域名解析及HTTP


域名解析

URL:統一資源定位符
http://www.sina.com.cn/web/index.html

  • http:// - 協議
  • www.sina.com.cn - 域名
  • /web/index.html - 路徑
    DNS - 域名解析服務
www.sina.com.cn -> 202.60.121.55, ...
...
#include <netdb.h>
struct hostent* gethostbyname (char const* name);
返回主機條目信息結構指針,失敗返回NULL。
hostent
     h_name - 字符指針,指向主機官方名字符串
     h_aliases - 指向字符指針數組的指針,該數組中的每個元素都是字符指針,指向一個別名字符串,最後一個元素是一個NULL指針
     h_addrtype - 地址類型,AF_INET(IPv4)
     h_length - 地址字節數, 4字節(IPv4)
     h_addr_list - 指向結構體指針數組的指針,該數組中的每個元素都指向一個struct in_addr類型的結構體,其中存放着主機一個IP地址,最後一個元素是一個空指針
#include <arpa/inet.h>
char* inet_ntoa (struct in_addr addr);

代碼示例

  • dns.c
#include <netdb.h>
#include <arpa/inet.h>
#include <stdio.h>
#include <stdlib.h>
int main (int argc, char* argv[]) {
	if (argc < 2) {
		printf ("用法:%s <主機域名>\n",
			argv[0]);
		return EXIT_FAILURE;
	}
	struct hostent* host =
		gethostbyname (argv[1]);
	if (! host) {
		perror ("gethostbyname");
		return EXIT_FAILURE;
	}
	if (host->h_addrtype == AF_INET) {
		printf ("主機官方名:\n");
		printf ("\t%s\n", host->h_name);
		printf ("主機別名表:\n");
		char** pp = host->h_aliases;
		while (*pp)
			printf ("\t%s\n", *pp++);
		printf ("主機地址表:\n");
		struct in_addr** pa =
			(struct in_addr**)
				host->h_addr_list;
		while (*pa)
			printf ("\t%s\n",
				inet_ntoa (**pa++));
	}
	return EXIT_SUCCESS;
}
  • 執行結果
    在這裏插入圖片描述

超文本傳輸協議(HTTP)

  1. 請求
GET /web/index.html HTTP/1.0<CR><NL>
Host: www.sina.com.cn
Accept: */*
Connection: Close/Keep-Alive
User-Agent: Mozilla/5.0
Referer: www.sina.com.cn<CR><NL><CR><NL>
  1. 響應
HTTP/1.0 200 OK
Server: nginx
Date: Wed, 26 Oct 2016 10:52:04 GMT
Content-Type: text/html;charset=UTF-8
Content-length: 1234
Connection: Close/Keep-Alive<CR><NL><CR><NL>
<html>
<head> ... </head>
<body> ... </body>
</html>

代碼示例

  • http.c
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <strings.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main (int argc, char* argv[]) {
	if (argc < 3) {
		printf ("用法:%s <主機地址> "
			"<主機域名> [<資源路徑>]\n",
			argv[0]);
		return EXIT_FAILURE;
	}
	char const* ip = argv[1];
	char const* domain = argv[2];
	char const* path = argc < 4 ?
		"" : argv[3];
	int sockfd = socket (PF_INET,
		SOCK_STREAM, 0);
	if (sockfd == -1) {
		perror ("socket");
		return EXIT_FAILURE;
	}
	struct sockaddr_in addr;
	bzero (&addr, sizeof (addr));
	addr.sin_family = AF_INET;
	addr.sin_port = htons (80);
	if (! inet_aton (ip,
		&addr.sin_addr)) {
		perror ("inet_aton");
		return EXIT_FAILURE;
	}
	if (connect (sockfd,
		(struct sockaddr*)&addr,
		sizeof (addr)) == -1) {
		perror ("connect");
		return EXIT_FAILURE;
	}
	char request[1024];
	sprintf (request,
		"GET /%s HTTP/1.0\r\n"
		"Host: %s\r\n"
		"Accept: */*\r\n"
		"Connection: Close\r\n"
		"User-Agent: Mozilla/5.0\r\n"
		"Referer: %s\r\n\r\n",
		path, domain, domain);
	if (send (sockfd, request,
		strlen (request), 0) == -1) {
		perror ("send");
		return EXIT_FAILURE;
	}
	for (;;) {
		char respond[1024] = {};
		ssize_t rlen = recv (sockfd,
			respond,
			sizeof (respond) - 1, 0);
		if (rlen == -1) {
			perror ("recv");
			return EXIT_FAILURE;
		}
		if (! rlen)
			break;
		printf ("%s", respond);
	}
	printf ("\n");
	close (sockfd);
	return EXIT_SUCCESS;
}
  • 執行結果
    在這裏插入圖片描述

正則表達式

包含頭文件

#include <regex.h>
  • regcomp - 編譯正則表達式
  • regexec - 執行正則匹配
  • regfree - 釋放正則表達式內存
... href=" http://www.sina.com.cn/web/index.html " ...
href="\s*\([^ >"]*\)\s*"
\s - 匹配任意空白字符(空格、製表、回車、換行)
* - 重複前一個匹配項任意次
[^ >"] - 匹配任意除空格大於號雙引號以外的字符
\(和\) - 定義子表達式

代碼示例

  • regex.c
#include <regex.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main (int argc, char* argv[]) {
	if (argc < 2) {
		printf ("用法:%s <HTML文件>\n",
			argv[0]);
		return EXIT_FAILURE;
	}
	FILE* fp = fopen (argv[1], "r");
	if (! fp) {
		perror ("fopen");
		return EXIT_FAILURE;
	}
	if (fseek (fp, 0, SEEK_END) == -1) {
		perror ("fseek");
		return EXIT_FAILURE;
	}
	long size = ftell (fp);
	if (size == -1) {
		perror ("ftell");
		return EXIT_FAILURE;
	}
	char* buf= (char*)malloc (size + 1);
	if (! buf) {
		perror ("malloc");
		return EXIT_FAILURE;
	}
	if (fseek (fp, 0, SEEK_SET) == -1) {
		perror ("fseek");
		return EXIT_FAILURE;
	}
	if (fread (buf, 1, size, fp)!=size) {
		perror ("fread");
		return EXIT_FAILURE;
	}
	buf[size] = '\0';
	fclose (fp);
	regex_t ex;
	int error = regcomp (&ex,
	"href=\"\\s*\\([^ >\"]*\\)\\s*\"",0);
	if (error) {
		char errInfo[1024];
		regerror (error, &ex, errInfo,
			sizeof (errInfo));
		printf ("regcomp: %s\n",
			errInfo);
		return EXIT_FAILURE;
	}
	char const* html = buf;
	regmatch_t match[2];
	while (regexec (&ex, html, 2, match,
		0) != REG_NOMATCH) {
		html += match[1].rm_so;
		size_t len = match[1].rm_eo -
			match[1].rm_so;
		char* url = (char*)malloc (
			len + 1);
		memcpy (url, html, len);
		url[len] = '\0';
		printf ("%s\n", url);
		free (url);
		html += len + match[0].rm_eo -
			match[1].rm_eo;
	}
	regfree (&ex);
	free (buf);
	return EXIT_SUCCESS;
}

  • 執行結果
    在這裏插入圖片描述
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章