域名解析
URL:統一資源定位符
http://www.sina.com.cn/web/index.html
- http:// - 協議
- www.sina.com.cn - 域名
- /web/index.html - 路徑
DNS - 域名解析服務
www.sina.com.cn -> 202.60.121.55, ...
...
#include <netdb.h>
struct hostent* gethostbyname (char const* name);
返回主機條目信息結構指針,失敗返回NULL。
hostent
h_name - 字符指針,指向主機官方名字符串
h_aliases - 指向字符指針數組的指針,該數組中的每個元素都是字符指針,指向一個別名字符串,最後一個元素是一個NULL指針
h_addrtype - 地址類型,AF_INET(IPv4)
h_length - 地址字節數, 4字節(IPv4)
h_addr_list - 指向結構體指針數組的指針,該數組中的每個元素都指向一個struct in_addr類型的結構體,其中存放着主機一個IP地址,最後一個元素是一個空指針
#include <arpa/inet.h>
char* inet_ntoa (struct in_addr addr);
代碼示例
- dns.c
#include <netdb.h>
#include <arpa/inet.h>
#include <stdio.h>
#include <stdlib.h>
int main (int argc, char* argv[]) {
if (argc < 2) {
printf ("用法:%s <主機域名>\n",
argv[0]);
return EXIT_FAILURE;
}
struct hostent* host =
gethostbyname (argv[1]);
if (! host) {
perror ("gethostbyname");
return EXIT_FAILURE;
}
if (host->h_addrtype == AF_INET) {
printf ("主機官方名:\n");
printf ("\t%s\n", host->h_name);
printf ("主機別名表:\n");
char** pp = host->h_aliases;
while (*pp)
printf ("\t%s\n", *pp++);
printf ("主機地址表:\n");
struct in_addr** pa =
(struct in_addr**)
host->h_addr_list;
while (*pa)
printf ("\t%s\n",
inet_ntoa (**pa++));
}
return EXIT_SUCCESS;
}
- 執行結果
超文本傳輸協議(HTTP)
- 請求
GET /web/index.html HTTP/1.0<CR><NL>
Host: www.sina.com.cn
Accept: */*
Connection: Close/Keep-Alive
User-Agent: Mozilla/5.0
Referer: www.sina.com.cn<CR><NL><CR><NL>
- 響應
HTTP/1.0 200 OK
Server: nginx
Date: Wed, 26 Oct 2016 10:52:04 GMT
Content-Type: text/html;charset=UTF-8
Content-length: 1234
Connection: Close/Keep-Alive<CR><NL><CR><NL>
<html>
<head> ... </head>
<body> ... </body>
</html>
代碼示例
- http.c
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <strings.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main (int argc, char* argv[]) {
if (argc < 3) {
printf ("用法:%s <主機地址> "
"<主機域名> [<資源路徑>]\n",
argv[0]);
return EXIT_FAILURE;
}
char const* ip = argv[1];
char const* domain = argv[2];
char const* path = argc < 4 ?
"" : argv[3];
int sockfd = socket (PF_INET,
SOCK_STREAM, 0);
if (sockfd == -1) {
perror ("socket");
return EXIT_FAILURE;
}
struct sockaddr_in addr;
bzero (&addr, sizeof (addr));
addr.sin_family = AF_INET;
addr.sin_port = htons (80);
if (! inet_aton (ip,
&addr.sin_addr)) {
perror ("inet_aton");
return EXIT_FAILURE;
}
if (connect (sockfd,
(struct sockaddr*)&addr,
sizeof (addr)) == -1) {
perror ("connect");
return EXIT_FAILURE;
}
char request[1024];
sprintf (request,
"GET /%s HTTP/1.0\r\n"
"Host: %s\r\n"
"Accept: */*\r\n"
"Connection: Close\r\n"
"User-Agent: Mozilla/5.0\r\n"
"Referer: %s\r\n\r\n",
path, domain, domain);
if (send (sockfd, request,
strlen (request), 0) == -1) {
perror ("send");
return EXIT_FAILURE;
}
for (;;) {
char respond[1024] = {};
ssize_t rlen = recv (sockfd,
respond,
sizeof (respond) - 1, 0);
if (rlen == -1) {
perror ("recv");
return EXIT_FAILURE;
}
if (! rlen)
break;
printf ("%s", respond);
}
printf ("\n");
close (sockfd);
return EXIT_SUCCESS;
}
- 執行結果
正則表達式
包含頭文件
#include <regex.h>
- regcomp - 編譯正則表達式
- regexec - 執行正則匹配
- regfree - 釋放正則表達式內存
... href=" http://www.sina.com.cn/web/index.html " ...
href="\s*\([^ >"]*\)\s*"
\s - 匹配任意空白字符(空格、製表、回車、換行)
* - 重複前一個匹配項任意次
[^ >"] - 匹配任意除空格大於號雙引號以外的字符
\(和\) - 定義子表達式
代碼示例
- regex.c
#include <regex.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main (int argc, char* argv[]) {
if (argc < 2) {
printf ("用法:%s <HTML文件>\n",
argv[0]);
return EXIT_FAILURE;
}
FILE* fp = fopen (argv[1], "r");
if (! fp) {
perror ("fopen");
return EXIT_FAILURE;
}
if (fseek (fp, 0, SEEK_END) == -1) {
perror ("fseek");
return EXIT_FAILURE;
}
long size = ftell (fp);
if (size == -1) {
perror ("ftell");
return EXIT_FAILURE;
}
char* buf= (char*)malloc (size + 1);
if (! buf) {
perror ("malloc");
return EXIT_FAILURE;
}
if (fseek (fp, 0, SEEK_SET) == -1) {
perror ("fseek");
return EXIT_FAILURE;
}
if (fread (buf, 1, size, fp)!=size) {
perror ("fread");
return EXIT_FAILURE;
}
buf[size] = '\0';
fclose (fp);
regex_t ex;
int error = regcomp (&ex,
"href=\"\\s*\\([^ >\"]*\\)\\s*\"",0);
if (error) {
char errInfo[1024];
regerror (error, &ex, errInfo,
sizeof (errInfo));
printf ("regcomp: %s\n",
errInfo);
return EXIT_FAILURE;
}
char const* html = buf;
regmatch_t match[2];
while (regexec (&ex, html, 2, match,
0) != REG_NOMATCH) {
html += match[1].rm_so;
size_t len = match[1].rm_eo -
match[1].rm_so;
char* url = (char*)malloc (
len + 1);
memcpy (url, html, len);
url[len] = '\0';
printf ("%s\n", url);
free (url);
html += len + match[0].rm_eo -
match[1].rm_eo;
}
regfree (&ex);
free (buf);
return EXIT_SUCCESS;
}
- 執行結果