using System;
using System.Collections.Generic;
using System.Text;
using System.Net;
using System.IO;
using System.Xml.Serialization;
using System.Text.RegularExpressions;
namespace HttpHelper
{
public class HttpHelper
{
public Stream GetStream(string url, ref CookieContainer cookies)
{
Stream strm = null;
try
{
WebRequest req = WebRequest.Create(url);
HttpWebRequest httpreg = (HttpWebRequest)req;
httpreg.CookieContainer = cookies;
httpreg.Method = "GET";
//httpreg.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 2.0.50727; CIBA)";
httpreg.UserAgent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1) ; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022)"; WebResponse resp = httpreg.GetResponse();
strm = resp.GetResponseStream();
}
catch (Exception ex)
{
return null;
}
return strm;
}
public string GetHTML(string url)
{
string result = null;
try
{
WebRequest req = WebRequest.Create(url);
HttpWebRequest httpreg = (HttpWebRequest)req;
//httpreg.CookieContainer = cookies;
httpreg.Method = "GET";
httpreg.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 2.0.50727; CIBA)";
WebResponse resp = httpreg.GetResponse();
StreamReader reader = new StreamReader(resp.GetResponseStream(), Encoding.GetEncoding(936));
result = reader.ReadToEnd();
}
catch
{
return "wrong page";
}
return result;
}
public string doGet(string url, ref CookieContainer cookies)
{
string result = null;
try
{
WebRequest req = WebRequest.Create(url);
HttpWebRequest httpreg = (HttpWebRequest)req;
httpreg.CookieContainer = cookies;
httpreg.Method = "GET";
httpreg.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 2.0.50727; CIBA)";
WebResponse resp = httpreg.GetResponse();
StreamReader reader = new StreamReader(resp.GetResponseStream(), Encoding.GetEncoding(936));
result = reader.ReadToEnd();
}
catch (Exception ex)
{
return "";
}
return result;
}
public Stream PostStream(string action, string data, ref CookieContainer cookies)
{
Stream strm = null;
try
{
byte[] bytes = Encoding.UTF8.GetBytes(data);
WebRequest req = WebRequest.Create(action);
HttpWebRequest httpreq = (HttpWebRequest)req;
httpreq.Method = "POST";
httpreq.ContentType = "application/x-www-form-urlencoded";
httpreq.Accept = "application/x-shockwave-flash, image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, application/x-silverlight, */*";
httpreq.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 2.0.50727; CIBA)";
httpreq.ContentLength = bytes.Length;
httpreq.CookieContainer = cookies;
strm = httpreq.GetRequestStream();
}
catch (Exception ex)
{
return null;
}
return strm;
}
public string doPost(string action, string data, ref CookieContainer cookies)
{
string result = null;
try
{
byte[] bytes = Encoding.UTF8.GetBytes(data);
WebRequest req = WebRequest.Create(action);
HttpWebRequest httpreq = (HttpWebRequest)req;
httpreq.Method = "POST";
httpreq.ContentType = "application/x-www-form-urlencoded";
httpreq.Accept = "application/x-shockwave-flash, image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, application/x-silverlight, */*";
httpreq.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 2.0.50727; CIBA)";
httpreq.ContentLength = bytes.Length;
httpreq.CookieContainer = cookies;
Stream strm = httpreq.GetRequestStream();
strm.Write(bytes, 0, bytes.Length);
strm.Close();
WebResponse resq = httpreq.GetResponse();
StreamReader reader = new StreamReader(resq.GetResponseStream(), Encoding.GetEncoding(936));
result = reader.ReadToEnd();
}
catch (Exception ex)
{
return "";
}
return result;
}
public static string doGet(string url)
{
string result = null;
try
{
WebRequest req = WebRequest.Create(url);
HttpWebRequest httpreg = (HttpWebRequest)req;
httpreg.Method = "GET";
httpreg.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 2.0.50727; CIBA)";
WebResponse resp = httpreg.GetResponse();
StreamReader reader = new StreamReader(resp.GetResponseStream(), Encoding.GetEncoding("gb2312"));
result = reader.ReadToEnd();
}
catch (Exception ex)
{
return "";
}
return result;
}
/// <summary>
/// 返回匹配多個的集合值
/// </summary>
/// <param name="start">開始html tag</param>
/// <param name="end">結束html tag</param>
/// <param name="html">html</param>
///
public static IList<string> GetHtmls(string start, string end, string html)
{
IList<string> list = new List<string>();
try
{
string pattern = string.Format("{0}(?<g>(.|[\r\n])+?){1}", start, end);//匹配URL的模式,並分組
MatchCollection mc = Regex.Matches(html, pattern);//滿足pattern的匹配集合
if (mc.Count != 0)
{
foreach (Match match in mc)
{
GroupCollection gc = match.Groups;
list.Add(gc["g"].Value);
}
}
}
catch { }
return list;
}
public static string GetHtml(string start, string end, string html)
{
string ret = string.Empty;
try
{
string pattern = string.Format("{0}(?<g>(.|[\r\n])+?)?{1}", start, end);//匹配URL的模式,並分組
ret = Regex.Match(html, pattern).Groups["g"].Value;
}
catch { }
return ret;
}
public static string funcGetSrc(string aText)
{
string pattern = @"(?is)<img (?:title=(['""]?)?(?<title>[^'""]*?)\1\s*|src=(['""]?)?(?<url>[^'""]*?)\2\s*)*/>";
Regex r = new Regex(pattern, RegexOptions.IgnoreCase);
MatchCollection mc = r.Matches(aText);
string imgname = "";
foreach (Match m in mc)
{
imgname += m.Groups[1].Value;
}
return imgname;
}
public static List<string> Get_A_Href(string html)
{
List<String> links = new List<String>();
MatchCollection matches = Regex.Matches(html, "<a(?:\\s+.+?)*?\\s+href=\"([^\"]*?)\".+>(.*?)</a>", RegexOptions.IgnoreCase);
foreach (Match match in matches)
{
string s = match.Groups[1].Value;
links.Add(s);
}
return links;
}
/// <summary>
/// 獲取字符中指定標籤的值
/// </summary>
/// <param name="str">字符串</param>
/// <param name="title">標籤</param>
/// <returns>值</returns>
public static List<string> GetTitleContent(string str, string title)
{
string tmpStr = string.Format("<{0}[^>]*?>(?<Text>[^<]*)</{1}>", title, title); //獲取<title>之間內容
MatchCollection TitleMatchs = Regex.Matches(str, tmpStr, RegexOptions.IgnoreCase);
List<string> list = new List<string>();
if (TitleMatchs.Count != 0)
{
foreach (Match TitleMatch in TitleMatchs)
{
string result = TitleMatch.Groups["Text"].Value;
list.Add(result);
}
}
return list;
}
/// <summary>
/// 獲取字符中指定標籤的值
/// </summary>
/// <param name="str">字符串</param>
/// <param name="title">標籤</param>
/// <param name="attrib">屬性名</param>
/// <returns>屬性</returns>
public static List<string> GetTitleContent(string str, string title, string attrib)
{
string tmpStr = string.Format("<{0}[^>]*?{1}=(['\"\"]?)(?<url>[^'\"\"\\s>]+)\\1[^>]*>", title, attrib); //獲取<title>之間內容
MatchCollection TitleMatchs = Regex.Matches(str, tmpStr, RegexOptions.IgnoreCase);
List<string> list = new List<string>();
if (TitleMatchs.Count != 0)
{
foreach (Match TitleMatch in TitleMatchs)
{
string result = TitleMatch.Groups["url"].Value;
list.Add(result);
}
}
return list;
}
//調用
private void DoIt()
{
HttpHelper helper = new HttpHelper();
string result = helper.GetHTML("http://www.17173.com");
result = HttpHelper.GetHtml("<div class=\"header-3-2-2-1\">", "<div class=header-3-3>", result);
List<string> list_content = HttpHelper.GetTitleContent(result, "a");
List<string> list_attrib = HttpHelper.GetTitleContent(result, "a", "href");
}
}
}
HTMLHelper
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.