功能:
將Html原碼解析成IHTMLDocumet2對象,然後將IHTMLDocumet2轉換成IHTMLDocumet3,使用DOMNode,將html顯示成一棵樹。此解析不執行任何腳本,不從網上下載任何資料,是一個純文本的解析。
(方法 Parse(string str) 一個輕量級Parsing 實現。這個代碼不會從網上下載任何資料,也不會執行任何腳本,純屬Parsing。
Parsing是通過MSHTML的Markup Service實現的。要正確使用這個代碼,需要添加MSHTML引用。)
要正確編譯如下代碼,還需要修改unsafe(啓用不安全模式)編譯器選項,將其開啓。
方法:在“項目”->“<應用程序名稱>屬性”對話框中打開“配置屬性”,選中“生成”項,修改“允許不安全代碼塊”的內容爲true.
[C#]
using System;
using System.Drawing;
using System.Collections;
using System.ComponentModel;
using System.Windows.Forms;
using System.Data;
using mshtml;
using System.Runtime.InteropServices;
using System.IO;
namespace WindowsApplication1
{
[ComVisible(true), ComImport(), Guid("7FD52380-4E07-101B-AE2D-08002B2EC713") , InterfaceTypeAttribute(ComInterfaceType.InterfaceIsIUnknown)]
public interface IPersistStreamInit
{
void GetClassID([In, Out] ref Guid pClassID);
[return: MarshalAs(UnmanagedType.I4)] [PreserveSig]
int IsDirty();
void Load([In, MarshalAs(UnmanagedType.Interface)] UCOMIStream pstm);
void Save([In, MarshalAs(UnmanagedType.Interface)] UCOMIStream pstm,
[In, MarshalAs(UnmanagedType.I4)] int fClearDirty);
void GetSizeMax([Out, MarshalAs(UnmanagedType.LPArray)] long pcbSize);
void InitNew();
}
/// <summary>
/// Form1 的摘要說明。
/// </summary>
public class Form1 : System.Windows.Forms.Form
{
private System.Windows.Forms.Button button1;
private System.Windows.Forms.TreeView treeView1;
/// <summary>
/// 必需的設計器變量。
/// </summary>
private System.ComponentModel.Container components = null;
public Form1()
{
//
// Windows 窗體設計器支持所必需的
//
InitializeComponent();
//
// TODO: 在 InitializeComponent 調用後添加任何構造函數代碼
//
}
/// <summary>
/// 清理所有正在使用的資源。
/// </summary>
protected override void Dispose( bool disposing )
{
if( disposing )
{
if (components != null)
{
components.Dispose();
}
}
base.Dispose( disposing );
}
#region Windows 窗體設計器生成的代碼
/// <summary>
/// 設計器支持所需的方法 - 不要使用代碼編輯器修改
/// 此方法的內容。
/// </summary>
private void InitializeComponent()
{
this.button1 = new System.Windows.Forms.Button();
this.treeView1 = new System.Windows.Forms.TreeView();
this.SuspendLayout();
//
// button1
//
this.button1.Location = new System.Drawing.Point(24, 16);
this.button1.Name = "button1";
this.button1.Size = new System.Drawing.Size(88, 24);
this.button1.TabIndex = 0;
this.button1.Text = "button1";
this.button1.Click += new System.EventHandler(this.button1_Click);
//
// treeView1
//
this.treeView1.ImageIndex = -1;
this.treeView1.Location = new System.Drawing.Point(280, 96);
this.treeView1.Name = "treeView1";
this.treeView1.SelectedImageIndex = -1;
this.treeView1.Size = new System.Drawing.Size(288, 224);
this.treeView1.TabIndex = 1;
//
// Form1
//
this.AutoScaleBaseSize = new System.Drawing.Size(6, 14);
this.ClientSize = new System.Drawing.Size(664, 333);
this.Controls.Add(this.treeView1);
this.Controls.Add(this.button1);
this.Name = "Form1";
this.Text = "Form1";
this.ResumeLayout(false);
}
#endregion
/// <summary>
/// 應用程序的主入口點。
/// </summary>
[STAThread]
static void Main()
{
Application.Run(new Form1());
}
unsafe IHTMLDocument2 Parse(string s)
{
IHTMLDocument2 pDocument=new HTMLDocumentClass();
if(pDocument!=null)
{
IPersistStreamInit pPersist=pDocument as IPersistStreamInit ;
pPersist.InitNew();
pPersist=null;
IMarkupServices ms=pDocument as IMarkupServices ;
if(ms!=null)
{
IMarkupContainer pMC=null;
IMarkupPointer pStart,pEnd;
ms.CreateMarkupPointer(out pStart);
ms.CreateMarkupPointer(out pEnd);
System.Text.StringBuilder sb=new System.Text.StringBuilder(s);
IntPtr pSource=Marshal.StringToHGlobalUni(s);
ms.ParseString(ref *(ushort*)pSource.ToPointer(),0,out pMC,pStart,pEnd);
if(pMC!=null)
{
Marshal.Release(pSource);
return pMC as IHTMLDocument2;
}
Marshal.Release(pSource);
}
}
return null;
}
private void button1_Click(object sender, System.EventArgs e)
{
string html="";
string filename="D://NetC#Program//html//163.htm";
if (!File.Exists(filename))
{
Console.WriteLine("文件不存在");
return;
}
StreamReader sr1 = new StreamReader(
(System.IO.Stream)File.OpenRead(filename),System.Text.Encoding.Default);
html="";
while (sr1.Peek()>-1)
{
html=html+sr1.ReadToEnd();
}
sr1.Close();
IHTMLDocument2 doc2 = Parse(html);
Console.WriteLine(doc2.styleSheets.length);
IHTMLDocument3 HTMLDocument=(IHTMLDocument3)doc2;
IHTMLDOMNode rootDomNode=(IHTMLDOMNode)HTMLDocument.documentElement;
TreeNode root=treeView1.Nodes.Add("HTML");
InsertDOMNodes(rootDomNode,root);
}
private void InsertDOMNodes(IHTMLDOMNode parentnode,TreeNode tree_node)
{
if(parentnode.hasChildNodes())//是否有子結點
{
IHTMLDOMChildrenCollection allchild = (IHTMLDOMChildrenCollection)parentnode.childNodes;
int length = allchild.length;
for(int i=0;i<length;i++)//對每個子結點進行處理,首先取出每個子節點的屬性,然後進行遞歸
{
IHTMLDOMNode child_node = (IHTMLDOMNode)allchild.item(i);
string m_snodeName =child_node.nodeName;
object m_onodevalue =child_node.nodeValue;
string m_snodetype =child_node.nodeType.ToString();
string m_snodevalue ="";
if ( m_onodevalue!=null)
m_snodevalue =m_onodevalue.ToString().Trim();
TreeNode tempnode=null;
if (child_node.nodeName.Equals("#text"))
{
if ((m_snodevalue!=null)&& (!m_snodevalue.Equals("")))
{
tempnode = tree_node.Nodes.Add(m_snodevalue);
}
}
else
{
tempnode = tree_node.Nodes.Add(child_node.nodeName);
InsertDOMNodes(child_node,tempnode);
}
}
}
}
}
}