(二)爬蟲htmlunit 練習例子,模擬下單。

下載htmlunit所需的jar包,

https://download.csdn.net/download/final0402/12158044

 

import java.net.URL;

import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.NicelyResynchronizingAjaxController;
import com.gargoylesoftware.htmlunit.Page;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.WebRequest;
import com.gargoylesoftware.htmlunit.html.DomElement;
import com.gargoylesoftware.htmlunit.html.HtmlPage;

public class ExampleHtmlUnit {

    
    public  static void main(String args[]) throws Exception{
        
        
        System.out.println("-----------------------------------------開始執行代碼----------------------------------------------");
        final WebClient webClient = new WebClient(BrowserVersion.CHROME);
        webClient.getOptions().setThrowExceptionOnScriptError(false);//當JS執行出錯的時候是否拋出異常, 
        webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);//當HTTP的狀態非200時是否拋出異常, 
        webClient.getOptions().setActiveXNative(false);
        webClient.getOptions().setCssEnabled(false);//是否啓用CSS, 因爲不需要展現頁面, 
        webClient.getOptions().setJavaScriptEnabled(true); //啓用JS
        webClient.setAjaxController(new NicelyResynchronizingAjaxController());//設置支持AJAX
        System.out.println("-----------------------------------------初始化瀏覽器對象完成----------------------------------------------");
                


        //下單網頁地址,
        URL link=new URL("https://xxz.xxxx.xxx/ssss146.html"); 
        WebRequest request=new WebRequest(link); 

 

 

      //通過F12查看,進行設置,有多少,加多少
        request.setAdditionalHeader("Referer", "https://passport.jd.com/new/login.aspx?ReturnUrl=https%3A%2F%2Fitem.jd.com%2F100011385146.html");//設置請求報文頭裏的refer字段
        ////設置請求報文頭裏的User-Agent字段
        request.setAdditionalHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3314.0 Safari/537.36 SE 2.X MetaSr 1.0");
        request.setAdditionalHeader("Connection", "keep-alive");
        request.setAdditionalHeader("upgrade-insecure-requests", "1");
        request.setAdditionalHeader("accept-language", "zh-CN,zh;q=0.9");
        request.setAdditionalHeader("accept-encoding", "gzip, deflate, br");
        request.setAdditionalHeader("accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8");
        request.setAdditionalHeader("authority", "item.jd.com");
        request.setAdditionalHeader("method", "GET");
        request.setAdditionalHeader("path", "/100ddd.html");
        request.setAdditionalHeader("scheme", "https");
        
        
        
        //設置個人用戶cookie遊覽器參數,可以登錄,F12查看,整體拷貝就行了。
        request.setAdditionalHeader("Cookie", 
                "shshshfpb=; "
                + " __jdu=; "
                + " shshshfpa=xxx-xx-xx-xx-xxx-xx;"
                + " areaId=1; "
                + " pinId=xxxxxxxx-x-f3wj7;"
                + " pin=jd_4xxxxxxx;"
                + " unick=jd_xxxxxxx;"
                + " _tp=vhxxhrLR2BQCBLcNTxxjA%3D;"
                + " _pst=jd_4xx9;"
                + " user-key=xx-b482-xx-xx-xx;"
                + " ipLoc-djd=x-2901-x-x;"
                + " ipLocation=%xx%xx;"
                + " cn=31; "
                + " unpl=V2_ZzNtbxxxxxwUB3MRWANhUBQNclRCFnxxxxcRxBFCEdkexhdBxxxECdSbDVkAyJdQxxx9GlQHbgMQVEVXQxN2C0NRSylbNVczxxxVjChReSlVxxgYiXHJU;"
                + "  __jdv=xx|123.sogou.com|t_1000003625_sogoumz|tuiguang|xxxxxx|xxx; "
                + " PCSYCityID=CN_110000_110100_110114;"
                + " __jda=122270672.1581587293813861172244.1581587294.1581601331.1581777054.3;"
                + " __jdc=234234; "
                + " shshshfp=234234234; wlfstk_smdl=234234234; "
                + " TrackID=234234-WleEb0cj2GzLIeO-234234234234; "
                + " thor=234234234234; "
                + " ceshi3.com=201; __jdb=23423|23423423;"
                + " shshshsID=234234234234; "
                + " 3AB9D23F7A4B3C9B=xxxxxx");


        
        
        System.out.println("-----------------------------------------設置瀏覽器 參數----------------------------------------------");
        HtmlPage page = null;
        try {
            //page = webClient.getPage("https://item.jd.com/100011385146.html");//嘗試加載上面圖片例子給出的網頁
            page = webClient.getPage(request);
        } catch (Exception e) {
            //e.printStackTrace();
        }finally {
            webClient.close();
        }
        
        webClient.waitForBackgroundJavaScript(30000);//異步JS執行需要耗時,所以這裏線程要阻塞30秒,等待異步JS執行結束
     
        System.err.println("-----------------------------------------發出客戶端請求----------------------------------------------"+page.getWebResponse().getLoadTime());      
        
        //assertEquals("HtmlUnit - Welcome to HtmlUnit", page.getTitleText());  
        //System.err.println(  page.getDocumentURI() );
        
        System.err.println(  page.getTitleText() );
        //System.err.println(  page.getHead() );
        //System.err.println(  page.getLocalName() );
        //System.err.println(  page.asText() );
        //System.err.println(  page.querySelector("") );
        //System.err.println(  page.getInputEncoding() );
        //System.err.println(  page.getBody() );
        
        HtmlPage pageResult = page;
        //System.err.println(  pageResult.getElementsByIdAndOrName("btn-reservation").c );
        
        
        //獲得  等待預約  對象
        DomElement onclick = pageResult.getElementById("btn-reservation");
        System.err.println(   onclick.getTextContent()   );
        
        
        //模擬用戶點擊預約對象
        Page p = onclick.click();
        System.err.println( p.getWebResponse().getLoadTime() );
        System.err.println( p.getWebResponse().getStatusMessage() );
        
    }

 

}

發佈了14 篇原創文章 · 獲贊 5 · 訪問量 3萬+
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章