爬進gdut新聞網的實現

 /**
         * 配置好CookieStore,讓HttpClient自動管理cookies
         */
        CookieStore store=new BasicCookieStore();
        RequestConfig config= RequestConfig.custom().setCookieSpec(CookieSpecs.STANDARD).build();
        /**
         * 登錄新聞網
         */
        HttpClient client= HttpClients.custom().setDefaultRequestConfig(config).setDefaultCookieStore(store).build();
        HttpPost post=new HttpPost("http://news.gdut.edu.cn/UserLogin.aspx?preURL=http%3a%2f%2fnews.gdut.edu.cn%2fdefault.aspx&__VIEWSTATE=%2FwEPDwUKLTQwOTA4NzE2NmQYAQUeX19Db250cm9sc1JlcXVpcmVQb3N0QmFja0tleV9fFgEFI2N0bDAwJENvbnRlbnRQbGFjZUhvbGRlcjEkQ2hlY2tCb3gxBufpEJuDDaf6eTj0A4Cn2Erf8u98KcGrQqATTB3mEaQ%3D&__EVENTVALIDATION=%2FwEWBQKb37HjDwLgvLy9BQKi4MPwCQL%2BzqO2BAKA4sljg4IvzC7ksG01o7aN0RZUOKEC4lV0bTeXI4zrbaQsj0c%3D&ctl00%24ContentPlaceHolder1%24userEmail=gdutnews&ctl00%24ContentPlaceHolder1%24userPassWord=newsgdut&ctl00%24ContentPlaceHolder1%24CheckBox1=on&ctl00%24ContentPlaceHolder1%24Button1=%E7%99%BB%E5%BD%95");
        HttpResponse response=client.execute(post);
        System.out.println(response.getStatusLine());


        /**
         * 進入具體通知頁面
         */
        HttpGet get = new HttpGet("http://news.gdut.edu.cn/ViewArticle.aspx?articleid=144748");
        response = client.execute(get);
        InputStream input=response.getEntity().getContent();
        BufferedReader reader
                =new BufferedReader(
                        new InputStreamReader(
                                input
                        )
        );
        String str,resStr="";
        while ((str=reader.readLine())!=null){
            resStr+=str;
        }
        /**
         * 解析頁面
         */
        Document doc=Jsoup.parse(resStr);
        System.out.println(doc.getElementById("content"));
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章