/**
* 配置好CookieStore,讓HttpClient自動管理cookies
*/
CookieStore store=new BasicCookieStore();
RequestConfig config= RequestConfig.custom().setCookieSpec(CookieSpecs.STANDARD).build();
/**
* 登錄新聞網
*/
HttpClient client= HttpClients.custom().setDefaultRequestConfig(config).setDefaultCookieStore(store).build();
HttpPost post=new HttpPost("http://news.gdut.edu.cn/UserLogin.aspx?preURL=http%3a%2f%2fnews.gdut.edu.cn%2fdefault.aspx&__VIEWSTATE=%2FwEPDwUKLTQwOTA4NzE2NmQYAQUeX19Db250cm9sc1JlcXVpcmVQb3N0QmFja0tleV9fFgEFI2N0bDAwJENvbnRlbnRQbGFjZUhvbGRlcjEkQ2hlY2tCb3gxBufpEJuDDaf6eTj0A4Cn2Erf8u98KcGrQqATTB3mEaQ%3D&__EVENTVALIDATION=%2FwEWBQKb37HjDwLgvLy9BQKi4MPwCQL%2BzqO2BAKA4sljg4IvzC7ksG01o7aN0RZUOKEC4lV0bTeXI4zrbaQsj0c%3D&ctl00%24ContentPlaceHolder1%24userEmail=gdutnews&ctl00%24ContentPlaceHolder1%24userPassWord=newsgdut&ctl00%24ContentPlaceHolder1%24CheckBox1=on&ctl00%24ContentPlaceHolder1%24Button1=%E7%99%BB%E5%BD%95");
HttpResponse response=client.execute(post);
System.out.println(response.getStatusLine());
/**
* 進入具體通知頁面
*/
HttpGet get = new HttpGet("http://news.gdut.edu.cn/ViewArticle.aspx?articleid=144748");
response = client.execute(get);
InputStream input=response.getEntity().getContent();
BufferedReader reader
=new BufferedReader(
new InputStreamReader(
input
)
);
String str,resStr="";
while ((str=reader.readLine())!=null){
resStr+=str;
}
/**
* 解析頁面
*/
Document doc=Jsoup.parse(resStr);
System.out.println(doc.getElementById("content"));
爬進gdut新聞網的實現
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.