import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.http.*;
import org.springframework.http.HttpMethod;
import javax.script.Invocable;
import javax.script.ScriptEngine;
import javax.script.ScriptEngineManager;
import javax.script.ScriptException;
import javax.swing.plaf.metal.OceanTheme;
import java.io.IOException;
import java.io.StringReader;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.regex.Matcher;
/**
* Created by Administrator on 2018/9/28.
*/
public class GetHotelJudge {
private static BlockingQueue<Object[]> insertqueuej= new LinkedBlockingQueue<>();//評價
public static void main(String[] args) throws IOException {
//入庫操作可根據自己數據庫設置
String sql="insert into hoteljude(HOTELNAME,USERIMG,USERNAME,SCORE,GGSCORE,HOTELTYPE,CHECKINTIME,BADTYPE,JUDGETIME,JUDGEINTRO,JUDGEREPLAY,JUDGEIMG) values(?,?,?,?,?,?,?,?,?,?,?,?)";
URLDemo2.insert(sql,insertqueuej);
getHotel();
}
public static void getHotel(){
try {
String hotelid="441351"; //酒店id
String uri="http://hotels.ctrip.com/hotel/"+hotelid+".html";
HttpClient h=new JavaHttpClient();
HttpResponse s = h.doGet(uri,null);//可以自己實現get請求鏈接 或者參考本人的。
String hotel=s.getResponseString();
Document doc=Jsoup.parse(hotel);//獲取酒店信息
Element e=doc.selectFirst("#J_htl_info > div.name > h2.cn_n");
String hotelname=e.text();
Map cookies=s.getCookies();
Map head=gethead(hotelid);
String even=oceanball(hotelid,head,cookies);//獲取even
int currentPage=0;
getEiinfo(currentPage,hotelid,"",head,even,cookies,hotelname);//獲取評價信息
} catch (IOException e) {
e.printStackTrace();
}
}
//獲取評價信息
public static void getEiinfo(int currentPage,String hotelid,String sub,Map headMap,String eleven,Map cookies, String hotelname){
try {
String pl = "http://hotels.ctrip.com/Domestic/tool/AjaxHotelCommentList.aspx?" +
"MasterHotelID=" + hotelid + "&hotel=" + hotelid + "&NewOpenCount=0&AutoExpiredCount=0&RecordCount=2365&OpenDate=&keywordPress=1&card=-1&property=-1" +
"&UserType=&productcode=&keyword=&roomName=&orderBy=2¤tPage="+currentPage+"&viewVersion=c&contyped=0" +
"&eleven="+eleven+"&callback="+getcallback(15)+"&_="+System.currentTimeMillis();
HttpClient h=new JavaHttpClient();
HttpResponse r=h.doGet2(pl,headMap,cookies);
String result=r.getResponseString();
Document doc=Jsoup.parse(result);
Element element= doc.selectFirst("#divCtripComment > div.comment_detail_list");//得到所有的評論信息div
if(element!=null){
Elements elements=element.getElementsByClass("comment_block J_asyncCmt");
if(elements!=null&&elements.size()>0){
for (Element e:elements){
Element eltImg=e.selectFirst("div.user_info > p.head > span.img > img");
String img=eltImg==null?null:eltImg.attr("src");//用戶頭像
//System.out.println("img:"+img);
Element eltName=e.selectFirst("div.user_info > p.name > span");
String name=eltName==null?null:eltName.text();//用戶名稱
//System.out.println("name:"+name);
Element eltscore=e.selectFirst("div.comment_main > p > span.score > span");
String score=eltscore==null?0+"":eltscore.text();//評分總分
//System.out.println("score:"+score);
Element eltggscore=e.selectFirst("div.comment_main > p > span.small_c");
String ggscore=eltggscore==null?null:eltggscore.attr("data-value");//各個維度評分
//System.out.println("ggscore:"+ggscore);
Element elttype=e.selectFirst("div.comment_main > p > span.type");
String type=elttype==null?null:elttype.text();//出遊類型
//System.out.println("type:"+type);
Element elttime=e.selectFirst("div.comment_main > p > span.date");
String time=elttime==null?null:elttime.text();//出遊時間
// System.out.println("time:"+time);
Element eltbad=e.selectFirst("div.comment_main > p > a");
String bad=eltbad==null?null:eltbad.text();//房型
//System.out.println("bad:"+bad);
Element eltdate=e.selectFirst("div.comment_main > div.comment_txt > div.comment_bar > p > span");
String date=eltdate==null?null:eltdate.text().replace("發表於","");//評論時間
//System.out.println("date:"+date);
Element eltintro=e.selectFirst("div.comment_main > div.comment_txt > div.J_commentDetail");
String intro=eltintro==null?null:eltintro.text();//評論內容
System.out.println("intro:"+intro);
Element eltreplay=e.selectFirst("div.comment_main > div.htl_reply > p.text");
String replay=eltreplay==null?null:eltreplay.text();//酒店回覆內容
//System.out.println("replay:"+replay);
Element picturediv=e.selectFirst("div.comment_main > div > div.comment_pic");
String purl="";
if(picturediv!=null){
Elements picture=picturediv.getElementsByClass("pic");//評論的圖集
if(picture!=null&&picture.size()>0){
for (Element pic:picture){
Element imgs=pic.selectFirst("img.p");
String url=imgs==null?null:imgs.attr("src");
purl=purl+url+";";
//System.out.println("url:"+url);
}
}
}
//組裝成對象
Object[] o=new Object[12];
o[0]=hotelname;
o[1]=img;
o[2]=name;
o[3]=score;
o[4]=ggscore;
o[5]=type;
o[6]=time;
o[7]=bad;
o[8]=date;
o[9]=intro;
o[10]=replay;
o[11]=purl;
insertqueuej.add(o);//加入隊列進行入庫
}
}
Elements adiv=doc.select("#divCtripComment > div.c_page_box > div > div.c_page_list.layoutfix > a");//獲取分頁信息的a標籤
int tpage=Integer.parseInt(adiv.last().text());//得到總頁數的值
Element cPage=doc.selectFirst("#divCtripComment > div.c_page_box > div > div.c_page_list.layoutfix > a.current");//得到當前頁的a標籤
int cpage=Integer.parseInt(cPage.text());//得到當前頁的值
if (cpage+1<=tpage){//進行循環讀取
/* try {
Thread.sleep(30000);
} catch (InterruptedException e) {
e.printStackTrace();
}*/
String even=oceanball(hotelid,headMap,cookies);//獲取even
getEiinfo(cpage+1,hotelid,"",headMap,even,cookies,hotelname);//獲取評價信息
}
}else{
String even=oceanball(hotelid,headMap,cookies);//獲取even
getEiinfo(currentPage,hotelid,"",headMap,even,cookies,hotelname);//獲取評價信息
}
} catch (IOException e1) {
e1.printStackTrace();
}
}
/**
* 請求頭設置
* @param hotelid
* @return
*/
public static Map gethead(String hotelid){
Map map = new HashMap();
map.put("Host", "hotels.ctrip.com");
map.put("Accept", "*/*");
map.put("Cache-Control", "max-age=0");
map.put("If-Modified-Since", "Thu, 01 Jan 1970 00:00:00 GMT");
map.put("Content-Type","application/x-javascript; charset=utf-8");
map.put("Accept-Language", "zh-CN,zh;q=0.8");
map.put("Referer", "http://hotels.ctrip.com/hotel/" + hotelid + ".html");
map.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/537.36");
return map;
}
/**
* 加密
* @param hotelid
* @return
*/
public static String oceanball(String hotelid,Map headMap,Map cookies){
try {
String callback = getcallback(15);
HttpClient h=new JavaHttpClient();
long currtime = System.currentTimeMillis();
String oceanball = "http://hotels.ctrip.com/domestic/cas/oceanball?callback="+callback+"&_="+currtime+"";
HttpResponse res=h.doGet2(oceanball,headMap,cookies);
String ocean = res.getResponseString();
ocean = ocean.replace("eval","JSON.stringify");
ScriptEngineManager manager = new ScriptEngineManager();
ScriptEngine engine = manager.getEngineByName("javascript");
ocean = String.valueOf( engine.eval(ocean));
ocean = ocean.replace(callback,"var eleven=" + callback);
ocean = String.valueOf(engine.eval(new StringReader(ocean)));
ScriptEngineManager manager1 = new ScriptEngineManager();
ScriptEngine engine1 = manager1.getEngineByName("javascript");
engine1.eval("var hotel_id = \""+hotelid+"\"; var site = {}; site.getUserAgent = function(){}; var Image = function(){}; var window = {}; window.document = window.document = {body:{innerHTML:\"1\"}, documentElement:{attributes:{webdriver:\"1\"}}, createElement:function(x){return {innerHTML:\"1\"}}}; var document = window.document;window.navigator = {\"appCodeName\":\"Mozilla\", \"appName\":\"Netscape\", \"language\":\"zh-CN\", \"platform\":\"Win\"}; window.navigator.userAgent = site.getUserAgent(); var navigator = window.navigator; window.location = {}; window.location.href = \"http://hotels.ctrip.com/hotel/\"+hotel_id+\".html\"; var location = window.location;" +
" var navigator = {userAgent:{indexOf: function(x){return \"1\"}}, geolocation:\"1\"};var getEleven = 'zgs'; " );
engine1.eval("var "+callback+" = function(a){getEleven = a;};");
engine1.eval(ocean);
String eleven = "";
if (engine instanceof Invocable) {
Invocable invocable = (Invocable) engine1;
eleven = (String) invocable.invokeFunction("getEleven");//4.使用 invocable.invokeFunction掉用js腳本里的方法,第一個參數爲方法名,後面的參數爲被調用的js方法的入參
}
return eleven;
} catch (IOException e) {
e.printStackTrace();
} catch (NoSuchMethodException e) {
e.printStackTrace();
} catch (ScriptException e) {
e.printStackTrace();
}
return null;
}
/**
* callback參數獲取
* @param number
* @return
*/
public static String getcallback(int number){
String s[]={"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"};
String cal="CAS";
for(int i=0;i<number;i++){
int t= (int) Math.ceil(51 * Math.random());
cal=cal+s[t];
}
return cal;
}
}
get 或者post 請求代碼 好幾個類寫的比較複雜,可以直接複製使用。
import javax.net.ssl.*;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.HttpURLConnection;
import java.net.InetSocketAddress;
import java.net.Proxy;
import java.net.URL;
import java.security.KeyManagementException;
import java.security.NoSuchAlgorithmException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* @Author: liaog
* @Date: 2018-08-01 13:28
* @description:
*/
public class JavaHttpClient extends AbstractHttpClient implements HttpClient{
protected String defaultCharset = "utf-8";
protected int timeout = 20000;
protected boolean useProxy;
protected String hostname;
protected int port;
public HttpResponse doRequest(HttpMethod method, String url, Map<String, String> userHeaders, InputStream data,Map<String, String> cookies) throws IOException {
URL urlObject = new URL(url);
HttpURLConnection urlConnection = openConnection(urlObject);
if (urlObject.getProtocol().equalsIgnoreCase("https")) {
HttpsURLConnection httpsURLConnection = (HttpsURLConnection) urlConnection;
prepareForHttps(httpsURLConnection);
}
String sessionid="";
if (cookies != null) {
for (Map.Entry<String, String> entry : cookies.entrySet()) {
//urlConnection.addRequestProperty(entry.getKey(), entry.getValue());
sessionid=sessionid+entry.getKey()+"="+entry.getValue()+";";
}
urlConnection.addRequestProperty("Cookie", sessionid);
}
urlConnection.setRequestMethod(method.name());
urlConnection.setConnectTimeout(timeout);
urlConnection.setReadTimeout(timeout);
urlConnection.setInstanceFollowRedirects(false);
if (userHeaders != null) {
for (Map.Entry<String, String> entry : userHeaders.entrySet()) {
urlConnection.addRequestProperty(entry.getKey(), entry.getValue());
}
}
if (data != null) {
int len = data.available();
urlConnection.addRequestProperty("Content-Length", String.valueOf(len));
urlConnection.setDoInput(true);
urlConnection.setDoOutput(true);
OutputStream outputStream = urlConnection.getOutputStream();
IOUtil.copyAndClose(data, outputStream);
}
urlConnection.setInstanceFollowRedirects( false );
InputStream responseInputStream = urlConnection.getInputStream();
int responseCode = urlConnection.getResponseCode();
ByteArrayOutputStream bos = new ByteArrayOutputStream(64);
IOUtil.copyAndClose(responseInputStream, bos);
Map<String, List<String>> headers = new HashMap<String, List<String>>(urlConnection.getHeaderFields());
String sessionId = "";
String cookieVal = "";
String key = null;
Map<String,String > map=new HashMap();
for(int i = 1; (key = urlConnection.getHeaderFieldKey(i)) != null; i++){//獲取cookies
if(key.equalsIgnoreCase("set-cookie")){
cookieVal = urlConnection.getHeaderField(i);
cookieVal = cookieVal.substring(0, cookieVal.indexOf(";"));
String s[]=cookieVal.split("=");
map.put(s[0],s[1]);
sessionId = sessionId + cookieVal + ";";
System.out.println("==="+cookieVal);
}
}
// System.out.println("session"+sessionId);
String redirect=urlConnection.getHeaderField( "location" );//獲得302轉發地址
//System.out.println("location:"+redirect);
/*if(redirect!=null){
doRequest(HttpMethod.GET,redirect,userHeaders,null,map);
}*/
return new HttpResponse(defaultCharset, responseCode, headers, bos.toByteArray(),map);
}
private HttpURLConnection openConnection(URL url) throws IOException {
if (isUseProxy()) {
return (HttpURLConnection)url.openConnection(new Proxy(Proxy.Type.HTTP, new InetSocketAddress(getHostname(), getPort())));
} else {
return (HttpURLConnection)url.openConnection();
}
}
private void prepareForHttps(HttpsURLConnection httpsURLConnection) {
try {
SSLContext sslContext = SSLContext.getInstance("SSL");
sslContext.init(null, new TrustManager[]{new AbstractHttpClient.TrustAnyTrustManager()}, secureRandom);
httpsURLConnection.setSSLSocketFactory(sslContext.getSocketFactory());
httpsURLConnection.setHostnameVerifier(new TrustAnyHostnameVerifier());
} catch (NoSuchAlgorithmException e) {
throw new RuntimeException(e);
} catch (KeyManagementException e) {
throw new RuntimeException(e);
}
}
public String getDefaultCharset() {
return defaultCharset;
}
public void setDefaultCharset(String defaultCharset) {
this.defaultCharset = defaultCharset;
}
public int getTimeout() {
return timeout;
}
public void setTimeout(int timeout) {
this.timeout = timeout;
}
public boolean isUseProxy() {
return useProxy;
}
public void setUseProxy(boolean useProxy) {
this.useProxy = useProxy;
}
public String getHostname() {
return hostname;
}
public void setHostname(String hostname) {
this.hostname = hostname;
}
public int getPort() {
return port;
}
public void setPort(int port) {
this.port = port;
}
}
import java.io.IOException;
import java.io.InputStream;
import java.util.Map;
/**
* @Author: liaogk
* @Date: 2018-08-01 13:18
* @description:
*/
public interface HttpClient {
HttpResponse doRequest(HttpMethod method, String url, Map<String, String> userHeaders, InputStream data,Map<String,String> cookies)throws IOException;
byte[] doGet(String url) throws IOException;
HttpResponse doGet(String url, Map<String, String> headers) throws IOException;
HttpResponse doGet2(String url, Map<String, String> headers,Map<String,String> cookies) throws IOException;
HttpResponse dopost(String url, Map<String, String> headers,InputStream data,Map<String,String> cookies) throws IOException;
}
/**
* @Author: liaogk
* @Date: 2018-08-01 13:22
* @description:
*/
public enum HttpMethod {
POST, GET, PUT, DELETE, HEADER, OPTIONS;
}
import java.io.UnsupportedEncodingException;
import java.util.List;
import java.util.Map;
/**
* @Author: liaog
* @Date: 2018-08-01 13:24
* @description:
*/
public class HttpResponse {
private int responseCode;
private Map<String, List<String>> headers;
private byte[] responseData;
private String defaultCharset;
private Map<String ,String> cookies;
public HttpResponse(String defaultCharset, int responseCode, Map<String, List<String>> headers, byte[] responseData,Map<String ,String> cookies) {
this.defaultCharset = defaultCharset;
this.responseCode = responseCode;
this.headers = headers;
this.responseData = responseData;
this.cookies = cookies;
}
public int getResponseCode() {
return responseCode;
}
public byte[] getResponseData() {
return responseData;
}
public String getResponseString() {
try {
return new String(responseData, getResponseCharset());
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
}
public List<String> getHeaders(String name) {
return headers.get(name);
}
public String getHeader(String name) {
List<String> theHeaders = getHeaders(name);
if (theHeaders == null || theHeaders.isEmpty()) {
return null;
}
return theHeaders.get(0);
}
public String getResponseCharset() {
String contentType = getHeader("Content-Type");
if (contentType == null || contentType.length() == 0) {
return defaultCharset;
}
String[] parts = contentType.split(";");
for (String part : parts) {
part = part.trim();
String[] kvParts = part.split("=");
if (kvParts.length < 2) {
continue;
}
String key = kvParts[0].trim();
String value = kvParts[1].trim();
if (key.equals("charset")) {
return value;
}
}
return defaultCharset;
}
public Map<String ,String> getCookies() {
return cookies;
}
public void setCookies(Map<String ,String> cookies) {
this.cookies = cookies;
}
}
import java.io.*;
/**
* @Author: liaog
* @Date: 2018-08-01 13:39
* @description:
*/
public class IOUtil {
private static final int BUF_LEN = 1024 * 8;
/**
* 通過threadLocal做cache優化,避免重複申請內存
*/
private static final ThreadLocal<byte[]> bufTl = new ThreadLocal<byte[]>() {
@Override
protected byte[] initialValue() {
return new byte[BUF_LEN];
}
};
private static byte[] getBuf() {
return bufTl.get();
}
public static void copyAndClose(InputStream is, OutputStream os) throws IOException {
byte[] buf = getBuf();
while (true) {
int len = is.read(buf);
if (len < 0) {
break;
}
os.write(buf, 0, len);
}
close(is);
close(os);
}
public static byte[] readAsBytes(File file) throws IOException {
ByteArrayOutputStream bos = new ByteArrayOutputStream(BUF_LEN);
copyAndClose(readAsStream(file), bos);
return bos.toByteArray();
}
public static void writeBytesToFile(File file, byte[] data) throws IOException{
FileOutputStream fos = new FileOutputStream(file);
copyAndClose(new ByteArrayInputStream(data), fos);
}
public static InputStream readAsStream(File file) throws IOException {
return new FileInputStream(file);
}
public static void close(Closeable c) {
if (c != null) {
try {
c.close();
} catch (Exception e) {
// ignore
}
}
}
public static String getFileNameSuffix(String name) {
if (name == null) {
return null;
}
int pos = name.lastIndexOf('.');
if (pos < 0) {
return null;
}
return name.substring(pos);
}
}
import javax.net.ssl.HostnameVerifier;
import javax.net.ssl.SSLSession;
import javax.net.ssl.X509TrustManager;
import java.io.IOException;
import java.io.InputStream;
import java.security.SecureRandom;
import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;
import java.util.Map;
/**
* @Author: liaogk
* @Date: 2018-08-01 13:19
* @description:
*/
public abstract class AbstractHttpClient implements HttpClient{
@Override
public abstract HttpResponse doRequest(HttpMethod method, String url, Map<String, String> userHeaders, InputStream data ,Map<String,String> cookies) throws IOException;
@Override
public byte[] doGet(String url) throws IOException {
return doGet(url, null).getResponseData();
}
@Override
public HttpResponse doGet(String url, Map<String, String> headers) throws IOException {
return doRequest(HttpMethod.GET, url, headers, null,null);
}
@Override
public HttpResponse doGet2(String url, Map<String, String> headers,Map<String,String> cookies) throws IOException {
return doRequest(HttpMethod.GET, url, headers, null,cookies);
}
@Override
public HttpResponse dopost(String url, Map<String, String> headers,InputStream data, Map<String, String> cookies) throws IOException {
return doRequest(HttpMethod.POST, url, headers, data,cookies);
}
protected static class TrustAnyTrustManager implements X509TrustManager {
@Override
public void checkClientTrusted(X509Certificate[] x509Certificates, String s) throws CertificateException {
}
@Override
public void checkServerTrusted(X509Certificate[] x509Certificates, String s) throws CertificateException {
}
@Override
public X509Certificate[] getAcceptedIssuers() {
return new X509Certificate[0];
}
}
protected static class TrustAnyHostnameVerifier implements HostnameVerifier {
@Override
public boolean verify(String s, SSLSession sslSession) {
return true;
}
}
protected final SecureRandom secureRandom = new SecureRandom();
}