jsoup參考資料:
https://www.jianshu.com/p/fd5caaaa950d
深坑:
爬蟲爬到的網頁源碼和按F12查看的網頁源碼不一致。爲什麼?
網頁最終顯示的頁面源碼是經過瀏覽器解析後的,get或者post請求到的源碼是服務器直接返回的,不一樣是正常的。
審查元素(或者用開發者工具,Firebug)看到的是現在實時性的內容(經過js的修改),而網頁源代碼看到的是就是最開始瀏覽器收到HTTP響應內容
這個原因,就是頁面加載的時候瀏覽器會渲染,把對應的class填充內容,但是爬蟲的時候沒有渲染的功能
開始不知道,爬取數據的時候發現有的字段返回爲null
如,爬取愛奇藝的網頁,我嘗試了JS/HTML格式化(http://tool.chinaz.com/Tools/jsformat.aspx)
嘗試了json格式,但本身是HTML(https://www.json.cn/#)
嘗試了VScode...
但是最後發現在谷歌瀏覽器直接開發者模式下查看Elements比較好,格式清晰一目瞭然,由於開發者模式下查詢比較卡,可以打開查看網頁源碼,進行搜索查找元素
分層爲<div><h3><span>
寫代碼之前,要學習jsoup,很簡單,看懂了再去寫效率高。。。
第一次寫爬蟲,對照競品爬取代碼debug,仿照寫
選擇器 select 取class直接select(.classname)
如遇:
解決報錯:javax.net.ssl.SSLHandshakeException: sun.security.validator.ValidatorException
參考:https://blog.csdn.net/u010248330/article/details/70161899
javax.net.ssl.SSLHandshakeException: sun.security.validator.ValidatorException: PKIX path building failed: sun.security.provider.certpath.SunCertPathBuilderException: unable to find valid certification path to requested target
at sun.security.ssl.Alerts.getSSLException(Alerts.java:192)
at sun.security.ssl.SSLSocketImpl.fatal(SSLSocketImpl.java:1949)
at sun.security.ssl.Handshaker.fatalSE(Handshaker.java:302)
at sun.security.ssl.Handshaker.fatalSE(Handshaker.java:296)
at sun.security.ssl.ClientHandshaker.serverCertificate(ClientHandshaker.java:1509)
at sun.security.ssl.ClientHandshaker.processMessage(ClientHandshaker.java:216)
at sun.security.ssl.Handshaker.processLoop(Handshaker.java:979)
at sun.security.ssl.Handshaker.process_record(Handshaker.java:914)
at sun.security.ssl.SSLSocketImpl.readRecord(SSLSocketImpl.java:1062)
at sun.security.ssl.SSLSocketImpl.performInitialHandshake(SSLSocketImpl.java:1375)
at sun.security.ssl.SSLSocketImpl.startHandshake(SSLSocketImpl.java:1403)
at sun.security.ssl.SSLSocketImpl.startHandshake(SSLSocketImpl.java:1387)
at sun.net.www.protocol.https.HttpsClient.afterConnect(HttpsClient.java:559)
at sun.net.www.protocol.https.AbstractDelegateHttpsURLConnection.connect(AbstractDelegateHttpsURLConnection.java:185)
at sun.net.www.protocol.https.HttpsURLConnectionImpl.connect(HttpsURLConnectionImpl.java:153)
at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:746)
at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:722)
at org.jsoup.helper.HttpConnection.execute(HttpConnection.java:306)
at org.jsoup.helper.HttpConnection.get(HttpConnection.java:295)
at com.alibaba.pingce.jingpin.BliHandler.getBliPcResult(BliHandler.java:44)
at com.alibaba.pingce.jingpin.BliHandler.main(BliHandler.java:199)
Caused by: sun.security.validator.ValidatorException: PKIX path building failed: sun.security.provider.certpath.SunCertPathBuilderException: unable to find valid certification path to requested target
at sun.security.validator.PKIXValidator.doBuild(PKIXValidator.java:387)
at sun.security.validator.PKIXValidator.engineValidate(PKIXValidator.java:292)
at sun.security.validator.Validator.validate(Validator.java:260)
at sun.security.ssl.X509TrustManagerImpl.validate(X509TrustManagerImpl.java:324)
at sun.security.ssl.X509TrustManagerImpl.checkTrusted(X509TrustManagerImpl.java:229)
at sun.security.ssl.X509TrustManagerImpl.checkServerTrusted(X509TrustManagerImpl.java:124)
at sun.security.ssl.ClientHandshaker.serverCertificate(ClientHandshaker.java:1491)
... 16 more
Caused by: sun.security.provider.certpath.SunCertPathBuilderException: unable to find valid certification path to requested target
at sun.security.provider.certpath.SunCertPathBuilder.build(SunCertPathBuilder.java:141)
at sun.security.provider.certpath.SunCertPathBuilder.engineBuild(SunCertPathBuilder.java:126)
at java.security.cert.CertPathBuilder.build(CertPathBuilder.java:280)
at sun.security.validator.PKIXValidator.doBuild(PKIXValidator.java:382)
... 22 more
Exception in thread "main" java.lang.NullPointerException
at com.alibaba.pingce.jingpin.BliHandler.getBliPcResult(BliHandler.java:189)
at com.alibaba.pingce.jingpin.BliHandler.main(BliHandler.java:199)
Disconnected from the target VM, address: '127.0.0.1:56813', transport: 'socket'
在網上查閱了信息說是證書問題,可以在代碼中寫一段邏輯忽略證書:
下面是網上下載的代碼:http://www.sojson.com/blog/195.html
import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;
import javax.net.ssl.HostnameVerifier;
import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLContext;
import javax.net.ssl.SSLSession;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
public class SslUtils {
public static void trustAllHttpsCertificates() throws Exception {
TrustManager[] trustAllCerts = new TrustManager[1];
TrustManager tm = new miTM();
trustAllCerts[0] = tm;
SSLContext sc = SSLContext.getInstance("SSL");
sc.init(null, trustAllCerts, null);
HttpsURLConnection.setDefaultSSLSocketFactory(sc.getSocketFactory());
}
static class miTM implements TrustManager,X509TrustManager {
public X509Certificate[] getAcceptedIssuers() {
return null;
}
public boolean isServerTrusted(X509Certificate[] certs) {
return true;
}
public boolean isClientTrusted(X509Certificate[] certs) {
return true;
}
public void checkServerTrusted(X509Certificate[] certs, String authType)
throws CertificateException {
return;
}
public void checkClientTrusted(X509Certificate[] certs, String authType)
throws CertificateException {
return;
}
}
/**
* 忽略HTTPS請求的SSL證書,必須在openConnection之前調用
* @throws Exception
*/
public static void ignoreSsl() throws Exception{
HostnameVerifier hv = new HostnameVerifier() {
public boolean verify(String urlHostName, SSLSession session) {
return true;
}
};
trustAllHttpsCertificates();
HttpsURLConnection.setDefaultHostnameVerifier(hv);
}
}
//在URLConnection con = url.openConnection()之前使用就行
public static void main(String[] args) {
//String url="http://wx1.sinaimg.cn/mw690/006sl6kBgy1fel3aq0nyej30i20hxq7i.jpg";
String url="https://05.imgmini.eastday.com/mobile/20170413/20170413053046_4a5e70ed0b39c824517630e6954861f2_1.jpeg";
String downToFilePath="d:/download/image/";
String fileName="test";
try {
SslUtils.ignoreSsl();
} catch (Exception e) {
e.printStackTrace();
}
imageDownLoad(url, downToFilePath,fileName);
}
在代碼中,增加如上工具類方法的異常信息捕獲即可
BliHandler
package com.alibaba.pingce.jingpin;
import com.alibaba.algo.dao.SokuTopQueryCompareSnapshotInfoDao;
import com.alibaba.fastjson.JSONObject;
import com.alibaba.pingce.component.Constants;
import com.alibaba.pingce.model.JingPinModle;
import com.alibaba.util.http.handler.SslUtil;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
@Service
public class BliHandler {
@Autowired
SokuTopQueryCompareSnapshotInfoDao sokuTopQueryCompareSnapshotInfoDao;
public List<JingPinModle> getBliPcResult(String query, int num) {
List<JingPinModle> jingPinModles = new ArrayList<>();
try {
try {
SslUtil.ignoreSsl();
} catch (Exception e) {
e.printStackTrace();
}
// String url="http://so.iqiyi.com/so/q_"+ URLEncoder.encode ( query,"UTF-8" )+"?source=input&sr=1476998987782";
// String url = "https://search.bilibili.com/all?keyword=" + URLEncoder.encode(query, "UTF-8") + "&from_source=nav_suggest_new";
String url = "https://search.bilibili.com/all?keyword=" + query + "&from_source=nav_suggest_new";
// logger.info ( url );
// System.out.println("utl==" + url);
Document doc = Jsoup.connect(url).userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.64 Safari/537.31").get();
// System.out.println("doc=="+doc);
HashMap<String, Integer> docSourceMap = new HashMap<>();
docSourceMap.put("bangumi-item-wrap", 1); //節目
// docSourceMap.put("", 10); //節目大詞
docSourceMap.put("video-item matrix", 2); //ugc
// docSourceMap.put("", 12); //人物
// docSourceMap.put("live-room-item", 98);//直播
// docSourceMap.put("mixin-list",1111);
List<String> classes = new ArrayList<>();
classes.add("bangumi-item-wrap");
classes.add("video-item matrix");
classes.add("live-room-item");
// classes.add("mixin-list");
// Elements docList = doc.select ( "div[class=layout-main] > div" );
// 獲取當前query搜索結果的所有類型卡片列表(節目、ugc等)
Elements docList = doc.select(".mixin-list");
// System.out.println("docList==" + docList);
// 獲取所有類型卡片列表裏的節目列表
// Elements bangumi_list = docList.select("." + classes.get(i));
// Elements bangumi_list = docList.select(".bangumi-list");
Elements bangumi_list = docList.select(".bangumi-item-wrap");
// 獲取所有類型卡片列表裏的ugc列表
// Elements videoListClearfix = docList.select(".video-item");
// 標籤[class=]
Elements videoListClearfix = docList.select("li[class=video-item matrix]");
// 獲取所有類型卡片列表裏的直播列表
Elements liveList = docList.select("ul[class=live-room-wrap clearfix]").select("li[class=live-room-item]");
for (int i = 0; i < classes.size(); i++) {
String title = "null";
String pic = "null";
String site = "null";
String time = "null";
String anchor = "null";
String timelength = "null";
String videoUrl = "null";
String type = "null";
String playCount = "null";
String headIcon = "null";
int rank = 1;
// for (Element element : docList) {
// 節目卡bangumi_list
if (!bangumi_list.isEmpty()) {
for (Element element : bangumi_list) {
// System.out.println("element==" + element);
if (jingPinModles.size() >= 5) {
break;
}
JSONObject curDoc = new JSONObject();
String figure = element.attr("class").trim();
// System.out.println("figure爲==" + figure);
if (!classes.contains(figure)) {
continue;
}
Integer docSource = docSourceMap.get(figure);
// System.out.println("docSource爲==" + docSource);
JingPinModle jingPinModle = new JingPinModle();
// 節目-番劇
// 兩種寫法都可以,獲取class div[class=right-info] 或者.right-info
// String category = element.select("div[class=right-info]").select("span[class=bangumi-label]").text().trim();
String category = element.select(".right-info").select("span[class=bangumi-label]").text().trim();
// System.out.println("category==" + category);
if (!category.isEmpty()) {
type = "節目(番劇)";
} else {
type = "專題";
}
title = element.select(".right-info").select("a[href]").attr("title").trim();
site = "B站";
String pic1 = "http" + element.select(".lazy-img");
// System.out.println("pic1===" + pic1);
pic = "http" + element.select(".lazy-img").attr("img[src]");
// Elements elements = element.select("a[class=left-img]");
//
// System.out.println("------------------------");
// for(Element element1:elements){
// System.out.println(JSONObject.toJSONString(element1.select("a").attr("href")));
// System.out.println("element1===="+element1);
// }
videoUrl = "http:" + element.select("a").attr("href").trim();
jingPinModle.setRank(rank++);
jingPinModle.setQuery(query);
jingPinModle.setVdo_title(title);
jingPinModle.setPic(pic);
jingPinModle.setSite(site);
jingPinModle.setCreate_time(time);
jingPinModle.setRel_people(anchor);
jingPinModle.setSeconds(timelength);
jingPinModle.setUrl(videoUrl);
jingPinModle.setType(type);
jingPinModles.add(jingPinModle);
// break;
}
}
// ugc卡videoListClearfix
if (!videoListClearfix.isEmpty()) {
Element element = videoListClearfix.get(i);
// System.out.println("element==" + element);
if (jingPinModles.size() >= 5) {
break;
}
JSONObject curDoc = new JSONObject();
String figure = element.attr("class").trim();
// System.out.println("figure爲==" + figure);
if (!classes.contains(figure)) {
continue;
}
Integer docSource = docSourceMap.get(figure);
// System.out.println("docSource爲==" + docSource);
JingPinModle jingPinModle = new JingPinModle();
// 標題
title = element.select(".info").select(".headline").
select("a[class=title]").attr("title").trim();
// 上傳時間
time = element.select(".info").select(".tags").select("span[class=so-icon time]").text();
System.out.println("time==" + time);
// select("div[desc=發佈時間]").select("span[class=so-icon time]").text().trim();
// 播放數
playCount = element.select(".info").select(".tags").select("span[class=so-icon watch-num]").text();
// 作者
anchor = element.select(".info").select(".tags").select("span[class=so-icon]").select("a[class=up-name]").text();
if (anchor.isEmpty()) {
anchor = element.select("div[class=result-right]").
select("div[desc=上傳者]").select("a[class=uploader-name]").attr("title").trim();
}
// anchor = element.select ( "div[class=result-right]" ).select ( "div[class=qy-search-result-info uploader-ico]" ).
// select ( "span[class=info-uploader]" ).text().replace("+關注","").trim();
// 視頻時長
timelength = element.select(".img").select("span[class=so-imgTag_rb]").text();
// 視頻封面
pic = "http:" + element.select("div[class=result-figure]").select("img[class=qy-mod-cover]").attr("src").
trim();
videoUrl = "http:" + element.select("div[class=result-right]").
select("a[class=main-tit]").attr("href").trim();
type = "ugc";
site = "B站";
jingPinModle.setRank(rank++);
jingPinModle.setQuery(query);
jingPinModle.setVdo_title(title);
jingPinModle.setPic(pic);
jingPinModle.setSite(site);
jingPinModle.setCreate_time(time);
jingPinModle.setRel_people(anchor);
jingPinModle.setSeconds(timelength);
jingPinModle.setUrl(videoUrl);
jingPinModle.setType(type);
// 視頻時長
jingPinModle.setPlay_count(playCount);
jingPinModles.add(jingPinModle);
// for (Element element : videoListClearfix) {
// }
}
}
} catch (Exception e) {
e.printStackTrace();
}
JingPinModle capture_model = new JingPinModle();
capture_model.setPic(sokuTopQueryCompareSnapshotInfoDao.selectUrlBySiteAndQuery(query, Constants.BliBli));
capture_model.setQuery(query);
capture_model.setRank(jingPinModles.size() + 1);
jingPinModles.add(capture_model);
return jingPinModles;
}
public static void main(String[] args) {
BliHandler handler = new BliHandler();
List<JingPinModle> modles = handler.getBliPcResult("輝夜大小姐", 5);
System.out.println(modles.size());
}
}
遇到的問題
調試的時候,發現圖片取不到,爲null
以下是開發者模式下抓取到的字段img
換一種方式,不用jsoup改用json解析:截取“顯示網絡源碼”裏的json,從window.__INITIAL_STATE__=到;(function(){var s;之前的json。pic取值如下(拼接https)
videoid取值如下 https://www.bilibili.com/video/av 拼接json裏的id
爬取結果如下:
jsoup源碼:
源碼:
//
// Source code recreated from a .class file by IntelliJ IDEA
// (powered by Fernflower decompiler)
//
package org.jsoup.nodes;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import org.jsoup.SerializationException;
import org.jsoup.helper.StringUtil;
import org.jsoup.helper.Validate;
import org.jsoup.nodes.Document.OutputSettings;
import org.jsoup.parser.Parser;
import org.jsoup.select.NodeFilter;
import org.jsoup.select.NodeTraversor;
import org.jsoup.select.NodeVisitor;
public abstract class Node implements Cloneable {
static final String EmptyString = "";
Node parentNode;
int siblingIndex;
protected Node() {
}
public abstract String nodeName();
protected abstract boolean hasAttributes();
public boolean hasParent() {
return this.parentNode != null;
}
public String attr(String attributeKey) {
Validate.notNull(attributeKey);
if (!this.hasAttributes()) {
return "";
} else {
String val = this.attributes().getIgnoreCase(attributeKey);
if (val.length() > 0) {
return val;
} else {
return attributeKey.startsWith("abs:") ? this.absUrl(attributeKey.substring("abs:".length())) : "";
}
}
}
public abstract Attributes attributes();
public Node attr(String attributeKey, String attributeValue) {
this.attributes().putIgnoreCase(attributeKey, attributeValue);
return this;
}
public boolean hasAttr(String attributeKey) {
Validate.notNull(attributeKey);
if (attributeKey.startsWith("abs:")) {
String key = attributeKey.substring("abs:".length());
if (this.attributes().hasKeyIgnoreCase(key) && !this.absUrl(key).equals("")) {
return true;
}
}
return this.attributes().hasKeyIgnoreCase(attributeKey);
}
public Node removeAttr(String attributeKey) {
Validate.notNull(attributeKey);
this.attributes().removeIgnoreCase(attributeKey);
return this;
}
public Node clearAttributes() {
Iterator it = this.attributes().iterator();
while(it.hasNext()) {
it.next();
it.remove();
}
return this;
}
public abstract String baseUri();
protected abstract void doSetBaseUri(String var1);
public void setBaseUri(final String baseUri) {
Validate.notNull(baseUri);
this.traverse(new NodeVisitor() {
public void head(Node node, int depth) {
node.doSetBaseUri(baseUri);
}
public void tail(Node node, int depth) {
}
});
}
public String absUrl(String attributeKey) {
Validate.notEmpty(attributeKey);
return !this.hasAttr(attributeKey) ? "" : StringUtil.resolve(this.baseUri(), this.attr(attributeKey));
}
protected abstract List<Node> ensureChildNodes();
public Node childNode(int index) {
return (Node)this.ensureChildNodes().get(index);
}
public List<Node> childNodes() {
return Collections.unmodifiableList(this.ensureChildNodes());
}
public List<Node> childNodesCopy() {
List<Node> nodes = this.ensureChildNodes();
ArrayList<Node> children = new ArrayList(nodes.size());
Iterator var3 = nodes.iterator();
while(var3.hasNext()) {
Node node = (Node)var3.next();
children.add(node.clone());
}
return children;
}
public abstract int childNodeSize();
protected Node[] childNodesAsArray() {
return (Node[])this.ensureChildNodes().toArray(new Node[this.childNodeSize()]);
}
public Node parent() {
return this.parentNode;
}
public final Node parentNode() {
return this.parentNode;
}
public Node root() {
Node node;
for(node = this; node.parentNode != null; node = node.parentNode) {
;
}
return node;
}
public Document ownerDocument() {
Node root = this.root();
return root instanceof Document ? (Document)root : null;
}
public void remove() {
Validate.notNull(this.parentNode);
this.parentNode.removeChild(this);
}
public Node before(String html) {
this.addSiblingHtml(this.siblingIndex, html);
return this;
}
public Node before(Node node) {
Validate.notNull(node);
Validate.notNull(this.parentNode);
this.parentNode.addChildren(this.siblingIndex, node);
return this;
}
public Node after(String html) {
this.addSiblingHtml(this.siblingIndex + 1, html);
return this;
}
public Node after(Node node) {
Validate.notNull(node);
Validate.notNull(this.parentNode);
this.parentNode.addChildren(this.siblingIndex + 1, node);
return this;
}
private void addSiblingHtml(int index, String html) {
Validate.notNull(html);
Validate.notNull(this.parentNode);
Element context = this.parent() instanceof Element ? (Element)this.parent() : null;
List<Node> nodes = Parser.parseFragment(html, context, this.baseUri());
this.parentNode.addChildren(index, (Node[])nodes.toArray(new Node[nodes.size()]));
}
public Node wrap(String html) {
Validate.notEmpty(html);
Element context = this.parent() instanceof Element ? (Element)this.parent() : null;
List<Node> wrapChildren = Parser.parseFragment(html, context, this.baseUri());
Node wrapNode = (Node)wrapChildren.get(0);
if (wrapNode != null && wrapNode instanceof Element) {
Element wrap = (Element)wrapNode;
Element deepest = this.getDeepChild(wrap);
this.parentNode.replaceChild(this, wrap);
deepest.addChildren(new Node[]{this});
if (wrapChildren.size() > 0) {
for(int i = 0; i < wrapChildren.size(); ++i) {
Node remainder = (Node)wrapChildren.get(i);
remainder.parentNode.removeChild(remainder);
wrap.appendChild(remainder);
}
}
return this;
} else {
return null;
}
}
public Node unwrap() {
Validate.notNull(this.parentNode);
List<Node> childNodes = this.ensureChildNodes();
Node firstChild = childNodes.size() > 0 ? (Node)childNodes.get(0) : null;
this.parentNode.addChildren(this.siblingIndex, this.childNodesAsArray());
this.remove();
return firstChild;
}
private Element getDeepChild(Element el) {
List<Element> children = el.children();
return children.size() > 0 ? this.getDeepChild((Element)children.get(0)) : el;
}
void nodelistChanged() {
}
public void replaceWith(Node in) {
Validate.notNull(in);
Validate.notNull(this.parentNode);
this.parentNode.replaceChild(this, in);
}
protected void setParentNode(Node parentNode) {
Validate.notNull(parentNode);
if (this.parentNode != null) {
this.parentNode.removeChild(this);
}
this.parentNode = parentNode;
}
protected void replaceChild(Node out, Node in) {
Validate.isTrue(out.parentNode == this);
Validate.notNull(in);
if (in.parentNode != null) {
in.parentNode.removeChild(in);
}
int index = out.siblingIndex;
this.ensureChildNodes().set(index, in);
in.parentNode = this;
in.setSiblingIndex(index);
out.parentNode = null;
}
protected void removeChild(Node out) {
Validate.isTrue(out.parentNode == this);
int index = out.siblingIndex;
this.ensureChildNodes().remove(index);
this.reindexChildren(index);
out.parentNode = null;
}
protected void addChildren(Node... children) {
List<Node> nodes = this.ensureChildNodes();
Node[] var3 = children;
int var4 = children.length;
for(int var5 = 0; var5 < var4; ++var5) {
Node child = var3[var5];
this.reparentChild(child);
nodes.add(child);
child.setSiblingIndex(nodes.size() - 1);
}
}
protected void addChildren(int index, Node... children) {
Validate.noNullElements(children);
List<Node> nodes = this.ensureChildNodes();
Node[] var4 = children;
int var5 = children.length;
for(int var6 = 0; var6 < var5; ++var6) {
Node child = var4[var6];
this.reparentChild(child);
}
nodes.addAll(index, Arrays.asList(children));
this.reindexChildren(index);
}
protected void reparentChild(Node child) {
child.setParentNode(this);
}
private void reindexChildren(int start) {
List<Node> childNodes = this.ensureChildNodes();
for(int i = start; i < childNodes.size(); ++i) {
((Node)childNodes.get(i)).setSiblingIndex(i);
}
}
public List<Node> siblingNodes() {
if (this.parentNode == null) {
return Collections.emptyList();
} else {
List<Node> nodes = this.parentNode.ensureChildNodes();
List<Node> siblings = new ArrayList(nodes.size() - 1);
Iterator var3 = nodes.iterator();
while(var3.hasNext()) {
Node node = (Node)var3.next();
if (node != this) {
siblings.add(node);
}
}
return siblings;
}
}
public Node nextSibling() {
if (this.parentNode == null) {
return null;
} else {
List<Node> siblings = this.parentNode.ensureChildNodes();
int index = this.siblingIndex + 1;
return siblings.size() > index ? (Node)siblings.get(index) : null;
}
}
public Node previousSibling() {
if (this.parentNode == null) {
return null;
} else {
return this.siblingIndex > 0 ? (Node)this.parentNode.ensureChildNodes().get(this.siblingIndex - 1) : null;
}
}
public int siblingIndex() {
return this.siblingIndex;
}
protected void setSiblingIndex(int siblingIndex) {
this.siblingIndex = siblingIndex;
}
public Node traverse(NodeVisitor nodeVisitor) {
Validate.notNull(nodeVisitor);
NodeTraversor.traverse(nodeVisitor, this);
return this;
}
public Node filter(NodeFilter nodeFilter) {
Validate.notNull(nodeFilter);
NodeTraversor.filter(nodeFilter, this);
return this;
}
public String outerHtml() {
StringBuilder accum = new StringBuilder(128);
this.outerHtml(accum);
return accum.toString();
}
protected void outerHtml(Appendable accum) {
NodeTraversor.traverse(new Node.OuterHtmlVisitor(accum, this.getOutputSettings()), this);
}
OutputSettings getOutputSettings() {
Document owner = this.ownerDocument();
return owner != null ? owner.outputSettings() : (new Document("")).outputSettings();
}
abstract void outerHtmlHead(Appendable var1, int var2, OutputSettings var3) throws IOException;
abstract void outerHtmlTail(Appendable var1, int var2, OutputSettings var3) throws IOException;
public <T extends Appendable> T html(T appendable) {
this.outerHtml(appendable);
return appendable;
}
public String toString() {
return this.outerHtml();
}
protected void indent(Appendable accum, int depth, OutputSettings out) throws IOException {
accum.append('\n').append(StringUtil.padding(depth * out.indentAmount()));
}
public boolean equals(Object o) {
return this == o;
}
public boolean hasSameValue(Object o) {
if (this == o) {
return true;
} else {
return o != null && this.getClass() == o.getClass() ? this.outerHtml().equals(((Node)o).outerHtml()) : false;
}
}
public Node clone() {
Node thisClone = this.doClone((Node)null);
LinkedList<Node> nodesToProcess = new LinkedList();
nodesToProcess.add(thisClone);
while(!nodesToProcess.isEmpty()) {
Node currParent = (Node)nodesToProcess.remove();
int size = currParent.childNodeSize();
for(int i = 0; i < size; ++i) {
List<Node> childNodes = currParent.ensureChildNodes();
Node childClone = ((Node)childNodes.get(i)).doClone(currParent);
childNodes.set(i, childClone);
nodesToProcess.add(childClone);
}
}
return thisClone;
}
public Node shallowClone() {
return this.doClone((Node)null);
}
protected Node doClone(Node parent) {
Node clone;
try {
clone = (Node)super.clone();
} catch (CloneNotSupportedException var4) {
throw new RuntimeException(var4);
}
clone.parentNode = parent;
clone.siblingIndex = parent == null ? 0 : this.siblingIndex;
return clone;
}
private static class OuterHtmlVisitor implements NodeVisitor {
private Appendable accum;
private OutputSettings out;
OuterHtmlVisitor(Appendable accum, OutputSettings out) {
this.accum = accum;
this.out = out;
out.prepareEncoder();
}
public void head(Node node, int depth) {
try {
node.outerHtmlHead(this.accum, depth, this.out);
} catch (IOException var4) {
throw new SerializationException(var4);
}
}
public void tail(Node node, int depth) {
if (!node.nodeName().equals("#text")) {
try {
node.outerHtmlTail(this.accum, depth, this.out);
} catch (IOException var4) {
throw new SerializationException(var4);
}
}
}
}
}