app移動端沒有後臺數據,沒辦法自己寫唄,湊合着用,日子湊合着過,慢慢開始爬蟲了
這是一個 python scrapy項目,具體關於scrapy的知識自己擴展
首先來一張自己稍稍總結的導圖:
代碼不做過多解釋
功能:獲取動態申請數據的url
class HomeNewsSpidersSpider(scrapy.Spider):
name = 'home_news_spiders'
allowed_domains = ['news.cctv.com']
start_urls = ['http://news.cctv.com/']
def parse(self, response):
# 解析json,獲取某個變量var的值
json_str = response.xpath('//*[@id="SUBD1563517622685109"]/script[2]/text()').extract_first()
# js文本轉換成xml
src_text = js2xml.parse(json_str, encoding='utf-8', debug=False)
src_tree = js2xml.pretty_print(src_text)
# 轉換xml成html
etree.HTML(src_tree)
# xpath解析數據
selector = Selector(text=src_tree)
content = selector.xpath("/html/body/program/var[2]/string/text()").extract_first()
print('content=', content)