《Python網絡爬蟲從入門到實踐》動態抓取數據

import requests
link = """https://api-zero.livere.com/v1/comments/list?callback=jQuery112407157432933558674_1566909798809&limit=10&repSeq=4272904&requestPath=%2Fv1%2Fcomments%2Flist&consumerSeq=1020&livereSeq=28583&smartloginSeq=5154&_=1566909798811"""
headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36'}
r = requests.get(link, headers= headers)
print (r.text)
import json
# 獲取 json 的 string
json_string = r.text
json_string = json_string[json_string.find('{'):-2]#返回‘{’下標值到倒數第二位之間的字符串,然後再賦值給json_string
json_data = json.loads(json_string)
print(json_data)
comment_list = json_data['results']['parents']#返回‘results’‘parents’以後的字符
print(comment_list)
for eachone in comment_list:
    message = eachone['content']
    print (message)

 

import requests
import json
def single_page_comment(link):
    headers = {'User-Agent' : 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}
    r = requests.get(link, headers= headers)
    # 獲取 json 的 string
    json_string = r.text
    json_string = json_string[json_string.find('{'):-2]
    json_data = json.loads(json_string)
    comment_list = json_data['results']['parents']
    for eachone in comment_list:
       message = eachone['content']
       print (message)
for page in range(1,4):
    link1 = "https://api-zero.livere.com/v1/comments/list?callback=jQuery112403473268296510956_1531502963311&limit=10&offset="
    link2 = "&repSeq=4272904&requestPath=%2Fv1%2Fcomments%2Flist&consumerSeq=1020&livereSeq=28583&smartloginSeq=5154&_=1531502963316"
    page_str = str(page)
    link = link1 + page_str + link2
    print (link)
    single_page_comment(link)

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章