from datetime import datetime
import time
from crawler_project.common_bases import CommonBases
from urllib.request import urlopen
from urllib import request
import requests
class CrawlerZhiHuInfo(CommonBases):
def requests_data(self, url):
headers = self.get_headers()
response = requests.get(url=url, headers=headers)
json_data = response.json()
return json_data
def treatment_data(self, data):
pass
return
def page_info(self):
"""
翻頁處理
:return:
"""
pag_num = 1
num = 0
i = 0
data_lists = []
for i in range(9999):
try:
url = 'https://www.zhihu.com/api/v4/search_v3?t=general&q={search_key}&correction=1&offset={offset}&limit=20&lc_idx=62&show_all_topics=0'.format(search_key=‘不方便', offset=num)
num += 20
json_data = self.requests_data(url)
data_lists.append(json_data)
print(f'知乎程序,正在獲取第{pag_num}頁數據')
time.sleep(2)
i += 1
pag_num += 1
print(json_data)
if json_data['paging']['is_end']:
break
except ValueError:
print(f'知乎程序,正在獲取失敗是第{pag_num}頁數據')
print(data_lists)
def main(self):
"""
主程序
:return:
"""
start_time = datetime.now()
self.page_info()
end_time = datetime.now()
print('程序執行時間', end_time - start_time)
crawler_zhihu_info = CrawlerZhiHuInfo()
if __name__ == '__main__':
crawler_zhihu_info.main()
知乎簡單抓取
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.