import requests
import lxml.etree as etree
def getBook(url):
response = requests.get(url)
response.encoding='utf-8'
#print(response)
# 生成選擇器對象
selector = etree.HTML(response.text)
#print(selector)
#xpath 數據 注意 輸出類型定位精確
bookTitles = selector.xpath('//div[@class="book-mid-info"]/h4/a/text()') #注意etree 需要解析到文本
bookhrefs = selector.xpath('//div[@class="book-mid-info"]/h4/a/@href')
authors = selector.xpath('//div[@class="book-mid-info"]/p[1]/a[1]/text()')
intros = selector.xpath('//div[@class="book-mid-info"]/p[2]/text()')
n = 1
for i in range(len(bookTitles)):
data = {
'ID':n,
'Title':bookTitles[i],
'BookLink':"https:" + bookhrefs[i],
'Author':authors[i],
'Intro':intros[i].strip()
}
print(data)
n += 1
#url = 'https://www.qidian.com/rank/collect?chn=21&page=1'
urlList = ['https://www.qidian.com/rank/collect?chn=21&page=' + str(i) for i in range(1,100)]
for url in urlList:
getBook(url)
xpath 爬取起點小說 寫入mysql前端
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.