xpath 爬取起點小說 寫入mysql前端

import requests
import lxml.etree as etree

def getBook(url):
    response = requests.get(url)
    response.encoding='utf-8'
    #print(response)
    # 生成選擇器對象
    selector = etree.HTML(response.text)
    #print(selector)
    #xpath 數據 注意 輸出類型定位精確
    bookTitles = selector.xpath('//div[@class="book-mid-info"]/h4/a/text()')  #注意etree 需要解析到文本
    bookhrefs = selector.xpath('//div[@class="book-mid-info"]/h4/a/@href')
    authors = selector.xpath('//div[@class="book-mid-info"]/p[1]/a[1]/text()')
    intros = selector.xpath('//div[@class="book-mid-info"]/p[2]/text()')
    n = 1
    for i in range(len(bookTitles)):
        data = {
            'ID':n,
            'Title':bookTitles[i],
            'BookLink':"https:" + bookhrefs[i],
            'Author':authors[i],
            'Intro':intros[i].strip()
        }

        print(data)
        n += 1

#url = 'https://www.qidian.com/rank/collect?chn=21&page=1'
urlList = ['https://www.qidian.com/rank/collect?chn=21&page=' + str(i) for i in range(1,100)]



for url in urlList:
    getBook(url)




發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章