爬去mzitu網站的美女圖片
使用requests庫請求鏈接獲取響應
使用lxml庫解析html獲取資源鏈接
import requests
from lxml import html
etree = html.etree
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/77.0.3865.120 Safari/537.36',
}
url = 'https://www.mzitu.com/tag/meitun/'
data = requests.get(url, headers=headers).text
s = etree.HTML(data)
file = s.xpath('//*[@id="pins"]/li')
i = 0
for div in file:
url_te = div.xpath('./a/@href')[0]
data_te = requests.get(url_te, headers=headers).text
s_te = etree.HTML(data_te)
page = int(s_te.xpath('/html/body/div[2]/div[1]/div[4]/a[5]/span/text()')[0])
# 圖片列表頁
for x in range(1, page):
urls = url_te + '/' + str(x)
data_s = requests.get(urls, headers=headers).text
s_s = etree.HTML(data_s)
img_url = s_s.xpath('/html/body/div[2]/div[1]/div[3]/p/a/img/@src')[0]
r = requests.get(img_url, headers=headers)
# 圖片保存至當前image目錄下 也可使用絕對路徑D:/image/ 此目錄不會自動創建需手動創建
path = path = str('image/' + str(i) + '.png')
with open(path, 'wb') as f:
f.write(r.content)
i += 1