建立一個 session 會話對象
首先建立一個 session 會話對象,利用會話對象 session 去訪問網頁
訪問 python 官網,async,await 關鍵字是將函數設置爲異步操作,是 aiohttp 使用方式
import aiohttp
import asyncio
async def hello(URL):
async with aiohttp.ClientSession() as session:
async with session.get(URL) as response:
responae = await response.text()
print(response)
if __name__ == '__main__':
URl = 'http://python.org'
loop = asyncio.get_event_loop()
loop.run_until_complete(hello(URl))
請求頭,超時,cookies,代理
在第二段代碼修改
from aiohttp import ClientSession
import aiohttp
import asyncio
# 設置請求頭
headers = {'content-type' : "application/json"}
async def hello(URL):
async with ClientSession() as session:
async with session.get(URL, headers=headers) as response:
response = await response.text()
print(response)
if __name__ == '__main__':
URl = 'http://python.org'
loop = asyncio.get_event_loop()
loop.run_until_complete(hello(URl))
# 設置超時,在會話中設置超時
timeout = aiohttp.ClientTimeout(total=60)
async def hello(URL):
async with ClientSession(timeout=timeout) as session:
async with session.get(URL) as response:
response = await response.text()
print(response)
# 設置超時,在請求中設置超時
timeout = aiohttp.ClientTimeout(total=60)
async def hello(URL):
async with ClientSession() as session:
async with session.get(URL,timeout=timeout) as response:
response = await response.text()
print(response)
# 設置 cookies
cookies = {'cookies' : 'working'}
async def hello(URL):
async with ClientSession(cookies=cookies) as session:
async with session.get(URL) as response:
response = await response.text()
print(response)
# 設置代理 ip
proxy = 'http://117.191.11.72:8080'
async def hello(URL):
async with ClientSession() as session:
async with session.get(URL,proxy=proxy) as response:
response = await response.text()
print(response)
# 支持代理授權
async def hello(URL):
async with ClientSession() as session:
proxy_auth = aiohttp.BasicAuth('user','pass')
async with session.get('http://python.org',
proxy='http://proxy.com',
proxy_auth=proxy_auth) as response:
response = await response.text()
print(response)
get 請求方法
兩種,不帶參數,帶參數
# 不帶參數
async def hello(URL):
async with ClientSession() as session:
async with session.get(URL) as response:
response = await response.text()
print(response)
# 帶參數
# 在 URL 中設置參數
async def hello(URL):
URl = 'http://httpbin.org/get?key=python'
async with ClientSession() as session:
async with session.get(URL) as response:
response = await response.text()
print(response)
# 設置請求參數 params
async def hello(URL):
URl = 'http://httpbin.org/get'
params = {'wd' : 'python'}
async with ClientSession() as session:
async with session.get(URL,params=params) as response:
response = await response.text()
print(response)
post 請求
# 字典格式寫入
async def hello(URL):
URl = 'http://httpbin.org/post'
data = {'wd' : 'python'}
async with ClientSession() as session:
async with session.get(URL,data=data) as response:
response = await response.text()
print(response)
# json 格式寫入
async def hello(URL):
URl = 'http://httpbin.org/post'
data = {'wd' : 'python'}
async with ClientSession() as session:
async with session.get(URL,json=data) as response:
response = await response.text()
print(response)
# 字符串格式寫入
async def hello(URL):
URl = 'http://httpbin.org/post'
data = 'python'
async with ClientSession() as session:
async with session.get(URL,data=data) as response:
response = await response.text()
print(response)
# 以字節流格式寫入(上傳文件)
async def hello(URL):
URl = 'http://httpbin.org/post'
data = 'python'
async with ClientSession() as session:
async with session.get(URL,data=data) as response:
response = await response.text()
print(response)
獲取響應內容方法
# 設置編碼格式
response = await response.text(encoding='utf-8')
# 以字節流格式返回
response = await response.read()
# 以 json 格式返回
response = await response.json()
# 獲取響應狀態碼
response = await response.status
# 獲取響應的請求頭
response = await response.headers
# 獲取 url 地址
url = response.url
異步爬取小說排行榜
import asyncio
import csv
from aiohttp import ClientSession
from bs4 import BeautifulSoup
# 網站訪問函數,將網站內容返回
async def getData(url,headers):
# 創建回話對象
async with ClientSession() as session:
# 發送 get 請求,設置請求頭
async with session.get(url,headers=headers) as response:
# 返回響應內容
return await response.text()
def savaData(result):
for i in result:
soup = BeautifulSoup(i,'lxml')
find_div = soup.find_all('div',class_='book-mid-info')
for d in find_div:
# 小說名
name = d.find('h4').getText()
# 作者
author = d.find('a',class_='name').getText()
# 更新時間
update = d.find('p',class_='update').getText()
# 寫入 csv
csvFile = open('data.csv','a',encoding='utf8',newline='')
writer = csv.writer(csvFile)
writer.writerow([name,author,update])
csvFile.close()
def run():
for i in range(25):
# 構建不同的 url 傳入 getData,最後由 asyncio 模塊執行
task = asyncio.ensure_future(getData(url.format(i+1),headers))
# 將所有請求都加入到列表 tasks
tasks.append(task)
# 等待所有請求執行完成,一併返回全部響應內容
result = loop.run_until_complete(asyncio.gather(*tasks))
savaData(result)
print(len(result))
if __name__ == '__main__':
import time
start = time.time()
headers = {
'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.87 Safari/537.36'
}
tasks = []
url = 'https://www.qidian.com/rank/hotsales?page={}'
# 創建 get_evevt_loop 對象
loop = asyncio.get_event_loop()
# 調用 run 函數
run()
end = time.time()
print(end-start)