在多協程程序代碼過程中,試驗了一個小項目,獲取上千個url中頁面,代碼如下:
import gevent
from gevent.queue import Queue
import time
import requests
from gevent import monkey#把下面有可能有IO操作的單獨做上標記
monkey.patch_all() # 將IO轉爲異步執行的函數
link_list = []
with open(r'D:\...\alexa.txt', 'r') as file:
file_list = file.readlines()
for eachone in file_list:
link = eachone.split('\t')[1]
link = link.replace('\n','')
link_list.append(link)
start = time.time()
def crawler(index):
Process_id = 'Process-' + str(index)
while not workQueue.empty():
url = workQueue.get(timeout=2)
try:
r = requests.get(url, timeout=20)
print (Process_id, workQueue.qsize(), r.status_code, url)
except Exception as e:
print (Process_id, workQueue.qsize(), url, 'Error: ', e)
def boss():
for url in link_list:
workQueue.put_nowait(url) #不阻塞地存放消息
if __name__ == '__main__':
workQueue = Queue(1000