# coding=utf-8
import threading, queue, time, urllib
from urllib import request
import work.baidu_pc as bd_pc
# 將所需要的數據塞入隊列之中
urlQueue = queue.Queue()
lines=[]
with open('seo.txt', encoding='UTF-8') as f:
for line in f:
lines.append(list(line.strip('\n').split(',')))
for wd in lines:
urlQueue.put(str(wd[0]))
def fetchUrl(urlQueue):
while True:
try:
# 不阻塞的讀取隊列數據
wd = urlQueue.get_nowait()
i = urlQueue.qsize()
except Exception as e:
break
print('Current Thread Name %s, wd: %s ' % (threading.currentThread().name, wd))
# 將拿出的數據進行操作
try:
result = bd_pc.getOrder(wd, 'www.51seo.net')
# 爲了突出效果, 設置延時
# time.sleep(1)
print(result)
except Exception as e:
continue
if __name__ == '__main__':
startTime = time.time()
threads = []
# 可以調節線程數, 進而控制抓取速度
threadNum = 10
for i in range(0, threadNum):
t = threading.Thread(target=fetchUrl, args=(urlQueue,))
threads.append(t)
for t in threads:
t.start()
for t in threads:
# 多線程多join的情況下,依次執行各線程的join方法, 這樣可以確保主線程最後退出, 且各個線程間沒有阻塞
t.join()
endTime = time.time()
print('Done, Time cost: %s ' % (endTime - startTime))
搜索引擎關鍵詞排名爬蟲,採集獲取
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.