協程 下載某個網頁上的圖片 python

#coding: utf-8
import requests
from pyquery import pyquery as pq
import re
import uuid
import asyncio
import time
now = lambda: time.time()
import gevent


# url範圍
url_list = ['http:/index{}.html'.format(str(i)) for i in range(50, 60)]


# 下載圖片
async def down_img(url, uuid_id):
    try:
        session = requests.session()

        img_data = session.get(url, verify=False).content
        await asyncio.sleep(3)
        with open('E:\img\\' +uuid_id +'_'+str(count) +'.jpg', 'wb') as ff:
            ff.write(img_data)
        print(now())
    except Exception as e:
        print(e)


# 獲取圖片url
async def get_pictures(url):
    try:

        session = requests.session()
        response_text = session.get(url)
        print(url)
        print(response_text)
        print(response_text)

        img_list = re.findall('<img src="(.*?)\.jpg', response_text.text)
        # await asyncio.sleep(3)
        print('111:', now())
        return img_list
    except Exception as e:
        print(e)


if __name__ == '__main__':
    start = now()

    # 先獲取到所有的網頁
    tasks_list = []
    for url in url_list:
        tasks_list.append(asyncio.ensure_future(get_pictures(url)))

    loop = asyncio.get_event_loop()
    loop.run_until_complete(asyncio.wait(tasks_list))


    image_dict = {}

    for task in tasks_list:
        uuid_id = str(uuid.uuid4()).replace('-', '')
        count = 0
        try:
            for t in task.result() if task.result() else []:
                count += 1
                image_dict[uuid_id + '_'+str(count)] = t+'.jpg'
        except Exception as e:
            pass

    img_task_list = []
    for k, v in image_dict.items():
        img_task_list.append(asyncio.ensure_future(down_img(v, k)))

    img_loop = asyncio.get_event_loop()
    loop.run_until_complete(asyncio.wait(img_task_list))

    print('use time:', now() - start)
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章