python爬取百思不得姐視頻

聲明:本文只用來學習python ,切勿用於非法用途

  1. #coding:utf-8

  2. from  Tkinter import *

  3. from ScrolledText import ScrolledText   ##scrollbar

  4. import re

  5. import threading

  6. import requests

  7. import sys

  8. import urllib

  9. reload(sys)

  10. sys.setdefaultencoding('utf-8')   ## output encodig utf-8

  11. url_name = [] ###url + name

  12. a = 1

  13. ## get the url_html

  14. def get():

  15.    global a

  16.    hd = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 \

  17.         (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'}

  18.    url = 'http://www.budejie.com/'+str(a)

  19.    var1.set('have scrapyed page %s  film' % (a))

  20.    html = requests.get(url, headers=hd).text  ##.text if used for get html code

  21.    a += 1

  22.    url_pattern = re.compile(r'(<div class="j-r-list-c">.*?</div>.*?</div>)', re.S)  ##bianyi up

  23.    for i in url_content:

  24.        url_reg = r'data-mp4="(.*?)">' ### r  zhuanyi

  25.        url_items = re.findall(url_reg, i)

  26.        if url_items: #####if exists

  27.            name_reg = re.compile(r'<a href="/detail-.{8}.html">(.*?)</a>')

  28.            name_items = re.findall(name_reg, i)

  29.            for j,k in zip(name_items, url_items):#zip  two list yi yi dui yin

  30.                url_name.append([j,k])

  31.                print j,k

  32.    return url_name


  33. ### how to download films

  34. id = 1 # film's number

  35. def write():

  36.    global id

  37.    while id < 2:

  38.        url_name = get()          ### url + name

  39.        #print url_name

  40.        for i in url_name:

  41.            urllib.urlretrieve(i[1], 'video/%s.mp4' % (i[0].decode('utf-8')))

  42.            text.insert(END, str(id)+'.'+i[1]+'\n'+i[0]+'\n')

  43.            url_name.pop(0)

  44.            id += 1  

  45.    var1.set('scrapy over')          


  46. def start():

  47.    th = threading.Thread(target=write)

  48.    th.start()




  49. root = Tk()

  50. root.title('comk專屬')

  51. text = ScrolledText(root, font=('微軟雅黑', 10))

  52. text.grid()   ## made setting active

  53. button = Button(root, text='開始爬取', font=('微軟雅黑', 10), command=start)

  54. button.grid()  

  55. var1 = StringVar()

  56. label = Label(root, font=('微軟雅黑', 10), fg='red', textvariable=var1)

  57. label.grid()

  58. var1.set('comk來了...ready~~~')


  59. root.mainloop()


發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章