import urllib.request
import re as gg
import os,string,sys
import easygui as g
result = []
#urllib.urlopen(url) 獲取網頁源碼函數
#urllib.urlretrieve(url,'存儲名字') 將網頁資源下載到本地函數
def getyuanma(_url):
page = urllib.request.urlopen(_url).read()
page1 = page.decode('UTF-8') ##轉換編碼
f = open(r'C:\Users\Administrator\Desktop\python\爬蟲\源碼1.txt','w',encoding='utf-8')
f.write(str(page1))
f.close()
def getQQnum(file_url):
f1 = open(file_url,'r',encoding='utf-8')
for i in f1:
p = gg.findall(r'([0-9]{8,11}\@[0-9a-zA-Z]{0,10}.{0,4})',i)
if len(p) != 0:
print(p)
def getyeshu(file_url):
f2 = open(file_url,'r',encoding='utf-8')
for i in f2:
p = gg.findall(r'pn=([0-9]{1,3})\">尾頁',i)
if len(p) != 0:
_num = p[0]
return _num
break
def main():
_url1 = g.enterbox("請輸入鏈接地址:","輸入地址")
getyuanma(_url1)
print(_url1)
_num = getyeshu(r'C:\Users\Administrator\Desktop\python\爬蟲\源碼1.txt')
if _num != None:
for i in range(1,int(_num)+1):
print(i)
getyuanma(_url1+r'?pn='+str(i))
getQQnum(r'C:\Users\Administrator\Desktop\python\爬蟲\源碼1.txt')
main()