python寫的基礎下載程序可以下載多頁
#conding:utf-8 import urllib2 import time page=1 #初始化下載頁面爲第一頁 url = ['']*350 設置url的存儲 while page<8: buf = urllib2.urlopen('http://blog.sina.com.cn/s/articlelist_1191258123_0_'+str(page)+'.html').read() #打開url並讀取內容 i = 0 title = buf.find(r'<a title=') #從title開始查找 href = buf.find(r'href=',title) html = buf.find(r'.html',href) while title !=-1 and href !=-1 and html !=-1 and i<50: 設置一頁面的url數和判斷title href都存在 url[i] = buf[href+6:html+5] 使url正常 print url[i] title = buf.find(r'<a title=',html) href = buf.find(r'href=',title) html = buf.find(r'.html',href) i = i+1 else: print page,"find end " page = page+1 else: print 'all down ' j = 0 while j<350: 下載url biaoti = ['']*350 content = urllib2.urlopen(url[j]).read() titname = content.find(r'SG_txta') 讀取標題 end = content.find(r'</h',titname) biaoti[j] = content[titname+9:end] print biaoti[j] open(r'hanhan/'+url[j][-26:],'w+').write(content) 保存內容以url的最後26位爲名稱和後綴 print 'downing ',url[j] j=j+1 time.sleep(4) else: print 'down fished'