閒暇之餘有玩《率土之濱》這個遊戲,感覺還不錯,想做個對戰模擬器,查查官網有啥數據可以用發現只有基本的武將數據可用,而且還沒有武將的成長數據。算了能爬啥就爬啥數據。。。
以下是代碼,新手玩python請多多指教,python版本是2.7
# -*- coding: utf-8 -*- from BeautifulSoup import BeautifulSoup # For processing HTML import urllib2 import sys import re reload(sys) sys.setdefaultencoding('utf-8') class heroInfo: def __init__(self): self.heroName = '' self.heroCost = '' self.herobingzhong='' self.herojuli = '' self.heromoulue = '' self.herogongji = '' self.herogongcheng = '' self.herofangyu = '' self.herosudu = '' self.herojineng='' self.heroother='' for i in range(646): temp = '' if i+1 < 10: temp = "00" if i+1 < 100 and i+1 >= 10: temp = "0" if i+1 >= 100: temp = "" url = temp+str(i+1) #print url r='' try: page = urllib2.urlopen("http://stzb.163.com/herolist/100"+url+".html") r = page.read() r = r.decode('gbk') except urllib2.URLError, err: print err continue soup = BeautifulSoup(r) content = soup.find(name='div',attrs={'class':'role-content'}) heroName = content.h1.text herolist=[] hinfo = heroInfo() hinfo.heroName = heroName herolist.append(hinfo) nextsoup=BeautifulSoup(str(content)) grouplist=nextsoup.findAll(name='dl',attrs={'class':'group'}) i=0 for item in grouplist: if i==0: hinfo.herojineng= item.dd.text else: hinfo.heroother= item.dd.text #print item.dd.text i=i+1 spanlist=nextsoup.findAll('span') for item in spanlist: if 'cost' in item.text: hinfo.heroCost = item.text #print item.text if '兵種' in item.text: hinfo.herobingzhong=item.text #print item.text if '攻擊距離' in item.text: hinfo.herojuli=item.text if '謀略' in item.text: hinfo.heromoulue=item.text if '初始攻擊' in item.text: hinfo.herogongji=item.text if '初始攻城' in item.text: hinfo.herogongcheng=item.text if '防禦' in item.text: hinfo.herofangyu=item.text if '速度' in item.text: hinfo.herosudu=item.text #print item.text print hinfo.heroName+','+hinfo.herobingzhong+','+hinfo.heroCost+','+hinfo.herojineng+','+hinfo.herogongji+','+hinfo.heromoulue+','+hinfo.herosudu+','+hinfo.herogongcheng+','+hinfo.herojineng+','+hinfo.heroother
爬下來的數據稍有瑕疵,因網易貌似某些武將數據刪除了網頁實際武將只有大約430個左右。理論上裝了BeautifulSoup就能直接運行,喜歡的可以拿去一試