天氣網爬取溫度數據,想去重慶玩,看看以往溫度數據,這個夏天重慶是否會像火爐呢?
我找的是一個查詢溫度範圍的站點,只有11年開始有數據,有些天的數據有丟失。
import urllib.request as request
from bs4 import BeautifulSoup
from matplotlib import pyplot as plt
import datetime
#獲取某城市某年某個月份區間的溫度範圍,寫入文件
#city:城市的字符串;然後是整數的年,月份區間;然後是保存的文件名
def getData(city,year,month_from,month_to,fileName):
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0'}
file=open(fileName,'w')
prefix="https://m.tianqi.com/lishi/"+city+"/"+str(year) #格式爲https://m.tianqi.com/lishi/chongqing/201907.html
suffix=".html"
#遍歷月份
for i in range(month_from,month_to+1):
if i<10:
url=prefix+str(0)+str(i)+suffix#小於10的時候日期格式爲201808
else:
url=prefix+str(i)+suffix
req=request.Request(url=url,headers=headers)#添加headers,避免防爬蟲
html=request.urlopen(req).read()#獲取html
soup=BeautifulSoup(html,"html.parser")#解析工具
days=0
if i==2:
if (year%4==0 and year%100!=0) or year%400==0:#只有2月需要判斷閏年
days=29
else:
days=28
else:
days=30#該網址沒有第31天的數據
for i in range(1,days+1):
day=""
if i<10:
day=str(0)+str(i)
else:
day=str(i)
the_day=soup.find("a",href=url[20:-5]+day)
if the_day:#有的數據丟失導致None對象
temperature=the_day.find("dd",class_="txt2").get_text()
temperature=temperature[:temperature.index("℃")]#得到類似10~20的數據
file.write(url[-11:-5]+day+","+str(temperature)+"\n")
file.close()
#一次性獲取多個城市的多個年份數據
def getMoreData(cities,from_year,to_year):
for city in cities:
for year in range(from_year,to_year+1):
fileName=city+"_"+str(year)+".csv" #命名方式
if year==datetime.datetime.now().year:
getData(city,year,1,datetime.datetime.now().month,fileName)
else:
getData(city,year,1,12,fileName)
# getMoreData(["chongqing","chengdu"],2011,2019)
#畫圖
def plotData(fileName):
file=open(fileName,'r')
city=fileName[:fileName.index("_")]
year=fileName[fileName.index("_")+1:-4]
lines=file.read().split("\n")[:-1]#去掉最後一行空白
x=[]#月和日
lows=[]#低溫
highs=[]#高溫
for line in lines:
line=line.split(",")
date=line[0]
index=line[1].index("~")
low=int(line[1][:index])
high=int(line[1][index+1:])
x.append(date[4:])
lows.append(low)
highs.append(high)
plt.figure()
plt.plot(x,lows,'b^--',label="lowest T")
plt.plot(x,highs,'ro--',label="highest T")
plt.title("Temperature in year "+year+" of city:"+city)
plt.hlines(35,0,len(x),colors='r',linestyles="dashed",label="35°C")#添加高溫水平線
plt.xlabel("Date")
plt.ylabel("Temperature(°C)")
plt.xticks([x[d] for d in range(len(x)) if d%15==0],fontsize=8,rotation=45)#每隔15個數據點顯示一次日期
plt.legend(loc='best')#添加label指示圖,位置自適應
file.close()
#一次性畫多個圖
def plotMoreData(fileNames):
for fileName in fileNames:
plotData(fileName)
plt.show()
plt.close()
# plotMoreData(["chengdu_2011.csv","chengdu_2012.csv"])