Python 爬取天氣預報數據

安裝依賴

更新pip(需要以管理員身份運行)

pip install --upgrade pip

如果執行pip install的時候就一直提示 Script file ‘E:\Anaconda\Scripts\pip-script.py’ is not present. 這個錯誤

使用 easy_install pip 

這個命令,再重新安裝一下pip就好了。

安裝lxml

參考:https://www.jianshu.com/p/6734a20fa6dd

1. 到這個連接下, 搜索lxml, 下載對應的 lxml.whl 文件(預編譯過的)

如:我本機python的版本是3.59 64位win10系統, 那麼就選擇

注意: 如果沒有安裝wheel, 要先安裝wheel

pip install wheel

2. 下載完成後, 把下載好的.whl文件放在python的跟目錄下, 比如我的是: D:\Python\Python39

在該目錄下執行:

python -m pip install lxml-4.6.4-cp39-cp39-win_amd64.whl

安裝beautifulsoup4

pip install beautifulsoup4

編寫代碼 weather.py

參考:https://zhuanlan.zhihu.com/p/85783510

from bs4 import BeautifulSoup
from bs4 import UnicodeDammit  # BS 內置庫,猜測文檔編碼
import urllib.request

url = 'http://www.weather.com.cn/weather/101080101.shtml'

try:
    headers = {'User-Agent':'Mozilla/5.0(Windows;U;Windows NT 6.0 x64;en-US;rv:1.9pre)Gecko/20191008 Minefield/3.0.2pre'}
    req = urllib.request.Request(url,headers = headers)
    data = urllib.request.urlopen(req)
    data = data.read()
    dammint = UnicodeDammit(data,['utf-8','gbk']) #鑑別編碼,做一個包裝-markup
    data = dammint.unicode_markup
    soup = BeautifulSoup(data,'lxml')
    lis = soup.select("ul[class='t clearfix'] li") # 找到ul下的所有li
    for li in lis:
        try:
            data = li.select('h1')[0].text  # h1的第一個元素的text文本
            weather = li.select("p[class='wea']")[0].text
            temp = li.findAll('span')[0].text + '/' + li.findAll('i')[0].text
            print(data,weather,temp)
        except Exception as err:
            print(err)
except Exception as err:
    print(err)

測試

python weather.py

成功爬取中國天氣網某城市七天的天氣情況。

升級版

爬取的升級版,是爬取幾個城市的天氣情況:

from bs4 import BeautifulSoup
from bs4 import UnicodeDammit
import urllib.request
import sqlite3

class WeatherDB:  # 包含對數據庫的操作
    def openDB(self):
        self.con = sqlite3.connect('weathers.db')
        self.cursor = self.con.cursor()
        try:
            self.cursor.execute('create table weathers (wCity varchar(16),wDate varchar(16),wWeather varchar(64),wTemp varchar(32),constraint pk_weather primary key(wCity,wDate))')
            # 爬取城市的天氣預報數據儲存到數據庫weather.db中
        except:  # 第一次創建表格是成功的;第二次創建就會清空表格
            self.cursor.execute('delete from weathers')
    def closeDB(self):
        self.con.commit()
        self.con.close()

    def insert(self,city,date,weather,temp):
        try:
            self.cursor.execute('insert into weather (wCity,wDate,wTemp)values(?,?,?,?)',(city,date,weather,temp))
        except Exception as err:
            print(err)
    def show(self):
        self.cursor.execute('select * from weathers')
        rows = self.cursor.fetchall()
        print('%-16s%-16s%-32s%-16s'%('city','date','weather','temp'))
        for row in rows:
            print('%-16s%-16s%-32s%-16s'%(row[0],row[1],row[2],row[3]))

class WeatherForecast:  # 調用url,request函數訪問網站
    def __init__(self):
        self.headers = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.0 x64; en-US;rv:1.9pre)Gecko/2019100821 Minefield/3.0.2pre'}  # 創建頭,僞裝成服務器/瀏覽器訪問遠程的web服務器
        self.cityCode = {'北京':'101010100','上海':'101020100','廣州':'101280101','深圳':'101280601'} # 查找的城市

    def forecastCity(self,city):
        if city not in self.cityCode.keys():
            print(city+'code cannot be found')
            return

        url = 'http://www.weather.com.cn/weather/'+self.cityCode[city]+'.shtml'  # 創建成url
        try:
            req = urllib.request.Request(url,headers=self.headers) # 訪問地址
            data = urllib.request.urlopen(req)
            data = data.read()
            dammit = UnicodeDammit(data,['utf-8'],'gbk')
            data = dammit.unicode_markup
            soup = BeautifulSoup(data,'lxml')
            lis = soup.select("ul[class='t clearfix'] li")  # 找到每一個天氣數據
            for li in lis:
                try:
                    date = li.select('h1')[0].text
                    weather = li.select('p[class="wea"]')[0].text
                    temp = li.select('p[class="tem"] span')[0].text+'/'+li.select('p[class="tem"] i')[0].text
                    print(city,date,weather,temp)
                    self.db.insert(city,date,weather,temp) # 插入到數據庫的記錄
                except Exception as err:
                    print(err)
        except Exception as err:
            print(err)

    def process(self,cities):
        self.db = WeatherDB()
        self.db.openDB()
        for city in cities:
            self.forecastCity(city)  # 循環每一個城市
        self.db.closeDB()

ws = WeatherForecast()
ws.process(['北京','上海','廣州','深圳'])
print('completed')

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章