python-pyecharts 數據可視化學習 (數據來源:丁香園)

目錄

一、數據準備

二、疫情地圖

2.1全國疫情地圖

2.2湖北省疫情地圖

三、疫情增長趨勢圖


效果圖

文件:pycharts_city.txt

 

             

             

 

一、數據準備

爬取丁香園的數據保存

import json
import re
import requests
import datetime

today = datetime.date.today().strftime('%Y%m%d')   #20200315

def crawl_dxy_data():
    """
    爬取丁香園實時統計數據,保存到data目錄下,以當前日期作爲文件名,存JSON文件
    """
    response = requests.get('https://ncov.dxy.cn/ncovh5/view/pneumonia') #request.get()用於請求目標網站
    print(response.status_code)                                          # 打印狀態碼


    try:
        url_text = response.content.decode()                             #更推薦使用response.content.deocde()的方式獲取響應的html頁面
        #print(url_text)
        url_content = re.search(r'window.getAreaStat = (.*?)}]}catch',   #re.search():掃描字符串以查找正則表達式模式產生匹配項的第一個位置 ,然後返回相應的match對象。
                                url_text, re.S)                          #在字符串a中,包含換行符\n,在這種情況下:如果不使用re.S參數,則只在每一行內進行匹配,如果一行沒有,就換下一行重新開始;
                                                                         #而使用re.S參數以後,正則表達式會將這個字符串作爲一個整體,在整體中進行匹配。
        texts = url_content.group()                                      #獲取匹配正則表達式的整體結果
        content = texts.replace('window.getAreaStat = ', '').replace('}catch', '') #去除多餘的字符
        json_data = json.loads(content)                                         
        with open('data/' + today + '.json', 'w', encoding='UTF-8') as f:
            json.dump(json_data, f, ensure_ascii=False)
    except:
        print('<Response [%s]>' % response.status_code)


def crawl_statistics_data():
    """
    獲取各個省份歷史統計數據,保存到data目錄下,存JSON文件
    """
    with open('data/'+ today + '.json', 'r', encoding='UTF-8') as file:
        json_array = json.loads(file.read())

    statistics_data = {}
    for province in json_array:
        response = requests.get(province['statisticsData'])
        try:
            statistics_data[province['provinceShortName']] = json.loads(response.content.decode())['data']
        except:
            print('<Response [%s]> for url: [%s]' % (response.status_code, province['statisticsData']))

    with open("data/statistics_data.json", "w", encoding='UTF-8') as f:
        json.dump(statistics_data, f, ensure_ascii=False)


if __name__ == '__main__':
    crawl_dxy_data()
    crawl_statistics_data()

 

 

二、疫情地圖

 

2.1全國疫情地圖

自行修改文件保存位置

import json
import datetime
from pyecharts.charts import Map
from pyecharts import options as opts

# 讀原始數據文件
today = datetime.date.today().strftime('%Y%m%d')   #20200315
datafile = 'data/'+ today + '.json'
with open(datafile, 'r', encoding='UTF-8') as file:
    json_array = json.loads(file.read())

# 分析全國實時確診數據:'confirmedCount'字段
china_data = []
for province in json_array:
    china_data.append((province['provinceShortName'], province['confirmedCount']))
china_data = sorted(china_data, key=lambda x: x[1], reverse=True)                 #reverse=True,表示降序,反之升序

print(china_data)
# 全國疫情地圖
# 自定義的每一段的範圍,以及每一段的特別的樣式。
pieces = [
    {'min': 10000, 'color': '#540d0d'},
    {'max': 9999, 'min': 1000, 'color': '#9c1414'},
    {'max': 999, 'min': 500, 'color': '#d92727'},
    {'max': 499, 'min': 100, 'color': '#ed3232'},
    {'max': 99, 'min': 10, 'color': '#f27777'},
    {'max': 9, 'min': 1, 'color': '#f7adad'},
    {'max': 0, 'color': '#f7e4e4'},
]
labels = [data[0] for data in china_data]
counts = [data[1] for data in china_data]

m = Map()
m.add("累計確診", [list(z) for z in zip(labels, counts)], 'china')

#系列配置項,可配置圖元樣式、文字樣式、標籤樣式、點線樣式等
m.set_series_opts(label_opts=opts.LabelOpts(font_size=12),
                  is_show=False)
#全局配置項,可配置標題、動畫、座標軸、圖例等
m.set_global_opts(title_opts=opts.TitleOpts(title='全國實時確診數據',
                                            subtitle='數據來源:丁香園'),
                  legend_opts=opts.LegendOpts(is_show=False),
                  visualmap_opts=opts.VisualMapOpts(pieces=pieces,
                                                    is_piecewise=True,   #是否爲分段型
                                                    is_show=True))       #是否顯示視覺映射配置
#render()會生成本地 HTML 文件,默認會在當前目錄生成 render.html 文件,也可以傳入路徑參數,如 m.render("mycharts.html")
m.render(path='/home/aistudio/data/全國實時確診數據.html')

 

2.2湖北省疫情地圖

import json
import datetime
from pyecharts.charts import Map
from pyecharts import options as opts

# 讀原始數據文件
today = datetime.date.today().strftime('%Y%m%d')   #20200315
datafile = 'data/'+ today + '.json'
with open(datafile, 'r', encoding='UTF-8') as file:
    json_array = json.loads(file.read())

# 分析湖北省實時確診數據
# 讀入規範化的城市名稱,用於規範化丁香園數據中的城市簡稱
with open('/home/aistudio/data/data24815/pycharts_city.txt', 'r', encoding='UTF-8') as f:
    defined_cities = [line.strip() for line in f.readlines()]


def format_city_name(name, defined_cities):
    for defined_city in defined_cities:
        if len((set(defined_city) & set(name))) == len(name):
            name = defined_city
            if name.endswith('市') or name.endswith('區') or name.endswith('縣') or name.endswith('自治州'):
                return name
            return name + '市'
    return None


province_name = '湖北'
for province in json_array:
    if province['provinceName'] == province_name or province['provinceShortName'] == province_name:
        json_array_province = province['cities']
        hubei_data = [(format_city_name(city['cityName'], defined_cities), city['confirmedCount']) for city in
                      json_array_province]
        hubei_data = sorted(hubei_data, key=lambda x: x[1], reverse=True)

        print(hubei_data)

labels = [data[0] for data in hubei_data]
counts = [data[1] for data in hubei_data]
pieces = [
    {'min': 10000, 'color': '#540d0d'},
    {'max': 9999, 'min': 1000, 'color': '#9c1414'},
    {'max': 999, 'min': 500, 'color': '#d92727'},
    {'max': 499, 'min': 100, 'color': '#ed3232'},
    {'max': 99, 'min': 10, 'color': '#f27777'},
    {'max': 9, 'min': 1, 'color': '#f7adad'},
    {'max': 0, 'color': '#f7e4e4'},
]

m = Map()
m.add("累計確診", [list(z) for z in zip(labels, counts)], '湖北')
m.set_series_opts(label_opts=opts.LabelOpts(font_size=12),
                  is_show=False)
m.set_global_opts(title_opts=opts.TitleOpts(title='湖北省實時確診數據',
                                            subtitle='數據來源:丁香園'),
                  legend_opts=opts.LegendOpts(is_show=False),
                  visualmap_opts=opts.VisualMapOpts(pieces=pieces,
                                                    is_piecewise=True,
                                                    is_show=True))
m.render(path='/home/aistudio/data/湖北省實時確診數據.html')

 

三、疫情增長趨勢圖

import numpy as np
import json
from pyecharts.charts import Line
from pyecharts import options as opts

# 讀原始數據文件
datafile = 'data/statistics_data.json'
with open(datafile, 'r', encoding='UTF-8') as file:
    json_dict = json.loads(file.read())

# 獲取日期列表
dateId = [str(da['dateId'])[4:6] + '-' + str(da['dateId'])[6:8] for da in json_dict['湖北'] if
          da['dateId'] >= 20200201]

# 分析各省份2月1日至今的新增確診數據:'confirmedIncr'
statistics__data = {}
for province in json_dict:
    statistics__data[province] = []
    for da in json_dict[province]:
        if da['dateId'] >= 20200201:
            statistics__data[province].append(da['confirmedIncr'])
    #若當天該省數據沒有更新,則默認爲0
    if(len(statistics__data[province])!=len(dateId)):
        statistics__data[province].append(0)


# 全國新增趨勢
all_statis = np.array([0] * len(dateId))
for province in statistics__data:
    all_statis = all_statis + np.array(statistics__data[province])

all_statis = all_statis.tolist()
# 湖北新增趨勢
hubei_statis = statistics__data['湖北']
# 湖北以外的新增趨勢
other_statis = [all_statis[i] - hubei_statis[i] for i in range(len(dateId))]

line = Line()
line.add_xaxis(dateId)
line.add_yaxis("全國新增確診病例",   #圖例
                all_statis,       #數據
                is_smooth=True,   #是否平滑曲線
               linestyle_opts=opts.LineStyleOpts(width=4, color='#B44038'),#線樣式配置項
               itemstyle_opts=opts.ItemStyleOpts(color='#B44038',          #圖元樣式配置項
                                                 border_color="#B44038",   #顏色
                                                 border_width=10))         #圖元的大小
line.add_yaxis("湖北新增確診病例", hubei_statis, is_smooth=True,
               linestyle_opts=opts.LineStyleOpts(width=2, color='#4E87ED'),
               label_opts=opts.LabelOpts(position='bottom'),              #標籤在折線的底部
               itemstyle_opts=opts.ItemStyleOpts(color='#4E87ED',
                                                 border_color="#4E87ED",
                                                 border_width=3))
line.add_yaxis("其他省份新增病例", other_statis, is_smooth=True,
               linestyle_opts=opts.LineStyleOpts(width=2, color='#F1A846'),
               label_opts=opts.LabelOpts(position='bottom'),              #標籤在折線的底部
               itemstyle_opts=opts.ItemStyleOpts(color='#F1A846',
                                                 border_color="#F1A846",
                                                 border_width=3))
line.set_global_opts(title_opts=opts.TitleOpts(title="新增確診病例", subtitle='數據來源:丁香園'),
                     yaxis_opts=opts.AxisOpts(max_=16000, min_=1, type_="log",    #座標軸配置項
                                              splitline_opts=opts.SplitLineOpts(is_show=True),#分割線配置項
                                              axisline_opts=opts.AxisLineOpts(is_show=True)))#座標軸刻度線配置項
line.render(path='/home/aistudio/data/新增確診趨勢圖.html')

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章