你看你也懂，seaborn強加matplotlib的進階模式

這是一份關於seaborn 和matplotlib 的進階筆記，我個人覺得畫圖還是用seaborn，相對更簡單上手一點。這裏面的函數我還有很多沒找到，有些我也不是很知道里面參數具體有什麼，能幹什麼，不過這些基本操作也能對你我有所幫助，有一起學習這方面知識的朋友可以留個言，我們一起加油吧！！

函數庫

import matplotlib
from matplotlib import pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd
%matplotlib inline

顯示中文

matplotlib.rcParams['font.family'] = 'SimSun'
matplotlib.rcParams['font.sans-serif'] = ['SimSun']
# 顯示中文字體

數據預覽

pok_mon = pd.read_csv('pokemon.csv')
pok_mon.head()

	id	species	generation_id	height	weight	base_experience	type_1	type_2	hp	attack	defense	speed	special-attack	special-defense
0	1	bulbasaur	1	0.7	6.9	64	grass	poison	45	49	49	45	65	65
1	2	ivysaur	1	1.0	13.0	142	grass	poison	60	62	63	60	80	80
2	3	venusaur	1	2.0	100.0	236	grass	poison	80	82	83	80	100	100
3	4	charmander	1	0.6	8.5	62	fire	NaN	39	52	43	65	60	50
4	5	charmeleon	1	1.1	19.0	142	fire	NaN	58	64	58	80	80	65

ful_econ = pd.read_csv(r'fuel_econ.csv')
ful_econ.head()

	id	make	model	year	VClass	drive	trans	fuelType	cylinders	displ	pv2	pv4	city	UCity	highway	UHighway	comb	co2	feScore	ghgScore
0	32204	Nissan	GT-R	2013	Subcompact Cars	All-Wheel Drive	Automatic (AM6)	Premium Gasoline	6	3.8	79	0	16.4596	20.2988	22.5568	30.1798	18.7389	471	4	4
1	32205	Volkswagen	CC	2013	Compact Cars	Front-Wheel Drive	Automatic (AM-S6)	Premium Gasoline	4	2.0	94	0	21.8706	26.9770	31.0367	42.4936	25.2227	349	6	6
2	32206	Volkswagen	CC	2013	Compact Cars	Front-Wheel Drive	Automatic (S6)	Premium Gasoline	6	3.6	94	0	17.4935	21.2000	26.5716	35.1000	20.6716	429	5	5
3	32207	Volkswagen	CC 4motion	2013	Compact Cars	All-Wheel Drive	Automatic (S6)	Premium Gasoline	6	3.6	94	0	16.9415	20.5000	25.2190	33.5000	19.8774	446	5	5
4	32208	Chevrolet	Malibu eAssist	2013	Midsize Cars	Front-Wheel Drive	Automatic (S6)	Regular Gasoline	4	2.4	0	95	24.7726	31.9796	35.5340	51.8816	28.6813	310	8	8

折線圖

seaborn中的errorbar折線圖

xbin_edges = np.arange(0.5, ful_econ['displ'].max()+0.25, 0.25)
xbin_centers = (xbin_edges + 0.25/2)[:-1]

# compute statistics in each bin
data_xbins = pd.cut(ful_econ['displ'], xbin_edges, right = False, include_lowest = True)
y_means = ful_econ['comb'].groupby(data_xbins).mean()
y_sems = ful_econ['comb'].groupby(data_xbins).sem()

plt.errorbar(x=xbin_centers, y=y_means, yerr=y_sems)

條形圖

seaborn中的countplot條形圖

base_color = sns.color_palette()[1]  # 獲得顏色元組 並且選擇第二個作爲返回值
base_order = pok_mon['generation_id'].value_counts().index # 排序 從大到小
plt.figure(figsize=[8, 4])
plt.subplot(1, 2, 1)  # 第一個
sns.countplot(data=pok_mon, x='generation_id', color=base_color, order=base_order)
plt.title('x軸的條形圖')
plt.subplot(1, 2, 2)  # 第二個
sns.countplot(data=pok_mon, y='generation_id', color=base_color,order=base_order)
plt.title('y軸的條形圖')
plt.yticks(rotation=35)  # y軸標籤角度  與水平方向的逆時針夾角
plt.show()

seaborn中的barplot條形圖這裏用來圖形調整

sns.barplot(data=pok_mon, x='generation_id', y='hp',errwidth=2, ci='sd')  # errwidth 設置誤差線的粗細 0 表示無  ci設置數 這裏標準差
plt.title('代數和血量的關係')    
plt.show()

seaborn中barplot條形圖這裏用來計算缺失值統計

na_count = pok_mon.isna().sum()
sns.barplot(na_count.index.values, na_count)
# 第一個參數爲列名稱， 第二個參數爲y值
plt.xticks(rotation = 35)

seaborn中的pointplot平均值圖

sns.pointplot(data=pok_mon, x='generation_id', y='hp') #linestyles = ''可以刪除折線
plt.title('平均值圖')
plt.show()

分組條形圖

seaborn中的countpolt分組條形圖

ful_econ['trans_type'] = ful_econ['trans'].apply(lambda x:x.split()[0]) # 數據分離
ax = sns.countplot(data=ful_econ, x='VClass', hue='trans_type')
plt.title('車型和檔型之間的關係')
plt.xlabel('車型')
plt.ylabel('檔型')
car_list = ['Minicompact Cars', 'Subcompact Cars', 'Compact Cars', 'Midsize Cars', 'Large Cars']
plt.xticks([ i for i in range(len(set(ful_econ['VClass'])))], car_list, rotation=25)
ax.legend(loc=1, ncol=1,framealpha=0.5,title='trans_type',facecolor='pink') # 更改圖例位置  位置， 列數， 透明度， 標題， 顏色

plt.show()

·loc參數數值對應的圖例位置·

Location String Location Code
best 0
upper right 1
upper left 2
lower left 3
lower right 4
right 5
center left 6
center right 7
lower center 8
upper center 9
center 10

餅圖

plt中的pie圖和擴展環圖

base_data = pok_mon['generation_id'].value_counts()
plt.figure(figsize=[8, 4])
plt.subplot(1, 2, 1)
plt.pie(base_data, labels=base_data.index,startangle=90, counterclock= False)
plt.axis('square')   # 確保x軸和y軸長度相同
plt.title('餅圖的使用')
plt.subplot(1, 2, 2)
plt.pie(base_data,labels=base_data.index, startangle=90,counterclock=False, wedgeprops={'width':0.4})
plt.axis('square')
plt.title('環圖的使用')
plt.show()

直方圖

plt中的hist直方圖

base_bins = np.arange(0, pok_mon['speed'].max()+5, 5)
plt.hist(data=pok_mon, x='speed', bins=base_bins, alpha=0.6, color='red')
plt.title('hist直方圖')
plt.xlabel('speed')
plt.ylabel('count')
plt.show()

seaborn 中的distplot直方圖

plt.figure(figsize=[8, 4])
plt.subplot(1, 2, 1)
sns.distplot(pok_mon['speed'], bins=base_bins, kde=True, hist_kws={'alpha':0.6}, color='Tan')
plt.title('distplot直方圖帶核密度估計')
plt.xlabel('speed')
plt.ylabel('count')
plt.subplot(1, 2,2)
sns.distplot(pok_mon['speed'],bins=base_bins, kde=False, hist_kws={'alpha':0.6}, color='Tan')
plt.title('distplot直方圖不帶核密度估計')
plt.xlabel('speed')
plt.ylabel('count')
plt.show()

散點圖

plt中的scatter散點圖

plt.scatter(data=ful_econ, x='displ', y='comb', alpha=1 / 10)
plt.title('displ 與 comb 之間的相關性')
plt.xlabel('displ(L)')
plt.ylabel('comb(L)')
plt.show()

seaborn中的regplot散點圖

sns.regplot(data=ful_econ, x='displ', y='comb', fit_reg=True, scatter_kws={'alpha':1/5}, x_jitter=0.02)
plt.title('displ 與 comb 之間的相關性')
plt.xlabel('displ(L)')
plt.ylabel('comb(L)')
plt.show()

透明度: alpha 介於（0 - 1）之間的數值，具體用法如上: 迴歸曲線: fit_reg 默認打開爲True 關閉設置爲False 抖動: x_jitter, y_jitter : 可以沿着x或者y軸每個值在真實值範圍抖動

熱圖

plt中的hist2d熱圖

ful_econ[['displ', 'comb']].describe()

	displ	comb
count	3929.000000	3929.000000
mean	2.950573	24.791339
std	1.305901	6.003246
min	0.600000	12.821700
25%	2.000000	20.658100
50%	2.500000	24.000000
75%	3.600000	28.227100
max	7.000000	57.782400

plt.figure(figsize=[8, 4])
plt.subplot(1, 2, 1)
bins_x = np.arange(0.6, 7+0.5, 0.5)
bins_y = np.arange(12, 58+5, 5)
plt.hist2d(data=ful_econ, x='displ', y='comb', bins=[bins_x, bins_y],cmap='BuPu',cmin=0.5)
plt.colorbar()  #  顏色對應條
plt.title('熱圖不加文本註釋')
plt.xlabel('displ')
plt.ylabel('comb')
plt.subplot(1, 2, 2)
ax = plt.hist2d(data=ful_econ, x='displ', y='comb', bins=[bins_x, bins_y],cmap='BuPu')
plt.colorbar()  #  顏色對應條
plt.title('熱圖加文本註釋')
plt.xlabel('displ')
plt.ylabel('comb')
count = ax[0]
# 加入文本註釋
for i in range(count.shape[0]):
    for j in range(count.shape[1]):
        c = count[i, j]
        if c > 400:  #  對數據關係大的換顏色
            plt.text(bins_x[i]+0.2, bins_y[j]+1.2, int(c), ha='center',va='center',color='white')
        else:  # 對數據小的換顏色
            plt.text(bins_x[i]+0.2, bins_y[j]+1.2, int(c), ha='center',va='center',color= 'black')
            
plt.show()

調色板網址

seaborn 中的heatmap熱圖

fu_counts = ful_econ.groupby(['VClass', 'trans_type']).size()  # se分組獲得series對象
fu_counts = fu_counts.reset_index(name='count')  # 轉換爲df對象
fu_counts = fu_counts.pivot(index='VClass', columns='trans_type', values='count') # 載入數據
sns.heatmap(fu_counts, annot=True, fmt='d')  # annot 顯示註釋 fmt數字規格 當fmt = '.0f' 可以顯示NaN值
plt.title('檔性和車型之間的關係')
plt.xlabel('檔型')
plt.ylabel('車型')
car_list = ['Minicompact Cars', 'Subcompact Cars', 'Compact Cars', 'Midsize Cars', 'Large Cars']
plt.yticks([ i for i in range(len(set(ful_econ['VClass'])))], car_list)
plt.show()

小提琴圖和箱線圖

seaborn 中的violinplot 小提琴圖

ful_econ.head()

	id	make	model	year	VClass	drive	trans	fuelType	cylinders	displ	pv2	pv4	city	UCity	highway	UHighway	comb	co2	feScore	ghgScore
0	32204	Nissan	GT-R	2013	Subcompact Cars	All-Wheel Drive	Automatic (AM6)	Premium Gasoline	6	3.8	79	0	16.4596	20.2988	22.5568	30.1798	18.7389	471	4	4
1	32205	Volkswagen	CC	2013	Compact Cars	Front-Wheel Drive	Automatic (AM-S6)	Premium Gasoline	4	2.0	94	0	21.8706	26.9770	31.0367	42.4936	25.2227	349	6	6
2	32206	Volkswagen	CC	2013	Compact Cars	Front-Wheel Drive	Automatic (S6)	Premium Gasoline	6	3.6	94	0	17.4935	21.2000	26.5716	35.1000	20.6716	429	5	5
3	32207	Volkswagen	CC 4motion	2013	Compact Cars	All-Wheel Drive	Automatic (S6)	Premium Gasoline	6	3.6	94	0	16.9415	20.5000	25.2190	33.5000	19.8774	446	5	5
4	32208	Chevrolet	Malibu eAssist	2013	Midsize Cars	Front-Wheel Drive	Automatic (S6)	Regular Gasoline	4	2.4	0	95	24.7726	31.9796	35.5340	51.8816	28.6813	310	8	8

sns.violinplot(data=ful_econ, x='VClass', y='displ')  # 加入參數 inner = None 可以去掉中間的小型箱線圖
plt.title('車型和displ之間的關係')                      # 加入參數 inner = 'quartile’ 可以增加四等分線圖
plt.xlabel('車型')
car_list = ['Minicompact Cars', 'Subcompact Cars', 'Compact Cars', 'Midsize Cars', 'Large Cars']
plt.xticks([ i for i in range(len(set(ful_econ['VClass'])))], car_list, rotation=25)
plt.show()

seaborn中 boxplot 箱線圖

sns.boxplot(data=ful_econ, x='VClass', y='displ') 
plt.title('車型和displ之間的關係')
plt.xlabel('車型')
car_list = ['Minicompact Cars', 'Subcompact Cars', 'Compact Cars', 'Midsize Cars', 'Large Cars']  # 排序列表
plt.xticks([ i for i in range(len(set(ful_econ['VClass'])))], car_list, rotation=25)
plt.show()

箱線圖說明:箱子中間那個線是數據的中位數，上面那個線是第三個四等分點，下面那個線是第一個四等分點，上下二根線是最大值和最小值。

絕對頻率和相對頻率

就是對數據表達的二種方式，分別作用在標籤或者長條上，我個人傾向後者的使用

base_order = pok_mon['generation_id'].value_counts().index
plt.figure(figsize=[8, 4])
plt.subplot(1, 2, 1) # 作用在標籤內
n_max = pok_mon['generation_id'].value_counts().max()  # 獲得比例最大的數量
n_sum = pok_mon['generation_id'].shape[0]  # 總數
n_sca = n_max / n_sum
n_loc = np.arange(0, n_sca+0.01, 0.05) # 位置
n_label = ['{:.2f}'.format(i) for i in n_loc] # 標籤
sns.countplot(data=pok_mon, x='generation_id', order=base_order)
plt.title('第一種方式')
plt.yticks(n_loc * n_sum, n_label)



plt.subplot(1, 2, 2) # 作用在長條內
sns.countplot(data=pok_mon, x='generation_id',order=base_order)
locs, labels = plt.xticks()
id_base = pok_mon['generation_id'].value_counts()  # 索引
for loc,label in zip(locs, labels):  # 使用text追加到每個長條內
    count = id_base[eval(label.get_text())]   #  獲得標籤對應的值
    sts = '{:0.1f}%'.format(100 * count / n_sum)
    print(sts)
    plt.text(loc, count - 10,sts, ha='center', color='black')
plt.title('第二種方式')
plt.show()

19.3%
18.7%
16.7%
13.3%
12.4%
10.7%
8.9%

標尺和變化

plt.figure(figsize=[8, 4])
plt.subplot(1, 2, 1)
log_pk = np.log10(pok_mon['weight'])
log_bins = np.arange(0, log_pk.max() + 0.11, 0.1)
plt.hist(log_pk, bins=log_bins)
plt.xlabel('log10(values)')
plt.title('對數據採用對數處理')
plt.subplot(1, 2, 2)
bins =10 ** np.arange(0, np.log10(pok_mon['weight'].max())+0.1, 0.1)
plt.hist(data=pok_mon, x='weight', bins=bins)
plt.xscale('log')  #  縮放log10倍
col = [1, 3, 10, 30, 100, 300, 1000, 3000]
plt.title('對標尺的修改')
plt.xticks(col, col)
plt.xlabel('特定下標')
plt.show()

分面

分面是一種通用的可視化技巧，幫助你處理包含兩個或多個變量的圖表。在分面操作中，數據被劃分爲不相交的子集，通常根據分類變量的不同類別進行劃分。對於每個子集，對其他變量繪製相同的圖表。分面是比較不同變量級別分佈或關係的一種方式，尤其是有三個或多個感興趣的變量時。

ful_econ.head()

	id	make	model	year	VClass	drive	trans	fuelType	cylinders	displ	...	pv4	city	UCity	highway	UHighway	comb	co2	feScore	ghgScore	trans_type
0	32204	Nissan	GT-R	2013	Subcompact Cars	All-Wheel Drive	Automatic (AM6)	Premium Gasoline	6	3.8	...	0	16.4596	20.2988	22.5568	30.1798	18.7389	471	4	4	Automatic
1	32205	Volkswagen	CC	2013	Compact Cars	Front-Wheel Drive	Automatic (AM-S6)	Premium Gasoline	4	2.0	...	0	21.8706	26.9770	31.0367	42.4936	25.2227	349	6	6	Automatic
2	32206	Volkswagen	CC	2013	Compact Cars	Front-Wheel Drive	Automatic (S6)	Premium Gasoline	6	3.6	...	0	17.4935	21.2000	26.5716	35.1000	20.6716	429	5	5	Automatic
3	32207	Volkswagen	CC 4motion	2013	Compact Cars	All-Wheel Drive	Automatic (S6)	Premium Gasoline	6	3.6	...	0	16.9415	20.5000	25.2190	33.5000	19.8774	446	5	5	Automatic
4	32208	Chevrolet	Malibu eAssist	2013	Midsize Cars	Front-Wheel Drive	Automatic (S6)	Regular Gasoline	4	2.4	...	95	24.7726	31.9796	35.5340	51.8816	28.6813	310	8	8	Automatic

5 rows × 21 columns

bins = np.arange(12, 58+2, 2)
g = sns.FacetGrid(data=ful_econ, col='VClass', col_wrap=3) # col_wrap 設置一行放置多少圖
g.map(plt.hist, 'comb',bins=bins)
g.set_titles('{col_name}')  # 爲每個子圖加上標題

# FacetGrid中其他參數 sharey（sharex） = False 增加每個子圖的行列標籤 size 更改每個子圖的高度 hue參數設置分類變量，可以多次調用繪製hist或者其他圖形

高階模式:

待定待定…