利用python進行數據分析pandas_數據重排

# -*- coding: utf-8 -*-
"""
Created on Fri Oct 25 16:41:03 2019

@author: weiping
"""

import pandas as pd
import numpy as np
df1  = pd.DataFrame({'name':['a','b','c','d','f','g'],
                     'key1':[1,1,1,2,2,2],
                     'date1':[3,34,5,6,4,8]})
df2  = pd.DataFrame({'name':['a','b','c','d','e','h'],
                      'key1':[1,1,1,3,3,3],
                     'date2':[33,44,23,45,66,77]})
df1,df2

'''
數據框之間的左右連接(類似SQL中的表關聯)
'''
df = pd.merge(df1,df2,on = 'name',how = 'left')#不限制how的內容 默認 inner
df

df = pd.merge(df1,df2,left_on = 'name',right_on = 'name',how = 'right')
df

df = pd.merge(df1,df2,on = ['name','key1'],how = 'left') # 聯合字段關聯
df
'''
索引上的連接
'''
df1 = pd.DataFrame({"key":['q','w','e','r','q','e','e'],
                    "data": range(7)})
df2 = pd.DataFrame({"da":[3,6,7]},index = ['a','q','e'])
df1,df2
pd.merge(df1,df2,left_on = 'key',right_index =True,how = 'right')

'''
軸向連接(pd.concat)
'''
#數組的軸向連接
arr = np.arange(12).reshape(3,4)
arr1 = np.arange(16).reshape(4,4)
np.concatenate([arr,arr],axis = 1 ) # 按列拼接
np.concatenate([arr,arr1],axis = 1)#報錯  數組拼接 必須軸向數據條數一致

#數據框的軸向連接

df1 = pd.DataFrame(arr,columns = ['a','b','c','d'])
pd.concat([df1,df1]) # 默認axis = 0 按行連接
pd.concat([df1,df1],axis = 1 ) # 按列連接


'''
合併重疊數據(打補丁)
'''

a = pd.DataFrame(list(range(7)),columns=['c1'])
a.ix[2:3] = np.nan
a.ix[4:5] = np.nan
b = pd.DataFrame(list(range(4,11)),columns = ['c2'])
#np.where
c = pd.DataFrame(np.where(pd.isnull(a),b,a),columns = ['c3']) # np.where  類似 if  else  返回數組
#combine_first
a['c1'].combine_first(b['c2']) # 與np.where 效果一致 返回 數據框

'''
長表 轉化 寬表
'''

df = pd.DataFrame({'t1':[11,11,11,22,22,22,33,33,33],
                   'item':['a','b','c','a','b','c','a','b','c'],
                   'value':[3.4,3,4,5,6,7,8,8,9]})
df
'''
Out[59]: 
   t1 item  value
0  11    a    3.4
1  11    b    3.0
2  11    c    4.0
3  22    a    5.0
4  22    b    6.0
5  22    c    7.0
6  33    a    8.0
7  33    b    8.0
8  33    c    9.0
'''
df2 = df.pivot('item','t1') 
df2
'''
Out[61]: 
     value          
t1      11   22   33
item                
a      3.4  5.0  8.0
b      3.0  6.0  8.0
c      4.0  7.0  9.0
'''
df2 = df.pivot('t1','item')
df2
'''
Out[62]: 
     value          
item     a    b    c
t1                  
11     3.4  3.0  4.0
22     5.0  6.0  7.0
33     8.0  8.0  9.0
'''
# 寬表
test = pd.DataFrame(fake_data, columns=['subject', 'A', 'B', 'C'])
test
'''
	subject	A	B	C
0	math	88	70	60
1	english	90	80	78
'''
# 轉換爲長表
pd.melt(test, id_vars=['subject'])
'''
subject	variable	value
0	math	A	88
1	english	A	90
2	math	B	70
3	english	B	80
4	math	C	60
5	english	C	78
'''

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章