In [1]: import pandas as pd
In [2]: df1 = pd.DataFrame({'key':['b','b','a','c','a','a','b'],'data1':range(7)})
In [3]: df2 = pd.DataFrame({'key':['a','b','d'],'data2':range(3)})
In [4]: df1
Out[4]:
data1 key
0 0 b
1 1 b
2 2 a
3 3 c
4 4 a
5 5 a
6 6 b
In [5]: df2
Out[5]:
data2 key
0 0 a
1 1 b
2 2 d
In [6]: pd.concat([df1,df2])
Out[6]:
data1 data2 key
0 0.0 NaN b
1 1.0 NaN b
2 2.0 NaN a
3 3.0 NaN c
4 4.0 NaN a
5 5.0 NaN a
6 6.0 NaN b
0 NaN 0.0 a
1 NaN 1.0 b
2 NaN 2.0 d
In [7]: pd.concat([df1,df2], keys=['x','y'])
Out[7]:
data1 data2 key
x 0 0.0 NaN b
1 1.0 NaN b
2 2.0 NaN a
3 3.0 NaN c
4 4.0 NaN a
5 5.0 NaN a
6 6.0 NaN b
y 0 NaN 0.0 a
1 NaN 1.0 b
2 NaN 2.0 d
In [8]:
In [8]: pd.concat([df1,df2], axis=1)
Out[8]:
data1 key data2 key
0 0 b 0.0 a
1 1 b 1.0 b
2 2 a 2.0 d
3 3 c NaN NaN
4 4 a NaN NaN
5 5 a NaN NaN
6 6 b NaN NaN
In [10]: pd.concat([df1,df2], axis=1, join='inner')
Out[10]:
data1 key data2 key
0 0 b 0 a
1 1 b 1 b
2 2 a 2 d
In [11]:
In [11]: pd.concat([df1,df2], axis=1, join_axes=[df1.index])
Out[11]:
data1 key data2 key
0 0 b 0.0 a
1 1 b 1.0 b
2 2 a 2.0 d
3 3 c NaN NaN
4 4 a NaN NaN
5 5 a NaN NaN
6 6 b NaN NaN
In [12]: pd.concat([df1,df2], axis=1, join_axes=[df2.index])
Out[12]:
data1 key data2 key
0 0 b 0 a
1 1 b 1 b
2 2 a 2 d
In [13]: pd.concat([df1,df2], axis=1, ignore_index=True)
Out[13]:
0 1 2 3
0 0 b 0.0 a
1 1 b 1.0 b
2 2 a 2.0 d
3 3 c NaN NaN
4 4 a NaN NaN
5 5 a NaN NaN
6 6 b NaN NaN
In [14]: pd.concat([df1,df2], axis=0, ignore_index=True)
Out[14]:
data1 data2 key
0 0.0 NaN b
1 1.0 NaN b
2 2.0 NaN a
3 3.0 NaN c
4 4.0 NaN a
5 5.0 NaN a
6 6.0 NaN b
7 NaN 0.0 a
8 NaN 1.0 b
9 NaN 2.0 d
In [15]:
In [15]:
In [15]:
In [15]:
In [15]:
In [15]: df = pd.DataFrame({'time':[1504108800,1504281600], 'data':range(2)})
In [16]: df
Out[16]:
data time
0 0 1504108800
1 1 1504281600
In [17]: df['date'] = [20170909,20170911]
In [18]: df
Out[18]:
data time date
0 0 1504108800 20170909
1 1 1504281600 20170911
In [19]: import time
In [20]: from datetime import datetime
In [21]: def datetime_to_stamp(date_time):
...: time_array = time.strptime(date_time, "%Y-%m-%d %H:%M:%S")
...: time_stamp = int(time.mktime(time_array))
...: return time_stamp
...:
In [23]: df['time2'] = df['time'].apply(lambda x: datetime.fromtimestamp(x).strftime('%Y-%m-%d %H:%M:%S'))
In [24]: df
Out[24]:
data time date time2
0 0 1504108800 20170909 2017-08-31 00:00:00
1 1 1504281600 20170911 2017-09-02 00:00:00
In [25]: df['time2'][0] # 轉化爲字符串
Out[25]: '2017-08-31 00:00:00'
In [27]: df['date'] = df['date'].astype(str) # 需要傳化爲字符串才能傳入strptime,同時格式需要滿足下面的格式'%Y%m%d'
In [29]: df['date2'] = df['date'].apply(lambda x: datetime.strptime(x,'%Y%m%d'))
In [30]: df
Out[30]:
data time date time2 date2
0 0 1504108800 20170909 2017-08-31 00:00:00 2017-09-09
1 1 1504281600 20170911 2017-09-02 00:00:00 2017-09-11
In [31]: df['date'][0]
Out[31]: '20170909'
In [32]: df['date2'][0]
Out[32]: Timestamp('2017-09-09 00:00:00')
In [33]: df['date2'][0].day
Out[33]: 9
In [34]: df['date2'][0].year
Out[34]: 2017
In [35]: g = df['date2'][0]
In [36]: g
Out[36]: Timestamp('2017-09-09 00:00:00')
In [37]: datetime.strftime(g,'%Y%m%d') # 再次轉化回來
Out[37]: '20170909'
In [38]: df
Out[38]:
data time date time2 date2
0 0 1504108800 20170909 2017-08-31 00:00:00 2017-09-09
1 1 1504281600 20170911 2017-09-02 00:00:00 2017-09-11
In [39]: df['ceshi'] = pd.to_datetime(df['date'])
In [40]: df
Out[40]:
data time date time2 date2 ceshi
0 0 1504108800 20170909 2017-08-31 00:00:00 2017-09-09 2017-09-09
1 1 1504281600 20170911 2017-09-02 00:00:00 2017-09-11 2017-09-11
In [41]: df['ceshi'][0]
Out[41]: Timestamp('2017-09-09 00:00:00')
In [43]:
In [43]: df['ddddddd'] = df['time2'].apply(lambda x: datetime_to_stamp(x)) # 標準日期轉爲時間戳
In [44]: df
Out[44]:
data time date time2 date2 ceshi \
0 0 1504108800 20170909 2017-08-31 00:00:00 2017-09-09 2017-09-09
1 1 1504281600 20170911 2017-09-02 00:00:00 2017-09-11 2017-09-11
ddddddd
0 1504108800
1 1504281600
In [45]: df['ddddddd'][0]
Out[45]: 1504108800
In [46]: