#!/usr/bin/env Python3
__author__ ='未昔/angelfate'
__date__ ='2019/8/6 10:52'# -*- coding: utf-8 -*-import pandas as pd
import numpy as np
"""
數據結構 DataFrame
創建,讀寫
"""print('用字典生成DataFrame,key爲列名(默認key是無序的)。')
data ={'name':['wang','wei','RPA','python','linux','C'],'year':[2000,2010,2020,2030,2040,2050],'pop':[1.0,1.1,1.2,1.3,1.4,1.5]}print('\n')print(pd.DataFrame(data))# 轉換爲 DataFrameprint('----指定索引順序----')print(pd.DataFrame(data, columns=['name','year','pop']))# 指定索引順序print('----指定索引,不存在的列,默認使用數據NaN----')
data2 = pd.DataFrame(data,
columns =['name','year','pop','state','con'],
index =['one','two','three','four','five','six'])print(data2)print('----查詢指定 列索引 數據---')print(data2['name'])# 查詢指定 列索引 數據print(data2.year)print('----查詢指定 行索引 數據----')print(data2.ix['two'])# 查詢指定 行索引 數據print('----修改列值----')
data2['state']=16.5# 修改一整列,爲指定數據print(data2)
data2.con = np.arange(6)# 用numpy數組修改元素print(data2)print('\n')print('用Serice指定要修改的索引及對應值,沒有指定的數據用Nan。')
val = pd.Series([-1.0,-1.1,-1.2], index =['two','three','five'])
data2['con']= val
print(data2)print('\n')print('---賦值給新列---')
data2['new_data']=(data2.name =='wang')#print(data2)print(data2.columns)# 打印列名print('\n')print('---DataFrame轉置---')
values ={'Nevada':{2001:1.1,2002:2.2},'Ohio':{2000:1.5,2001:1.7,2002:3.6}}
data3 = pd.DataFrame(values)print(data3)print(data3.T)# 行列轉置print('\n')print('---指定索引順序,使用切片初始化順序---')print(pd.DataFrame(values, index =[2001,2002,2003]))
data4 ={'Ohio':data3['Ohio'][:-1],'Nevada':data3['Nevada'][:2]}print(pd.DataFrame(data4))print('---指定索引和列名---')
data3.index.name ='year'
data3.columns.name ='state'print(data3)print(data3.values)# 只打印數據print(data2.values)
結果
用字典生成DataFrame,key爲列名(默認key是無序的)。
name pop year
0 wang 1.020001 wei 1.120102 RPA 1.220203 python 1.320304 linux 1.420405 C 1.52050----指定索引順序----
name year pop
0 wang 20001.01 wei 20101.12 RPA 20201.23 python 20301.34 linux 20401.45 C 20501.5----指定索引,不存在的列,默認使用數據NaN----
name year pop state con
one wang 20001.0 NaN NaN
two wei 20101.1 NaN NaN
three RPA 20201.2 NaN NaN
four python 20301.3 NaN NaN
five linux 20401.4 NaN NaN
six C 20501.5 NaN NaN
----查詢指定 列索引 數據---
one wang
two wei
three RPA
four python
five linux
six C
Name: name, dtype:object
one 2000
two 2010
three 2020
four 2030
five 2040
six 2050
Name: year, dtype: int64
----查詢指定 行索引 數據----
name wei
year 2010
pop 1.1
state NaN
con NaN
Name: two, dtype:object----修改列值----
name year pop state con
one wang 20001.016.5 NaN
two wei 20101.116.5 NaN
three RPA 20201.216.5 NaN
four python 20301.316.5 NaN
five linux 20401.416.5 NaN
six C 20501.516.5 NaN
name year pop state con
one wang 20001.016.50
two wei 20101.116.51
three RPA 20201.216.52
four python 20301.316.53
five linux 20401.416.54
six C 20501.516.55
用Serice指定要修改的索引及對應值,沒有指定的數據用Nan。
name year pop state con
one wang 20001.016.5 NaN
two wei 20101.116.5-1.0
three RPA 20201.216.5-1.1
four python 20301.316.5 NaN
five linux 20401.416.5-1.2
six C 20501.516.5 NaN
---賦值給新列---
name year pop state con new_data
one wang 20001.016.5 NaN True
two wei 20101.116.5-1.0False
three RPA 20201.216.5-1.1False
four python 20301.316.5 NaN False
five linux 20401.416.5-1.2False
six C 20501.516.5 NaN False
Index(['name','year','pop','state','con','new_data'], dtype='object')---DataFrame轉置---
Nevada Ohio
2000 NaN 1.520011.11.720022.23.6200020012002
Nevada NaN 1.12.2
Ohio 1.51.73.6---指定索引順序,使用切片初始化順序---
Nevada Ohio
20011.11.720022.23.62003 NaN NaN
Nevada Ohio
2000 NaN 1.520011.11.7---指定索引和列名---
state Nevada Ohio
year
2000 NaN 1.520011.11.720022.23.6[[nan 1.5][1.11.7][2.23.6]][['wang'20001.016.5 nan True]['wei'20101.116.5-1.0False]['RPA'20201.216.5-1.1False]['python'20301.316.5 nan False]['linux'20401.416.5-1.2False]['C'20501.516.5 nan False]]
Process finished with exit code 0