數據結構 DataFrame

#!/usr/bin/env Python3
__author__ = '未昔/angelfate'
__date__ = '2019/8/6 10:52'
# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np

"""
數據結構 DataFrame
創建,讀寫
"""
print('用字典生成DataFrame,key爲列名(默認key是無序的)。')
data = {
    'name': ['wang', 'wei', 'RPA', 'python', 'linux', 'C'],
    'year': [2000, 2010, 2020, 2030, 2040, 2050],
    'pop': [1.0, 1.1, 1.2, 1.3, 1.4, 1.5]
}
print('\n')

print(pd.DataFrame(data))  # 轉換爲 DataFrame
print('----指定索引順序----')
print(pd.DataFrame(data, columns= ['name', 'year', 'pop']) )  # 指定索引順序

print('----指定索引,不存在的列,默認使用數據NaN----')
data2 = pd.DataFrame(data,
                     columns = ['name', 'year', 'pop', 'state', 'con'],
                     index = ['one', 'two', 'three', 'four', 'five', 'six']
)
print(data2)
print('----查詢指定 列索引 數據---')
print(data2['name']) # 查詢指定 列索引 數據
print(data2.year)
print('----查詢指定 行索引 數據----')
print(data2.ix['two']) # 查詢指定 行索引 數據
print('----修改列值----')
data2['state'] = 16.5  # 修改一整列,爲指定數據
print(data2)
data2.con = np.arange(6)  # 用numpy數組修改元素
print(data2)
print('\n')

print('用Serice指定要修改的索引及對應值,沒有指定的數據用Nan。')
val = pd.Series([-1.0, -1.1, -1.2], index = ['two', 'three', 'five'])
data2['con'] = val
print(data2)
print('\n')

print('---賦值給新列---')
data2['new_data'] = (data2.name == 'wang') #
print(data2)
print(data2.columns) # 打印列名
print('\n')

print('---DataFrame轉置---')
values = {'Nevada': {2001:1.1, 2002:2.2},
          'Ohio': {2000:1.5, 2001:1.7, 2002:3.6}}
data3 = pd.DataFrame(values)
print(data3)
print(data3.T)  # 行列轉置
print('\n')

print('---指定索引順序,使用切片初始化順序---')
print(pd.DataFrame(values, index = [2001, 2002, 2003]))
data4 = {'Ohio':data3['Ohio'][:-1], 'Nevada':data3['Nevada'][:2]}
print(pd.DataFrame(data4))

print('---指定索引和列名---')
data3.index.name = 'year'
data3.columns.name = 'state'
print(data3)
print(data3.values) # 只打印數據
print(data2.values)

結果

用字典生成DataFrame,key爲列名(默認key是無序的)。


     name  pop  year
0    wang  1.0  2000
1     wei  1.1  2010
2     RPA  1.2  2020
3  python  1.3  2030
4   linux  1.4  2040
5       C  1.5  2050
----指定索引順序----
     name  year  pop
0    wang  2000  1.0
1     wei  2010  1.1
2     RPA  2020  1.2
3  python  2030  1.3
4   linux  2040  1.4
5       C  2050  1.5
----指定索引,不存在的列,默認使用數據NaN----
         name  year  pop state  con
one      wang  2000  1.0   NaN  NaN
two       wei  2010  1.1   NaN  NaN
three     RPA  2020  1.2   NaN  NaN
four   python  2030  1.3   NaN  NaN
five    linux  2040  1.4   NaN  NaN
six         C  2050  1.5   NaN  NaN
----查詢指定 列索引 數據---
one        wang
two         wei
three       RPA
four     python
five      linux
six           C
Name: name, dtype: object
one      2000
two      2010
three    2020
four     2030
five     2040
six      2050
Name: year, dtype: int64
----查詢指定 行索引 數據----
name      wei
year     2010
pop       1.1
state     NaN
con       NaN
Name: two, dtype: object
----修改列值----
         name  year  pop  state  con
one      wang  2000  1.0   16.5  NaN
two       wei  2010  1.1   16.5  NaN
three     RPA  2020  1.2   16.5  NaN
four   python  2030  1.3   16.5  NaN
five    linux  2040  1.4   16.5  NaN
six         C  2050  1.5   16.5  NaN
         name  year  pop  state  con
one      wang  2000  1.0   16.5    0
two       wei  2010  1.1   16.5    1
three     RPA  2020  1.2   16.5    2
four   python  2030  1.3   16.5    3
five    linux  2040  1.4   16.5    4
six         C  2050  1.5   16.5    5


用Serice指定要修改的索引及對應值,沒有指定的數據用Nan。
         name  year  pop  state  con
one      wang  2000  1.0   16.5  NaN
two       wei  2010  1.1   16.5 -1.0
three     RPA  2020  1.2   16.5 -1.1
four   python  2030  1.3   16.5  NaN
five    linux  2040  1.4   16.5 -1.2
six         C  2050  1.5   16.5  NaN


---賦值給新列---
         name  year  pop  state  con  new_data
one      wang  2000  1.0   16.5  NaN      True
two       wei  2010  1.1   16.5 -1.0     False
three     RPA  2020  1.2   16.5 -1.1     False
four   python  2030  1.3   16.5  NaN     False
five    linux  2040  1.4   16.5 -1.2     False
six         C  2050  1.5   16.5  NaN     False
Index(['name', 'year', 'pop', 'state', 'con', 'new_data'], dtype='object')


---DataFrame轉置---
      Nevada  Ohio
2000     NaN   1.5
2001     1.1   1.7
2002     2.2   3.6
        2000  2001  2002
Nevada   NaN   1.1   2.2
Ohio     1.5   1.7   3.6


---指定索引順序,使用切片初始化順序---
      Nevada  Ohio
2001     1.1   1.7
2002     2.2   3.6
2003     NaN   NaN
      Nevada  Ohio
2000     NaN   1.5
2001     1.1   1.7
---指定索引和列名---
state  Nevada  Ohio
year               
2000      NaN   1.5
2001      1.1   1.7
2002      2.2   3.6
[[nan 1.5]
 [1.1 1.7]
 [2.2 3.6]]
[['wang' 2000 1.0 16.5 nan True]
 ['wei' 2010 1.1 16.5 -1.0 False]
 ['RPA' 2020 1.2 16.5 -1.1 False]
 ['python' 2030 1.3 16.5 nan False]
 ['linux' 2040 1.4 16.5 -1.2 False]
 ['C' 2050 1.5 16.5 nan False]]

Process finished with exit code 0
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章