之前分享過一期 Excel 讀寫【一】 這兒分享下其他的收穫。
一些嘗試
openpyxl
A. 普通寫入內容 【可寫可讀模式】
header = ['ID', 'Product', 'Status', 'Fee', 'Tr updated']
data = [[(10, 'zyoooo00', '100', 50, '2020-05-16'), (1, 'zyoooo01', '10', 5, '2020-05-15')], [(11, 'zyoooo1', '100', 50, '2020-05-16') , (10, 'zyoooo11', '10', 5, '2020-05-15')], [(12, 'zyoooo2', '100', 50, '2020-05-16'), (10, 'zyoooo0', '10', 5, '2020-05-15')], [(3, 'zyoooo3', '100', 50, '2020-05-16'), (10, 'zyoooo0', '10', 5, '2020-05-15')]]
def insert_excel_data(self, header, all_data):
"""
新建Excel 寫入內容
:param header: header = ['ID', 'Product', 'Status', 'Fee', 'Tr updated']
:param all_data: [list1, list2, list3, list4] list1對應第一個sheet的所有數據
:return:
"""
import openpyxl
workbook = openpyxl.Workbook()
for sh in range(len(all_data)):
sheet = workbook.create_sheet(index=sh)
Log.info(sheet)
for b in range(len(header)):
sheet.cell(row=1, column=b + 1, value=str(header[b]))
for a in range(len(all_data[sh])):
sheet.cell(row=2 + a, column=1, value=str(all_data[sh][a][0]))
sheet.cell(row=2 + a, column=2, value=str(all_data[sh][a][1]))
sheet.cell(row=2 + a, column=3, value=str(all_data[sh][a][2]))
sheet.cell(row=2 + a, column=4, value=str(all_data[sh][a][3]))
sheet.cell(row=2 + a, column=5, value=str(all_data[sh][a][4]))
Log.info('當前sheet數據生成成功')
# 【多出來一個sheet】
ws = workbook["Sheet"]
workbook.remove(ws)
ex_time = time.strftime("%Y%m%d_%H%M%S")
workbook.save(r'D:\work\gys\test_' + ex_time + '.xlsx')
print('excel創建成功')
def insert_excel_data_a(self, header, all_data):
"""
新建Excel 寫入內容
:param header: header = ['ID', 'Product', 'Status', 'Fee', 'Tr updated']
:param all_data: [list1, list2, list3, list4] list1對應第一個sheet的所有數據
:return:
"""
import openpyxl
workbook = openpyxl.Workbook()
for sh in range(len(all_data)):
sheet = workbook.create_sheet(index=sh)
Log.info(sheet)
for b in range(len(header)):
sheet.cell(row=1, column=b + 1, value=str(header[b]))
for a in range(len(all_data[sh])):
for h in range(len(header)):
sheet.cell(row=2 + a, column=h + 1, value=str(all_data[sh][a][h]))
Log.info('當前sheet數據生成成功')
# 【多出來一個sheet】
ws = workbook["Sheet"]
workbook.remove(ws)
ex_time = time.strftime("%Y%m%d_%H%M%S")
workbook.save(r'D:\work\gys\test_' + ex_time + '.xlsx')
print('excel創建成功')
B.只寫模式
其實這個也是比較推薦的; 但我電腦配置沒那麼好(一跑起來,內存要用近90%),所以最推薦的是 前面的 insert_excel_new()
def insert_excel(self, all_data, header):
"""
新建Excel 寫入內容 【只寫模式】
:param header: header = ['ID', 'Product', 'Status', 'Fee', 'Tr updated']
:param all_data: [list1, list2, list3, list4] list1對應第一個sheet的所有數據
:return:
"""
import openpyxl
workbook = openpyxl.Workbook(write_only=True)
for sh in range(len(all_data)):
sheet = workbook.create_sheet(index=sh)
Log.info(sheet)
test_list = all_data[sh]
test_list.insert(0, header)
for b in range(len(test_list)):
sheet.append(test_list[b])
Log.info('當前sheet數據生成成功')
ex_time = time.strftime("_%Y%m%d_%H%M%S")
workbook.save(r'D:\work\gys\test_' + ex_time + '.xlsx')
print('excel創建成功')
pandas
def get_excel_data_new_1(self, file, sheet_index=0):
"""
獲取excel
:param file:
:param sheet_index: 序列
:return:
"""
import numpy as np
import pandas as pd
data_old = pd.read_excel(file, sheet_name=sheet_index)
data = np.array(data_old).tolist()
print(data)
def get_excel_data_new_2(self, file, sheet_name):
"""
獲取excel
:param file:
:param sheet_name: sheet name
:return:
"""
import numpy as np
import pandas as pd
df = pd.read_excel(file, sheet_name=None)
df1 = df[sheet_name]
data = np.array(df1).tolist()
print(data)
xlwings
xlwings 好像有時候需要 打開某個Excel文件。
【下方代碼 若打開此Excel 不報錯;若沒打開,就報錯 ‘pywintypes.com_error: (-2147221164, ‘沒有註冊類’, None, None)‘】
def get_excel_data_new_3(self, file, sheet_index=0):
import xlwings as xw
from openpyxl.utils import get_column_letter
data = list()
wb = xw.Book(file)
sht = wb.sheets[sheet_index]
rng = sht.range('a1').expand('table')
tot_rows = rng.rows.count
print(tot_rows)
tot_ncols = rng.columns.count
print(tot_ncols)
# print(get_column_letter(1)) # A
# print(get_column_letter(267)) # JG # 推薦這樣獲取 tot_ncols 【1-N】
# openpyxl.utils.get_column_letter -> 會把數字轉化爲字母。
# openpyxl.utils.column_index_from_string -> 會把字母轉化爲數字。
# chr_ele = chr(97 + tot_ncols) # tot_ncols 小於等於25 【0-25】
# print(chr_ele)
for a in range(1, tot_rows + 1):
# value = sht.range('A{shu}:{lie}{shu}'.format(shu=a, lie=chr_ele[tot_ncols - 1])).value
value = sht.range('A{shu}:{lie}{shu}'.format(shu=a, lie=get_column_letter(tot_ncols))).value
data.append(value)
print(data)
一種有缺陷的思路
需求:將Excel轉CSV
思路:快速獲取Excel的內容,再使用 pandas 的 to_csv() 保存;
openpyxl的 Worksheet.columns 方法不能在只讀模式下使用。
def delete_header(self, data_list):
"""
返回 非表頭的所有數據,表頭
:param data_list:
:return:
"""
temp = data_list[0]
data_list = [d for d in data_list if d != temp]
return data_list, temp
def get_excel_data_4(self, file):
"""
讀取的Excel 所有sheet的表頭一樣
:param file:
:return:
"""
from openpyxl import load_workbook
import pandas as pd
import xlrd
excel = xlrd.open_workbook(file)
the_sheet = excel.sheet_by_index(0)
tot_ncols = the_sheet.ncols
excel = load_workbook(file, read_only=True)
sh = excel.sheetnames
all_data = [list() for abc in range(tot_ncols)]
for a in sh:
print('當前sheet Name是 {}'.format(a))
sheet = excel.get_sheet_by_name(a)
for i in sheet.values:
for d in range(tot_ncols):
all_data[d].append(i[d])
data = [self.delete_header(e)[0] for e in all_data]
# print(data)
header = [self.delete_header(e)[1] for e in all_data]
# print(header)
data_dict = dict()
for f in range(len(header)):
data_dict[header[f]] = data[f]
print(data_dict)
data_df = pd.DataFrame(data_dict)
new_time = time.strftime("_%H%M%S")
new_file = file.replace('.xlsx', ''.join([new_time, '.csv']))
data_df.to_csv(new_file, index=False)
缺陷:用pandas的 to_csv(),去生成CSV文件,表頭若是有重複字段,在生成data_dict時候,update(),會把前面的 key_value 覆蓋掉;
【我的Excel文件 確實是有 2個表頭字段是一樣的】;
獲取Excel的sheet總數量 和 sheet_name
def read_excel_sheetname(self, file):
"""
獲取 Excel的sheet數量 和每個sheet_Name
:param file:
:return:
"""
from openpyxl import load_workbook
import xlwings as xw
import xlrd
import pandas as pd
excel = load_workbook(file, read_only=True, keep_vba=False)
chang = len(excel.sheetnames)
print(excel.sheetnames)
print(chang, 'zyooooxie_csdn')
excel = xw.Book(file) # 事先打開了 xlsx 文件
chang = len(excel.sheets)
print([s.name for s in excel.sheets])
print(chang, 'zyooooxie_csdn')
# excel.close()
excel = xlrd.open_workbook(file)
chang = len(excel.sheet_names())
print(excel.sheet_names())
print(chang, 'zyooooxie_csdn')
excel = xlrd.open_workbook(file)
all_sheets = excel.sheets()
chang = len(all_sheets)
print([a.name for a in all_sheets])
print(chang, 'zyooooxie_csdn')
excel = pd.ExcelFile(file)
chang = len(excel.sheet_names)
print(excel.sheet_names)
print(chang, 'zyooooxie_csdn')
這些方法 我並沒 對大數據量Excel做實踐,若要對大數量的有興趣,可以看下 我寫的第一期;
交流技術 歡迎+QQ 153132336 zy
個人博客 https://blog.csdn.net/zyooooxie