Python腳本之讀寫Excel【二】

之前分享過一期 Excel 讀寫【一】這兒分享下其他的收穫。

一些嘗試

openpyxl

A. 普通寫入內容【可寫可讀模式】

        header = ['ID', 'Product', 'Status', 'Fee', 'Tr updated']
        data = [[(10, 'zyoooo00', '100', 50, '2020-05-16'), (1, 'zyoooo01', '10', 5, '2020-05-15')], [(11, 'zyoooo1', '100', 50, '2020-05-16') , (10, 'zyoooo11', '10', 5, '2020-05-15')], [(12, 'zyoooo2', '100', 50, '2020-05-16'), (10, 'zyoooo0', '10', 5, '2020-05-15')], [(3, 'zyoooo3', '100', 50, '2020-05-16'), (10, 'zyoooo0', '10', 5, '2020-05-15')]]

    def insert_excel_data(self, header, all_data):
        """
        新建Excel 寫入內容
        :param header:    header = ['ID', 'Product', 'Status', 'Fee', 'Tr updated']
         :param all_data: [list1, list2, list3, list4]   list1對應第一個sheet的所有數據
        :return:
        """
        import openpyxl
        workbook = openpyxl.Workbook()

        for sh in range(len(all_data)):
            sheet = workbook.create_sheet(index=sh)
            Log.info(sheet)

            for b in range(len(header)):
                sheet.cell(row=1, column=b + 1, value=str(header[b]))

            for a in range(len(all_data[sh])):

                sheet.cell(row=2 + a, column=1, value=str(all_data[sh][a][0]))
                sheet.cell(row=2 + a, column=2, value=str(all_data[sh][a][1]))
                sheet.cell(row=2 + a, column=3, value=str(all_data[sh][a][2]))
                sheet.cell(row=2 + a, column=4, value=str(all_data[sh][a][3]))
                sheet.cell(row=2 + a, column=5, value=str(all_data[sh][a][4]))

            Log.info('當前sheet數據生成成功')

        # 【多出來一個sheet】
        ws = workbook["Sheet"]
        workbook.remove(ws)

        ex_time = time.strftime("%Y%m%d_%H%M%S")
        workbook.save(r'D:\work\gys\test_' + ex_time + '.xlsx')

        print('excel創建成功')

    def insert_excel_data_a(self, header, all_data):
        """
        新建Excel 寫入內容
        :param header:    header = ['ID', 'Product', 'Status', 'Fee', 'Tr updated']
         :param all_data: [list1, list2, list3, list4]   list1對應第一個sheet的所有數據
        :return:
        """
        import openpyxl
        workbook = openpyxl.Workbook()

        for sh in range(len(all_data)):
            sheet = workbook.create_sheet(index=sh)
            Log.info(sheet)

            for b in range(len(header)):
                sheet.cell(row=1, column=b + 1, value=str(header[b]))

            for a in range(len(all_data[sh])):

                for h in range(len(header)):
                    sheet.cell(row=2 + a, column=h + 1, value=str(all_data[sh][a][h]))

            Log.info('當前sheet數據生成成功')

        # 【多出來一個sheet】
        ws = workbook["Sheet"]
        workbook.remove(ws)

        ex_time = time.strftime("%Y%m%d_%H%M%S")
        workbook.save(r'D:\work\gys\test_' + ex_time + '.xlsx')

        print('excel創建成功')

B.只寫模式

其實這個也是比較推薦的；但我電腦配置沒那麼好（一跑起來，內存要用近90%），所以最推薦的是前面的 insert_excel_new()

    def insert_excel(self, all_data, header):
        """
        新建Excel 寫入內容 【只寫模式】
        :param header:    header = ['ID', 'Product', 'Status', 'Fee', 'Tr updated']
         :param all_data: [list1, list2, list3, list4]   list1對應第一個sheet的所有數據
        :return:
        """
        import openpyxl
        workbook = openpyxl.Workbook(write_only=True)
        for sh in range(len(all_data)):
            sheet = workbook.create_sheet(index=sh)
            Log.info(sheet)

            test_list = all_data[sh]
            test_list.insert(0, header)

            for b in range(len(test_list)):
                sheet.append(test_list[b])
            Log.info('當前sheet數據生成成功')

        ex_time = time.strftime("_%Y%m%d_%H%M%S")
        workbook.save(r'D:\work\gys\test_' + ex_time + '.xlsx')

        print('excel創建成功')

pandas

    def get_excel_data_new_1(self, file, sheet_index=0):
        """
        獲取excel
        :param file: 
        :param sheet_index: 序列
        :return: 
        """
        import numpy as np
        import pandas as pd

        data_old = pd.read_excel(file, sheet_name=sheet_index)

        data = np.array(data_old).tolist()

        print(data)

    def get_excel_data_new_2(self, file, sheet_name):
        """
        獲取excel
        :param file: 
        :param sheet_name: sheet name 
        :return: 
        """
        import numpy as np
        import pandas as pd
        df = pd.read_excel(file, sheet_name=None)

        df1 = df[sheet_name]
        data = np.array(df1).tolist()

        print(data)

xlwings

xlwings 好像有時候需要打開某個Excel文件。
【下方代碼若打開此Excel 不報錯；若沒打開，就報錯 ‘pywintypes.com_error: (-2147221164, ‘沒有註冊類’, None, None)‘】

    def get_excel_data_new_3(self, file, sheet_index=0):
        import xlwings as xw
        from openpyxl.utils import get_column_letter

        data = list()
        wb = xw.Book(file)
        sht = wb.sheets[sheet_index]

        rng = sht.range('a1').expand('table')
        tot_rows = rng.rows.count
        print(tot_rows)
        tot_ncols = rng.columns.count
        print(tot_ncols)

        # print(get_column_letter(1))  # A
        # print(get_column_letter(267))  # JG          # 推薦這樣獲取 tot_ncols 【1-N】

        # openpyxl.utils.get_column_letter -> 會把數字轉化爲字母。
        # openpyxl.utils.column_index_from_string -> 會把字母轉化爲數字。

        # chr_ele = chr(97 + tot_ncols)       # tot_ncols 小於等於25 【0-25】
        # print(chr_ele)

        for a in range(1, tot_rows + 1):
            # value = sht.range('A{shu}:{lie}{shu}'.format(shu=a, lie=chr_ele[tot_ncols - 1])).value
            value = sht.range('A{shu}:{lie}{shu}'.format(shu=a, lie=get_column_letter(tot_ncols))).value

            data.append(value)

        print(data)

一種有缺陷的思路

需求：將Excel轉CSV
思路：快速獲取Excel的內容，再使用 pandas 的 to_csv() 保存；

openpyxl的 Worksheet.columns 方法不能在只讀模式下使用。

    def delete_header(self, data_list):
        """
        返回 非表頭的所有數據，表頭
        :param data_list: 
        :return: 
        """
        temp = data_list[0]
        data_list = [d for d in data_list if d != temp]

        return data_list, temp

    def get_excel_data_4(self, file):
        """
        讀取的Excel 所有sheet的表頭一樣
        :param file:
        :return:
        """
        from openpyxl import load_workbook
        import pandas as pd
        import xlrd

        excel = xlrd.open_workbook(file)
        the_sheet = excel.sheet_by_index(0)
        tot_ncols = the_sheet.ncols

        excel = load_workbook(file, read_only=True)
        sh = excel.sheetnames

        all_data = [list() for abc in range(tot_ncols)]
        for a in sh:
            print('當前sheet Name是 {}'.format(a))
            sheet = excel.get_sheet_by_name(a)

            for i in sheet.values:

                for d in range(tot_ncols):
                    all_data[d].append(i[d])

        data = [self.delete_header(e)[0] for e in all_data]
        # print(data)
        header = [self.delete_header(e)[1] for e in all_data]
        # print(header)
        data_dict = dict()
        for f in range(len(header)):
            data_dict[header[f]] = data[f]

        print(data_dict)

        data_df = pd.DataFrame(data_dict)
        new_time = time.strftime("_%H%M%S")
        new_file = file.replace('.xlsx', ''.join([new_time, '.csv']))

        data_df.to_csv(new_file, index=False)

缺陷：用pandas的 to_csv()，去生成CSV文件，表頭若是有重複字段，在生成data_dict時候，update()，會把前面的 key_value 覆蓋掉；
【我的Excel文件確實是有 2個表頭字段是一樣的】；

獲取Excel的sheet總數量和 sheet_name

    def read_excel_sheetname(self, file):
        """
        獲取 Excel的sheet數量 和每個sheet_Name
        :param file:
        :return:
        """
        from openpyxl import load_workbook
        import xlwings as xw
        import xlrd
        import pandas as pd

        excel = load_workbook(file, read_only=True, keep_vba=False)
        chang = len(excel.sheetnames)
        print(excel.sheetnames)
        print(chang, 'zyooooxie_csdn')

        excel = xw.Book(file)           # 事先打開了 xlsx 文件
        chang = len(excel.sheets)
        print([s.name for s in excel.sheets])
        print(chang, 'zyooooxie_csdn')
        # excel.close()

        excel = xlrd.open_workbook(file)
        chang = len(excel.sheet_names())
        print(excel.sheet_names())
        print(chang, 'zyooooxie_csdn')

        excel = xlrd.open_workbook(file)
        all_sheets = excel.sheets()
        chang = len(all_sheets)
        print([a.name for a in all_sheets])
        print(chang, 'zyooooxie_csdn')

        excel = pd.ExcelFile(file)
        chang = len(excel.sheet_names)
        print(excel.sheet_names)
        print(chang, 'zyooooxie_csdn')

這些方法我並沒對大數據量Excel做實踐，若要對大數量的有興趣，可以看下我寫的第一期；

交流技術歡迎+QQ 153132336 zy
個人博客 https://blog.csdn.net/zyooooxie

Python腳本之讀寫Excel【二】

一些嘗試

一種有缺陷的思路

[轉帖]使用NMT和pmap解決JVM資源泄漏問題原創

Python實現大麥網搶票的四大關鍵技術點解析

Python 安裝庫指令大全

salesforce零基礎學習（一百三十八）零碎知識點小總結（十）

一款開源的.NET程序集反編譯、編輯和調試神器

關於接口協議，你必須要知道這些！

2020年上半年數據庫系統工程師考試

基於 Milvus + LlamaIndex 實現高級 RAG

【2024-05-21】以茶會友

數據準確性測試之【四】表記錄新增、修改、刪除，對其他字段值的影響

數據完整性測試之【二】導出的CSV、Excel文件和數據庫表裏的記錄

數據準確性測試之【二】前端顯示的值、接口返回值和數據庫表裏的記錄

數據完整性測試之【一】接口返回值和數據庫表裏的記錄

Python腳本之連接數據庫【一】

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結