【pandas 小記】pandas 讀寫xml文件


import xml.etree.ElementTree as ET
import pandas as pd

def iter_records(records):
    """
    遍歷每個節點的生成器
    :param records:
    :return:
    """
    for record in records:
        temp_dict = {}  # 存儲節點key-value
        for var in record:
            temp_dict[var.attrib['var_name']] = var.text
        # 生成值,即每個節點的數據
        yield temp_dict


def read_xml(xmlFileName):
    """
    讀取xml數據,返回df
    :param xmlFileName:
    :return:
    """
    with open(xmlFileName, 'r') as xml_file:
        tree = ET.parse(xml_file)
        # 訪問根節點
        root = tree.getroot()
        # 從根節點開始遍歷,返回df
        return pd.DataFrame(list(iter_records(root)))


def xml_encode(row):
    """
    將每行數據轉換成xml
    :param row:
    :return:
    """
    # 輸出<record>節點開始標籤 ,名稱可以根據實際需要修改
    xmlItem = ['  <record>']
    # 轉換成xml格式
    for field in row.index:
        line = '  <var var_name="{0}">{1}</var>'.format(field, row[field])
        xmlItem.append(line)

    # 輸出<record>節點結束標籤
    xmlItem.append('  </record>')

    # 返回結果
    return '\n'.join(xmlItem)


def write_xml(xmlFileName, data):
    """
    將數據寫入xml文件
    :param xmlFileName:
    :param data:
    :return:
    """
    with open(xmlFileName, 'w') as xmlfile:
        # 寫頭部
        xmlfile.write(
            '<?xml version="1.0" encoding="UTF-8"?>'
        )
        xmlfile.write('<records>\n')   # 名稱可以根據實際需要修改

        # 寫數據
        xmlfile.write(
            '\n'.join(data.apply(xml_encode, axis=1))
        )
        xmlfile.write('\n</records>')   # 名稱可以根據實際需要修改


xml_filenane = r'E:\data\realEstate_trans.xml'
new_filename = r'E:\data\realEstate_trans_output.xml'
xml_df = read_xml(xml_filenane)    # 讀取到df
print(xml_df)
# write_xml(new_filename, xml_df)  # 寫入新xml文件

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章