import xml.etree.ElementTree as ET
import pandas as pd
def iter_records(records):
"""
遍歷每個節點的生成器
:param records:
:return:
"""
for record in records:
temp_dict = {} # 存儲節點key-value
for var in record:
temp_dict[var.attrib['var_name']] = var.text
# 生成值,即每個節點的數據
yield temp_dict
def read_xml(xmlFileName):
"""
讀取xml數據,返回df
:param xmlFileName:
:return:
"""
with open(xmlFileName, 'r') as xml_file:
tree = ET.parse(xml_file)
# 訪問根節點
root = tree.getroot()
# 從根節點開始遍歷,返回df
return pd.DataFrame(list(iter_records(root)))
def xml_encode(row):
"""
將每行數據轉換成xml
:param row:
:return:
"""
# 輸出<record>節點開始標籤 ,名稱可以根據實際需要修改
xmlItem = [' <record>']
# 轉換成xml格式
for field in row.index:
line = ' <var var_name="{0}">{1}</var>'.format(field, row[field])
xmlItem.append(line)
# 輸出<record>節點結束標籤
xmlItem.append(' </record>')
# 返回結果
return '\n'.join(xmlItem)
def write_xml(xmlFileName, data):
"""
將數據寫入xml文件
:param xmlFileName:
:param data:
:return:
"""
with open(xmlFileName, 'w') as xmlfile:
# 寫頭部
xmlfile.write(
'<?xml version="1.0" encoding="UTF-8"?>'
)
xmlfile.write('<records>\n') # 名稱可以根據實際需要修改
# 寫數據
xmlfile.write(
'\n'.join(data.apply(xml_encode, axis=1))
)
xmlfile.write('\n</records>') # 名稱可以根據實際需要修改
xml_filenane = r'E:\data\realEstate_trans.xml'
new_filename = r'E:\data\realEstate_trans_output.xml'
xml_df = read_xml(xml_filenane) # 讀取到df
print(xml_df)
# write_xml(new_filename, xml_df) # 寫入新xml文件
【pandas 小記】pandas 讀寫xml文件
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.