1.在腳本所在的目錄下創建數據目錄,比如202004,表示爲今年4月的數據目錄,5月則創建202005,以此類推(這個目錄必須事先創建)
2.將原始的數據文件放入到這個目錄(文件的編碼格式爲utf-8,不是的話要先使用notebook打開,然後轉爲utf-8)
3.運行腳本,腳本運行成功則會在202004目錄下生成一個新的202004_sql目錄,相應的sql文件也在這裏面
4.進入202004_sql目錄,連接到數據庫mysql -uroot -p(回車後輸入數據庫密碼)
5.在相應的數據庫下運行這些sql文件
6.無報錯表示數據導入成功
import io
import os
base_dir = os.getcwd()
date_dir = input('date_dir:')
sql_dir = base_dir + "/" + date_dir + "/" + date_dir + '_sql'
if os.path.exists(sql_dir):
pass
else:
os.mkdir(base_dir + "/" + date_dir + "/" + date_dir + '_sql')
def convert_data(filename):
sql_file = filename + ".sql"
with io.open(filename, 'r+', encoding='utf-8') as f:
for line in f:
if line:
line = line.replace(',', '&')
line = line.replace('`', ',')
L1 = line.split(',')
if len(L1) < 58:
x = 58 - len(L1)
for _ in range(x):
L1.append('NULL')
for i in L1:
if i == '':
i = "NULL"
S1 = tuple(x for x in L1)
sql = "insert into " + "table_" + date_dir + "(`msg_type`, `sy_fn`, `trans_date`, `SP_code`, `trans_handle_code`, `ser_condition_code`, `trans_type_zh`, `acc_trans_date`, `acc_trans_time`, `trans_amount`, `term_no`, `term_serial_no`, `mer_code`, `term_no_cups`, `mer_code_cups`, `pay_code_type`, `pay_no`, `bill_no_type`, `bill_no`, `user_area_code`, `user_att_area_code`, `card_no`, `industry_org_code`, `bank_code`, `term_org_trans_flow`, `system_org_trans_flow`, `liq_date`, `Ret_Reference_no`, `Auth_code_38`, `54_domain`, `Receive_mechanism_id`, `trans_status`, `cups_response_code`, `Acc_response_code`, `Industry_respons_code`, `trans_channel_60`, `trans_code`, `acc_reservation_122`, `multi_channel_error_code`, `add_industry_info`, `retain_add_info`, `send_mechanism_code`, `std_bus_model`, `std_enter_org_name`, `std_bus_no`, `std_bus_name`, `std_bus_auth_no`, `std_user_no_types`, `std_user_no`, `std_user_area_no`, `std_user_no_area_add_code`, `rece_institution_id_code`, `convert_org_code`, `msg_resv_fld`, `mer_code2`, `mix`, `ind_mer_code`, `mer_pay_add`) values{};\n".format(S1)
sql_file = os.path.join(sql_dir, sql_file)
with open(sql_file, 'a+') as e:
e.write(sql)
def check_data():
"""
統計data行數和轉換後的sql語句行數
"""
check_dir = os.path.join(base_dir, date_dir)
files = os.listdir(check_dir)
os.chdir(check_dir)
SUM = 0
for filename in files:
if not os.path.isdir(filename):
with open(filename, 'r') as f:
n = len(f.readlines())
SUM += n
print("source_data_lines:", SUM)
sql_files = os.listdir(sql_dir)
os.chdir(sql_dir)
count = 0
for file in sql_files:
if not os.path.isdir(file):
with open(file, 'r') as e:
m = len(e.readlines())
count += m
print("sql_data_lines:", count)
if __name__ == '__main__':
source_data_dir = os.path.join(base_dir, date_dir)
files = os.listdir(source_data_dir)
os.chdir(source_data_dir)
for filename in files:
if not os.path.isdir(filename):
convert_data(filename)
check_data()