python腳本將txt文本轉爲sql並簡單校驗數據

1.在腳本所在的目錄下創建數據目錄,比如202004,表示爲今年4月的數據目錄,5月則創建202005,以此類推(這個目錄必須事先創建)
2.將原始的數據文件放入到這個目錄(文件的編碼格式爲utf-8,不是的話要先使用notebook打開,然後轉爲utf-8)
3.運行腳本,腳本運行成功則會在202004目錄下生成一個新的202004_sql目錄,相應的sql文件也在這裏面
4.進入202004_sql目錄,連接到數據庫mysql -uroot -p(回車後輸入數據庫密碼)
5.在相應的數據庫下運行這些sql文件
6.無報錯表示數據導入成功

#!/usr/bin/python 
# translate txt_data to sql
#
import io
import os

# filename = input("filename:")
# print(filename)
base_dir = os.getcwd()
date_dir = input('date_dir:')
# 於當月目錄下創建以%Y%m爲前綴,以_sql爲後綴的子目錄
sql_dir = base_dir + "/" + date_dir + "/" + date_dir + '_sql'
if os.path.exists(sql_dir):
  pass
else:
  os.mkdir(base_dir + "/" + date_dir + "/" + date_dir + '_sql')
def convert_data(filename):
  sql_file = filename + ".sql"
  with io.open(filename, 'r+', encoding='utf-8') as f:
    for line in f:
      # 先把每行數據的逗號替換爲&,然後再添加逗號作爲分隔符
      # 過濾空行
      if line:
        line = line.replace(',', '&')
        line = line.replace('`', ',')
        L1 = line.split(',')
        # 粗略處理,字段不符合規範的數據在後面填充NULL
        if len(L1) < 58:
          x = 58 - len(L1)
          for _ in range(x):
            L1.append('NULL')
        # 空字段替換爲NULL
        for i in L1:
          if i == '':
            i = "NULL"
        S1 = tuple(x for x in L1)
        sql = "insert into " + "table_" + date_dir + "(`msg_type`, `sy_fn`, `trans_date`, `SP_code`, `trans_handle_code`, `ser_condition_code`, `trans_type_zh`, `acc_trans_date`, `acc_trans_time`, `trans_amount`, `term_no`, `term_serial_no`, `mer_code`, `term_no_cups`, `mer_code_cups`, `pay_code_type`, `pay_no`, `bill_no_type`, `bill_no`, `user_area_code`, `user_att_area_code`, `card_no`, `industry_org_code`, `bank_code`, `term_org_trans_flow`, `system_org_trans_flow`, `liq_date`, `Ret_Reference_no`, `Auth_code_38`, `54_domain`, `Receive_mechanism_id`, `trans_status`, `cups_response_code`, `Acc_response_code`, `Industry_respons_code`, `trans_channel_60`, `trans_code`, `acc_reservation_122`, `multi_channel_error_code`, `add_industry_info`, `retain_add_info`, `send_mechanism_code`, `std_bus_model`, `std_enter_org_name`, `std_bus_no`, `std_bus_name`, `std_bus_auth_no`, `std_user_no_types`, `std_user_no`, `std_user_area_no`, `std_user_no_area_add_code`, `rece_institution_id_code`, `convert_org_code`, `msg_resv_fld`, `mer_code2`, `mix`, `ind_mer_code`, `mer_pay_add`) values{};\n".format(S1)
        sql_file = os.path.join(sql_dir, sql_file)
        with open(sql_file, 'a+') as e:
          e.write(sql) 
  # convert the data to sql

def check_data():
  """
  統計data行數和轉換後的sql語句行數
  """
  check_dir = os.path.join(base_dir, date_dir)
  files = os.listdir(check_dir)
  os.chdir(check_dir)
  SUM = 0
  for filename in files:
    if not os.path.isdir(filename):
      with open(filename, 'r') as f:
        n = len(f.readlines())
        SUM += n
  print("source_data_lines:", SUM)
  
  sql_files = os.listdir(sql_dir)
  os.chdir(sql_dir)
  count = 0
  for file in sql_files:
    if not os.path.isdir(file):
      with open(file, 'r') as e:
        m = len(e.readlines())
        count += m
  print("sql_data_lines:", count)



if __name__ == '__main__':
  source_data_dir = os.path.join(base_dir, date_dir)
  files = os.listdir(source_data_dir)
  os.chdir(source_data_dir)
  for filename in files:
    if not os.path.isdir(filename):
      convert_data(filename)
      # pass
  check_data()  


發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章