參考:https://blog.csdn.net/BabyFish13/article/details/103516408
1、固定庫名替換成參數腳本
/Users/nisj/Documents/wptDataGit-nisj/wptData/pyScript/pyWpt/sqlfile_replace_key.py
# -*- coding=utf-8 -*-
import re
import sys
import os
def replace_chart_by_condition(schema, target_dir):
old_string = schema + '\.'
to_new_string = '\${c.to.' + schema + '}\.'
from_new_string = '\${c.from.' + schema + '}\.'
os.system(
"""sed -i "" "/insert/ s/{to_old_string}/{to_new_string}/g" `grep -i "insert" -rl --exclude-dir=table-creation {target_dir}` """.format(
to_old_string=old_string, to_new_string=to_new_string, target_dir=target_dir))
os.system(
"""sed -i "" "/INSERT/ s/{to_old_string}/{to_new_string}/g" `grep -i "insert" -rl --exclude-dir=table-creation {target_dir}` """.format(
to_old_string=old_string, to_new_string=to_new_string, target_dir=target_dir))
os.system(
"""sed -i "" "/SET / s/{to_old_string}/{to_new_string}/g" `grep -i "insert" -rl --exclude-dir=table-creation {target_dir}` """.format(
to_old_string=old_string, to_new_string=to_new_string, target_dir=target_dir))
os.system(
"""sed -i "" "/set / s/{to_old_string}/{to_new_string}/g" `grep -i "insert" -rl --exclude-dir=table-creation {target_dir}` """.format(
to_old_string=old_string, to_new_string=to_new_string, target_dir=target_dir))
os.system(
"""sed -i "" "s/{from_old_string}/{from_new_string}/g" `grep "{from_old_string}" -rl --exclude-dir=table-creation {target_dir}` """.format(
from_old_string=old_string, from_new_string=from_new_string, target_dir=target_dir))
# 替換表名後綴(_tmp.,_mid.)的爲原始樣式
os.system(
"""sed -i "" "s/{from_old_string}/{from_new_string}/g" `grep "{schema}" -rl --exclude-dir=table-creation {target_dir}` """.format(
from_old_string='_' + from_new_string, from_new_string='_' + old_string, schema=schema,
target_dir=target_dir))
target_dir = "/Users/nisj/WptWork/gitOnline/etl/sql-coral"
for schema in (
'api', 'ctl', 'da', 'dim', 'dwf', 'dwr', 'dws', 'dwt', 'ext', 'jds', 'mid', 'ods', 'plat', 'sds', 'sds_data',
'sds_valid', 'tmp'):
print schema
replace_chart_by_condition(schema=schema, target_dir=target_dir)
2、添加依賴及目標任務名腳本
/Users/nisj/Documents/wptDataGit-nisj/wptData/pyScript/pyWpt/get_depends_from_sql_first_level.py
# -*- coding=utf-8 -*-
import os
import re
import datetime
import warnings
import time
import threadpool
warnings.filterwarnings("ignore")
def get_depends(sql_path):
sql_name_first = sql_path.replace('/Users/nisj/WptWork/gitOnline/etl/sql/', '').replace('.sql', '')
sql_file = open(sql_path, "r")
sql_file = sql_file.read()
sql_file = re.sub(r'--.*\n', " ", sql_file)
sql_file = re.sub(' +', ' ', sql_file.replace('\n', ' ').replace(';', ' ').replace(' ', ' ').replace(')',' ').lower()).replace('( select', '(select').replace('from ', 'from|').replace('join ', 'join|')
sql_file_list = sql_file.split(' ')
depend_tablist = []
for sql_str in sql_file_list:
if ('join|' in sql_str or 'from|' in sql_str) and '|(select' not in sql_str and 'convert.join' not in sql_str and 'from\_' not in sql_str and 'tmp' not in sql_str and 'ejoin' not in sql_str and 'platfrom' not in sql_str and '.' in sql_str[5:]:
depend_tab = sql_str[5:][sql_str[5:].index('.') + 1:]
if depend_tab <> sql_name_first:
depend_tablist.append(depend_tab)
return list(set(depend_tablist))
def insert_sql_file_first_line(sql_name_first):
dir_path = "/Users/nisj/WptWork/gitOnline/etl/sql/"
dir_path_coral = "/Users/nisj/WptWork/gitOnline/etl/sql-coral/"
sql_path_first = dir_path + sql_name_first + ".sql"
sql_path_first_coral = dir_path_coral + sql_name_first + ".sql"
with open(sql_path_first_coral, 'r+') as f:
content = f.read()
f.seek(0, 0)
depend_list_str = str(get_depends(sql_path_first))
f.write('--目標任務名(建議):\n--{sql_name_first}\n--依賴任務表(參考):\n--{depend_list_str}\n\n'.format(
sql_name_first=sql_name_first, depend_list_str=depend_list_str) + content)
def list_file(sql_dir, postfix):
file_list = []
files = os.listdir(sql_dir)
for sql_name in files:
if os.path.isfile(sql_dir + sql_name):
if sql_name.endswith(postfix):
file_list.append([sql_dir, sql_name])
# else:
# if os.path.isdir(sql_dir + sql_name):
# file_list.extend(list_file(sql_dir + sql_name + '/', postfix))
return file_list
sql_dir = "/Users/nisj/WptWork/gitOnline/etl/sql/"
# print list_file(sql_dir, postfix='sql')
for onlyList in list_file(sql_dir, 'sql'):
sql_name_first = onlyList[1][:-4]
insert_sql_file_first_line(sql_name_first)
3、說明
sqlfile_replace_key.py處理了上一版本中原表名中出現"set"而參數替換錯誤的問題。
get_depends_from_sql_first_level.py將要添加的任務名、任務的依賴都列在腳本上端,方便遷移時的操作。