配合任務遷移數倉ETL腳本按需替換方案2

參考https://blog.csdn.net/BabyFish13/article/details/103516408

1、固定庫名替換成參數腳本

/Users/nisj/Documents/wptDataGit-nisj/wptData/pyScript/pyWpt/sqlfile_replace_key.py

# -*- coding=utf-8 -*-
import re
import sys
import os


def replace_chart_by_condition(schema, target_dir):
    old_string = schema + '\.'
    to_new_string = '\${c.to.' + schema + '}\.'
    from_new_string = '\${c.from.' + schema + '}\.'
    os.system(
        """sed -i "" "/insert/ s/{to_old_string}/{to_new_string}/g" `grep -i "insert" -rl --exclude-dir=table-creation {target_dir}` """.format(
            to_old_string=old_string, to_new_string=to_new_string, target_dir=target_dir))

    os.system(
        """sed -i "" "/INSERT/ s/{to_old_string}/{to_new_string}/g" `grep -i "insert" -rl --exclude-dir=table-creation {target_dir}` """.format(
            to_old_string=old_string, to_new_string=to_new_string, target_dir=target_dir))

    os.system(
        """sed -i "" "/SET / s/{to_old_string}/{to_new_string}/g" `grep -i "insert" -rl --exclude-dir=table-creation {target_dir}` """.format(
            to_old_string=old_string, to_new_string=to_new_string, target_dir=target_dir))

    os.system(
        """sed -i "" "/set / s/{to_old_string}/{to_new_string}/g" `grep -i "insert" -rl --exclude-dir=table-creation {target_dir}` """.format(
            to_old_string=old_string, to_new_string=to_new_string, target_dir=target_dir))

    os.system(
        """sed -i "" "s/{from_old_string}/{from_new_string}/g" `grep "{from_old_string}" -rl --exclude-dir=table-creation {target_dir}` """.format(
            from_old_string=old_string, from_new_string=from_new_string, target_dir=target_dir))

    # 替換表名後綴(_tmp.,_mid.)的爲原始樣式
    os.system(
        """sed -i "" "s/{from_old_string}/{from_new_string}/g" `grep "{schema}" -rl --exclude-dir=table-creation {target_dir}` """.format(
            from_old_string='_' + from_new_string, from_new_string='_' + old_string, schema=schema,
            target_dir=target_dir))


target_dir = "/Users/nisj/WptWork/gitOnline/etl/sql-coral"
for schema in (
        'api', 'ctl', 'da', 'dim', 'dwf', 'dwr', 'dws', 'dwt', 'ext', 'jds', 'mid', 'ods', 'plat', 'sds', 'sds_data',
        'sds_valid', 'tmp'):
    print schema
    replace_chart_by_condition(schema=schema, target_dir=target_dir)

2、添加依賴及目標任務名腳本

/Users/nisj/Documents/wptDataGit-nisj/wptData/pyScript/pyWpt/get_depends_from_sql_first_level.py

# -*- coding=utf-8 -*-
import os
import re
import datetime
import warnings
import time
import threadpool

warnings.filterwarnings("ignore")


def get_depends(sql_path):
    sql_name_first = sql_path.replace('/Users/nisj/WptWork/gitOnline/etl/sql/', '').replace('.sql', '')
    sql_file = open(sql_path, "r")
    sql_file = sql_file.read()
    sql_file = re.sub(r'--.*\n', " ", sql_file)

    sql_file = re.sub(' +', ' ', sql_file.replace('\n', ' ').replace(';', ' ').replace('	', ' ').replace(')',' ').lower()).replace('( select', '(select').replace('from ', 'from|').replace('join ', 'join|')
    sql_file_list = sql_file.split(' ')
    depend_tablist = []
    for sql_str in sql_file_list:
        if ('join|' in sql_str or 'from|' in sql_str) and '|(select' not in sql_str and 'convert.join' not in sql_str and 'from\_' not in sql_str and 'tmp' not in sql_str and 'ejoin' not in sql_str and 'platfrom' not in sql_str and '.' in sql_str[5:]:
            depend_tab = sql_str[5:][sql_str[5:].index('.') + 1:]
            if depend_tab <> sql_name_first:
                depend_tablist.append(depend_tab)
    return list(set(depend_tablist))


def insert_sql_file_first_line(sql_name_first):
    dir_path = "/Users/nisj/WptWork/gitOnline/etl/sql/"
    dir_path_coral = "/Users/nisj/WptWork/gitOnline/etl/sql-coral/"

    sql_path_first = dir_path + sql_name_first + ".sql"
    sql_path_first_coral = dir_path_coral + sql_name_first + ".sql"

    with open(sql_path_first_coral, 'r+') as f:
        content = f.read()
        f.seek(0, 0)
        depend_list_str = str(get_depends(sql_path_first))
        f.write('--目標任務名(建議):\n--{sql_name_first}\n--依賴任務表(參考):\n--{depend_list_str}\n\n'.format(
            sql_name_first=sql_name_first, depend_list_str=depend_list_str) + content)


def list_file(sql_dir, postfix):
    file_list = []
    files = os.listdir(sql_dir)
    for sql_name in files:
        if os.path.isfile(sql_dir + sql_name):
            if sql_name.endswith(postfix):
                file_list.append([sql_dir, sql_name])
                # else:
                #     if os.path.isdir(sql_dir + sql_name):
                #         file_list.extend(list_file(sql_dir + sql_name + '/', postfix))
    return file_list


sql_dir = "/Users/nisj/WptWork/gitOnline/etl/sql/"

# print list_file(sql_dir, postfix='sql')
for onlyList in list_file(sql_dir, 'sql'):
    sql_name_first = onlyList[1][:-4]
    insert_sql_file_first_line(sql_name_first)

3、說明

sqlfile_replace_key.py處理了上一版本中原表名中出現"set"而參數替換錯誤的問題。
get_depends_from_sql_first_level.py將要添加的任務名、任務的依賴都列在腳本上端,方便遷移時的操作。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章