Python Word Automation

# generate.py
import win32com.client as win32
from datetime import datetime, date
import re
import os
import sys
import json
import logging
from enumerations import *

logger = logging.getLogger(__name__)
log_formater = logging.Formatter(fmt='%(asctime)s - %(levelname)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
logger.setLevel(logging.DEBUG)

cmd_handler = logging.StreamHandler(stream=sys.stdout)
cmd_handler.setLevel(logging.DEBUG)
cmd_handler.setFormatter(log_formater)
logger.addHandler(cmd_handler)


def set_log_file(log_file):
    file_handler = logging.FileHandler(filename=log_file, mode='w', encoding='utf-8')
    file_handler.setLevel(logging.INFO)
    # file_handler.addFilter(lambda rec: rec.levelno == logging.INFO)
    file_handler.setFormatter(log_formater)
    logger.addHandler(file_handler)


def search_all(pattern, string):
    search_start = 0
    m = pattern.search(string, pos=search_start)
    rs = []
    while m:
        rs.append(m.group(0))
        search_start = m.end(0) + 1
        m = pattern.search(string, pos=search_start)
    return rs


def package_list_key(item):
    return item[:item.find('.')] + '.pka' if item.endswith('.pks') else item


class Generator:

    def __init__(self, template_filename, task_dir, rel_date, mod_code, prep_by, prep_date, review_by, review_date,
                 summary_list, context_lines=5):
        self.task_dir = task_dir
        self.mod_filename = os.path.join(task_dir, 'MOD_LIST', f'MOD{mod_code[4:]}.docx')
        self.template_filename = template_filename
        self.rel_date = datetime.strptime(rel_date, '%d/%m/%Y').date()
        self.mod_code = mod_code
        self.prep_by = prep_by
        self.prep_date = datetime.strptime(prep_date, '%d/%m/%Y').date() if prep_date is not None else date.today()
        self.review_by = review_by
        self.review_date = datetime.strptime(review_date,
                                             '%d/%m/%Y').date() if review_date is not None else date.today()
        self.summary_list = summary_list
        self.context_lines = context_lines
        self.word = win32.Dispatch('word.Application')
        self.word.visible = True
        try:
            self.document = self.word.Documents.Open(self.template_filename)
        except Exception as e:
            msg = f'ERROR: failed to open template {self.template_filename}'
            logger.error(msg)
            raise e
        else:
            logger.info(f'save as: {self.mod_filename}')
            self.document.SaveAs(self.mod_filename)
            self.section = self.document.sections(1)
            self.range = self.section.Range

    def build_regex_pattern(self,
                            context_lines=0,
                            header_regex='(.*\\n)? *-- Mod. Date.+\\n(^.+\\n).+{mod_code}\\n(^.+\\n)+ *(-+\\n)',
                            line_regex='(.*\\n){{{context_lines}}}.+ -- ?{mod_code}.*\\n(.*\\n){{{context_lines}}}',
                            block_regex='(.*\\n){{{context_lines}}}\\s*--<< ?{mod_code}.*\\n(.*\\n)*?\\s*-->> ?{'
                                        'mod_code}.*\\n(.*\\n){{{context_lines}}}'
                            ):
        header_regex = header_regex.format(mod_code=self.mod_code)
        line_regex = line_regex.format(mod_code=self.mod_code, context_lines=context_lines)
        block_regex = block_regex.format(mod_code=self.mod_code, context_lines=context_lines)
        # logger.info(header_regex)
        # logger.info(line_regex)
        # logger.info(block_regex)
        regex = f'{header_regex}|{line_regex}|{block_regex}'
        return re.compile(pattern=regex, flags=re.MULTILINE)

    def generate(self):
        self.gen_header()
        self.gen_doc_log()
        self.gen_summary()
        self.gen_mod()
        self.document.Save()

    def gen_mod(self):
        logger.info('generate content')
        h3_dict = {
            'Table structure': self.gen_ddl,
            'Table setup': self.gen_dml,
            'Module related': self.gen_plsql,
            'Form related': self.gen_form,
            'Report related': self.gen_report,
            'UNIX related': self.gen_unix,
            'NT related': self.gen_nt,
            'Grant Script related': self.gen_grant
        }
        section = self.document.Sections(1)
        for h3 in h3_dict:
            self.insert_h3(text=h3)
            h3_dict[h3]()

    def gen_header(self):
        logger.info('generate header')
        h = self.section.Headers(WdHeaderFooterIndex.wdHeaderFooterPrimary)
        spaces = ' ' * 90
        header_text = f'HCAS {self.rel_date.strftime("%Y %b")} Release{spaces}{self.mod_code}'
        h.Range.Paragraphs(4).Range.Delete()
        h.Range.InsertParagraphAfter()
        h.Range.Paragraphs(4).Format.Alignment = WdParagraphAlignment.wdAlignParagraphDistribute
        h.Range.Paragraphs(4).Range.Font.Size = 9.5
        h.Range.InsertAfter(header_text)

    def gen_doc_log(self):
        logger.info('generate doc log')
        table = self.document.Tables(1)
        col_1 = table.Columns(2)
        col_2 = table.Columns(4)
        col_1.Cells(1).Range.Text = self.mod_code
        col_1.Cells(2).Range.Text = self.prep_by
        col_1.Cells(3).Range.Text = self.prep_date.strftime('%d/%m/%Y')
        col_2.Cells(1).Range.Text = self.review_by
        col_2.Cells(2).Range.Text = self.review_date.strftime('%d/%m/%Y')

    def gen_summary(self):
        logger.info('generate summary')
        self.range.InsertParagraphAfter()
        last_paragraph_index = self.range.Paragraphs.Count
        self.range.Paragraphs(last_paragraph_index).Style = "Heading 2"
        self.range.InsertAfter('Change Summary')

        for summary in self.summary_list:
            self.range.InsertParagraphAfter()
            if self.range.Paragraphs(last_paragraph_index).Range.ListFormat.ListType != WdListType.wdListBullet:
                last_paragraph_index = self.range.Paragraphs.Count
                self.range.Paragraphs(last_paragraph_index).Range.ListFormat.ApplyBulletDefault()
            self.range.InsertAfter(summary)
        self.insert_normal()
        self.range.InsertParagraphAfter()
        last_paragraph_index = self.range.Paragraphs.Count
        self.range.Paragraphs(last_paragraph_index).Style = "Heading 2"
        self.range.InsertAfter('Mod List')
        self.insert_normal()

    def insert_normal(self):
        self.range.InsertParagraphAfter()
        last_paragraph_index = self.range.Paragraphs.Count
        self.range.Paragraphs(last_paragraph_index).Style = "Normal"

    def insert_normal_text(self, text, target_range=None, add_indent=False):
        if target_range is None:
            target_range = self.range
        target_range.InsertParagraphAfter()
        p_start = target_range.Paragraphs.Count
        target_range.InsertAfter(text)
        p_end = target_range.Paragraphs.Count
        for i in range(p_start, p_end):
            target_range.Paragraphs(i).Style = "Normal"
            target_range.Paragraphs(i).Range.HighlightColorIndex = WdColorIndex.wdNoHighlight
            if add_indent:
                target_range.Paragraphs(i).Indent()

    def insert_highlight_text(self, text, target_range=None, add_indent=False, context_lines=0):
        if target_range is None:
            target_range = self.range
        target_range.InsertParagraphAfter()
        p_start = target_range.Paragraphs.Count
        target_range.InsertAfter(text)
        p_end = target_range.Paragraphs.Count
        for i in range(p_start + context_lines, p_end - context_lines):
            target_range.Paragraphs(p_start).Style = "Normal"
            target_range.Paragraphs(i).Range.HighlightColorIndex = WdColorIndex.wdYellow
            if add_indent:
                target_range.Paragraphs(i).Indent()

    def insert_text(self, text):
        last_paragraph_index = self.range.Paragraphs.Count
        self.range.Paragraphs(last_paragraph_index).Style = "Normal"
        self.range.Paragraphs(last_paragraph_index).Range.HighlightColorIndex = WdColorIndex.wdNoHighlight
        self.range.InsertAfter(text)

    def insert_h3(self, text):
        self.range.InsertParagraphAfter()
        last_paragraph_index = self.range.Paragraphs.Count
        self.range.Paragraphs(last_paragraph_index).Style = "Heading 3"
        self.range.InsertAfter(text)

    def insert_h4(self, text):
        self.range.InsertParagraphAfter()
        last_paragraph_index = self.range.Paragraphs.Count
        self.range.Paragraphs(last_paragraph_index).Style = "Heading 4"
        self.range.InsertAfter(text)

    def insert_list(self, text_list):
        last_paragraph_index = self.range.Paragraphs.Count
        for text in text_list:
            self.range.InsertParagraphAfter()
            if self.range.Paragraphs(last_paragraph_index).Range.ListFormat.ListType != WdListType.wdListBullet:
                last_paragraph_index = self.range.Paragraphs.Count
                self.range.Paragraphs(last_paragraph_index).Range.ListFormat.ApplyBulletDefault()
            self.range.InsertAfter(text)
        self.insert_normal()

    def gen_mod_list(self, source_file, pattern, context_lines=0):
        if not os.path.isfile(source_file):
            msg = f'ERROR: {source_file} is not a file.'
            logger.error(msg)
            raise Exception(msg)
        else:
            logger.info(f'    {source_file}')
            self.insert_normal()
            f = open(file=source_file, encoding='utf-8')
            text = ''.join(f.readlines())
            rs = search_all(pattern=pattern, string=text)
            code_start = self.range.End
            code_range = self.document.Range(code_start - 1, code_start)
            self.document.Tables.Add(code_range, 1, 1)
            code_table = self.document.Tables(self.document.Tables.Count)
            code_table.Borders.Enable = True
            cell_range = code_table.Cell(1, 1).Range
            for r in rs:
                self.insert_normal_text(target_range=cell_range, text='...')
                self.insert_highlight_text(target_range=cell_range, text=r, context_lines=context_lines)
                self.insert_normal_text(target_range=cell_range, text='...')
            f.close()

    def gen_source_list(self, source_dir, pattern, startswith=None, endswith=None, context_lines=0):
        if os.path.isdir(source_dir):
            source_list = list(filter(
                lambda fn: ((startswith is not None and fn.startswith(startswith)) or startswith is None) and (
                            (endswith is not None and fn.endswith(endswith)) or endswith is None),
                os.listdir(source_dir)))
            source_list.sort(key=package_list_key)
            for source_filename in source_list:
                self.insert_h4(text=source_filename)
                src_file = os.path.join(source_dir, source_filename)
                self.gen_mod_list(source_file=src_file, pattern=pattern, context_lines=context_lines)
            return len(source_list)
        else:
            msg = f'{source_dir} not a directory'
            logger.info(msg)
            return 0

    def gen_oracle_like(self, src_dir, startswith=None, endswith=None, context_lines=0):
        p = self.build_regex_pattern(context_lines=context_lines)
        src_cnt = self.gen_source_list(source_dir=src_dir, pattern=p, startswith=startswith, endswith=endswith,
                                       context_lines=context_lines)
        return src_cnt

    def gen_ddsql_like(self, mast_dir, temp_dir, startswith):
        m_cnt = self.gen_oracle_like(src_dir=mast_dir, startswith=startswith, endswith='.sql')
        t_cnt = self.gen_oracle_like(src_dir=temp_dir, startswith=startswith, endswith='.sql')
        if m_cnt == 0 and t_cnt == 0:
            self.insert_list(text_list=('N/A',))

    def gen_dml(self):
        logger.info('    generate dml')
        master_dml_dir = os.path.join(self.task_dir, 'DDSQL')
        temp_dml_dir = os.path.join(master_dml_dir, 'temp')
        self.gen_ddsql_like(mast_dir=master_dml_dir, temp_dir=temp_dml_dir, startswith='DML')

    def gen_ddl(self):
        logger.info('    generate ddl')
        master_ddl_dir = os.path.join(self.task_dir, 'DDSQL')
        temp_ddl_dir = os.path.join(master_ddl_dir, 'temp')
        self.gen_ddsql_like(mast_dir=master_ddl_dir, temp_dir=temp_ddl_dir, startswith='DDL')

    def gen_grant(self):
        logger.info('    generate grant')
        master_grant_dir = os.path.join(self.task_dir, 'DDSQL')
        temp_grant_dir = os.path.join(master_grant_dir, 'temp')
        self.gen_ddsql_like(mast_dir=master_grant_dir, temp_dir=temp_grant_dir, startswith='GRANT')

    def gen_plsql(self):
        logger.info('    generate plsql')
        src_dir = os.path.join(self.task_dir, 'PLSQL')
        src_cnt = self.gen_oracle_like(src_dir=src_dir, context_lines=1)
        if src_cnt == 0:
            self.insert_list(text_list=('N/A',))

    def gen_form(self):
        logger.info('    Form not supported')
        self.insert_list(text_list=('N/A',))

    def gen_report(self):
        logger.info('    Form not supported')
        self.insert_list(text_list=('N/A',))

    def gen_unix(self):
        logger.info('    generate UNIX')
        unix_dir = os.path.join(self.task_dir, 'UNIX_SCRIPT')
        header_regex = '(.*\\n)#.+{mod_code}.+\\n(.*\\n)*# *\\*+\\n'
        line_regex = '(.*\\n){{{context_lines}}}.+ # ?-- ?{mod_code}.*\\n(.*\\n){{{context_lines}}}'
        block_regex = '(.*\\n){{{context_lines}}}\\s*#--<< ?{mod_code}.*\\n(.*\\n)*?\\s*#-->> ?{mod_code}.*\\n(' \
                      '.*\\n){{{context_lines}}}'
        p = self.build_regex_pattern(header_regex=header_regex, block_regex=block_regex, line_regex=line_regex)
        ml12b = os.path.join(unix_dir, 'ML12B')
        count12b = self.gen_source_list(source_dir=ml12b, pattern=p)
        ml16b = os.path.join(unix_dir, 'ML16B')
        count16b = self.gen_source_list(source_dir=ml16b, pattern=p)
        if count12b == 0 and count16b == 0:
            self.insert_list(text_list=('N/A',))

    def gen_nt(self):
        logger.info('    generate NT')
        self.insert_list(text_list=('N/A',))


if __name__ == "__main__":
    args = sys.argv[1:]
    if len(args) == 0:
        workding_dir = os.path.dirname(__file__)
        log_file = os.path.join(workding_dir, 'generate_mod_list.log')
        set_log_file(log_file=log_file)
        conf_file = os.path.join(workding_dir, 'generate_mod_list.json')
        arguments = json.load(open(conf_file, mode='r', encoding='utf-8'))
        g = Generator(**arguments)
        g.generate()
    elif len(args) == 1:
        conf_file = args[0]
        if os.path.isfile(conf_file):
            workding_dir = os.path.dirname(conf_file)
            wrk_log_file = os.path.join(workding_dir, 'generate_mod_list.log')
            set_log_file(log_file=wrk_log_file)
            logger.info(f'Using conf file: {conf_file}')
            arguments = json.load(open(conf_file, mode='r', encoding='utf-8'))
            g = Generator(**arguments)
            g.generate()
        else:
            msg = f'ERROR: {conf_file} is not a file.'
            logger.error(msg)
            raise Exception(msg)
class WdHeaderFooterIndex:
    wdHeaderFooterEvenPages = 3 # Returns all headers or footers on even-numbered pages.
    wdHeaderFooterFirstPage = 2 # Returns the first header or footer in a document or section.
    wdHeaderFooterPrimary = 1 # Returns the header or footer on all pages other than the first page of a document or section.

class WdParagraphAlignment:
    wdAlignParagraphCenter = 1 # Center-aligned.
    wdAlignParagraphDistribute = 4 # Paragraph characters are distributed to fill the entire width of the paragraph.
    wdAlignParagraphJustify = 3 # Fully justified.
    wdAlignParagraphJustifyHi = 7 # Justified with a high character compression ratio.
    wdAlignParagraphJustifyLow = 8 # Justified with a low character compression ratio.
    wdAlignParagraphJustifyMed = 5 # Justified with a medium character compression ratio.
    wdAlignParagraphLeft = 0 # Left-aligned.
    wdAlignParagraphRight = 2 # Right-aligned.
    wdAlignParagraphThaiJustify = 9 # Justified according to Thai formatting layout.


class WdReferenceType:
    wdRefTypeBookmark = 2 # Bookmark.
    wdRefTypeEndnote = 4 # Endnote.
    wdRefTypeFootnote = 3 # Footnote.
    wdRefTypeHeading = 1 # Heading.
    wdRefTypeNumberedItem = 0 # Numbered item.

class WdListType:
    wdListBullet = 2 # Bulleted list.
    wdListListNumOnly = 1 # ListNum fields that can be used in the body of a paragraph.
    wdListMixedNumbering = 5 # Mixed numeric list.
    wdListNoNumbering = 0 # List with no bullets, numbering, or outlining.
    wdListOutlineNumbering = 4 # Outlined list.
    wdListPictureBullet = 6 # Picture bulleted list.
    wdListSimpleNumbering = 3 # Simple numeric list.

class WdColorIndex:
    wdAuto = 0 # Automatic color. Default; usually black.
    wdBlack = 1 # Black color.
    wdBlue = 2 # Blue color.
    wdBrightGreen = 4 # Bright green color.
    wdByAuthor = -1 # Color defined by document author.
    wdDarkBlue = 9 # Dark blue color.
    wdDarkRed = 13 # Dark red color.
    wdDarkYellow = 14 # Dark yellow color.
    wdGray25 = 16 # Shade 25 of gray color.
    wdGray50 = 15 # Shade 50 of gray color.
    wdGreen = 11 # Green color.
    wdNoHighlight = 0 # Removes highlighting that has been applied.
    wdPink = 5 # Pink color.
    wdRed = 6 # Red color.
    wdTeal = 10 # Teal color.
    wdTurquoise = 3 # Turquoise color.
    wdViolet = 12 # Violet color.
    wdWhite = 8 # White color.
    wdYellow = 7 # Yellow color.

class WdBorderType:
    wdBorderBottom = -3 # A bottom border.
    wdBorderDiagonalDown = -7 # A diagonal border starting in the upper-left corner.
    wdBorderDiagonalUp = -8 # A diagonal border starting in the lower-left corner.
    wdBorderHorizontal = -5 # Horizontal borders.
    wdBorderLeft = -2 # A left border.
    wdBorderRight = -4 # A right border.
    wdBorderTop = -1 # A top border.
    wdBorderVertical = -6 # Vertical borders.

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章