1.導入第三方包,以python-docx爲例
file->settings->project:***->project interpreter->+->搜索python-docx->Install Package
2.輸入代碼
from docx import Document from docx.shared import Pt from docx.enum.text import WD_LINE_SPACING import re import os def getAllSub(path): Dirlist = [] Filelist = [] for home, dirs, files in os.walk(path): # 獲得所有文件夾 for dirname in dirs: Dirlist.append(os.path.join(home, dirname)) # 獲得所有文件 for filename in files: Filelist.append(os.path.join(home, filename)) return Dirlist, Filelist def getSuffile(fileList, suffix): resList = [] for ff in fileList: if ff.endswith(suffix): resList.append(ff) return resList # 返回刪掉file文件的的空行和註釋的內容 def getcode(file): with open(file,'r',encoding='UTF-8') as ff: ff_ = ff.read() ff_ = re.sub('\/\/[^\n]*', '', ff_) # 替換掉//註釋 ff_ = re.sub('\/\*(\s|.)*?\*\/', '', ff_) # 替換掉/**/註釋 fflist = ff_.splitlines() # 按\n分割成列表 for line in fflist[:]: # fflist[:]作用是生成一個fflist的副本,解決循環list進行remove()會跳過一些元素的問題 if line == '': # 刪除掉空行(只含\n的行在splitlines()分割時會被分成空字符'') fflist.remove(line) if re.match(r'^\s+$', line): # 刪除掉空白字符(如\t)行 fflist.remove(line) res = '\n'.join(fflist) + '\n' return res, len(fflist) # 返回去格式代碼和代碼行數 # 把flist中所有的文件去註釋、去空行保存在savepath中 def saveDocFile(flist, savepath): # SINGLE => 單倍行距(默認) # ONE_POINT_FIVE => 1.5倍行距 # DOUBLE2 => 倍行距 # AT_LEAST => 最小值 # EXACTLY => 固定值 # MULTIPLE => 多倍行距 doc = Document() p = doc.add_paragraph('') # 增加一頁 doc.styles['Normal'].font.name = 'Times New Roman' # 正文是normal, 設置正文的字體格式 doc.styles['Normal'].font.size = Pt(8) # 設置字體的大小爲 5 號字體 p.line_spacing_rule = WD_LINE_SPACING.EXACTLY # 固定值 paragraph_format = doc.styles['Normal'].paragraph_format paragraph_format.line_spacing = Pt(12.9) # 固定值12,9磅, 保證每頁有50行代碼 lines = 0 for i, f in enumerate(flist): print('starting deal %d/%d' % (i + 1, len(flist))) codef, codelies = getcode(f) lines += codelies if lines > 3000: # 如果加上這個文件就超過3000行(就是超過60頁了) doc.save(savepath) return else: p.add_run(codef) doc.save(savepath) # 不足60 頁進行保存 print('all done') if __name__ == '__main__': path = r'E:\長河灣\2020-6-2軟著研究所\webmagic-module-master' savePath = r'E:\長河灣\2020-6-2軟著研究所\code.doc' fileList = getAllSub(path)[1] # 遞歸獲取所有文件 fileList = getSuffile(fileList, '.java') # 從中篩選出.java文件 saveDocFile(fileList, savePath) 3.點擊執行或快捷鍵Ctrl+shift+F10