在做 HTML 的解析的時候遇到的需求,希望能用 Python 實現一下類似 bash 下 tree 命令輸出的效果,類似這樣:
.
├── a3c_demo.py
├── dqn-boat_mannual
│ ├── ICONS_Python
│ ├── Image
│ ├── __pycache__
│ ├── ddpg.py
│ ├── enviroment.py
│ ├── gameui.py
│ ├── ima1.py
│ ├── main.py
│ ├── manual_control.py
│ └── temp.py
├── dqn-boat_mannual.7z
├── envdemo.py
└── log
└── events.out.tfevents.1582014010.ZYPLAPTOP
想了一下發現,也不難實現,主要就是用 DFS 遞歸打印,注意判斷尾部節點就行了。示例代碼放到這裏,說不定以後用的上:
# -*- coding: utf-8 -*-
import json
import re
from queue import Queue
from pprint import pprint
from lxml import etree
class Node:
def __init__(self, text, level):
self.text = text
self.level = level
self.children = []
def addChild(self, node):
self.children.append(node)
def empty(self):
return len(self.children) == 0
def __str__(self):
return self.text
def printTree(node, indent: list, final_node=True):
"""打印樹結構的算法
Parameters:
----------
node: 節點
indent: 記錄了節點之前需要打印的信息
final_node: node 是否是最後一個節點(i.e. 沒有下一個 sibling 了)
"""
for i in range(node.level):
print(indent[i], end='')
if final_node:
print('└──', end='')
else:
print('├──', end='')
print(node.text)
if node.empty():
return
else:
cnt = len(node.children)
for i, n in enumerate(node.children):
c = ' ' if final_node else '│ '
indent.append(c)
last_node = i == cnt - 1
printTree(n, indent, last_node)
del indent[-1]
def inspectElem(elem: etree.Element, level=0):
"""遞歸地建立 HTML 樹"""
tag = str(elem.tag)
node = Node(tag, level)
for child in elem.iterchildren():
sub_node = inspectElem(child, level + 1)
node.addChild(sub_node)
return node
if __name__ == "__main__":
f_path = r"./sample.html"
with open(f_path, encoding='utf-8') as f:
html = f.read()
tree = etree.HTML(html)
node = inspectElem(tree)
indent = []
print('.')
printTree(node, indent, True)
這裏的案例是打印 HTML 標籤的嵌套結構,源文件內容如下:
<!DOCTYPE html>
<html lang="zh-cn">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta http-equiv="X-UA-Compatible" content="ie=edge">
<title>Table佈局</title>
</head>
<!--去除邊緣,否則灰色區域旁邊會有白邊-->
<body marginheight="0px" marginwidth="0px">
<table width="100%" height="950px" style="background-color:darkgray;">
<tr>
<td colspan="2" width="100%" height="10%" style="background-color:cadetblue;">
這是頭部
</td>
</tr>
<tr>
<td width="30%" height="80%" style="background-color:darkkhaki;">
左菜單
</td>
<td width="70%" height="80%" style="background-color:cornflowerblue;">
右菜單
</td>
</tr>
<tr>
<td colspan="2" width="100%" height="10%" style="background-color: goldenrod;">
底部
</td>
</tr>
</table>
</body>
</html>
打印效果如下:
.
└──html
├──head
│ ├──meta
│ ├──meta
│ ├──meta
│ └──title
├──<cyfunction Comment at 0x00000258F44AE288>
└──body
└──table
├──tr
│ └──td
├──tr
│ ├──td
│ └──td
└──tr
└──td