腳本語言支持變量,通過使用字典作爲變量存儲區實現。
賦值語句中的等號後面可匹配表達式。
嘗試一個規則不成功之後,恢復到原樣,再去嘗試另外的規則,這個現象就叫做“回溯”。
simple_script.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from play_with_compiler.craft.simple_parser import SimpleParser
from play_with_compiler.craft.base_type import ASTNodeType
import sys
'''
* 一個簡單的腳本解釋器。
* 所支持的語法,請參見simple_parser.py
*
* 運行腳本:
* 在命令行下,鍵入:python simple_script.py
* 則進入一個REPL界面。你可以依次敲入命令。比如:
* > 2+3;
* > int age = 10;
* > int b;
* > b = 10*2;
* > age = age + b;
* > exit(); //退出REPL界面。
*
* 你還可以使用一個參數 -v,讓每次執行腳本的時候,都輸出AST和整個計算過程。
'''
class SimpleScript(object):
def __init__(self, verbose):
self._variables = {}
self._verbose = verbose
'''
遍歷AST,計算值
'''
def evaluate(self, node, indent):
result = None
if self._verbose:
print('%s Calcalationg: %s:' %(indent, node.get_type()))
if node.get_type() == ASTNodeType.Programm:
for child in node.get_children():
result = self.evaluate(child, indent)
elif node.get_type() == ASTNodeType.Additive:
child1 = node.get_children()[0]
value1 = self.evaluate(child1, indent + "\t")
child2 = node.get_children()[1]
value2 = self.evaluate(child2, indent + "\t")
if node.get_text() == '+':
result = int(value1) + int(value2)
else:
result = int(value1) - int(value2)
elif node.get_type() == ASTNodeType.Multiplicative:
child1 = node.get_children()[0]
value1 = self.evaluate(child1, indent + "\t")
child2 = node.get_children()[1]
value2 = self.evaluate(child2, indent + "\t")
if node.get_text() == '*':
result = int(value1) * int(value2)
else:
result = int(value1) / int(value2)
elif node.get_type() == ASTNodeType.IntLiteral:
result = int(node.get_text())
elif node.get_type() == ASTNodeType.Identifier:
var_name = node.get_text()
value = self._variables.get(var_name)
if value != None:
result = int(value)
else:
raise Exception('variavle ' + var_name + ' has not been set any value')
elif node.get_type() == ASTNodeType.AssignmentStmt:
var_name = node.get_text()
if var_name not in self._variables.keys():
raise Exception('unknown variable: ' + var_name)
# 接着執行下面的代碼
elif node.get_type() == ASTNodeType.IntDeclaration:
var_name = node.get_text()
var_value = None
if len(node.get_children()) > 0:
child = node.get_children()[0]
result = self.evaluate(child, indent + '\t')
var_value = int(result)
self._variables[var_name] = var_value
if self._verbose:
print('%sResult: %s' %(indent, result))
elif indent == '':
if node.get_type() == ASTNodeType.IntDeclaration or node.get_type() == ASTNodeType.AssignmentStmt:
print('%s: %s' %(node.get_text(), result))
elif node.get_type() != ASTNodeType.Programm:
print(result)
return result
'''
實現一個簡單的 REPL
'''
def play(args):
verbose = False
if (len(args) > 0 and args[0] == '-v'):
verbose = True
print('verbose mode')
print('Simple script language!')
parser = SimpleParser()
script = SimpleScript(verbose)
script_text = ""
while True:
try:
line = raw_input(">")
if line == 'exit();':
print("good bye!")
break
script_text += line + "\n"
if line.endswith(";"):
tree = parser.parse(script_text)
if verbose:
parser.dump_AST(tree, "")
script.evaluate(tree, "")
script_text = ""
except Exception as e:
print('119: %s' %e)
script_text = ''
play(sys.argv[1:])
simple_parser.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from play_with_compiler.craft.simple_lexer import SimpleLexer
from play_with_compiler.craft.base_type import Token, TokenReader, ASTNodeType, TokenType
from play_with_compiler.craft.simple_calculator import SimpleASTNode
'''
* 一個簡單的語法解析器。
* 能夠解析簡單的表達式、變量聲明和初始化語句、賦值語句。
* 它支持的語法規則爲:
*
* programm -> int_declare | expressionStatement | assignmentStatement
* int_declare -> 'int' Id ( = additive) ';'
* expressionStatement -> addtive ';'
* addtive -> multiplicative ( (+ | -) multiplicative)*
* multiplicative -> primary ( (* | /) primary)*
* primary -> IntLiteral | Id | (additive)
'''
class SimpleParser(object):
'''
解析腳本
'''
def parse(self, script):
lexer = SimpleLexer()
tokens = lexer.tokenize(script)
root_node = self.prog(tokens)
return root_node
'''
AST的根節點,解析的入口
'''
def prog(self, tokens):
node = SimpleASTNode(ASTNodeType.Programm, 'pwc')
while tokens.peek():
child = self.int_declare(tokens)
if not child:
child = self.expression_statement(tokens)
if not child:
child = self.assignment_statement(tokens)
if not child:
node.add_child(child)
if not child:
raise Exception('unknown statement')
node.add_child(child)
return node
'''
表達式語句,即表達式後面跟個分號
'''
def expression_statement(self, tokens):
pos = tokens.get_position()
node = self.additive(tokens)
if node:
token = tokens.peek()
if (token and token.get_type() == TokenType.SemiColon):
tokens.read()
else:
node = None
tokens.set_position(pos) # 回溯
return node
'''
賦值語句,如age = 10*2;
'''
def assignment_statement(self, tokens):
node = None
token = tokens.peek() # 預讀,看看下面是不是標識符
if (token != None and token.get_type() == TokenType.Identifier):
token = tokens.read() # 讀入標識符
node = SimpleASTNode(ASTNodeType.AssignmentStmt, token.get_text())
token = tokens.peek() # 預讀,看下面是不是等號
if (token != None and token.get_type() == TokenType.Assignment):
tokens.read() # 取出等號
child = self.additive(tokens)
if (child == None): # 出錯,等號右邊不是一個合法的表達式
raise Exception('invalide assignment statement, expecting an expression')
else:
node.add_child(child) # 添加子節點
token = tokens.peek() # 預讀,看後面是不是分號
if (token != None and token.get_type() == TokenType.SemiColon):
tokens.read() # 消耗掉該分號
else:
raise Exception('invalid statement, expecting semicolon')
else: # 回溯,吐出之前消化掉的標識符
tokens.unread()
node = None
return node
'''
* 整型變量聲明,如:
* int a;
* int b = 2*3;
'''
def int_declare(self, tokens):
node = None
token = tokens.peek()
if (token and token.get_type() == TokenType.Int):
token = tokens.read()
if (tokens.peek().get_type() == TokenType.Identifier):
token = tokens.read()
node = SimpleASTNode(ASTNodeType.IntDeclaration, token.get_text())
token = tokens.peek()
if (token and token.get_type() == TokenType.Assignment):
tokens.read() # 取出等號
child = self.additive(tokens)
if (not child):
raise Exception('invlide variable initialization, expecting an expression')
else:
node.add_child(child)
else:
raise Exception('variable name expected')
if node:
token = tokens.peek()
if (token and token.get_type() == TokenType.SemiColon):
tokens.read()
else:
raise Exception('invalid statemennt, expecting semicolon')
return node
'''
加法表達式
'''
def additive(self, tokens):
child1 = self.multiplicative(tokens) # 應用 add 規則
node = child1
if child1:
while True: # 循環應用 add' 規則
token = tokens.peek()
if (token and (token.get_type() == TokenType.Plus or token.get_type() == TokenType.Minus)):
token = tokens.read() # 讀出加號
child2 = self.multiplicative(tokens) # 計算下級節點
if child2:
node = SimpleASTNode(ASTNodeType.Additive, token.get_text())
node.add_child(child1)
node.add_child(child2)
child1 = node
else:
raise Exception('invlide additive expression, expecting the right part.')
else:
break
return node
'''
乘法表達式
'''
def multiplicative(self, tokens):
child1 = self.primary(tokens)
node = child1
while True:
token = tokens.peek()
if (token != None and (token.get_type() == TokenType.Star or token.get_type() == TokenType.Slash)):
token = tokens.read()
child2 = self.primary(tokens)
if (child2 != None):
node = SimpleASTNode(ASTNodeType.Multiplicative, token.get_text())
node.add_child(child1)
node.add_child(child2)
child1 = node
else:
raise Exception('invalid multiplicative expression, expecting the right part.')
else:
break
return node
'''
基礎表達式
'''
def primary(self, tokens):
node = None
token = tokens.peek()
if token:
if (token.get_type() == TokenType.IntLiteral):
token = tokens.read()
node = SimpleASTNode(ASTNodeType.IntLiteral, token.get_text())
elif (token.get_type() == TokenType.Identifier):
token = tokens.read()
node = SimpleASTNode(ASTNodeType.Identifier, token.get_text())
elif (token.get_type() == TokenType.LeftParen):
tokens.read()
node = self.additive(tokens)
if node:
token = tokens.peek()
if (token and token.get_type() == TokenType.RightParen):
tokens.read()
else:
raise Exception('expecting right parenthesis')
else:
raise Exception('expecting an additive expression inside parenthesis')
return node # 這個方法也做了AST的簡化,就是不用構造一個primary節點,直接返回子節點。因爲它只有一個子節點
'''
* 打印輸出AST的樹狀結構
* @param node
* @param indent 縮進字符,由tab組成,每一級多一個tab
'''
def dump_AST(self, node, indent):
if not node:
return
print("%s%s %s" %(indent, node.node_type, node.text))
for child in node.get_children():
self.dump_AST(child, indent + "\t")
結果:
# python simple_script.py
Simple script language!
>2;
2
>2+3*5;
17
>age;
119: variavle age has not been set any value
>int a = 5;
a: 5
>int b = a + 2;
b: 7
>b;
7
>exit();
good bye!
課程:https://time.geekbang.org/column/article/125926
代碼:https://github.com/buyouran1/PlayWithCompiler