編譯原理之美 --- 05 | 語法分析(三):實現一門簡單的腳本語言

腳本語言支持變量,通過使用字典作爲變量存儲區實現。

賦值語句中的等號後面可匹配表達式。

嘗試一個規則不成功之後,恢復到原樣,再去嘗試另外的規則,這個現象就叫做“回溯”。

simple_script.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-

from play_with_compiler.craft.simple_parser import SimpleParser
from play_with_compiler.craft.base_type import ASTNodeType
import sys

'''
 * 一個簡單的腳本解釋器。
 * 所支持的語法,請參見simple_parser.py
 *
 * 運行腳本:
 * 在命令行下,鍵入:python simple_script.py
 * 則進入一個REPL界面。你可以依次敲入命令。比如:
 * > 2+3;
 * > int age = 10;
 * > int b;
 * > b = 10*2;
 * > age = age + b;
 * > exit();  //退出REPL界面。
 *
 * 你還可以使用一個參數 -v,讓每次執行腳本的時候,都輸出AST和整個計算過程。
 '''
class SimpleScript(object):
    def __init__(self, verbose):
       self._variables = {}
       self._verbose = verbose

    '''
    遍歷AST,計算值
    '''
    def evaluate(self, node, indent):
        result = None
        if self._verbose:
            print('%s Calcalationg: %s:' %(indent, node.get_type()))
        
        if node.get_type() == ASTNodeType.Programm:
            for child in node.get_children():
                result = self.evaluate(child, indent)
        elif node.get_type() == ASTNodeType.Additive:
            child1 = node.get_children()[0]
            value1 = self.evaluate(child1, indent + "\t")
            child2 = node.get_children()[1]
            value2 = self.evaluate(child2, indent + "\t")
            if node.get_text() == '+':
                result = int(value1) + int(value2)
            else:
                result = int(value1) - int(value2)
        elif node.get_type() == ASTNodeType.Multiplicative:
            child1 = node.get_children()[0]
            value1 = self.evaluate(child1, indent + "\t")
            child2 = node.get_children()[1]
            value2 = self.evaluate(child2, indent + "\t")
            if node.get_text() == '*':
                result = int(value1) * int(value2)
            else:
                result = int(value1) / int(value2)
        elif node.get_type() == ASTNodeType.IntLiteral:
            result = int(node.get_text())
        elif node.get_type() == ASTNodeType.Identifier:
            var_name = node.get_text()
            value = self._variables.get(var_name)
            if value != None:
                result = int(value)
            else:
                raise Exception('variavle ' + var_name + ' has not been set any value')
        elif node.get_type() == ASTNodeType.AssignmentStmt:
            var_name = node.get_text()
            if var_name not in self._variables.keys():
                raise Exception('unknown variable: ' + var_name)
            # 接着執行下面的代碼
        elif node.get_type() == ASTNodeType.IntDeclaration:
            var_name = node.get_text()
            var_value = None
            if len(node.get_children()) > 0:
                child = node.get_children()[0]
                result = self.evaluate(child, indent + '\t')
                var_value = int(result)
            self._variables[var_name] = var_value
        
        if self._verbose:
            print('%sResult: %s' %(indent, result))
        elif indent == '':
            if node.get_type() == ASTNodeType.IntDeclaration or node.get_type() == ASTNodeType.AssignmentStmt:
                print('%s: %s' %(node.get_text(), result))
            elif node.get_type() != ASTNodeType.Programm:
                print(result)
        return result

'''
實現一個簡單的 REPL
'''
def play(args):
    verbose = False
    if (len(args) > 0 and args[0] == '-v'):
        verbose = True
        print('verbose mode')
    print('Simple script language!')

    parser = SimpleParser()
    script = SimpleScript(verbose)
    script_text = ""

    while True:
        try:
            line = raw_input(">")
            if line == 'exit();':
                print("good bye!")
                break
            script_text += line + "\n"
            if line.endswith(";"):
                tree = parser.parse(script_text)
                if verbose:
                    parser.dump_AST(tree, "")
                script.evaluate(tree, "")
                script_text = ""
        except Exception as e:
            print('119: %s' %e)
            script_text = ''

play(sys.argv[1:])

simple_parser.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-

from play_with_compiler.craft.simple_lexer import SimpleLexer
from play_with_compiler.craft.base_type import Token, TokenReader, ASTNodeType, TokenType
from play_with_compiler.craft.simple_calculator import SimpleASTNode

'''
 * 一個簡單的語法解析器。
 * 能夠解析簡單的表達式、變量聲明和初始化語句、賦值語句。
 * 它支持的語法規則爲:
 *
 * programm -> int_declare | expressionStatement | assignmentStatement
 * int_declare -> 'int' Id ( = additive) ';'
 * expressionStatement -> addtive ';'
 * addtive -> multiplicative ( (+ | -) multiplicative)*
 * multiplicative -> primary ( (* | /) primary)*
 * primary -> IntLiteral | Id | (additive)
'''
class SimpleParser(object):
    '''
    解析腳本
    '''
    def parse(self, script):
        lexer = SimpleLexer()
        tokens = lexer.tokenize(script)
        root_node = self.prog(tokens)
        return root_node

    '''
    AST的根節點,解析的入口
    '''
    def prog(self, tokens):
        node = SimpleASTNode(ASTNodeType.Programm, 'pwc')
        while tokens.peek():
            child = self.int_declare(tokens)
            
            if not child:
                child = self.expression_statement(tokens)

            if not child:
                child = self.assignment_statement(tokens)

            if not child:
                node.add_child(child)

            if not child:
                raise Exception('unknown statement')

            node.add_child(child)
            
        return node

    '''
    表達式語句,即表達式後面跟個分號
    '''
    def expression_statement(self, tokens):
        pos = tokens.get_position()
        node = self.additive(tokens)
        if node:
            token = tokens.peek()
            if (token and token.get_type() == TokenType.SemiColon):
                tokens.read()
            else:
                node = None
                tokens.set_position(pos) # 回溯
        return node
    
    '''
    賦值語句,如age = 10*2;
    '''
    def assignment_statement(self, tokens):
        node = None
        token = tokens.peek() # 預讀,看看下面是不是標識符
        if (token != None and token.get_type() == TokenType.Identifier):
            token = tokens.read() # 讀入標識符
            node = SimpleASTNode(ASTNodeType.AssignmentStmt, token.get_text())
            token = tokens.peek() # 預讀,看下面是不是等號
            if (token != None and token.get_type() == TokenType.Assignment):
                tokens.read() # 取出等號
                child = self.additive(tokens)
                if (child == None): # 出錯,等號右邊不是一個合法的表達式
                    raise Exception('invalide assignment statement, expecting an expression')
                else:
                    node.add_child(child) # 添加子節點
                    token = tokens.peek() # 預讀,看後面是不是分號
                    if (token != None and token.get_type() == TokenType.SemiColon):
                        tokens.read()  # 消耗掉該分號
                    else:
                        raise Exception('invalid statement, expecting semicolon')
            else: # 回溯,吐出之前消化掉的標識符
                tokens.unread()
                node = None
        return node

    '''
     * 整型變量聲明,如:
     * int a;
     * int b = 2*3;
    '''
    def int_declare(self, tokens):
        node = None
        token = tokens.peek()
        if (token and token.get_type() == TokenType.Int):
            token = tokens.read()
            if (tokens.peek().get_type() == TokenType.Identifier):
                token = tokens.read()
                node = SimpleASTNode(ASTNodeType.IntDeclaration, token.get_text())
                token = tokens.peek()
                if (token and token.get_type() == TokenType.Assignment):
                    tokens.read()  # 取出等號
                    child = self.additive(tokens)
                    if (not child):
                        raise Exception('invlide variable initialization, expecting an expression')
                    else:
                        node.add_child(child)
            else:
                raise Exception('variable name expected')

            if node:
                token = tokens.peek()
                if (token and token.get_type() == TokenType.SemiColon):
                    tokens.read()
                else:
                    raise Exception('invalid statemennt, expecting semicolon')
        return node
    
    '''
    加法表達式
    '''
    def additive(self, tokens):
        child1 = self.multiplicative(tokens) # 應用 add 規則
        node = child1
        if child1:
            while True:  # 循環應用 add' 規則
                token = tokens.peek()
                if (token and (token.get_type() == TokenType.Plus or token.get_type() == TokenType.Minus)):
                    token = tokens.read() # 讀出加號
                    child2 = self.multiplicative(tokens) # 計算下級節點
                    if child2:
                        node = SimpleASTNode(ASTNodeType.Additive, token.get_text())
                        node.add_child(child1)
                        node.add_child(child2)
                        child1 = node
                    else:
                        raise Exception('invlide additive expression, expecting the right part.')
                else:
                    break
        return node

    '''
    乘法表達式
    '''
    def multiplicative(self, tokens):
        child1 = self.primary(tokens)
        node = child1
        while True:
            token = tokens.peek()
            if (token != None and (token.get_type() == TokenType.Star or token.get_type() == TokenType.Slash)):
                token = tokens.read()
                child2 = self.primary(tokens)
                if (child2 != None):
                    node = SimpleASTNode(ASTNodeType.Multiplicative, token.get_text())
                    node.add_child(child1)
                    node.add_child(child2)
                    child1 = node
                else:
                    raise Exception('invalid multiplicative expression, expecting the right part.')
            else:
                break
        return node

    '''
    基礎表達式
    '''
    def primary(self, tokens):
        node = None
        token = tokens.peek()
        if token:
            if (token.get_type() == TokenType.IntLiteral):
                token = tokens.read()
                node = SimpleASTNode(ASTNodeType.IntLiteral, token.get_text())
            elif (token.get_type() == TokenType.Identifier):
                token = tokens.read()
                node = SimpleASTNode(ASTNodeType.Identifier, token.get_text())
            elif (token.get_type() == TokenType.LeftParen):
                tokens.read()
                node = self.additive(tokens)
                if node:
                    token = tokens.peek()
                    if (token and token.get_type() == TokenType.RightParen):
                        tokens.read()
                    else:
                        raise Exception('expecting right parenthesis')
                else:
                    raise Exception('expecting an additive expression inside parenthesis')
        return node # 這個方法也做了AST的簡化,就是不用構造一個primary節點,直接返回子節點。因爲它只有一個子節點

    '''
    * 打印輸出AST的樹狀結構
    * @param node
    * @param indent 縮進字符,由tab組成,每一級多一個tab
    '''
    def dump_AST(self, node, indent):
        if not node:
            return
        print("%s%s %s" %(indent, node.node_type, node.text))
        for child in node.get_children():
            self.dump_AST(child, indent + "\t")

結果:

# python simple_script.py
Simple script language!
>2;
2
>2+3*5;
17
>age;
119: variavle age has not been set any value
>int a = 5;
a: 5
>int b = a + 2;
b: 7
>b;
7
>exit();
good bye!

課程:https://time.geekbang.org/column/article/125926
代碼:https://github.com/buyouran1/PlayWithCompiler

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章