詞法分析(Java實現)不用狀態機


老師要求不能用自動機等知識,就是想讓我們湊代碼,體驗下不用自動機的痛苦睡覺

package analyse;

import java.io.*;
import java.util.*;

public class Analyse {
	FileReader fileReader = null;
	BufferedReader reader = null;
	StringBuffer token = null;//緩衝區,緩衝臨時未完全token
	
	char nowChar;//當前字符
	public char getNowChar() {
		return nowChar;
	}

	public void setNowChar(char nowChar) {
		this.nowChar = nowChar;
	}

	boolean isEnd = false;//判斷是否到達文件尾
	
	public static String keyWorkStr[] = {
		"abstract","assert","boolean",
		"break","byte","case",
		"catch","char","const",
		"class","continue","default",
		"do","double","else",
		"enum","extends","final",
		"finally","float","for",
		"goto","if","implements",
		"import","instanceof","int",
		"interface","long","native",
		"new","package","private",
		"protected","public","return",
		"short","static","strictfp",
		"super","switch","synchronized",
		"this","throw","throws",
		"transient","try","void",
		"volatile","while","true",
		"false"
	};//關鍵字字符串
	
	public static String operaStr[] = {
		"+","-","*",
		"/","++","--",
		"<<",">>","<",
		">",">=","<=",
		"==","=","*=",
		"+=","-=","/=",
		"%=","&=","|=",
		"^=","&","&&",
		"||","!=","~",
		"<<=",">>=","%"
	};//操作符字串
	public static String specialStr[] = {
		"(",")","[",
		"]","!",":",
		".",",","{",
		"}","#",";",
		"@","?"
	};//特殊符號字串
	
	public static Set<String> keyWork = new HashSet<String>(Arrays.asList(keyWorkStr));//關鍵字集合
	public static Set<String> special = new HashSet<String>(Arrays.asList(specialStr));//特殊符號集合
	public static Set<String> opera = new HashSet<String>(Arrays.asList(operaStr));//操作符集合
	
	
	/**
	 * 入口函數
	 */
	public static void main(String[] args) {
		Analyse an = new Analyse();
		an.start();
	}
	
	/**
	 * 啓動分析
	 */
	public void start(){
		step0(nextChar());
	}
	
	/**
	 * 讀取下一個字符,並且更新當前字符
	 * 如果讀取異常拋出異常時返回空字符
	 * 讀取相應輸入文件,沒有取默認文件
	 * 如果到達文件尾則將isEnd標誌爲true
	 */
	public char nextChar(){
		if(reader == null){
			try {
				fileReader = new FileReader("src/testJava/HelloWord.java");
			} catch (FileNotFoundException e) {
				e.printStackTrace();
			}
			reader = new BufferedReader(fileReader);
		}
		try {
			if(reader.ready()){
				setNowChar((char)reader.read());
				return getNowChar();
			}
		} catch (IOException e) {
			e.printStackTrace();
		}
		isEnd = true;
		return '`';//結束
	}
	
	/**
	 * 接收一個字符判斷:
	 * 如果該字符是換行,空字符,製表符則忽略取下一個字符
	 * 程序總體把分析分成6種種類去分析,並由初始模塊根據第一字符原則判斷進入哪個類別,.分別是
	 * 1.	字符串:雙引號關聯起來的字串
	 * 2.	單個字符:單引號關聯起來的字符
	 * 3.	註釋:行註釋以及塊註釋
	 * 4.	數字:自然數以及小數
	 * 5.	文字(letter):標識符以及關鍵字
	 * 6.	符號:操作符以及特殊符號或非法字符
	 */
	public void step0(char c){
		
		while(c == '\n' || c == ' ' || c == '	' || c == '\r'){
			c = nextChar();
		}
		
		if(isEnd){
			System.out.println("分析結束");
			
			try {
				fileReader.close();
				reader.close();
			} catch (IOException e) {
				e.printStackTrace();
			}
			return;
		}
		
		if(c == '\"'){
			stringStep();
		}else if(c == '\''){
			charStep();
		}else if(c == '/'){
			noteStep();
		}else if(Character.isDigit(c)){
			digitStep();
		}else if(Character.isLetter(c) || c == '_'){
			letterStep();
		}else{
			signStepOrIllegal();
		}
	}

	/**
	 * 進入字符串步驟
	 * 過濾第一種缺陷情況就是"\"",但是不能過濾第二重轉義字符出現的缺陷.
	 * 因爲這種情況不多,所以忽略
	 */
	private void stringStep() {
		token = new StringBuffer(String.valueOf('\"'));
		char c;
		do{
			c = nextChar();
			token.append(c);
			if(c == '\"' && !token.toString().equals("\"\\\"")){
				break;
			}
		}while(true);
		System.out.println(token + " :字符串");
		step0(nextChar());
	}

	/**
	 * 進入單字符步驟
	 * 可能出現轉義字符打印錯誤,故提供一重修復.
	 * 鑑於深層情況少見,忽略
	 */
	private void charStep() {
		token = new StringBuffer(String.valueOf('\''));
		
		char c;
		do{
			c = nextChar();
			token.append(c);
			if(c == '\'' && !token.toString().equals("'\\'")){
				break;
			}
		}while(true);
		System.out.println(token + " :單字符");
		step0(nextChar());
	}

	/**
	 * 進入註釋步驟
	 * 行註釋以及塊註釋
	 */
	private void noteStep() {
		token = new StringBuffer(String.valueOf('/'));
		char c = nextChar();
		
		if(c == '/'){//行註釋
			token.append('/');
			token.append(getRemainLine());
		}else if(c == '*'){//塊註釋
			token.append('*');
			token.append(getRemainBlock());
		}else{
			System.out.println("註釋代碼未知情況");
		}
		System.out.println(token + " :註釋");
		step0(nextChar());
	}

	/**
	 * 返回註釋塊字符串
	 * 策略是一直掃描直到掃描到*和/符號
	 * 
	 * @return String
	 */
	private String getRemainBlock() {
		StringBuffer buffer = new StringBuffer();
		char c;
		char c2;
		while(true){
			c = nextChar();
			if(c == '\t')
				continue;
			buffer.append(c);
			if(c == '*'){
				c2 = nextChar();
				if(c2 == '\t')
					continue;
				
				if(c2 == '/'){//如果繼*後的符號是斜槓,那麼就退出循環
					buffer.append(c2);
					break;
				}
				buffer.append(c2);
			}
		}
		return buffer.toString();
	}

	/**
	 * 返回行註釋的字符串
	 * 掃描策略是直接掃描直到換行符
	 * @return
	 */
	private String getRemainLine() {
		StringBuffer buffer = new StringBuffer();
		char c;
		while(true){
			c = nextChar();
			if(c == '\n' || c == '\r'){
				break;
			}
			buffer.append(c);
		}
		return buffer.toString();
	}

	/**
	 * 進入數字過程
	 * 不包括正負號
	 * 最多隻能出現一個點.
	 */
	private void digitStep() {
		boolean dot = false;
		token = new StringBuffer(String.valueOf(getNowChar()));//把當前數字加入
		
		char c = nextChar();
		while(c == '.' || Character.isDigit(c)){
			if(c == '.'){
				if(dot){//就是點已經出現過了
					break;
				}
				dot = true;
			}
			token.append(c);//點或者數字都加入字串
			c = nextChar();
		}
		System.out.println(token + " :數字");
		
		//跳回步驟0
		step0(getNowChar());
	}

	/**
	 * 進入文字過程
	 */
	private void letterStep() {
		token = new StringBuffer(String.valueOf(getNowChar()));//吧當前的字符串加入
		char c = nextChar();
		while(c == '_' || Character.isDigit(c) || Character.isLetter(c)){
			token.append(c);
			c = nextChar();
		}
		
		if(keyWork.contains(token.toString())){
			System.out.println(token + " :關鍵字");
		}else{
			System.out.println(token + " :標識符");
		}
		
		step0(getNowChar());
	}

	/**
	 * 進入符號階段
	 * 在這裏分別區分非法字符
	 * 操作符,特殊字符
	 */
	private void signStepOrIllegal() {
		token = new StringBuffer();
		char c = getNowChar();
		StringBuffer buffer = new StringBuffer(String.valueOf(getNowChar()));
		while(opera.contains(buffer.toString()) || special.contains(buffer.toString())){
			token.append(c);
			c = nextChar();
			buffer.append(c);
		}
		if(token.length() != 0){//非法字符
			if(special.contains(token.toString())){
				System.out.println(token + " :特殊字符");
			}else{
				System.out.println(token + " :操作符");
			}
		}else{
			System.out.println(getNowChar() + " :非法字符");
		}
		
		step0(getNowChar());
		
	}
}


發佈了86 篇原創文章 · 獲贊 13 · 訪問量 12萬+
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章