egret-基於TireTree的敏感字過濾

1.單例基類

class BaseSingle {
	public constructor(...args) {
		
	}
	public static Ins(...args): any {
		let cls: any = this;
		if (!cls._instance) {
			cls._instance = new cls(...args);
		}
		return cls._instance;
	}
}

2.TireNode節點定義

//TireTree的Node節點
class TireNode {
	public ids: number[] = [];//id組,經過該節點的id集合
	public endId: number = -1;//是否是結束節點
	public char: string//當前節點的字符
	public leftNode: TireNode[] = []//左側樹節點
	public rightNode: TireNode[] = []//右側樹節點
	public bisEnd: boolean = false;
	public constructor() {
	}
	//填充節點
	public fill(id, text) {
		if (this.ids.length == 0 || this.ids.indexOf(id) == -1) {
			this.ids.push(id);
		}
		this.bisEnd = (text.length == 1)
		if (this.bisEnd) {
			this.char = text;
			this.endId = id;
		} else {
			this.char = text.slice(0, 1)[0]
			this.next(id, text.slice(1))
		}
	}
	
	public findExitArray(text) {
		if (text.length == 1) {
			return this.ids;
		}
		text = text.slice(1);
		let nextCode = text.slice(0, 1)[0]
		let target: TireNode[] = null;
		if (nextCode.charCodeAt(0) > this.char.charCodeAt(0)) {
			target = this.rightNode;
		} else {
			target = this.leftNode;
		}
		let ind = this.bIsExit(target, nextCode);
		if (ind != -1) {
			return target[ind].findExitArray(text);
		} else {
			return [];
		}
	}
	public findEnd(text) {
		if (text.length == 1) {
			return this.endId
		}
		text = text.slice(1);
		let nextCode = text.slice(0, 1)[0]
		let target: TireNode[] = null;
		if (nextCode.charCodeAt(0) > this.char.charCodeAt(0)) {
			target = this.rightNode;
		} else {
			target = this.leftNode;
		}
		let ind = this.bIsExit(target, nextCode);

		if (ind != -1) {
			return target[ind].findEnd(text);
		} else {
			return -1;
		}
	}
	private next(id, text) {
		let nextCode = text.slice(0, 1)[0]
		let target: TireNode[] = null;
		if (nextCode.charCodeAt(0) > this.char.charCodeAt(0)) {
			target = this.rightNode;
		} else {
			target = this.leftNode;
		}
		let ind = this.bIsExit(target, nextCode);
		if (ind != -1) {
			target[ind].fill(id, text);
		} else {
			let node = new TireNode();
			node.fill(id, text);
			target.push(node);
		}
	}
	private bIsExit(arr: TireNode[], char) {
		for (let i = 0; i < arr.length; i++) {
			let a = arr[i]
			if (a.char == char) {
				return i;
			}
		}
		return -1;
	}
}

3.TireTree的定義

class TireTree extends BaseSingle {
	public threeNode: TireNode[] = [];

	public constructor() {
		super();
	}
	public static Ins(): TireTree {
		return super.Ins();
	}
	public fill(id, text: string, bword: boolean = true) {
		let nextCode;
		if (bword) {
			text = text.toUpperCase();
		}

		if (text.length > 1) {
			nextCode = text.slice(0, 1)[0]
		} else {
			nextCode = text;
		}
		let ind = this.bIsExit(nextCode);
		if (ind != -1) {
			this.threeNode[ind].fill(id, text);
		} else {
			let node = new TireNode();
			node.fill(id, text);
			this.threeNode.push(node);
		}

	}
	public findEnd(text: string, bword: boolean = true) {
		if (bword) {
			text = text.toUpperCase();
		}
		// console.log("find", text);
		let nextCode = text.slice(0, 1)[0]
		let ind = this.bIsExit(nextCode);
		if (ind != -1) {
			return this.threeNode[ind].findEnd(text)
		} else {
			return -1;
		}
	}
	public findExitArray(text: string, bword: boolean = true) {
		if (bword) {
			text = text.toUpperCase();
		}
		let nextCode = text.slice(0, 1)[0]
		let ind = this.bIsExit(nextCode);
		if (ind != -1) {
			return this.threeNode[ind].findExitArray(text)
		} else {
			return [];
		}
	}
	private bIsExit(char) {
		for (let i = 0; i < this.threeNode.length; i++) {
			let a = this.threeNode[i]
			if (a.char == char) {
				return i;
			}
		}
		return -1;
	}

}

4.使用:管理類的定義

class TireMgr extends BaseSingle {
	public masks: any;//敏感字庫
	public checkText: string;//要檢查的字符
	public splitChar = [" ", "_", "*", "^", "%", "$", "@"]//忽略字符,檢測的時候忽略這些符號
	public constructor() {
		super();
		this.masks = RES.getRes('maskwords_json')//加載敏感字庫
	}
	public static Ins(): TireMgr {
		return super.Ins();
	}
	//將敏感字庫填入TireTree中
	public init() {
		for (let key in this.masks) {
			TireTree.Ins().fill(key, this.masks[key]);
		}
	}
	//獲取敏感字檢查結果,有敏感字的地方用*號代替
	public getMask(text): string {
		console.log("strat", egret.getTimer());
		this.checkText = text
		let cache = ""
		for (let i = 0; i < this.checkText.length; i++) {
			let startIndex = 0;
			let endIndex = -1;
			let check = this.checkText[i];
			// console.log("開始檢測索引", i);
			// console.log("檢測值", check);
			// console.log("當前緩存", cache);
			if (this.splitChar.indexOf(check) == -1 && TireTree.Ins().findExitArray(check).length > 0) {
				//如果存在,標識開始索引值
				startIndex = i;
				if (TireTree.Ins().findEnd(check) != -1) {
					//如果有結束,記錄一下結束索引值
					endIndex = i;
				}
				//從現在這個開始到下一個
				let newCheck = check;
				let sind = startIndex;
				let eind = -1;
				for (let j = startIndex + 1; j < this.checkText.length; j++) {
					// console.log("遞進步長", j);
					eind = j;
					if (this.splitChar.indexOf(this.checkText[j]) != -1) {
						// console.log("存在分詞字符", this.checkText[j]);
						continue;
					}
					newCheck += this.checkText[j];
					// console.log("檢測字符串", newCheck);

					if (TireTree.Ins().findExitArray(newCheck).length > 0) {
						//如果仍然存在
						if (TireTree.Ins().findEnd(newCheck) != -1) {
							//如果有結束,記錄一下結束索引值
							endIndex = j;
						} else {
							//沒有的話,下一個j
							continue;
						}
					} else {
						i = j - 1;

						break;
					}
				}
				// console.log("檢測結束");

				//下一個查找結束,如果開始和結束標識不一樣,代表有mask
				if (startIndex <= endIndex) {
					let mask = ""
					for (let ind = startIndex; ind < endIndex + 1; ind++) {
						mask += this.checkText[ind];
						cache += "*";
					}
					// console.log("存在mask", mask);
					//把i值調大
					i = endIndex;
				} else {
					// console.log("本次檢測無mask,檢測字段", newCheck);
					// console.log("檢測索引", sind, eind);
					let canCache = ""
					for (let ind = sind; ind < eind; ind++) {
						canCache += this.checkText[ind];
					}
					// console.log("當前可緩存", canCache);
					cache += canCache;
				}

			} else {
				cache += check;
				//如果不存在,記錄當前值
			}
		}
		console.log("end", egret.getTimer());
		return cache;
	}

}

5.敏感字庫格式參考:

{
	"1":"測試",
	"2":"敏感字",
	"3":"測試下敏感字",
	"4":"敏",
	"5":"感"
}

說明

1.已經通過簡單的敏感字測試,複雜條件下的有效性待確定
2.可用作其他搜索匹配方案,但注意資源格式,必須保留id作爲鍵值
3.注意使用時要調用 TireMgr.Ins().init()進行初始化一次
4.如有疑問或建議,歡迎留言探討
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章