最近在用富文本編輯器,查了好多防XSS攻擊的代碼,都感覺不怎麼好用。首先這些方法都是過濾非法字符或者字符串,標籤字符串千變萬化,難於過濾全面、其次過濾後的代碼甚至會喪失正常功能。因此我考慮只取我們需要的部分。
摒棄了過濾法,我考慮使用標籤分析法。
參考了百度UEditor前端的過濾方法,它將允許標籤的tag和屬性列了出來並作以保留(白名單)。因此我也考慮使用白名單法:
allowParams=array(//白名單
'a'=>array('target','href','title','class','style'),
'abbr'=>array('title','class','style'),
'address' =>array('class','style'),
'area' =>array('shape','coords','href','alt'),
'article' =>array(),
'aside' =>array(),
'audio' =>array('autoplay','controls','loop','preload','src','class','style'),
'b' =>array('class','style'),
'bdi' =>array('dir'),
'bdo' =>array('dir'),
'big' =>array(),
'blockquote' =>array('cite','class','style'),
'br' =>array(),
'caption' =>array('class','style'),
'center' =>array(),
'cite' =>array(),
'code' =>array('class','style'),
'col' =>array('align','valign','span','width','class','style'),
'colgroup' =>array('align','valign','span','width','class','style'),
'dd' =>array('class','style'),
'del' =>array('datetime'),
'details' =>array('open'),
'div' =>array('class','style'),
'dl' =>array('class','style'),
'dt' =>array('class','style'),
'em' =>array('class','style'),
'font' =>array('color','size','face'),
'footer' =>array(),
'h1' =>array('class','style'),
'h2' =>array('class','style'),
'h3' =>array('class','style'),
'h4' =>array('class','style'),
'h5' =>array('class','style'),
'h6' =>array('class','style'),
'header' =>array(),
'hr' =>array(),
'i' =>array('class','style'),
'img' =>array('src','alt','title','width','height','id','_src','loadingclass','class','data-latex'),
'ins' =>array('datetime'),
'li' =>array('class','style'),
'mark' =>array(),
'nav' =>array(),
'ol' =>array('class','style'),
'p' =>array('class','style'),
'pre' =>array('class','style'),
's' =>array(),
'section' =>array(),
'small' =>array(),
'span' =>array('class','style'),
'sub' =>array('class','style'),
'sup' =>array('class','style'),
'strong' =>array('class','style'),
'table' =>array('width','border','align','valign','class','style'),
'tbody' =>array('align','valign','class','style'),
'td' =>array('width','rowspan','colspan','align','valign','class','style'),
'tfoot' =>array('align','valign','class','style'),
'th' =>array('width','rowspan','colspan','align','valign','class','style'),
'thead' =>array('align','valign','class','style'),
'tr' =>array('rowspan','align','valign','class','style'),
'tt' =>array(),
'u' =>array(),
'text' =>array(),//simple_html_dom裏文字tag用text表示
'ul' =>array('class','style')
);
白名單有了之後就要開始處理了。既然要處理html就要有處理html的工具。這裏我選用simple_html_dom(百度可以查到,這裏就不發鏈接了)
下面分析內容的dom:
$uedata="<div>$html</div>";//simple_html_dom會把根節點轉換爲root
$dom=str_get_html($html);//用simple_html_dom解析內容
這裏解析部分就完成了。由於simple_html_dom解析的是樹形結構,所以遍歷時需要用圖的遍歷方式,這裏我選用了dfs遍歷
$doms=$dom->root->children;//取根節點準備dfs遍歷
$this->dfs($doms);//遍歷處理
$html=$dom->outertext;//將處理結果返回
$dom->clear();//釋放內存
這裏遍歷處理很簡單了,使用dfs遞歸就可以了。逐個判斷tag和params,合格的留下,不合格的刪掉。這裏我的代碼是這樣的:
function dfs($doms){
foreach ($doms as $domitem) {
if(!in_array($domitem->tag, array_keys($this->allowParams))){
$domitem->outertext="";//沒有這個tag就刪掉它
}else{
foreach ($domitem->attr as $key => $value) {
if(!in_array($key, $this->allowParams[$domitem->tag]))
$domitem->removeAttribute($key);//沒有這個屬性就刪掉它
}
if(!empty($domitem->children))
$this->dfs($domitem->nodes);//遞歸
}
}
}
到此整個處理就結束了。整合後放到ThinkPHP擴展類,代碼如下:
<?php
namespace Org\Util;
require 'simple_html_dom.class.php';
class UEditorXSSRejector{
private $allowParams;
public function parse($uedata){
$this->allowParams=array(//白名單
'a'=>array('target','href','title','class','style'),
'abbr'=>array('title','class','style'),
'address' =>array('class','style'),
'area' =>array('shape','coords','href','alt'),
'article' =>array(),
'aside' =>array(),
'audio' =>array('autoplay','controls','loop','preload','src','class','style'),
'b' =>array('class','style'),
'bdi' =>array('dir'),
'bdo' =>array('dir'),
'big' =>array(),
'blockquote' =>array('cite','class','style'),
'br' =>array(),
'caption' =>array('class','style'),
'center' =>array(),
'cite' =>array(),
'code' =>array('class','style'),
'col' =>array('align','valign','span','width','class','style'),
'colgroup' =>array('align','valign','span','width','class','style'),
'dd' =>array('class','style'),
'del' =>array('datetime'),
'details' =>array('open'),
'div' =>array('class','style'),
'dl' =>array('class','style'),
'dt' =>array('class','style'),
'em' =>array('class','style'),
'font' =>array('color','size','face'),
'footer' =>array(),
'h1' =>array('class','style'),
'h2' =>array('class','style'),
'h3' =>array('class','style'),
'h4' =>array('class','style'),
'h5' =>array('class','style'),
'h6' =>array('class','style'),
'header' =>array(),
'hr' =>array(),
'i' =>array('class','style'),
'img' =>array('src','alt','title','width','height','id','_src','loadingclass','class','data-latex'),
'ins' =>array('datetime'),
'li' =>array('class','style'),
'mark' =>array(),
'nav' =>array(),
'ol' =>array('class','style'),
'p' =>array('class','style'),
'pre' =>array('class','style'),
's' =>array(),
'section' =>array(),
'small' =>array(),
'span' =>array('class','style'),
'sub' =>array('class','style'),
'sup' =>array('class','style'),
'strong' =>array('class','style'),
'table' =>array('width','border','align','valign','class','style'),
'tbody' =>array('align','valign','class','style'),
'td' =>array('width','rowspan','colspan','align','valign','class','style'),
'tfoot' =>array('align','valign','class','style'),
'th' =>array('width','rowspan','colspan','align','valign','class','style'),
'thead' =>array('align','valign','class','style'),
'tr' =>array('rowspan','align','valign','class','style'),
'tt' =>array(),
'u' =>array(),
'text' =>array(),
'ul' =>array('class','style')//,
//'video' =>array('autoplay','controls','loop','preload','src','height','width','class','style')
);
$uedata="<div>$uedata</div>";
$dom=str_get_html($uedata);
$doms=$dom->root->children;
$this->dfs($doms);
$html=$dom->outertext;
$dom->clear();
return $html;
}
public function dfs($doms){
foreach ($doms as $domitem) {
if(!in_array($domitem->tag, array_keys($this->allowParams))){
$domitem->outertext="";
}else{
foreach ($domitem->attr as $key => $value) {
if(!in_array($key, $this->allowParams[$domitem->tag]))
$domitem->removeAttribute($key);
}
if(!empty($domitem->children))
$this->dfs($domitem->nodes);
}
}
}
}
?>
將simple_html_dom.php重命名爲simple_html_dom.class.php放到\Org\Utils下,增加命名空間聲明。然後將上述代碼保存爲UEditorXSSRejector.class.php放在同目錄下。然後在function裏調用:
use Org\Util;
function remove_xss($val) {
$xss = new \Org\Util\UEditorXSSRejector();
return $xss->parse($val);
}
大工告成。拿wangEditor提交到Controller試了一下不管是加粗傾斜有序無序列表鏈接圖片等等都感覺沒什麼問題。