XSStrike源碼閱讀（3）——核心函數

wafDectector

代碼也不多，我就直接貼上來了，源碼如下:

def wafDetector(url, params, headers, GET, delay, timeout):
    with open('./db/wafSignatures.json', 'r') as file:
        wafSignatures = json.load(file)
    # a payload which is noisy enough to provoke the WAF
    noise = '<script>alert("XSS")</script>'
    params['xss'] = noise
    # Opens the noise injected payload
    response = requester(url, params, headers, GET, delay, timeout)
    page = response.text
    code = str(response.status_code)
    headers = str(response.headers)
    if int(code) >= 400:
        bestMatch = [0, None]
        for wafName, wafSignature in wafSignatures.items():
            score = 0
            pageSign = wafSignature['page']
            codeSign = wafSignature['code']
            headersSign = wafSignature['headers']
            if pageSign:
                if re.search(pageSign, page, re.I):
                    score += 1
            if codeSign:
                if re.search(codeSign, code, re.I):
                    score += 0.5  # increase the overall score by a smaller amount because http codes aren't strong indicators
            if headersSign:
                if re.search(headersSign, headers, re.I):
                    score += 1
            # if the overall score of the waf is higher than the previous one
            if score > bestMatch[0]:
                del bestMatch[:]  # delete the previous one
                bestMatch.extend([score, wafName])  # and add this one
        if bestMatch[0] != 0:
            return bestMatch[1]
        else:
            return None
    else:
        return None

其實waf探測這一功能的實現在接下來要提到的幾個函數中算是邏輯最簡單的一個，但也是不可或缺的一個，我之前彷彿在作者的註釋中看到這是從sqlmap那邊借鑑過來的，總體思路還是很清晰的，就是發送足夠惡意的惡意請求（233），以此來故意觸發waf，然後根據返回信息中的page,code,headers這三個指標來辨識waf。至於那幾個數字，其實就是給這三個特徵加了不同的權重，這是爲了能夠在特徵匹配到多個waf時取匹配度最高的那一個最爲最終結果。
下面貼上已知的waf特徵庫(wafSignatures.json)：

{
	"360 Web Application Firewall (360)" : {
		"code" : "493",
		"page" : "/wzws-waf-cgi/",
		"headers" : "X-Powered-By-360wzb"
	},
	"aeSecure" : {
		"code" : "",
		"page" : "aesecure_denied.png",
		"headers" : "aeSecure-code"
	},
	"Airlock (Phion/Ergon)" : {
		"code" : "",
		"page" : "",
		"headers" : "AL[_-]?(SESS|LB)"
	},
	"Anquanbao Web Application Firewall (Anquanbao)" : {
		"code" : "405",
		"page" : "/aqb_cc/error/|hidden_intercept_time",
		"headers" : "X-Powered-By-Anquanbao"
	},
	"Armor Protection (Armor Defense)" : {
		"code" : "",
		"page" : "This request has been blocked by website protection from Armor",
		"headers" : ""
	},
	"Application Security Manager (F5 Networks)" : {
		"code" : "",
		"page" : "The requested URL was rejected\\. Please consult with your administrator\\.",
		"headers" : ""
	},
	"Amazon Web Services Web Application Firewall (Amazon)" : {
		"code" : "403",
		"page" : "",
		"headers" : "\\bAWS"
	},
	"Yunjiasu Web Application Firewall (Baidu)" : {
		"code" : "",
		"page" : "",
		"headers" : "yunjiasu-nginx"
	},
	"Barracuda Web Application Firewall (Barracuda Networks)" : {
		"code" : "",
		"page" : "",
		"headers" : "barra_counter_session=|(|\\b)barracuda_"
	},
	"BIG-IP Application Security Manager (F5 Networks)" : {
		"code" : "",
		"page" : "",
		"headers" : "BigIP|F5\\Z|\\bTS[0-9a-f]+=|X-WA-Info:|X-Cnection:"
	},
	"BinarySEC Web Application Firewall (BinarySEC)" : {
		"code" : "",
		"page" : "",
		"headers" : "binarysec"
	},
	"BlockDoS" : {
		"code" : "",
		"page" : "",
		"headers" : "BlockDos\\.net"
	},
	"ChinaCache (ChinaCache Networks)" : {
		"code" : "400",
		"page" : "",
		"headers" : "Powered-By-ChinaCache"
	},
	"Cisco ACE XML Gateway (Cisco Systems)" : {
		"code" : "",
		"page" : "",
		"headers" : "ACE XML Gateway"
	},
	"Cloudbric Web Application Firewall (Cloudbric)" : {
		"code" : "",
		"page" : "Cloudbric|Malicious Code Detected",
		"headers" : ""
	},
	"CloudFlare Web Application Firewall (CloudFlare)" : {
		"code" : "",
		"page" : "Attention Required! \\| Cloudflare|CloudFlare Ray ID:|var CloudFlare=|CLOUDFLARE_ERROR_500S_BOX",
		"headers" : "cloudflare|__cfduid=|cf-ray"
	},
	"CloudFront (Amazon)" : {
		"code" : "",
		"page" : "",
		"headers" : "Error from cloudfront"
	},
	"Comodo Web Application Firewall (Comodo)" : {
		"code" : "",
		"page" : "",
		"headers" : "Protected by COMODO WAF"
	},
	"CrawlProtect (Jean-Denis Brun)" : {
		"code" : "",
		"page" : "This site is protected by CrawlProtect",
		"headers" : ""
	},
	"IBM WebSphere DataPower (IBM)" : {
		"code" : "",
		"page" : "",
		"headers" : "X-Backside-Transport.*?(OK|FAIL)"
	},
	"Deny All Web Application Firewall (DenyAll)" : {
		"code" : "",
		"page" : "Condition Intercepted",
		"headers" : "sessioncookie"
	},
	"Distil Web Application Firewall Security (Distil Networks)" : {
		"code" : "",
		"page" : "",
		"headers" : "x-distil-cs"
	},
	"DOSarrest (DOSarrest Internet Security)" : {
		"code" : "",
		"page" : "",
		"headers" : "DOSarrest|X-DIS-Request-ID"
	},
	"dotDefender (Applicure Technologies)" : {
		"code" : "",
		"page" : "dotDefender Blocked Your Request|<meta name=\\.description\\. content=\\.Applicure is the leading provider of web application security|Please contact the site administrator, and provide the following Reference ID:EdgeCast Web Application Firewall (Verizon)",
		"headers" : "X-dotDefender-denied"
	},
	"EdgeCast Web Application Firewall (Verizon)" : {
		"code" : "400",
		"page" : "",
		"headers" : "SERVER.*?ECDF"
	},
	"ExpressionEngine (EllisLab)" : {
		"code" : "",
		"page" : "Invalid (GET|POST) Data",
		"headers" : ""
	},
	"FortiWeb Web Application Firewall (Fortinet)" : {
		"code" : "",
		"page" : "\\.fgd_icon|\\.blocked|\\.authenticate",
		"headers" : "FORTIWAFSID=|cookiesession1="
	},
	"Hyperguard Web Application Firewall (art of defence)" : {
		"code" : "",
		"page" : "",
		"headers" : "ODSESSION="
	},
	"Incapsula Web Application Firewall (Incapsula/Imperva)" : {
		"code" : "",
		"page" : "Incapsula incident ID|_Incapsula_Resource|subject=WAF Block Page|If you feel you have been blocked in error, please contact Customer Support",
		"headers" : "X-Iinfo|incap_ses|visid_incap"
	},
	"ISA Server (Microsoft)" : {
		"code" : "",
		"page" : "The server denied the specified Uniform Resource Locator (URL)\\. Contact the server administrator\\.",
		"headers" : ""
	},
	"Jiasule Web Application Firewall (Jiasule)" : {
		"code" : "403",
		"page" : "static\\.jiasule\\.com/static/js/http_error\\.js|notice-jiasule",
		"headers" : "jiasule-WAF|__jsluid=|jsl_tracking"
	},
	"KS-WAF (Knownsec)" : {
		"code" : "",
		"page" : "ks-waf-error\\.png'",
		"headers" : ""
	},
	"KONA Security Solutions (Akamai Technologies)" : {
		"code" : "400|403|501",
		"page" : "",
		"headers" : "AkamaiGHost"
	},
	"ModSecurity: Open Source Web Application Firewall (Trustwave)" : {
		"code" : "",
		"page" : "This error was generated by Mod_Security|One or more things in your request were suspicious|rules of the mod_security module",
		"headers" : "Mod_Security|NOYB"
	},
	"NAXSI (NBS System)" : {
		"code" : "",
		"page" : "",
		"headers" : "naxsi/waf"
	},
	"NetContinuum Web Application Firewall (NetContinuum/Barracuda Networks)" : {
		"code" : "",
		"page" : "",
		"headers" : "NCI__SessionId="
	},
	"NetScaler (Citrix Systems)" : {
		"code" : "",
		"page" : "",
		"headers" : "ns_af=|citrix_ns_id|NSC_|NS-CACHE"
	},
	"Newdefend Web Application Firewall (Newdefend)" : {
		"code" : "",
		"page" : "",
		"headers" : "newdefend"
	},
	"NSFOCUS Web Application Firewall (NSFOCUS)" : {
		"code" : "",
		"page" : "",
		"headers" : "NSFocus"
	},
	"Palo Alto Firewall (Palo Alto Networks)" : {
		"code" : "",
		"page" : "has been blocked in accordance with company policy",
		"headers" : ""
	},
	"Profense Web Application Firewall (Armorlogic)" : {
		"code" : "",
		"page" : "",
		"headers" : "PLBSID=|Profense"
	},
	"AppWall (Radware)" : {
		"code" : "",
		"page" : "Unauthorized Activity Has Been Detected.*?Case Number:",
		"headers" : "X-SL-CompState"
	},
	"Reblaze Web Application Firewall (Reblaze)" : {
		"code" : "",
		"page" : "",
		"headers" : "rbzid=|Reblaze Secure Web Gateway"
	},
	"ASP.NET RequestValidationMode (Microsoft)" : {
		"code" : "500",
		"page" : "ASP\\.NET has detected data in the request that is potentially dangerous|Request Validation has detected a potentially dangerous client input value|HttpRequestValidationException",
		"headers" : ""
	},
	"Safe3 Web Application Firewall" : {
		"code" : "",
		"page" : "",
		"headers" : "Safe3"
	},
	"Safedog Web Application Firewall (Safedog)" : {
		"code" : "",
		"page" : "",
		"headers" : "WAF/2\\.0|safedog"
	},
	"SecureIIS Web Server Security (BeyondTrust)" : {
		"code" : "",
		"page" : "SecureIIS.*?Web Server Protection|http://www\\.eeye\\.com/SecureIIS/|\\?subject=[^>]*SecureIIS Error",
		"headers" : ""
	},
	"SEnginx (Neusoft Corporation)" : {
		"code" : "",
		"page" : "SENGINX-ROBOT-MITIGATION",
		"headers" : ""
	},
	"TrueShield Web Application Firewall (SiteLock)" : {
		"code" : "",
		"page" : "SiteLock Incident ID|sitelock-site-verification|sitelock_shield_logo",
		"headers" : ""
	},
	"SonicWALL (Dell)" : {
		"code" : "",
		"page" : "This request is blocked by the SonicWALL|#shd|#nsa_banner|Web Site Blocked.*?\\bnsa_banner",
		"headers" : "SonicWALL"
	},
	"UTM Web Protection (Sophos)" : {
		"code" : "",
		"page" : "Powered by UTM Web Protection",
		"headers" : ""
	},
	"Stingray Application Firewall (Riverbed / Brocade)" : {
		"code" : "403|500",
		"page" : "",
		"headers" : "X-Mapping-"
	},
	"CloudProxy WebSite Firewall (Sucuri)" : {
		"code" : "403",
		"page" : "Access Denied.*?Sucuri Website Firewall|Sucuri WebSite Firewall.*?Access Denied|Questions\\?.*?cloudproxy@sucuri\\.net",
		"headers" : "Sucuri/Cloudproxy|X-Sucuri"
	},
	"Tencent Cloud Web Application Firewall (Tencent Cloud Computing)" : {
		"code" : "405",
		"page" : "waf\\.tencent-cloud\\.com",
		"headers" : ""
	},
	"Teros/Citrix Application Firewall Enterprise (Teros/Citrix Systems)" : {
		"code" : "",
		"page" : "",
		"headers" : "st8(id|_wat|_wlf)"
	},
	"TrafficShield (F5 Networks)" : {
		"code" : "",
		"page" : "",
		"headers" : "F5-TrafficShield|ASINFO="
	},
	"UrlScan (Microsoft)" : {
		"code" : "",
		"page" : "Rejected-By-UrlScan",
		"headers" : "Rejected-By-UrlScan"
	},
	"USP Secure Entry Server (United Security Providers)" : {
		"code" : "",
		"page" : "",
		"headers" : "Secure Entry Server"
	},
	"Varnish FireWall (OWASP)" : {
		"code" : "404",
		"page" : "Request rejected by xVarnish-WAF|\\bXID: \\d+",
		"headers" : ""
	},
	"Wallarm Web Application Firewall (Wallarm)" : {
		"code" : "",
		"page" : "",
		"headers" : "nginx-wallarm"
	},
	"WatchGuard (WatchGuard Technologies)" : {
		"code" : "",
		"page" : "",
		"headers" : "WatchGuard"
	},
	"WebKnight Application Firewall (AQTRONIX)" : {
		"code" : "999",
		"page" : "WebKnight Application Firewall Alert|AQTRONIX WebKnight",
		"headers" : "WebKnight"
	},
	"Wordfence (Feedjit)" : {
		"code" : "",
		"page" : "This response was generated by Wordfence|Your access to this site has been limited",
		"headers" : ""
	},
	"Zenedge Web Application Firewall (Zenedge)" : {
		"code" : "",
		"page" : "zenedge/assets/",
		"headers" : "ZENEDGE"
	},
	"Yundun Web Application Firewall (Yundun)" : {
		"code" : "",
		"page" : "",
		"headers" : "YUNDUN"
	},
	"Yunsuo Web Application Firewall (Yunsuo)" : {
		"code" : "",
		"page" : "<img class=.yunsuologo.",
		"headers" : "yunsuo_session"
	}
}

htmlParser

這個函數應該是整個程序裏最複雜的一個了，複雜不是在程序本身，而是複雜在html環境，要徹底讀懂這個函數你需要對xss輸出位置與html文檔足夠熟悉。否則很難看懂作者的邏輯。下面我會在代碼註釋中一步步講解，如果我解釋的有所錯誤，歡迎指出

def htmlParser(response, encoding):
    rawResponse = response  # raw response returned by requests
    response = response.text  # response content
    if encoding:  # if the user has specified an encoding, encode the probe in that
        response = response.replace(encoding(xsschecker), xsschecker)
    tags = []  # tags in which the input is reflected
    locations = []  # contexts in which the input is reflected
    attributes = []  # attribute names
    environments = []  # strings needed to break out of the context(雙引號，單引號等）
    positions = []  # postions of all the reflections of the xsschecker
    for match in re.finditer(xsschecker, response):
        positions.append(match.start())

#  It finds the contexts of the reflections
	#一個參數，可能會輸出在多處，我們需要分解html文檔，一步步解析輸出所處的環境
    parts = response.split(xsschecker)
    # remove first element since it doesn't contain xsschecker
    parts.remove(parts[0])
    # add xsschecker in front of all elements
    parts = [xsschecker + s for s in parts]
    #下面爲了幫助理解，我們假設輸出類似這樣：（1）<div>xsschecker</div>
    #經過上面幾步，（1）變成了這樣：[xsschecker</div>]
    #由於我的假設只有一個輸出位置，所以parts列表只有一個成員
    for part in parts:  # iterate over the parts
 
        deep = part.split('>')#使用‘>’將part分割成數組，如果列表的第一個元素存在</script，這說明輸出在<script>標籤之間，如果列表的第一個元素爲</則說明是輸出在普通標籤之間，那麼他的執行環境就是html環境
        
        if '</script' in deep[0]:
            location = 'script'
        elif '</' in deep[0]:
            location = 'html'
        #除了上面兩種，輸出在<script>標籤與普通標籤之間的情況，還可能輸出在標籤屬性以及html註釋中，下面將逐一判斷
        else:
            num = 0
            for i in deep:
                if i[-2:] == '--':
                    if '<!--' not in ''.join(deep[:num + 1]):
                        location = 'comment'
                        break
                        continue
                #如果不是註釋，先假設是在<script>標籤中，畢竟script標籤中各種dom操作以及大於小於符號都可能出現 > 這個符號，當然還有接下來在屬性中這種可能，所以我說這裏只是假設執行環境爲script
                location = 'script'
                for char in part:
                    # the only way to find out if it's attribute context is to see if '<' is present.
                    if char == '<':#這個判斷讀者不能只看到char == <這一個條件，還要看上面都已經排除了哪些條件，這樣這裏的判斷才能算是合理，否則這樣寫就是耍流氓
                        location = 'attribute'  # no, it doesn't match '<script>'
                        break
                num += 1
        #返回的文檔是一個純文本（沒有<符號），但是content-type是text/html那麼也是html環境
        if '<' not in response:
            if rawResponse.headers['Content-Type'] == 'text/html':
                location = 'html'
        locations.append(location)  # add location to locations list

#  Finds the "environment" of reflections. is it within double quotes? Which tag contains the reflection?

    num = 0  # dummy value to keep record of occurence being processed
    # find xsschecker in response and return matches
    for occ in re.finditer(xsschecker, response, re.IGNORECASE):
        # convert "xsschecker to EOF" into a list
        toLook = list(response[occ.end():])
        for loc in range(len(toLook)):  # iterate over the chars
             #這裏是採用直接找後面有沒有單雙引號這種方式，雖然有些屬性是沒有用引號包裹的，但是這樣寫也沒有什麼危害
            if toLook[loc] in ('\'', '"', '`'):  # if the char is a quote
                environments.append(toLook[loc])  # add it to enviornemts list
                #接下來就是確定輸出位置具體在哪個標籤了，具體做法就是一點點拆分，一點點接近我們的目標
                tokens = response.split('<')
                goodTokens = []  # tokens which contain xsschecker
                for token in tokens:  # iterate over tokens
                    if xsschecker in token:  # if xsschecker is in token
                        goodTokens.append(token)  # add it to goodTokens list
                        # attributes and their values are generally seperated with space so...
                        attrs = token.split(' ')
                        for attr in attrs:  # iterate over the attribute
                            if xsschecker in attr:  # is xsschecker in this attribute?
                                # alright, this is the one we need
                                attributeName = attr.split('=')[0]
                                attributeValue = ''.join(attr.split('=')[1:])
                                if attributeValue.startswith('\'') or attributeValue.startswith('"'):
                                    attributeValue = attributeValue[1:-1]
                                attributes.append({attributeName:attributeValue})
                                break
                try:
                    # finds the tag "inside" which input is refelcted
                    tag = re.search(r'\w+', goodTokens[num]).group()
                except IndexError:
                    try:
                        # finds the tag "inside" which input is refelcted
                        tag = re.search(r'\w+', goodTokens[num - 1]).group()
                    except IndexError:
                        tag = 'null'
                tags.append(tag)  # add the tag to the tags list
                break
            #這是輸出在標籤之間的情況，上面是輸出在屬性中的情況
            elif toLook[loc] == '<':  # if we encounter a closing angular brackt
                # check if the next character to it is a / to make sure its a closing tag
                if toLook[loc + 1] == '/':
                    tag = ''.join(toLook).split('</')[1].split('>')[0]
                    #badTags是指那些會對輸出在他之間的內容進行html實體編碼的標籤
                    if tag in badTags:  # if the tag is a non-executable context e.g. noscript, textarea
                        # add it to environments because we need to break out of it
                        environments.append('</' + tag + '/>')#閉合標籤會用到
                    else:
                        environments.append('')
                    tags.append(tag)  # add the tag to tags list
                    # since it's a closing tag, it can't have any attributes
                    attributes.append('')
                break
            loc += 1
        num += 1
    occurences = {}  # a dict to store all the collected information about the reflections
    for i, loc, env, tag, attr, position in zip(range(len(locations)), locations, environments, tags, attributes, positions):
        occurences[i] = {}
        occurences[i]['position'] = position
        if loc == 'comment':  # if context is html comment
            env = '-->'  # add --> as environment as we need this to break out（閉合需要用到）
        occurences[i]['context'] = [loc, env, tag, attr]
    return [occurences, positions]

filterChecker與checker

之所以將他們放在一起講，是因爲他們是爲了實現同一功能的整體
filterChecker顧名思義，就是檢查過濾情況，畢竟我們想要成功利用xss,是會用到一些特殊字符的，最常見的：",',>,>,/等等
而這兩個函數的功能就相當於是給這些字符打分，分數越高則說明越可能沒有被過濾。filterChecker在調用checker之前只是做了一些預處理工作，我就不再細說了，主要提一下checker函數，依舊採用註釋的方式：

def checker(url, params, headers, GET, delay, payload, positions, timeout, encoding):
#payload就是我們欲測試的字符，將他與其他字符串拼接，方便查找
    checkString = 'st4r7s' + payload + '3nd'
    if encoding:
        checkString = encoding(unquote(checkString))
    response = requester(url, replaceValue(
        params, xsschecker, checkString, copy.deepcopy), headers, GET, delay, timeout).text.lower()
    reflectedPositions = []
    for match in re.finditer('st4r7s', response):
        reflectedPositions.append(match.start())
    #fillHoles我理解的就是填補哪些被完全過濾的位置，例如有些網站就是你提交了危險字符，他會攔截你整個字符串，這樣我們的輸出在掃描器看來就會少了一處（也就是reflectedPositions長度比positions小）
    filledPositions = fillHoles(positions, reflectedPositions)
    #  Itretating over the reflections
    num = 0
    efficiencies = []
    for position in filledPositions:
        allEfficiencies = []
        try:
            reflected = response[reflectedPositions[num]
                :reflectedPositions[num]+len(checkString)]
            #打分的地方就是這裏，fuzz.partial_ratio是一個比較字符串的函數2333
            efficiency = fuzz.partial_ratio(reflected, checkString.lower())
            allEfficiencies.append(efficiency)
        except IndexError:
            pass
        if position:
            reflected = response[position:position+len(checkString)]
            if encoding:
                checkString = encoding(checkString.lower())
            efficiency = fuzz.partial_ratio(reflected, checkString)
            if reflected[:-2] == ('\\%s' % checkString.replace('st4r7s', '').replace('3nd', '')):
                efficiency = 90
            allEfficiencies.append(efficiency)
            efficiencies.append(max(allEfficiencies))
        else:
            efficiencies.append(0)
        num += 1
    return list(filter(None, efficiencies))

未完待續>>>
XSStrike源碼閱讀（1）——目錄結構與大體架構
 XSStrike源碼閱讀（2）——四種模式

XSStrike源碼閱讀（3）——核心函數

wafDectector

htmlParser

filterChecker與checker

web安全崗面試題收集

xss基礎認證釣魚代碼收集

xss漏洞掃描器開發隨想

python正則表達式原始字符串之坑

XSStrike源碼閱讀（3）——核心函數

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結