【VBA研究】用XMLHTTP的Post功能抓取數據

作者:iamlaosong

我前一陣子用VBA做了個工具,用XMLHTTP的Get功能抓取城市間距離。現在我想用用XMLHTTP的Post功能抓取郵件軌跡。抓取數據是用Get還是Post,取決於網站提交參數的方法。

1、通過分析(用fiddler),郵件軌跡查詢網站是用post提交參數的。如下圖:

上圖中“Entity”內容用於設置包頭,點擊“TextView”可以看到傳輸的參數內容,郵件號碼,如下圖:

抓取數據的代碼如下:

Sub tt()
    Dim HttpReq As Object
    Dim pdata, http As String
    
    Set HttpReq = CreateObject("MSXML2.XMLHTTP.6.0")
    '軌跡頭部數據,網址用xxx屏蔽
    http = "http://10.xxx.xxx.xxx/querypush-traln/qps/qpswaybilltraceinternal/queryTraceByTrace_no/"
    pdata = "trace_no=1044905987232"
    
    HttpReq.Open "Post", http, False
    
    HttpReq.setRequestHeader "Content-Length", Len(pdata)
    'HttpReq.setRequestHeader "CONTENT-TYPE", "application/x-www-form-urlencoded"
    HttpReq.setRequestHeader "CONTENT-TYPE", "application/x-www-form-urlencoded; charset=utf-8"

    HttpReq.send pdata    '"trace_nos=1194359346482"
    
    Do Until HttpReq.readyState = 4
        DoEvents
    Loop
    
    If HttpReq.Status = 200 Then
        Debug.Print HttpReq.responseText
    End If

End Sub

2、返回內容是個json結構的數據,可以用fiddler的查看返回內容,點擊“TextView”:

點擊“JSON”可以看到數據解析結果:

但vba解析的時候內容爲空,對比以前返回json結構的數據,發現這個數據少了個名稱,因爲定義JS函數時,指定了一個json結構數據的名稱,即jscode = "function json(s,i) { return eval('(' + s + ').traces[' + i + ']'); }"中的traces名稱。

加上後就可以解析了,即:

            '返回數據要成爲標準的json結構,還需要在外面加一層數據名稱
            buf = "{""traces"":" & HttpReq.responseText & "}"
            kk = get_trace(buf)
get_trace函數就是用來解析json數據的,代碼如下:

Function get_trace(mystring As String) As Integer
    Dim objJSx As Object, objJSy As Object
    Dim m1, m2, n, j As Integer
    Dim source, level, kind, sm As String
    
    On Error Resume Next
    Set objJSx = CreateObject("ScriptControl")        '調用MSScriptControl.ScriptControl對象將提取的變量文本運算形成對象集合
    objJSx.Language = "JavaScript"                    '測試發現JavaScript、javascript、JScript都可以表示JavaScript語言
    
    '定義一個JS函數,通過計算表達式的方式引入JSON數據並解析
    jscode = "function json(s,i) { return eval('(' + s + ').traces[' + i + ']'); }"
    objJSx.AddCode jscode
    TT = "否"
    For n = 1 To 100
        If objJSx.Run("json", mystring, n - 1) = "" Then Exit For
        Set objJSy = objJSx.Run("json", mystring, n - 1)
        For j = 1 To 11
            TraceInfo(n, j) = ""
        Next j
        
        TraceInfo(n, 1) = objJSy.traceNo
        TraceInfo(n, 2) = objJSy.opCode
        TraceInfo(n, 3) = objJSy.opTime
        TraceInfo(n, 4) = objJSy.opName
        TraceInfo(n, 6) = objJSy.opOrgCode
        TraceInfo(n, 7) = objJSy.opOrgSimpleName
        TraceInfo(n, 8) = objJSy.operatorNo
        TraceInfo(n, 9) = objJSy.operatorName
        TraceInfo(n, 10) = objJSy.level
        TraceInfo(n, 11) = objJSy.source
        sm = objJSy.desc
        '剔除數據中的HTML部分
        Do While InStr(sm, "<") > 0
            m1 = InStr(sm, "<")
            m2 = InStr(sm, ">")
            If m2 > 0 Then
                If Mid(sm, m1, 3) = "/br" Then
                    sm = Left(sm, m1 - 1) & " " & Right(sm, Len(sm) - m2)
                Else
                    sm = Left(sm, m1 - 1) & Right(sm, Len(sm) - m2)
                End If
            Else
                Exit Do
            End If
        Loop
        TraceInfo(n, 5) = sm
        If objJSy.opCode = "704" Then TT = "是"
    Next n
    
    get_trace = n - 1
End Function

3、實際使用的代碼

Public Sub get_data()
    '根據工作表中的查詢語句讀取數據
    Dim HttpReq As Object
    Dim i, k, kk, lineno, row1 As Long
    Dim Mail, pdata, tbhead As String, buf As String
    Dim arr_head
    
    lineno = [A65536].End(xlUp).Row      '行數,也是郵件號碼數量
    Range("B2:B" & lineno).ClearContents
    'lineno = ActiveSheet.UsedRange.Rows.Count
    Set HttpReq = CreateObject("MSXML2.XMLHTTP.6.0")
    '軌跡頭部數據
    'http = "http://10.xxx.xxx.xxx/querypush-traln/qps/qpswaybilltraceinternal/queryCurrentTraceByTrace_nos/"
    'pdata = "trace_nos="
    '軌跡數據
    http = "http://10.xxx.xxx.xxx/querypush-traln/qps/qpswaybilltraceinternal/queryTraceByTrace_no/"
    'pdata = "trace_no="
    
    row1 = 2
    maxrow = Sheets("查詢結果").UsedRange.Rows.Count
    If maxrow >= 1 Then
        Sheets("查詢結果").Range("A1:L" & maxrow).ClearContents
    End If
    tbhead = "郵件號碼 操作碼 操作時間 處理動作 詳細說明 機構代碼 機構名稱 操作員代碼 操作員姓名 級別 來源"
    arr_head = Split(tbhead, " ")    '下標從0開始
    Sheets("查詢結果").Cells(1, 1).Resize(1, UBound(arr_head) + 1) = arr_head
    
    For i = 2 To lineno
        Mail = Trim(Sheets("郵件號碼").Cells(i, 1))
        If Mail = "" Then Exit For
        If Len(Mail) = 13 Then
            
            HttpReq.Open "Post", http, False
            
            pdata = "trace_no=" & Mail
            HttpReq.setRequestHeader "Content-Length", Len(pdata)
            HttpReq.setRequestHeader "CONTENT-TYPE", "application/x-www-form-urlencoded"
            
            HttpReq.send pdata    'pdata = "trace_no=1194359346482"
            
            Do Until HttpReq.readyState = 4
                DoEvents
            Loop
            'MsgBox HttpReq.getAllResponseHeaders
            'Debug.Print HttpReq.responseText
            
            '返回數據要成爲標準的json結構,還需要在外面加一層數據名稱
            buf = "{""traces"":" & HttpReq.responseText & "}"

            kk = get_trace(buf)
            Sheets("郵件號碼").Cells(i, 2) = TT
            
            If kk > 0 Then
                For k = kk To 1 Step -1
                    If CInt(TraceInfo(k, 10)) <= Range("E1") Then
                        For j = 1 To 11
                            Sheets("查詢結果").Cells(row1, j) = TraceInfo(k, j)
                        Next j
                        row1 = row1 + 1
                    End If
                Next k
            Else
                Sheets("郵件號碼").Cells(i, 2) = "Err"
                
                Sheets("查詢結果").Cells(row1, 1) = Mail
                Sheets("查詢結果").Cells(row1, 2) = "Err"
                Sheets("查詢結果").Cells(row1, 4) = HttpReq.responseText
                row1 = row1 + 1
                delay (9 * Rnd + 1)   '出錯了,說明你還是幹快了,隨機後延時1-10秒,看運氣了。
            End If
            'If CInt(i / 10) * 10 = i Then
                Application.StatusBar = "完成:" & Round(i * 100 / lineno, 2) & "%"
                DoEvents
            'End If
            delay (Rnd + 0.25)  '總部領導說了,接口是大家用的,你一個人不能用太多,此處延時0.5秒,降降速度。
        Else
            Sheets("郵件號碼").Cells(i, 2) = "異常"
        End If
    
    Next i
    
    Application.StatusBar = "就緒"
    Sheets("查詢結果").Activate
    msg = MsgBox("郵件批量查詢完畢,共查詢" & i - 2 & "個郵件!", vbOKOnly, "AHEMS:iamlaosong")

End Sub

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章