作者:iamlaosong
我前一陣子用VBA做了個工具,用XMLHTTP的Get功能抓取城市間距離。現在我想用用XMLHTTP的Post功能抓取郵件軌跡。抓取數據是用Get還是Post,取決於網站提交參數的方法。
1、通過分析(用fiddler),郵件軌跡查詢網站是用post提交參數的。如下圖:
上圖中“Entity”內容用於設置包頭,點擊“TextView”可以看到傳輸的參數內容,郵件號碼,如下圖:
抓取數據的代碼如下:
Sub tt()
Dim HttpReq As Object
Dim pdata, http As String
Set HttpReq = CreateObject("MSXML2.XMLHTTP.6.0")
'軌跡頭部數據,網址用xxx屏蔽
http = "http://10.xxx.xxx.xxx/querypush-traln/qps/qpswaybilltraceinternal/queryTraceByTrace_no/"
pdata = "trace_no=1044905987232"
HttpReq.Open "Post", http, False
HttpReq.setRequestHeader "Content-Length", Len(pdata)
'HttpReq.setRequestHeader "CONTENT-TYPE", "application/x-www-form-urlencoded"
HttpReq.setRequestHeader "CONTENT-TYPE", "application/x-www-form-urlencoded; charset=utf-8"
HttpReq.send pdata '"trace_nos=1194359346482"
Do Until HttpReq.readyState = 4
DoEvents
Loop
If HttpReq.Status = 200 Then
Debug.Print HttpReq.responseText
End If
End Sub
2、返回內容是個json結構的數據,可以用fiddler的查看返回內容,點擊“TextView”:
點擊“JSON”可以看到數據解析結果:
但vba解析的時候內容爲空,對比以前返回json結構的數據,發現這個數據少了個名稱,因爲定義JS函數時,指定了一個json結構數據的名稱,即jscode = "function json(s,i) { return eval('(' + s + ').traces[' + i + ']'); }"中的traces名稱。
加上後就可以解析了,即:
'返回數據要成爲標準的json結構,還需要在外面加一層數據名稱
buf = "{""traces"":" & HttpReq.responseText & "}"
kk = get_trace(buf)
get_trace函數就是用來解析json數據的,代碼如下:
Function get_trace(mystring As String) As Integer
Dim objJSx As Object, objJSy As Object
Dim m1, m2, n, j As Integer
Dim source, level, kind, sm As String
On Error Resume Next
Set objJSx = CreateObject("ScriptControl") '調用MSScriptControl.ScriptControl對象將提取的變量文本運算形成對象集合
objJSx.Language = "JavaScript" '測試發現JavaScript、javascript、JScript都可以表示JavaScript語言
'定義一個JS函數,通過計算表達式的方式引入JSON數據並解析
jscode = "function json(s,i) { return eval('(' + s + ').traces[' + i + ']'); }"
objJSx.AddCode jscode
TT = "否"
For n = 1 To 100
If objJSx.Run("json", mystring, n - 1) = "" Then Exit For
Set objJSy = objJSx.Run("json", mystring, n - 1)
For j = 1 To 11
TraceInfo(n, j) = ""
Next j
TraceInfo(n, 1) = objJSy.traceNo
TraceInfo(n, 2) = objJSy.opCode
TraceInfo(n, 3) = objJSy.opTime
TraceInfo(n, 4) = objJSy.opName
TraceInfo(n, 6) = objJSy.opOrgCode
TraceInfo(n, 7) = objJSy.opOrgSimpleName
TraceInfo(n, 8) = objJSy.operatorNo
TraceInfo(n, 9) = objJSy.operatorName
TraceInfo(n, 10) = objJSy.level
TraceInfo(n, 11) = objJSy.source
sm = objJSy.desc
'剔除數據中的HTML部分
Do While InStr(sm, "<") > 0
m1 = InStr(sm, "<")
m2 = InStr(sm, ">")
If m2 > 0 Then
If Mid(sm, m1, 3) = "/br" Then
sm = Left(sm, m1 - 1) & " " & Right(sm, Len(sm) - m2)
Else
sm = Left(sm, m1 - 1) & Right(sm, Len(sm) - m2)
End If
Else
Exit Do
End If
Loop
TraceInfo(n, 5) = sm
If objJSy.opCode = "704" Then TT = "是"
Next n
get_trace = n - 1
End Function
3、實際使用的代碼
Public Sub get_data()
'根據工作表中的查詢語句讀取數據
Dim HttpReq As Object
Dim i, k, kk, lineno, row1 As Long
Dim Mail, pdata, tbhead As String, buf As String
Dim arr_head
lineno = [A65536].End(xlUp).Row '行數,也是郵件號碼數量
Range("B2:B" & lineno).ClearContents
'lineno = ActiveSheet.UsedRange.Rows.Count
Set HttpReq = CreateObject("MSXML2.XMLHTTP.6.0")
'軌跡頭部數據
'http = "http://10.xxx.xxx.xxx/querypush-traln/qps/qpswaybilltraceinternal/queryCurrentTraceByTrace_nos/"
'pdata = "trace_nos="
'軌跡數據
http = "http://10.xxx.xxx.xxx/querypush-traln/qps/qpswaybilltraceinternal/queryTraceByTrace_no/"
'pdata = "trace_no="
row1 = 2
maxrow = Sheets("查詢結果").UsedRange.Rows.Count
If maxrow >= 1 Then
Sheets("查詢結果").Range("A1:L" & maxrow).ClearContents
End If
tbhead = "郵件號碼 操作碼 操作時間 處理動作 詳細說明 機構代碼 機構名稱 操作員代碼 操作員姓名 級別 來源"
arr_head = Split(tbhead, " ") '下標從0開始
Sheets("查詢結果").Cells(1, 1).Resize(1, UBound(arr_head) + 1) = arr_head
For i = 2 To lineno
Mail = Trim(Sheets("郵件號碼").Cells(i, 1))
If Mail = "" Then Exit For
If Len(Mail) = 13 Then
HttpReq.Open "Post", http, False
pdata = "trace_no=" & Mail
HttpReq.setRequestHeader "Content-Length", Len(pdata)
HttpReq.setRequestHeader "CONTENT-TYPE", "application/x-www-form-urlencoded"
HttpReq.send pdata 'pdata = "trace_no=1194359346482"
Do Until HttpReq.readyState = 4
DoEvents
Loop
'MsgBox HttpReq.getAllResponseHeaders
'Debug.Print HttpReq.responseText
'返回數據要成爲標準的json結構,還需要在外面加一層數據名稱
buf = "{""traces"":" & HttpReq.responseText & "}"
kk = get_trace(buf)
Sheets("郵件號碼").Cells(i, 2) = TT
If kk > 0 Then
For k = kk To 1 Step -1
If CInt(TraceInfo(k, 10)) <= Range("E1") Then
For j = 1 To 11
Sheets("查詢結果").Cells(row1, j) = TraceInfo(k, j)
Next j
row1 = row1 + 1
End If
Next k
Else
Sheets("郵件號碼").Cells(i, 2) = "Err"
Sheets("查詢結果").Cells(row1, 1) = Mail
Sheets("查詢結果").Cells(row1, 2) = "Err"
Sheets("查詢結果").Cells(row1, 4) = HttpReq.responseText
row1 = row1 + 1
delay (9 * Rnd + 1) '出錯了,說明你還是幹快了,隨機後延時1-10秒,看運氣了。
End If
'If CInt(i / 10) * 10 = i Then
Application.StatusBar = "完成:" & Round(i * 100 / lineno, 2) & "%"
DoEvents
'End If
delay (Rnd + 0.25) '總部領導說了,接口是大家用的,你一個人不能用太多,此處延時0.5秒,降降速度。
Else
Sheets("郵件號碼").Cells(i, 2) = "異常"
End If
Next i
Application.StatusBar = "就緒"
Sheets("查詢結果").Activate
msg = MsgBox("郵件批量查詢完畢,共查詢" & i - 2 & "個郵件!", vbOKOnly, "AHEMS:iamlaosong")
End Sub