搜索引擎接口測試

搜索重要指標:召回率、準確率。

核心指標:relevance相關性、freshness時效性、quality質量、ctr點擊率、confidence權威度、cold_start冷啓動。最終的排序依賴這幾個字段計算分數

 

 

業務接口返回6000-9000個字段

搜索接口查了7個搜索鏈路

PM需求:

選取某一天人物的query,標識出同時出type=12及type=3的query,計算同時出現的概率,另外把同時出的query給出來,分析使用

query選取:人物top1000 、隨機1000


引擎接口傳參加上調試信息cmd=xx後的接口返回43846個字段

 

引擎接口不加調試信息cmd=xx的接口返回23944個字段

 

如果從業務接口去請求,不可,因爲業務接口過濾了引擎返回的大量字段

所以只能測引擎接口

難點:上萬個返回字段裏如何取到這個“特殊的字段”,查了很多個query,看json格式吐了,在json在線解析直接無響應。。。

最終:取resultList這個jsonArray下的jsonObject的元素即可

 


QA測試設計:

數據源準備:去FBI撈取或去ODPS讀取top1000個人物query、任意1000個人物query

讀取引擎接口(對內),判斷接口返回,取標誌性字段(type類型),for循環遍歷JSONObject下的value。分支判斷,組合各種場景實現需求要的數據統計

 


發現開發代碼的bug:引擎接口召回不穩定,同一個query請求2次,時而返回3 和12類型,時而僅返回12類型

 

寫給自己的bug:跑完數據才發現,召回比率應該改爲百分比

 

修改後:

 



代碼實現:

模塊劃分:

 

1. HTTPCommonMethod爲拼接http請求的工具類

package com.xx.searchRecall.utils;

import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.util.EncodingUtil;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

public class HTTPCommonMethod {

    /**
     * get 請求,只需將變動的參數傳入params中即可
     *
     * @param url_pre
     * @param params
     * @return
     */
    public static String requestURL;

    public static String doGet(String url_pre, Map<String, String> params, int count) {
        try {
            Header header = new Header("Content-type", "application/json");
            String response = "";

            // HttpClient是Apache Jakarta Common下的子項目,用來提供高效的、最新的、功能豐富的支持HTTP協議的客戶端編程工具包,並且它支持HTTP協議最新的版本和建議。
            // HttpClient已經應用在很多的項目中,比如Apache Jakarta上很著名的另外兩個開源項目Cactus和HTMLUnit都使用了HttpClient。
            // 使用HttpClient發送請求、接收響應
            HttpClient httpClient = new HttpClient();

            if (url_pre != null) {
                // NameValuePair是簡單名稱值對節點類型。多用於Java像url_pre發送Post請求。在發送post請求時用該list來存放參數
//                getParamsList(url_online, params, count);
                // 預發環境value替換線上環境value
                List<NameValuePair> qparams_pre = getParamsList_pre(params);
                if (qparams_pre != null && qparams_pre.size() > 0) {
                    String formatParams = EncodingUtil.formUrlEncode(qparams_pre.toArray(new NameValuePair[qparams_pre.size()]),
                            "utf-8");
                    url_pre = url_pre.indexOf("?") < 0 ? url_pre + "?" + formatParams : url_pre + "&" + formatParams;

                }
                requestURL = url_pre;

//                System.out.println("第【" + count + "】條日誌,預發環境pre請求的url_pre==" + url_pre);
                GetMethod getMethod = new GetMethod(url_pre);
                getMethod.addRequestHeader(header);
            /*if (null != headers) {
                Iterator var8 = headers.entrySet().iterator();

                while (var8.hasNext()) {
                    Map.Entry<String, String> entry = (Map.Entry)var8.next();
                    getMethod.addRequestHeader((String)entry.getKey(), (String)entry.getValue());
                }
            }*/
                //System.out.println(getMethod.getRequestHeader("User-Agent"));

                int statusCode = httpClient.executeMethod(getMethod);
                // 如果請求失敗則打印出失敗的返回碼
                if (statusCode != 200) {
                    System.out.println("第" + statusCode + "【" + count + "】條日誌,預發環境請求出錯,錯誤碼爲=======" + statusCode);
                    return response;
                }
                response = new String(getMethod.getResponseBody(), "utf-8");

            }
            return response;
        } catch (Exception e) {
            e.printStackTrace();
        }
        return null;

    }


    // 參數格式化
    private static List<NameValuePair> getParamsList_pre(Map<String, String> paramsMap) {
        if (paramsMap != null && paramsMap.size() != 0) {
            List<NameValuePair> params = new ArrayList();
            Iterator var2 = paramsMap.entrySet().iterator();

            while (var2.hasNext()) {
                Map.Entry<String, String> map = (Map.Entry) var2.next();

                // 預發環境最新版本日誌回放,請求參數打開以下if else,註釋掉最後一行

                    // 參數格式化,commons-httpclient自帶的方法NameValuePair會自動將==轉爲=,還有特殊符號格式化
                    // NameValuePair是簡單名稱值對節點類型。多用於Java像url_pre發送Post請求。在發送post請求時用該list來存放參數
                    params.add(new NameValuePair(map.getKey() + "", map.getValue() + ""));

//                params.add(new NameValuePair(map.getKey() + "", map.getValue() + ""));
            }

            return params;
        } else {
            return null;
        }
    }
}


2. OdpsUtil爲連接數據庫的工具類

package com.xx.searchRecall.utils;

import com.aliyun.odps.Instance;
import com.aliyun.odps.Odps;
import com.aliyun.odps.OdpsException;
import com.aliyun.odps.account.Account;
import com.aliyun.odps.account.AliyunAccount;
import com.aliyun.odps.data.Record;
import com.aliyun.odps.task.SQLTask;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

public class OdpsUtil {
    // 以下爲https://xx.dw.xx-inc.com/ 點擊用戶的頭像,獲取如下連接開發環境數據庫的參數

    private static String accessId = "xx";
    private static String accessKey = "xx";

    private static String odpsUrl = "http://xx.com/api";
    // xx爲odps的dev環境(測試環境),線上爲xx
//    private static String project = "xx";
    private static String project = "xx";

    public static List<Record> getSQLResult(String sql){
        Account account = new AliyunAccount(accessId, accessKey);
        Odps odps =new Odps(account);
        odps.setEndpoint(odpsUrl);
        odps.setDefaultProject(project);
        Instance i;
        List<Record> records = new ArrayList<>();
        try {
            i = SQLTask.run(odps, sql);
            i.waitForSuccess();
            records = SQLTask.getResult(i);
        } catch (OdpsException e) {
            e.printStackTrace();
        }

        return records;
    }

    public static List<Record> getSQLResult(String sql,String accessSelfId,String accessSelfKey){
        Account account = new AliyunAccount(accessSelfId, accessSelfKey);
        Odps odps =new Odps(account);
        odps.setEndpoint(odpsUrl);
        odps.setDefaultProject(project);
        Instance i;
        List<Record> records = new ArrayList<>();
        try {
            i = SQLTask.run(odps, sql);
            i.waitForSuccess();
            records = SQLTask.getResult(i);
        } catch (OdpsException e) {
            e.printStackTrace();
        }

        return records;
    }
    public static List<String> record2wordList(List<Record> list)
    {
        List<String> listFile = new ArrayList<>();
        if(list !=null && list.size()>0)
        {
            Iterator iterator=list.iterator();
            while (iterator.hasNext())
            {
                Record record= (Record) iterator.next();
                String keyWord=record.getString(0);
                listFile.add(keyWord);
            }
        }

        return listFile;

    }
}


3. RunProcess爲程序入口方法,傳入要查詢的SQL,調用odps工具類按行讀取數據源(top1000的query)

package com.xx.searchRecall;

import com.xx.searchRecall.utils.OdpsUtil;
import com.xx.searchRecall.utils.TimeTransfer;
import com.xx.searchRecall.utils.logOnlineReadODPS;
import com.xx.odps.data.Record;

import java.text.SimpleDateFormat;
import java.util.List;


public class RunProcess {
    private static String accessId = "xx";
    private static String accessKey = "xx";


    public static void main(String[] args) {
        // 運行程序(讀取新的log文件)之前,清空舊文件(上次的log日誌信息)
        String currentDay = TimeTransfer.getCurrentTime();

        // top1000個人物卡
//       xx

        // 隨機1000個人物卡,SQL查詢條件不一致
        String sql="SELECT t0t.query AS f1 FROM(  \n" +
                "\n" +
                "SELECT ftbl_1t.type AS type\n" +
                "  , ftbl_1t.xx AS xx\n" +
                "  , ftbl_1t.xx AS xx\n" +
                "  , ftbl_1t.ctr AS ctr\n" +
                "  , ftbl_1t.xx AS xx\n" +
                "  , ftbl_1t.xx AS xx\n" +
                "  , ftbl_1t.xx AS xx\n" +
                "  , ftbl_1t.vv AS vv\n" +
                "  , ftbl_1t.ts AS ts\n" +
                "  , ftbl_1t.xx AS xx\n" +
                "  , ftbl_1t.query AS query\n" +
                "  , ftbl_1t.ds AS ds\n" +
                "FROM xx.xx ftbl_1t\n" +
                "\n" +
                "\n" +
                "  )t0t WHERE ((t0t.ds  >=  '20200419')  AND(t0t.ds  <  '20200519'))  AND(t0t.type  =  '人物卡片') ORDER BY TO_DATE(t0t.ds,'yyyymmdd')  DESC LIMIT 1000;";
        // 定義日期時間格式,DateFormat 類的子類——SimpleDateFormat。SimpleDateFormat使得可以選擇任何用戶定義的日期/時間格式的模式
        SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy年MM月dd日 HH:mm:ss");

        long startTime = System.currentTimeMillis();

        // 當前時間默認爲毫秒,直接轉換爲年月日時分秒
        String startTimeRun = dateFormat.format(startTime);
        System.out.println("===============查詢客戶端傳參日誌SQL開始執行了,startTimeRun爲=================【" + startTimeRun + "】");

//        System.out.println(sql);
        List<Record> list = OdpsUtil.getSQLResult(sql, accessId, accessKey);
//        System.out.println("list==" + list);


        logOnlineReadODPS.startSearch(list);


        long endTime = System.currentTimeMillis();
        // 當前時間默認爲毫秒,直接轉換爲年月日時分秒
        String endTimeRun = dateFormat.format(endTime);
        System.out.println("==========讀取odps當前日期的傳參日誌完畢,endTimeRun爲===========【" + endTimeRun + "】");
        long ReadTime = (endTime - startTime) / 1000;
        System.out.println("==========從連接到讀取數據庫日誌的時長,ReadTime爲===========【" + ReadTime + "】秒");


        System.err.println("list.size=" + list.size());

    }

}

 

4. ReadFiles爲讀取本地數據方法(百度的一段代碼),本地文件格式--以=","拆分

趙露思,周星馳,陳芊芊,林正英,迪麗熱巴,楊爍,劉德華,吳亦凡
package com.alibaba.searchRecall.utils;

import java.io.*;
import java.util.Arrays;

public class ReadFiles {

    public static String[] readTxt(String filePath) {
        StringBuilder builder = new StringBuilder();
        try {
            File file = new File(filePath);
            if (file.isFile() && file.exists()) {
                InputStreamReader isr = new InputStreamReader(new FileInputStream(file), "utf-8");
                BufferedReader br = new BufferedReader(isr);
                String lineTxt = null;
                int num = 0;
                long time1 = System.currentTimeMillis();
                while ((lineTxt = br.readLine()) != null) {
                    System.out.println(lineTxt);
                    builder.append(lineTxt);
                    builder.append(",");
                    num++;
//                    System.out.println("總共" + num + "條數據!");
                }
                //System.out.println("總共"+num+"條數據!");
                long time2 = System.currentTimeMillis();
                long time = time1 - time2;
//                System.out.println("共花費" + time + "秒");
                br.close();
            } else {
                System.out.println("文件不存在!");
            }
        } catch (Exception e) {
            System.out.println("文件讀取錯誤!");
        }
        String[] strings = builder.toString().split(",");
        return strings;
    }

    public static void main(String[] args) {
        String filePath = "/Users/xx/searchRecall/utils/person.txt";
        System.out.println(filePath);
        String[] strings = readTxt(filePath);
        System.out.println("strings:"+Arrays.toString(strings));
    }
}

 

5. logOnlineReadODPS爲從數據庫類取到源數據後請求接口,接口返回解析

package com.xx.searchRecall.utils;

import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.aliyun.odps.data.Record;

import java.util.*;


public class logOnlineReadODPS {

    public static void main(String[] args) {
//        startSearch();
    }

    // 定義集合,把搜索詞query放到list集合
    public static List<String> list = new ArrayList<>();

    public static void startSearch(List<Record> list) {

        int only3 = 0;
        String query3 = "";

        int only12 = 0;
        String query12 = "";

        int both3_12 = 0;
        String query3_12 = "";

        int no3_12 = 0;
        String queryNO3_12 = "";

        int totalCount = 0;

        for (int i = 0; i < list.size(); i++) {
            // 獲取單條SQL的查詢字段內容
            Record record = list.get(i);
            String keywords = record.getString("f1");

            Map<String, String> query = new HashMap<>();
            query.put("keyword", keywords);
            // 如果URL沒有公共參數,則把 ?去掉;
            // 業務接口傳參增加cmd=4拿到引擎字段返回
            String url_pre = "http://xx/query?noqc=0&xx=xx&pg=1&nocache=1&sdkver=xx";

            // 開始請求,域名、接口名==url+請求參數param(hashMap)
            //        String response = HTTPCommonMethod.doGet(url_pre, url_online, map, count);
            System.out.println("第" + (i + 1) + "條數據==" + query);
            String response = HTTPCommonMethod.doGet(url_pre, query, i);

            JSONObject responseJson = JSONObject.parseObject(response);

            int type = responseToParse(i, keywords, responseJson);

            if (type == 1) {
                only3++;
                query3 = query3 + keywords + ",";
            } else if (type == 2) {
                only12++;
                query12 = query12 + keywords + ",";

            } else if (type == 3) {
                both3_12++;
                query3_12 = query3_12 + keywords + ",";

            } else {
                no3_12++;
                queryNO3_12 = queryNO3_12 + keywords + ",";

            }

            // 打印接口返回的數據
            totalCount = i + 1;
        }

        System.out.println("totalCount==" + totalCount);

        String rate3 = ((float) only3 / (float) totalCount) * 100 + "%";
        String rate12 = ((float) only12 / (float) totalCount) * 100 + "%";
        String rate3_12 = ((float) both3_12 / (float) totalCount) * 100 + "%";
        String rateNO3_12 = ((float) no3_12 / (float) totalCount) * 100 + "%";

        System.out.println("------------------------------------------------------------------------------------------------");
        System.out.println("------------------------------------------------------------------------------------------------g");
        System.out.println("only3---只召回自頻道==【" + only3 + "】---比率爲==【" + rate3 + "】---query3==【" + query3 + "】");
        System.out.println("only12---只召回人物==【" + only12 + "】---比率爲==【" + rate12 + "】---query12==【" + query12 + "】");
        System.out.println("both3-12---同時召回自頻道和人物==【" + both3_12 + "】---比率爲==【" + rate3_12 + "】---query3_12==【" + query3_12 + "】");
        System.out.println("no3-12---均未召回自頻道和人物==【" + no3_12 + "】---比率爲==【" + rateNO3_12 + "】---queryNO3_12==【" + queryNO3_12 + "】");

    }


    /**
     * @param count
     * @param query
     * @param response
     * @return 1:只包含自頻道類型3   2:只包含人物類型12   3:既包含頻道類型3且包含人物類型12   0:既不包含頻道類型3且不包含人物類型12
     */

    public static int responseToParse(int count, String query, JSONObject response) {
        try {

//            HashMap<Integer, Integer> hm = new HashMap<Integer, Integer>();
            boolean docSource3 = false;
            boolean docSource12 = false;
            boolean docSource3_12 = false;
            if (!response.isEmpty()) {
                // 獲取JSONArray
                JSONArray jsonArray = response.getJSONArray("resultList");
                // for循環遍歷JSONObject
                for (int i = 0; i < jsonArray.size(); i++) {
                    JSONObject jsonObject = jsonArray.getJSONObject(i);
                    // 獲取key對應value的取值getInteger("key")
                    int doc_source = jsonObject.getInteger("doc_source");

                    if (doc_source == 3) {
                        docSource3 = true;
                    } else if (doc_source == 12) {
                        docSource12 = true;
                    } else if (docSource3 && docSource12) {
                        docSource3_12 = true;
                    }
//                    System.out.println("第【" + count + "】條日誌,搜索query爲==【" + query + "】,doc_source==【" + doc_source + "】");

                    /*if (hm.containsKey(doc_source)) {
                        int value = hm.get(doc_source);
                        if (String.valueOf(value) != null && value != 0) {
                            hm.put(doc_source, value);
                        } else {
                            hm.put(doc_source, 1);
                        }
//                    System.out.println("value==" +  hm.get(doc_source));
                        System.out.println("hm=" + hm);

                    }*/
                }

                if (docSource3 && !docSource12) {
                    return 1;
                } else if (!docSource3 && docSource12) {
                    return 2;
                } else if (docSource3 && docSource12) {
                    return 3;
                }

            } else {
                System.err.println("第【" + count + "】條日誌,搜索query爲==【" + query + "】,接口返回爲空");
            }


        } catch (Exception e) {
            e.printStackTrace();
        }
        return 0;
    }

    public static JSONObject jsonObject = new JSONObject();

}

 

6. logOnlineReadFiles爲讀取本地數據源,請求接口,接口返回解析

package com.alibaba.searchRecall.utils;

import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;

import java.util.*;

import static com.alibaba.searchRecall.utils.ReadFiles.readTxt;

public class logOnlineReadFiles {

    public static void main(String[] args) {
        startSearch();
    }

    // 定義集合,把搜索場景放到list集合
    public static List<String> list = new ArrayList<>();

    public static void startSearch() {

        // 拼接的傳參參數爲中文,需要把中文放到map
        // 方法1:把待測試的query top排行前1000在odps查詢出,存到本地,再通過接口拼接
        String filePath = "/Users/lishan/Desktop/xx/xx/src/main/java/com/xx/searchRecall/person.txt";
        System.out.println(filePath);
        String[] keywords = readTxt(filePath);
        System.out.println("strings:" + Arrays.toString(keywords));
//        String keywords=record.getString("f1");

        // 方法2:代碼讀取odps工具類,查詢top1000的query,再通過接口拼接
        // 見logOnlineReadODPS

//        String[]  keywords={"吳亦凡","楊冪","唐嫣"};
//        String[] keywords = {"吳亦凡"};
        int only3 = 0;
        String query3 = "";

        int only12 = 0;
        String query12 = "";

        int both3_12 = 0;
        String query3_12 = "";

        int no3_12 = 0;
        String queryNO3_12 = "";

        int totalCount = 0;

        for (int i = 0; i < keywords.length; i++) {

            Map<String, String> query = new HashMap<>();
            query.put("keyword", keywords[i]);
            // 如果URL沒有公共參數,則把 ?去掉;
            // 業務接口傳參增加cmd=4拿到引擎字段返回
            String url_pre = "http://xx/query?noqc=0&pg=1&nocache=1&xx=308";

            // 開始請求,域名、接口名==url+請求參數param(hashMap)
            //        String response = HTTPCommonMethod.doGet(url_pre, url_online, map, count);
            System.out.println("第" + (i + 1) + "條數據==" + query);
            String response = HTTPCommonMethod.doGet(url_pre, query, i);

            JSONObject responseJson = JSONObject.parseObject(response);

            int type = responseToParse(i, keywords[i], responseJson);

            if (type == 1) {
                only3++;
                query3 = query3 + keywords[i] + ",";
            } else if (type == 2) {
                only12++;
                query12 = query12 + keywords[i] + ",";

            } else if (type == 3) {
                both3_12++;
                query3_12 = query3_12 + keywords[i] + ",";

            } else {
                no3_12++;
                queryNO3_12 = queryNO3_12 + keywords[i] + ",";

            }

            // 打印接口返回的數據
//            System.out.println("第【" + i + "】條日誌,預發環境pre接口返回response爲=======" + response);
            totalCount = i + 1;
//            System.out.println("每次循環的totalCount=="+totalCount);

        }

        System.out.println("totalCount==" + totalCount);

        float rate3 = (float) only3 / (float) totalCount;
        float rate12 = (float) only12 / (float) totalCount;
        float rate3_12 = (float) both3_12 / (float) totalCount;
        float rateNO3_12 = (float) no3_12 / (float) totalCount;

        System.out.println("------------------------------------------------------------------------------------------------");
        System.out.println("------------------------------------------------------------------------------------------------g");
        System.out.println("only3---只召回自頻道==【" + only3 + "】---比率爲==【" + rate3 + "】---query3==【" + query3 + "】");
        System.out.println("only12---只召回人物==【" + only12 + "】---比率爲==【" + rate12 + "】---query12==【" + query12 + "】");
        System.out.println("both3-12---同時召回自頻道和人物==【" + both3_12 + "】---比率爲==【" + rate3_12 + "】---query3_12==【" + query3_12 + "】");
        System.out.println("no3-12---均未召回自頻道和人物==【" + no3_12 + "】---比率爲==【" + rateNO3_12 + "】---queryNO3_12==【" + queryNO3_12 + "】");

    }


    /**
     * @param count
     * @param query
     * @param response
     * @return 1:只包含自頻道類型3   2:只包含人物類型12   3:既包含頻道類型3且包含人物類型12   0:既不包含頻道類型3且不包含人物類型12
     */

    public static int responseToParse(int count, String query, JSONObject response) {
        try {

//            HashMap<Integer, Integer> hm = new HashMap<Integer, Integer>();
            boolean docSource3 = false;
            boolean docSource12 = false;
            boolean docSource3_12 = false;
            if (!response.isEmpty()) {
                // 獲取JSONArray
                JSONArray jsonArray = response.getJSONArray("resultList");
                // for循環遍歷JSONObject
                for (int i = 0; i < jsonArray.size(); i++) {
                    JSONObject jsonObject = jsonArray.getJSONObject(i);
                    // 獲取key對應value的取值getInteger("key")
                    int doc_source = jsonObject.getInteger("doc_source");

                    if (doc_source == 3) {
                        docSource3 = true;
                    } else if (doc_source == 12) {
                        docSource12 = true;
                    } else if (docSource3 && docSource12) {
                        docSource3_12 = true;
                    }
//                    System.out.println("第【" + count + "】條日誌,搜索query爲==【" + query + "】,doc_source==【" + doc_source + "】");

                    /*if (hm.containsKey(doc_source)) {
                        int value = hm.get(doc_source);
                        if (String.valueOf(value) != null && value != 0) {
                            hm.put(doc_source, value);
                        } else {
                            hm.put(doc_source, 1);
                        }
//                    System.out.println("value==" +  hm.get(doc_source));
                        System.out.println("hm=" + hm);

                    }*/
                }

                if (docSource3 && !docSource12) {
                    return 1;
                } else if (!docSource3 && docSource12) {
                    return 2;
                } else if (docSource3 && docSource12) {
                    return 3;
                }

            } else {
                System.err.println("第【" + count + "】條日誌,搜索query爲==【" + query + "】,接口返回爲空");
            }


        } catch (Exception e) {
            e.printStackTrace();
        }
        return 0;
    }

    public static JSONObject jsonObject = new JSONObject();

}

 

。。。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章