Java登錄網站多個賬號整理鬥米簡歷(一)

這是對上個程序的補充,同時登錄多個賬號採集所有信息,分別存在不同的表中,修改的部分代碼下面會粘貼出來,其他代碼部分參考上篇博客~~~
每個賬號是有區別的,所以方法的調用時使用個標記添加爲參數,全局變量及方法調用的變化如下:

import java.io.*;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.Calendar;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import jxl.Cell;
import jxl.Sheet;
import jxl.Workbook;
import jxl.read.biff.BiffException;
import jxl.write.Label;
import jxl.write.WritableSheet;
import jxl.write.WritableWorkbook;
import jxl.write.WriteException;
import jxl.write.biff.RowsExceededException;


    public static String responseCookie[] = {"","",""};
    public static int i[]={0,0,0},x[]={0,0,0};//i是用來記錄發佈的職位數,x是用來記錄報名人員的人數

    public static void main(String[] args) throws IOException, RowsExceededException, WriteException, BiffException {
        // TODO Auto-generated method stub
        Calendar c = Calendar.getInstance();//可以對每個時間域單獨修改
        int year = c.get(Calendar.YEAR);
        int month = c.get(Calendar.MONTH)+1; 
        int date = c.get(Calendar.DATE);

        //GetCookie()方法得到Cookie值作爲全局變量,便於之後跳轉網頁是隨時使用,最後返回網頁內容
        String ss[]={"phone_login=123&passwd_login=123",
                "phone_login=456&passwd_login=456",
                "phone_login=789&passwd_login=789"};

        for(int j=0;j<ss.length;j++){
            String path="C:\\resume\\DoumiCareer("+j+")"+year+"."+month+"."+date+".xlsx";//使用當天的時間來命名文件名
            WritableWorkbook wb = Workbook.createWorkbook(new File(path));
            WritableSheet ws = wb.createSheet("Sheet1", 0);//創建Excel表並打開

            Label label0 = new Label(0,0,"職位名");
            ws.addCell(label0);
            Label label1 = new Label(1,0,"職位ID");
            ws.addCell(label1);
            Label label2 = new Label(2,0,"發佈城市");
            ws.addCell(label2);
            Label label3 = new Label(3,0,"報名鏈接");
            ws.addCell(label3);
            i[j]++;

            StringBuffer str=GetCookie(ss[j],j);
            //解析網頁內容,得到當前頁面所有職位的部分信息
            String hrefs=LookAllCareer(str,ws,wb,j);

            //判斷是否有下一頁的鏈接,若不爲null,則繼續獲取網頁內容,並解析
            while(hrefs!=null){
                String href=hrefs.replaceAll("amp;","");
                System.out.println(href);
                StringBuffer content=new StringBuffer();
                //獲取鏈接下網頁的內容
                content=GetHTML(href,j);

                hrefs=LookAllCareer(content,ws,wb,j);
            }
            //將信息寫入表中並關閉
            wb.write();
            wb.close();
            //根據上面建立的表,得到所有報名人員的信息的方法
            GetAllEnrollInfo(j);
        }
    }

    //獲取該鏈接下的網頁內容,結合Cookie進行跳轉
    //根據Cookie及URL得到網頁內容
    public static StringBuffer GetHTML(String href,int r) throws UnsupportedEncodingException, IOException{
        URL url = new URL(href); 
        HttpURLConnection connection = (HttpURLConnection) url.openConnection();
        //System.out.println(responseCookie);
        connection.setRequestProperty("Cookie", responseCookie[r]);//給服務器送登錄後的cookie 
        BufferedReader br = new BufferedReader(new InputStreamReader(connection.getInputStream(),"UTF-8")); 
        StringBuffer content=new StringBuffer();
        int j=0;
        String line1= br.readLine();
        while (line1 != null) {
            content.append(line1+System.getProperty("line.separator"));
            //System.out.println((j++)+"===="+new String(line1.getBytes()));
            line1 = br.readLine();
        }
        return content;
    }

    //獲取全局Cookie值,並返回企業版首頁的網頁內容
    //得到全局變量cookie值
    public static StringBuffer GetCookie(String str,int r) throws IOException{
        URL url = new URL("https://vip.doumi.com/employer/user/ajaxlogin"); 
        HttpURLConnection connection = (HttpURLConnection) url.openConnection(); 

        connection.setDoOutput(true);//允許連接提交信息 
        connection.setRequestMethod("POST");//網頁提交方式“GET”、“POST” 

        connection.setRequestProperty("Accept","text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
        connection.setRequestProperty("Accept-Encoding", "gzip, deflate, sdch, br"); 
        connection.setRequestProperty("Accept-Language", "zh-CN,zh;q=0.8"); 
        connection.setRequestProperty("Cache-Control", "max-age=0"); 
        connection.setRequestProperty("Connection", "keep-alive"); 
        connection.setRequestProperty("Host", "vip.doumi.com");
        connection.setRequestProperty("Referer", "http://www.doumi.com/wh/"); 

        connection.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36"); 

        OutputStream os = connection.getOutputStream(); 
        //os.write(jsonParam.toString().getBytes());
        os.write(str.toString().getBytes("UTF-8"));
        os.flush();
        //os.close(); 

        String cookieVal = "";
        String key = null;
        //取cookie  
        for(int i = 1; (key = connection.getHeaderFieldKey(i)) != null; i++){  
            if(key.equalsIgnoreCase("set-cookie")){  
                cookieVal = connection.getHeaderField(i);  
                cookieVal = cookieVal.substring(0, cookieVal.indexOf(";"));  
                responseCookie[r] = responseCookie[r] + cookieVal + ";";  
            }
        }

        //System.out.println("cookie:" + responseCookie); 

        int responsecode = connection.getResponseCode();
        //System.out.println("responsecode:"+responsecode);

        //acces 
        URL url1 = new URL("https://vip.doumi.com/managecenter"); 
        HttpURLConnection connection1 = (HttpURLConnection) url1.openConnection();

        connection1.setRequestProperty("Cookie", responseCookie[r]);//給服務器送登錄後的cookie 
        BufferedReader br1 = new BufferedReader(new InputStreamReader(connection1.getInputStream(),"UTF-8")); 
        StringBuffer content1=new StringBuffer();

        String line1= br1.readLine();
        //System.out.println("登陸後:");
        int j=0;
        while (line1 != null) {
            content1.append(line1+System.getProperty("line.separator"));

            //System.out.println((j++)+"===="+new String(line1.getBytes()));
            line1 = br1.readLine();
        }
        //System.out.println(content1.toString());

        return content1;
    }

    //獲取該網頁下的所有職位的相關信息,創建單元格寫入DoumiCareer表中,返回下一頁鏈接的字符串
    //匹配網頁的內容,返回下一頁的鏈接地址,重複使用該函數收集發佈職位的信息
    public static String LookAllCareer(StringBuffer strb,WritableSheet sheet,WritableWorkbook wb,int ll) throws RowsExceededException, WriteException, IOException{
        //得到下一頁職位信息頁面鏈接的正則,即用最後一個li判斷是否有a標籤,若有,則記下鏈接屬性,若沒有則停止
        Pattern P=Pattern.compile("<div class=\"pageBox\">[\\s]*?<ul class=\"pagination\">[\\s\\S]*?</ul>[\\s]*?</div>");//[\\s]*?<li></li>[\\s]*?

        //得到當前頁面下所有職位的相關信息的正則
        Pattern p1=Pattern.compile("<div class=\"bList-item-t\">[\\s]*?<a.*?>([^\n]*)</a>");
        Pattern p2=Pattern.compile("<div class=\"bList-item-mid\">[\\s]*?<ul.*?>[\\s]*?<li>[\\s\\S]*?</li>[\\s]*?<li>[\\s\\S]*?</li>[\\s\\S]*?<li class=\"pubTime\">[\\s\\S]*?</li>[\\s]*?<li>[\\s\\S]*?</li>[\\s]*?<li>[\\s\\S]*?</li>[\\s]*?</ul>[\\s]*?</div>");
        Pattern p3=Pattern.compile("<div class=\"bList-item-opBtn\">[\\s]*?<a.*?>([^\n]*)</a>[\\s]*?<a.*?>([^\n]*)</a>[\\s]*?</div>");

        Matcher nextpage = P.matcher(strb);
        Matcher m = p1.matcher(strb);
        Matcher r = p2.matcher(strb);
        Matcher s = p3.matcher(strb);

        //得到單個職位報名管理信息的跳轉鏈接
        Pattern reg2=Pattern.compile("<li>[\\s]*?<span>([^\n]*)</span>([\\s\\S]*?)[\\s]*?</li>");
        Pattern reg22=Pattern.compile("<li>[\\s]*?<span>([^\n]*)</span>[\\s]*?<em.*?>([^\n]*)</em>[\\s]*?</li>");
        Pattern reg3=Pattern.compile("<a" + "[^<>]*?\\s" + "href" + "=['\"]?(.*?)['\"]?(\\s.*?)?>");
        while(m.find() && r.find() && s.find()){
            Matcher rr=reg2.matcher(r.group(0));
            Matcher rrr=reg22.matcher(r.group(0));
            Matcher ss=reg3.matcher(s.group(0));
            if(rr.find() && rrr.find() && ss.find()){
                Label label0 = new Label(0,i[ll],m.group(1));
                sheet.addCell(label0);
                Label label1 = new Label(1,i[ll],rr.group(2));
                sheet.addCell(label1);
                Label label2 = new Label(2,i[ll],rrr.group(2));
                sheet.addCell(label2);
                Label label3 = new Label(3,i[ll],ss.group(1));
                sheet.addCell(label3);
                i[ll]++;
                //System.out.println(i+"==="+m.group(1)+","+rr.group(2)+","+rrr.group(2)+","+ss.group(1));
            }
            //System.out.println("職位名稱:"+m.group(0)+","+r.group(0)+","+s.group(0));
        }
        //wb.write();

        //得到發佈職位的名稱
        Pattern Reg=Pattern.compile("<li class=\"active\">[\\s\\S]*?</li><li><a href"+"=['\"]?(.*?)['\"]?(\\s.*?)?>([\\s\\S])*?</a></li>");
        if(nextpage.find()){
            //System.out.println(nextpage.group(0));
            Matcher NextPage=Reg.matcher(nextpage.group(0));
            if(NextPage.find()){
                //System.out.println(NextPage.group(1));
                return NextPage.group(1);
            }
        }
        return null;
    }

    //讀取DoumiCareer表中的信息,獲取所有職位的報名人員的信息,並寫入DoumiResume表中
    //讀取Excel表中每個職位的的報名管理鏈接,跳轉到報名管理的頁面,得到所有頁面的報名人員信息
    public static void GetAllEnrollInfo(int y) throws BiffException, IOException, RowsExceededException, WriteException{
        //int j=1;
        Calendar c = Calendar.getInstance();//可以對每個時間域單獨修改
        int year = c.get(Calendar.YEAR); 
        int month = c.get(Calendar.MONTH)+1; 
        int date = c.get(Calendar.DATE);
        String path="C:\\resume\\DoumiCareer("+y+")"+year+"."+month+"."+date+".xlsx";
        InputStream readfile = new FileInputStream(path);  
        Workbook rexcel = Workbook.getWorkbook(readfile);

        //這裏有兩種方法獲取sheet表:名字和下標(從0開始)   
        //Sheet st = rwb.getSheet("original");   
        Sheet st = rexcel.getSheet(0);

        //創建DoumiResume.xlsx,將報名人員信息寫入Excel表中
        String setpath="C:\\resume\\DoumiResume("+y+")"+year+"."+month+"."+date+".xlsx";
        WritableWorkbook wb = Workbook.createWorkbook(new File(setpath));
        WritableSheet ws = wb.createSheet("Sheet1", 0);

        Label label0 = new Label(0,0,"姓名");
        ws.addCell(label0);
        Label label1 = new Label(1,0,"性別");
        ws.addCell(label1);
        Label label2 = new Label(2,0,"年齡");
        ws.addCell(label2);
        Label label3 = new Label(3,0,"聯繫電話");
        ws.addCell(label3);
        //Label label4 = new Label(4,0,"報名時間");
        //ws.addCell(label4);
        /*Label label4 = new Label(4,0,"報名時間");
        ws.addCell(label4);
        Label label5 = new Label(5,0,"是否在校生");
        ws.addCell(label5);
        Label label6 = new Label(6,0,"所在學校");
        ws.addCell(label6);
        Label label7 = new Label(7,0,"入學年份");
        ws.addCell(label7);*/
        x[y]++;

        for(int j=1;j<i[y];j++){
            Cell cell1=st.getCell(1, j);//當前網頁的職位ID(pid)
            Cell cell3=st.getCell(3, j);//獲取當前職位的網頁鏈接
            System.out.println(cell3.getContents());
            StringBuffer content=GetHTML(cell3.getContents(),y);
            //System.out.println(content);
            GetSingleEnrollInfo(cell1.getContents(),content,wb,ws,y);
        }
        //關閉輸入流及讀取信息的表
        readfile.close();
        rexcel.close();
        //寫入報名信息到表中並關閉
        wb.write();
        wb.close();
    }

    //根據報名管理的網頁內容,匹配到所有報名人員的信息
    public static void GetSingleEnrollInfo(String pid,StringBuffer content,WritableWorkbook wb,WritableSheet sheet,int y) throws IOException, RowsExceededException, WriteException{
        //System.out.println(content);

        //匹配到下一頁指向的網址,並返回
        Pattern P=Pattern.compile("<div class=\"pageBox\"></div>");
        Matcher M=P.matcher(content);

        //匹配到所有報名人員的相關信息:姓名,性別,年齡,使用aid和pid請求Ajax得到聯繫電話// class=\"fc-4b mr5 hover-after\" [\\s\\S]
        Pattern q1=Pattern.compile("<em.*?>[\\s]*?<span.*?>[\\s]*?([\\S]*?)[\\s]*?</span>[\\s]*?</em>[\\s]*?<span.*?></span>[\\s]*?<span class=\"mr5\">([^\n]*?)</span>[\\s]*?<span class=\"mr5\">([^\n]*?)</span>");
        Pattern q2=Pattern.compile("<td class=\"read-phone-"+"([\\S]*?)\">[\\s]*([\\s\\S]*?)[\\s]*</td>[\\s]*?<td>[\\s]*([\\s\\S]*?)[\\s]*</td>");//([\\S]*?)[\\s]*<div.*?>([\\s\\S]*?)</div>

        //匹配到姓名,性別,年齡
        Matcher m1=q1.matcher(content);

        //匹配到read-phone標籤的aid和pid
        Matcher m2=q2.matcher(content);
        while(m1.find() && m2.find()){

            //獲取報名時間
            String bmtime=null;
            if(m2.group(3).indexOf("div")>-1){
                Pattern q22=Pattern.compile("([^\n]*?)[\\s]*<div class=\"b-ico-time\">([\\s\\S]*?)</div>");
                Matcher m22=q22.matcher(m2.group(3));
                //label4=new Label(4,x,m.group(1));
                if(m22.find()){
                    //System.out.println(m22.group(1));
                    bmtime=m22.group(1);
                }
                else{
                    bmtime="";
                }
            }
            else{
                bmtime=m2.group(3);
            }
            //System.out.println(bmtime);
            //判斷是否爲當天時間
            if(bmtime.indexOf("1天前")>-1){
                Label label0 = new Label(0,x[y],m1.group(1));
                Label label1 = new Label(1,x[y],m1.group(2));
                Label label2 = new Label(2,x[y],m1.group(3));
                sheet.addCell(label0);
                sheet.addCell(label1);
                sheet.addCell(label2);
                //System.out.println(m1.group(1)+","+m1.group(2)+","+m1.group(3));

                Label label3;
                if(m2.group(2).indexOf("查看電話")>-1){
                    String aid=m2.group(1);
                    //System.out.println();//獲取aid並進行處理
                    String tel=CheckTelphone(pid,aid,y);
                    label3=new Label(3,x[y],tel);
                    //sheet.addCell(label3);
                    //System.out.println(tel);
                }
                else{
                    label3=new Label(3,x[y],m2.group(2));
                    //sheet.addCell(label3);
                }
                sheet.addCell(label3);

                //Label label4 = new Label(4,x[y],bmtime);
                //sheet.addCell(label4);
                x[y]++;
            }
            else continue;
        }
    }

    //對pid和aid進行處理獲得手機號
    public static String CheckTelphone(String pid,String aid,int y) throws IOException{
        String tel=null;
        URL url=new URL("https://vip.doumi.com/employer/manage/readphone");
        HttpURLConnection connection = (HttpURLConnection) url.openConnection();

        connection.setRequestProperty("Cookie", responseCookie[y]);//給服務器送登錄後的cookie

        connection.setDoOutput(true); //通過把URLConnection設爲輸出,你可以把數據向你個Web頁傳送。

        String str="aid="+aid+"&pid="+pid;
        OutputStreamWriter out = new OutputStreamWriter(connection.getOutputStream(), "UTF-8");     
        out.write(str.toString()); //向頁面傳遞數據。post的關鍵所在!     
        // remember to clean up     
        out.flush();     
        out.close();

        BufferedReader br = new BufferedReader(new InputStreamReader(connection.getInputStream(),"UTF-8")); 
        StringBuffer content=new StringBuffer();
        String line= br.readLine();

        Pattern p=Pattern.compile("\"data\":\\{\""+aid+"\":\"([\\S]*?)\"}}");//{符號前面加了兩槓,不加的話會出錯
        Matcher m=p.matcher(line);
        if(m.find()){
            tel=m.group(1);
            //System.out.println(m.group(1));
        }
        return tel;
    }
發佈了51 篇原創文章 · 獲贊 6 · 訪問量 3萬+
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章