Java登錄QQ郵箱整理郵件的58的簡歷
實習期間給公司做的第一個小工具,只需下載jxl.jar和mail.jar的第三方那個類庫,就可以着手敲代碼了:
- 1、此次是使用POP3協議登錄的郵箱,使用了手機獲取的賬號配置碼
- 2、遍歷收件箱的所有郵件,判斷郵件類型,得到郵件的內容
- 3、解析每一封郵件的內容,parseMessage(Message …messages)
- 4、找到符合主題的郵件,如包含(58.com)……,getSubject(MimeMessage msg)
- 5、針對特定郵件解析出相應內容,並創建excel表,一條條的存入其中
- 6、所有簡歷的部分信息先放在Excel表中集合,關閉表
登錄郵箱
網上普遍可尋找到遵循POP3協議及IMAP協議的登錄方式,下面我用的是POP3協議,缺陷是不能給郵件做標記,只能讀,不可改變郵件的狀態及屬性,後面會有IMAP協議的登錄的方式的博客進行補充…….
另外敲代碼前,先去郵箱的設置裏,進去“賬戶”的設置,開啓POP3協議的開啓功能,使用手機發送信息得到登錄碼,很方便,可避免修改了密碼,或是郵箱設置了獨立密碼等層層關卡的麻煩。寫成了方法,代碼如下
public static void receive() throws Exception {
// 準備連接服務器的會話信息
Properties props = new Properties();
//props.setProperty("mail.store.protocol", "pop3"); // 協議
props.setProperty("mail.pop3.port", "110"); // 端口
props.setProperty("mail.pop3.host", "pop.qq.com"); // pop3服務器
// SSL安全連接參數
props.setProperty("mail.pop3.socketFactory.class", "javax.net.ssl.SSLSocketFactory");
props.setProperty("mail.pop3.socketFactory.fallback", "true");
props.setProperty("mail.pop3.socketFactory.port", "995");
// 創建Session實例對象
Session session = Session.getInstance(props);
Store store = session.getStore("pop3");
store.connect("pop.qq.com","[email protected]", "......");//前面是我的郵箱號,後面填入自己的登陸碼即可
// 獲得收件箱
Folder folder = store.getFolder("INBOX");
/* Folder.READ_ONLY:只讀權限
* Folder.READ_WRITE:可讀可寫(可以修改郵件的狀態)
*/
folder.open(Folder.READ_WRITE); //打開收件箱
// 由於POP3協議無法獲知郵件的狀態,所以getUnreadMessageCount得到的是收件箱的郵件總數
// 獲得收件箱中的郵件總數
System.out.println("郵件總數: " + folder.getMessageCount());
// 得到收件箱中的所有郵件,並解析
try{
Message[] messages = folder.getMessages();
parseMessage(messages);
//釋放資源
folder.close(true);
store.close();
}catch(Exception e){
e.printStackTrace();
System.out.println("receive內部異常");}
}
解析郵件
判斷主題裏是否有58.com,並判斷該郵件是否是當天的,整理當天的郵件信息存入Excel表中去:
public static void parseMessage(Message ...messages) throws MessagingException, IOException {
if (messages == null || messages.length < 1)
throw new MessagingException("未找到要解析的郵件!");
//58簡歷整理工具,使用正則表達式匹配相應的信息:姓名,性別,年齡,電話,郵箱,經驗
Pattern p1 = Pattern.compile("<h3.*?>([\\s\\S]*)<span.*?>([\\s\\S]*)</span></h3>");
Pattern p2 = Pattern.compile("<label.*?>([\\s\\S]*)<span.*?><span.*?>([\\s\\S]*)</span></span></label>");
Pattern p3 = Pattern.compile("<ul.*?>[\\s]*?<li.*?>([^\n]*)</li>[\\s]*?<li.*?>([^\n]*)</li>[\\s]*?<li.*?>([^\n]*)</li>[\\s]*?<li.*?>([^\n]*)</li>[\\s]*?</ul>");
Pattern p4 = Pattern.compile("<label.*?>([\\s\\S]*)<span.*?>([\\s\\S]*)</span></label>");
//鬥米簡歷整理工具,使用正則表達式匹配相應的信息:姓名,性別,年齡,電話,郵箱,經驗
//???
Date now = new Date();
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy.MM.dd_hh:mm:ss");//可以方便地修改日期格式
String nowDate = dateFormat.format( now );
System.out.println(nowDate);
Calendar c = Calendar.getInstance();//可以對每個時間域單獨修改
int year = c.get(Calendar.YEAR);
int month = c.get(Calendar.MONTH)+1;
int date = c.get(Calendar.DATE);
try{
//創建及打開Excel表,存儲58同城的簡歷信息
String path="C:\\resume\\58Resume"+year+"."+month+"."+date+".xlsx";
//String path="/Users/LiuDuan/workspace/temp_edm/58Resume"+year+"."+month+"."+date+".xlsx";
//InputStream is = new FileInputStream("C:\\Users\\yang\\Desktop\\ResumeData.xlsx");;
WritableWorkbook wb = Workbook.createWorkbook(new File(path));
WritableSheet ws = wb.createSheet("Sheet1", 0);
//創建及打開Excel表,存儲58同城的簡歷信息
int j=0;
int count = messages.length;
// 解析所有郵件
for (int i = 130; i < count; i++) {
MimeMessage msg = (MimeMessage) messages[i];
//解決郵件主題亂碼的問題
String subject1 = getSubject(msg); //獲得郵件主題
String subject = "";
//前面必須判斷下是否爲null,否則會有異常
if (subject1 ==null || subject1 == "" || "".equals(subject1)
|| "null".equals(subject1)) {
subject = "此郵件沒有主題";
continue;
} else {
subject = subject1;
}
//
System.out.println("第"+i+"封郵件主題是: " + subject);
String str=getSentDate(msg, null);
System.out.println("------發送時間:" +str);
//if(subject.indexOf("58.com")>0 && nowDate.equals(getSentDate(msg, null))){
if(subject.indexOf("58.com")>0 &&judgeDate(nowDate,str)){
StringBuffer content = new StringBuffer(300);
getMailTextContent(msg, content);
//System.out.println(content);
//checkhtml(content.toString(),i);
//得到每條郵件的三個信息
Matcher m = p1.matcher(content);
Matcher n = p2.matcher(content);
Matcher p = p3.matcher(content);
Matcher q = p4.matcher(content);
StringBuilder sb = new StringBuilder();
while (m.find()) {
sb.append(m.group(1)+","+m.group(2));
Label label1 = new Label(0,j,m.group(1));
ws.addCell(label1);
Label label2 = new Label(1,j,m.group(2).substring(1, 2));
ws.addCell(label2);
Label label3 = new Label(2,j,m.group(2).substring(3, 5));
ws.addCell(label3);
sb.append(",");
}
while (n.find()) {
sb.append(n.group(2));
Label label4 = new Label(3,j,n.group(2));
ws.addCell(label4);
sb.append(",");
}
while (p.find()) {
sb.append(p.group(2));
Label label5 = new Label(4,j,p.group(2));
ws.addCell(label5);
sb.append(",");
}
while (q.find()) {
//System.out.println("4");
sb.append(q.group(2));
Label label6 = new Label(5,j,q.group(2));
ws.addCell(label6);
sb.append(",");
}
j++;
System.out.println(sb);
}else{
continue;
}
}
wb.write();
wb.close();
}catch(IOException e){
e.printStackTrace();
System.out.println("parseMessage內部1");
}catch(MessagingException e){
e.printStackTrace();
System.out.println("parseMessage內部2");
}catch (RowsExceededException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (WriteException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
/**
* 獲得郵件主題
* @param msg 郵件內容
* @return 解碼後的郵件主題
*/
public static String getSubject(MimeMessage msg) throws UnsupportedEncodingException, MessagingException {
try{
if(msg==null || msg.getSubject()==null)
return null;
return MimeUtility.decodeText(msg.getSubject());
}
catch(UnsupportedEncodingException e){
return null;
}catch(MessagingException e){
return null;
}
}
/**
* 獲得郵件發送時間
* @param msg 郵件內容
* @return yyyy年mm月dd日 星期X HH:mm
* @throws MessagingException
*/
public static String getSentDate(MimeMessage msg, String pattern) throws MessagingException {
try{
Date receivedDate = msg.getSentDate();
if (receivedDate == null)
return "";
if (pattern == null || "".equals(pattern))
//pattern = "yyyy年MM月dd日 E HH:mm ";
pattern = "yyyy.MM.dd_HH:mm:ss";
return new SimpleDateFormat(pattern).format(receivedDate);} catch(MessagingException e){System.out.println("獲取時間內部異常");return null;}
}
//判斷郵箱郵件時間是否在昨天的5點到今天的4點之間
public static boolean judgeDate(String s1,String resume_time_str){
//截取系統的年月日時間
/* int nowY = Integer.parseInt(s1.substring(0, 4));
int nowM = Integer.parseInt(s1.substring(5, 7));
int nowD = Integer.parseInt(s1.substring(8, 10));
//System.out.println(nowY+"."+nowM+"."+nowD);
// 截取郵件時間的時分秒
int emailY = Integer.parseInt(s2.substring(0, 4));
int emailM = Integer.parseInt(s2.substring(5, 7));
int emailD = Integer.parseInt(s2.substring(8, 10));
int emailh = Integer.parseInt(s2.substring(11, 13));
int emailm = Integer.parseInt(s2.substring(14, 16));
int emails = Integer.parseInt(s2.substring(17, 19));
//System.out.println(emailY+"."+emailM+"."+emailD+"."+emailh+"."+emailm+"."+emails);
if(nowY == emailY && nowM == emailM && nowD == emailD){
if(emailh > 0 && emailh <17){
return true;}
else if(emailh == 0 && emailm >= 0 && emails >= 0){
return true;}
else if(emailh == 17 && emailm < 1 && emails < 1){
return true;}
else
return false;
}else if(nowY==emailY && nowM==emailM && nowD==emailD+1){
if(emailh >= 17 && emailh <=23){return true;}
//else if(emailh == 0 && emailm > 0 && emails > 0){return true;}
//else if(emailh == 17 && emailm <= 30 && emails <= 30){return true;}
else
return false;
}
return false;
*/
//取得昨天中午到今天中午的時間段範圍,2017-05-01 12:00:00 ~ 2017-05-02 11:59:59
Date todydate = new Date();
Date yestoday = new Date(todydate.getTime() - 24 * 3600 * 1000);
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
SimpleDateFormat sdf_hms = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
String dateBeginStr = sdf.format(yestoday)+" 12:00:00";
String dateEndStr = sdf.format(todydate)+" 11:59:59";
System.out.println("格式化後的日期:" + dateBeginStr);
System.out.println("格式化後的日期:" + dateEndStr);
SimpleDateFormat resume_sdf = new SimpleDateFormat("yyyy.MM.dd_HH:mm:ss");
try {
long time_start = sdf_hms.parse(dateBeginStr).getTime()/1000;
long time_end = sdf_hms.parse(dateEndStr).getTime()/1000;
long resume_time = resume_sdf.parse(resume_time_str).getTime()/1000;
if(time_start < resume_time && resume_time < time_end) {
return true;
}
//System.out.println("time1 "+ String.valueOf(sdf.parse(dateBeginStr).getTime()/1000));
//System.out.println("time1 "+ String.valueOf(sdf.parse(dateEndStr).getTime()/1000));
} catch (Exception e) {
System.out.println("get date error!!");
return false;
}
return false;
}
/**
* 獲得郵件文本內容
* @param part 郵件體
* @param content 存儲郵件文本內容的字符串
* @throws MessagingException
* @throws IOException
*/
public static void getMailTextContent(Part part,StringBuffer content) throws MessagingException, IOException {
//如果是文本類型的附件,通過getContent方法可以取到文本內容,但這不是我們需要的結果,所以在這裏要做判斷
/*boolean isContainTextAttach = part.getContentType().indexOf("name") > 0;
System.out.println(part.getContentType());
if (part.isMimeType("text/*") && !isContainTextAttach) {
content.append(part.getContent().toString());
} else if (part.isMimeType("message/rfc822")) {
getMailTextContent((Part)part.getContent(),content);
} else if (part.isMimeType("multipart/*")) {
Multipart multipart = (Multipart) part.getContent();
int partCount = multipart.getCount();
for (int i = 0; i < partCount; i++) {
BodyPart bodyPart = multipart.getBodyPart(i);
getMailTextContent(bodyPart,content);
}
}*/
//StringBuffer content = new StringBuffer();
String contenttype = part.getContentType();
int nameindex = contenttype.indexOf("name");
boolean conname = false;
if (nameindex != -1)
conname = true;
if (part.isMimeType("text/html")&& !conname) {
content.append((String)part.getContent());
}else if(part.isMimeType("text/plain")&& !conname){
content.append((String)part.getContent());
}else if (part.isMimeType("multipart/*")) {
Multipart multipart = (Multipart) part.getContent();
int count = multipart.getCount();
boolean hasHtml = checkHasHtml(multipart);//這裏校驗是否有text/html內容
for(int index = 0 ; index < count ; index++ ){
Part temp = multipart.getBodyPart(index);
if(temp.isMimeType("text/plain")&&hasHtml){
//有html格式的則不顯示無格式文檔的內容
}else{
getMailTextContent(temp, content);
}
}
}else if (part.isMimeType("message/rfc822")) {
getMailTextContent((Part) part.getContent(), content);
}else{}
}
public static boolean checkHasHtml(Multipart part) throws MessagingException, IOException{
boolean hasHtml = false;
int count = part.getCount();
for(int i = 0 ; i < count ; i++ ){
Part bodyPart = part.getBodyPart(i);
if (bodyPart.isMimeType("text/html")) {
hasHtml = true;
break;
}
}
return hasHtml;
}
主函數的調用
前面的異常處理裏都添加了輸出語句,可要可不要,另外對於程序中提示要添加的類,一定要看仔細了,經常會出現jar包名字不一樣卻有相同名字的類,添加錯了,程序就出了問題,之前浪費了我不少的時間找bug。。。以下是我的程序裏需要添加的類及主函數:
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.mail.Address;
import javax.mail.BodyPart;
import javax.mail.Flags;
import javax.mail.Folder;
import javax.mail.Message;
import javax.mail.MessagingException;
import javax.mail.Multipart;
import javax.mail.Part;
import javax.mail.Session;
import javax.mail.Store;
import javax.mail.internet.InternetAddress;
import javax.mail.internet.MimeMessage;
import javax.mail.internet.MimeMultipart;
import javax.mail.internet.MimeUtility;
import jxl.Workbook;
import jxl.write.Label;
import jxl.write.WritableSheet;
import jxl.write.WritableWorkbook;
import jxl.write.WriteException;
import jxl.write.biff.RowsExceededException;
public static void main(String[] args) throws Exception {
try{receive();}
catch(Exception e){e.printStackTrace();System.out.println("receive異常");}
}
這個Java程序的名字自己隨機取得,只是一個外殼,對內部程序沒有影響。
有什麼問題希望大家可以留言一起交流哈,集思廣益,加油加油在加油,勤能補拙,希望自己技能日以漸進!
下面是jar包的下載的鏈接:
http://download.csdn.net/detail/yixiesuifeng/9831640