背景
公司需求爲導出在線用戶信息,其中第一步便是取到在線用戶,本文選擇去ejabberd網頁取爬取在線用戶頁面,獲得返回報文後進行截取獲得用戶賬號。後續方便進行取緩存數據獲取必要的數據爲導出excel做準備。
主要代碼
連接ejabberd,獲取返回報文
/**
* 請求ejabbered集羣管理頁面的post請求方法
*
* @param url
* @param userNameAndPwd
* @return
*/
public static String sendPost(String urlAddress, String userNameAndPwd) {
String reponseStr = "";
BufferedReader in = null;
String line = "";
try {
URL url = new URL(urlAddress);
URLConnection connection = url.openConnection();
logger.debug("與ejabberd建立連接的地址爲:"+urlAddress);
connection.setDoOutput(true);
connection.setRequestProperty("Pragma:", "no-cache");
connection.setRequestProperty("Accept",
"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8");
connection.setRequestProperty("Cache-Control", "no-cache");
connection.setRequestProperty("Content-Type", "text/xml");
connection.setRequestProperty("User-Agent",
"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36");
connection.setRequestProperty("Authorization", "Basic " + userNameAndPwd);
connection.connect();
// 定義 BufferedReader輸入流來讀取URL的響應
in = new BufferedReader(new InputStreamReader(connection.getInputStream()));
while ((line = in.readLine()) != null) {
reponseStr += line;
}
logger.info("請求回執:" + reponseStr);
} catch (Exception e) {
logger.error(e);
}
return reponseStr;
}
返回報文樣例
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns='http://www.w3.org/1999/xhtml' xml:lang='en' lang='en'>
<head>
<title>ejabberd Web Admin</title>
<meta http-equiv='Content-Type' content='text/html; charset=utf-8'/>
<script src='/admin/server/sihua1.com//additions.js' type='text/javascript'> </script>
<link href='/admin/server/sihua1.com/favicon.ico' type='image/x-icon' rel='shortcut icon'/>
<link href='/admin/server/sihua1.com/style.css' type='text/css' rel='stylesheet'/>
</head>
<body>
<div id='container'>
<div id='header'>
<h1>
<a href='/admin/'>ejabberd Web Admin</a>
</h1>
</div>
<div id='navigation'>
<ul>
<li>
<div id='navitem'>
<a href='/admin/acls/'>Access Control Lists</a>
</div>
</li>
<li>
<div id='navitem'>
<a href='/admin/access/'>Access Rules</a>
</div>
</li>
<li>
<div id='navitem'>
<a href='/admin/vhosts/'>Virtual Hosts</a>
</div>
</li>
<li>
<div id='navheadsub'>
<a href='/admin/server/sihua1.com/'>sihua1.com</a>
</div>
</li>
<li>
<div id='navitemsub'>
<a href='/admin/server/sihua1.com/acls/'>Access Control Lists</a>
</div>
</li>
<li>
<div id='navitemsub'>
<a href='/admin/server/sihua1.com/access/'>Access Rules</a>
</div>
</li>
<li>
<div id='navitemsub'>
<a href='/admin/server/sihua1.com/users/'>Users</a>
</div>
</li>
<li>
<div id='navitemsub'>
<a href='/admin/server/sihua1.com/online-users/'>Online Users</a>
</div>
</li>
<li>
<div id='navitemsub'>
<a href='/admin/server/sihua1.com/last-activity/'>Last Activity</a>
</div>
</li>
<li>
<div id='navitemsub'>
<a href='/admin/server/sihua1.com/nodes/'>Nodes</a>
</div>
</li>
<li>
<div id='navitemsub'>
<a href='/admin/server/sihua1.com/stats/'>Statistics</a>
</div>
</li>
<li>
<div id='navitemsub'>
<a href='/admin/server/sihua1.com/shared-roster/'>Shared Roster Groups</a>
</div>
</li>
<li>
<div id='navitemsub'>
<a href='/admin/server/sihua1.com/muc/'>Multi-User Chat</a>
</div>
</li>
<li>
<div id='navitem'>
<a href='/admin/nodes/'>Nodes</a>
</div>
</li>
<li>
<div id='navitem'>
<a href='/admin/stats/'>Statistics</a>
</div>
</li>
<li>
<div id='navitem'>
<a href='/admin/muc/'>Multi-User Chat</a>
</div>
</li>
</ul>
</div>
<div id='content'>
<h1>Online Users</h1>
<a href='../user/wxin/'>[email protected]</a>
<br/>
</div>
<div id='clearcopyright'></div>
</div>
<div id='copyrightouter'>
<div id='copyright'>
<p>
<a href='https://www.ejabberd.im/'>ejabberd</a> (c) 2002-2017
<a href='https://www.process-one.net/'>ProcessOne, leader in messaging and push solutions</a>
</p>
</div>
</div>
</body>
</html>
報文爲頁面html,分爲導航欄、內容等部分,我們關注內容部分即可;下面a標籤內爲我們想要的在線用戶賬號(多個用戶則爲多個a標籤)
<div id='content'>
<h1>Online Users</h1>
<a href='../user/wxin/'>[email protected]</a>
<br/>
</div>
截取返回報文,獲得在線用戶賬號Jid
package com.onewaveinc.utils;
//省略了import部分
...
/**
* 此類原爲導出excel表格,其中獲取在線用戶賬號爲其中一步;
* 從ejabberd返回報文進行解析截取用戶賬號
* @author wxin
*
*/
public class ExportXMPPUserInfo {
private UserManager userManager;
private UserChannelLoginManager userChannelLoginManager;
private String path;
// private final static String CONNECTED_USERS ="connected_users";
/**
* 加載ejabbered集羣的管理頁面賬號信息至內存
*/
private Map<String, String> ejabberedUserMap=new HashMap<String, String>();
/**
* 加載ejabbered集羣的url
*/
private Map<String, String> ejabberedUrlMap=new HashMap<String, String>();
private String xmppManageListStr;
private ChannelManager channelManager;
private MemcachedFactory memcachedFactory;
private static Logger logger = Logger.getInstance(ExportXMPPUserInfo.class);
private String zipPath;
/**
* 定時導出XMPP每個機房(一個集羣)的在線用戶的信息
* 導出信息:用戶賬號,mac地址,登陸的IP,登陸域名,機頂盒的型號,版本,和以及登陸所在節點的ip,
* 顯示 登陸的時間,登陸的時長(現在的時間減去登陸的時間)。
*/
public void getEjabberdJid() {
try {
List<Channel> channelList = new ArrayList<Channel>();
try {
channelList = channelManager.findChannelIdList();
} catch (Exception e) {
e.printStackTrace();
}
String serverName = "";
String serverDomain = "";
String urlAddress = "";
String userNameAndPwd = "";
String respStr = "";
String usersUrlAddress = "";
String usersRespStr = "";
for (Channel channel : channelList) {
serverName = channel.getName();
serverDomain = channel.getHost();
logger.info("此次處理的集羣名稱爲:" + serverName);
//獲取所有在線用戶賬號
urlAddress = ejabberedUrlMap.get(serverName)+"server/"+ serverDomain +"/online-users";
userNameAndPwd = ejabberedUserMap.get(serverName);
respStr = HttpUtil.sendPost(urlAddress, userNameAndPwd);
List<String> jidList = new ArrayList<String>();
jidList = getJidList(respStr);
}
} catch (Exception e) {
result = "failed";
e.printStackTrace();
}
return result;
}
/**
* 截取用戶jidList
* @param respStr
* @return List<String> jidList
*/
public List<String> getJidList(String respStr) {
List<String> jidList = new ArrayList<String>();
int indexOne = respStr.indexOf("<h1>Online Users</h1>")+"<h1>Online Users</h1>".length();
int indexTwo = respStr.indexOf("</a><br/></div><div id='clearcopyright'></div>");
if (-1 != indexOne && -1 != indexTwo && indexTwo > indexOne) {
String usersStr = respStr.substring(indexOne, indexTwo);
logger.debug("在線用戶列表爲:"+ usersStr);
String []infoArr = usersStr.split("</a><br/>");
logger.debug("按照</a>進行拆分用戶列表" + Arrays.toString(infoArr));
for (String userInfo : infoArr) {
indexOne = userInfo.indexOf("/'>")+"/'>".length();
String newUserInfo = userInfo.substring(indexOne);
logger.debug("正在處理用戶JID爲"+newUserInfo);
jidList.add(newUserInfo);
}
}
return jidList;
}
/**
* 加載各個Ejabbered集羣的Node節點
* 此方法在初始化時進行(init-method="queryEjabberedNodes")
* xmppManageListStr數據格式爲:xmpp,[email protected],1=http://10.223.138.137:5280/admin/;...
* 單個元素爲 imoss系統中的集羣名稱,管理員賬號,管理員密碼=ejaader集羣管理地址,多個採用英文分號分隔
*/
public void queryEjabberedNodes(){
try {
if(StringUtils.isNotBlank(xmppManageListStr)){
String[] strArrayOne=xmppManageListStr.split(";");
for(String strTempOne:strArrayOne){
String[] arr=strTempOne.split("=");
String[] strArrayTwo=arr[0].split(",");
logger.info("strArrayTwo[0]="+strArrayTwo[0]+",strArrayTwo[1]="
+strArrayTwo[1]+",strArrayTwo[2]="+strArrayTwo[2]
+",arr[1]="+arr[1]);
ejabberedUrlMap.put(strArrayTwo[0], arr[1]);
String encodeUserAndPwd=Base64Uitl.encode((strArrayTwo[1]+":"+strArrayTwo[2]).getBytes());
ejabberedUserMap.put(strArrayTwo[0], encodeUserAndPwd);
}
}
} catch (Exception e) {
logger.error("處理配置項xmpp.management.url.list產生異常!",e);
}
}
//省略了getter、setter部分
}
補充說明
ExportXMPPUserInfo.queryEjabberedNodes()在配置中配置爲init-method,在初始化階段就已經運行:將發送url、賬號密碼提前寫入map中。
配置
<bean id="contbiz.imoss.exportXMPPUserInfo"
class="com.onewaveinc.utils.ExportXMPPUserInfo" init-method="queryEjabberedNodes">
...
</bean>