先說下需求吧,由於前置機每天產生數百G的日誌報文,這些日誌只有在現場反應出現問題之後,纔會查找具體的日誌,因此需要將前置機上面的大量日誌轉存到dataHub上(datahub會自動向odps(MaxCompute)轉存),本來先要將入datahub代碼融入前置機,這樣前置直接實時往datahub寫入,但是由於一些歷史原因,不能這樣做,最後考慮到業務上面對實時性要求不強自己便通過java程序實現了日誌到datahub的轉存——這個程序運行數月之後,現在已經停用,目前日誌都是通過ELK實現實時保存了,不過在datahub到ELK的過度期中,這段程序圓滿的完成了他的工作!
程序的結構圖如下:
/**
* 前置日誌入dataHub入口程序
* 參數爲日誌所在文件夾目錄(不以“/”結尾),參數可爲多個日誌目錄,空格分開
* @author yangcheng
* 2018年5月8日
*/
public class GetDataV2 {
public static ConcurrentHashMap<String, Integer> localCache = new ConcurrentHashMap<String, Integer>();
public static void main(String[] args) {
System.out.println("服務器可用線程數:"+Runtime.getRuntime().availableProcessors());
// DateFormat format = new SimpleDateFormat("HH");
DateFormat format2 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
int threadNum = 16;//Runtime.getRuntime().availableProcessors();
ThreadPoolExecutor executor =
new ThreadPoolExecutor(threadNum, 100, 1, TimeUnit.MINUTES, new LinkedBlockingQueue<Runnable>());
LinkedBlockingQueue<String> fileNameQueue = new LinkedBlockingQueue<String>();
executor.execute(new DealWithFailure(args[0], fileNameQueue));
while(true){
//打印當前時間
System.out.println("開始執行時間:"+format2.format(new Date()));
//計數器-計算每天0點30分到1點30分之間報文總數
AtomicLong counter = new AtomicLong(0);
DatahubClient client = DataHubUtil.getClient();
CountDownLatch cuCountDownLatch =new CountDownLatch(threadNum);
for (String path : args) {
String filePath = path;//路徑
System.out.println("開始執行。。。。。。。。。。。。。。。。。。"+filePath+"——————啓用線程數:"+threadNum);
//獲取客戶端 生產數據
try {
File file = new File(filePath);
String[] files = file.list();
//執行更名操作
for (String fileName : files) {
if(fileName.endsWith(".log")){
System.out.println(".log: "+fileName);
continue;
}
if(!isQz(fileName)){
System.out.println("isQz: "+fileName);
continue;
}
String pathName = filePath+"/"+fileName;
String newPathName = filePath+"/back_"+fileName;
String[] arg = {"/bin/sh","-c","mv "+pathName+" "+ newPathName};
Runtime.getRuntime().exec(arg);
//更名之後的文件名
fileNameQueue.put(newPathName); //newPathName
}
} catch (Exception e) {
e.printStackTrace();
}
}
for(int i = 0; i < threadNum ; i++){
executor.execute(new Consumer(cuCountDownLatch,fileNameQueue, client,localCache));
}
try {
cuCountDownLatch.await();
getNum(localCache);
System.out.println("執行結束................");
} catch (InterruptedException e1) {
e1.printStackTrace();
}finally{
//關閉資源
DataHubUtil.closeAll(client);
}
try {
Thread.sleep(1000*60*30);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
private static void getNum(ConcurrentHashMap<String, Integer> localCache2) {
Set<String> keySet = localCache2.keySet();
for(String key : keySet){
Integer value = localCache2.get(key);
System.out.println("在 "+key+"時間點段"+"=============有"+value+"條");
}
}
public static boolean isQz(String fileName){
if(fileName.startsWith("confirm") ||fileName.startsWith("event") ){
return true;
}
return false;
}
/**
* 前置日誌入dataHub工具類
* @author yangcheng
* 2018年5月8日
*/
public class DataHubUtil {
private static final String endpoint="";//
private static final String project="";
private static final String topic= ""; //面向對象 qz_698_day_real_monitor
private static final String accessId="";
private static final String accessKey="";
/**
* 獲取datahub客戶端
* 客戶端使用完成之後,必須執行closeAll()方法關閉資源
* @return DatahubClient
*/
public static DatahubClient getClient(){
AliyunAccount account = new AliyunAccount(accessId, accessKey);
DatahubConfiguration conf = new DatahubConfiguration(account, endpoint);
//client
DatahubClient client = new DatahubClient(conf);
return client;
}
/**
* 添加數據到datahub
* @return 0表示成功,非0表示失敗
*/
public static void add2DataHub(DatahubClient client, String shardId ,List<Datas> dataList){
//record
String threadName = Thread.currentThread().getName();
RecordSchema schema = client.getTopic(project, topic).getRecordSchema();
List<RecordEntry> recordEntries = new ArrayList<RecordEntry>();
for (Datas data : dataList) {
RecordEntry entry = new RecordEntry(schema);
entry.setString("area_code", data.getArea_code());
entry.setString("terminal_addr", data.getTerminal_addr());
entry.setString("afn", data.getAfn());
entry.setString("fn", data.getFn());
entry.setString("content", data.getContent());
entry.setString("m_type", data.getP_Type());
entry.setTimeStampInDate("insert_time", data.getInsert_time());
entry.setString("data_date", data.getDataDate());
entry.setShardId(shardId);
recordEntries.add(entry);
}
try {
Thread.sleep(1000);//一秒執行一次
} catch (InterruptedException e) {
e.printStackTrace();
}
int i = client.putRecords(project, topic, recordEntries).getFailedRecordCount();
if(i>0){
System.out.println(threadName+"首次上傳失敗,失敗數爲=="+i+"兩秒之後重複上傳");
try {
Thread.sleep(2000);
} catch (InterruptedException e) {
e.printStackTrace();
}
retry(client,recordEntries,3);
}
}
/**
* 當發現失敗之後 重試機制
* @param recordEntries
*/
public static void retry(DatahubClient client,List<RecordEntry> recordEntries,int retryTimes){
boolean suc = false;
String threadName = Thread.currentThread().getName();
while(retryTimes != 0){
retryTimes = retryTimes - 1 ;
int resulFNum = client.putRecords(project, topic, recordEntries).getFailedRecordCount();
if(resulFNum > 0){
System.out.println(threadName+"重複上傳至dataHub......");
continue;
}
suc = true;
System.out.println(threadName+"重複上傳至dataHub成功......");
break;
}
//重複上傳失敗
if(!suc){
System.out.println(threadName+"重複上傳至dataHub失敗......");
throw new RuntimeException("retryFailure");
}
}
/**
* 獲取數據
* @param client 客戶端
* @param shardId 通道編號
* @param timeStart 日誌起始時間
* @param limitNum 獲取日誌條數
*/
public static void getRecord(DatahubClient client , String shardId,Long timeStart,int limitNum){
RecordSchema schema = client.getTopic(project, topic).getRecordSchema();
GetCursorResult cursorResult=client.getCursor(project, topic, shardId, timeStart);
GetRecordsResult cursorResul=client.getRecords(project, topic, shardId, cursorResult.getCursor(), limitNum, schema);
for (RecordEntry element : cursorResul.getRecords()) {
System.out.println(element.toJsonNode());
}
}
/**
* 根據行政區碼和終端地址獲取通道編號
* @param area_code
* @param terminal_addr
* @return 通道編號
*/
public static String getShardId(String area_code,String terminal_addr){
int id=(area_code+terminal_addr).hashCode() % 10 ;
return String.valueOf(id);
}
/**
* 關閉dataHub資源
* @param DatahubClient 需要關閉的DatahubClient
*/
public static void closeAll(DatahubClient... client){
for (DatahubClient cl : client) {
try {
cl.close();
} catch (Exception e) {
if(cl != null){
cl = null;
}
// e.printStackTrace();
}
}
}
public static void closeFileStream(BufferedReader... brs){
for (BufferedReader bufferedReader : brs) {
try {
bufferedReader.close();
} catch (IOException e) {
if(bufferedReader != null){
bufferedReader=null;
}
e.printStackTrace();
}
}
}
/**
* 批量入dataHub
* @param client
* @param dataList 數據集合
*/
public static void addBacthData(DatahubClient client, List<Datas> dataList) {
//record
RecordSchema schema = client.getTopic(project, topic).getRecordSchema();
List<RecordEntry> recordEntries = new ArrayList<RecordEntry>();
for (Datas data : dataList) {
RecordEntry entry = new RecordEntry(schema);
entry.setString("area_code", data.getArea_code());
entry.setString("terminal_addr", data.getTerminal_addr());
entry.setString("afn", data.getAfn());
entry.setString("fn", data.getFn());
entry.setString("content", data.getContent());
entry.setString("m_type", data.getP_Type());
entry.setTimeStampInDate("insert_time", data.getInsert_time());
entry.setString("data_date", data.getDataDate());
entry.setShardId(data.getShardId());
recordEntries.add(entry);
}
client.putRecords(project, topic, recordEntries).getFailedRecordCount();
}
}
/**
* 線程01
* @author yangcheng
* @date 2018年11月20日
* @version V1.0
*/
public class DealWithFailure implements Runnable{
String filePath = null;
//Cosumer線程獲取文件名的隊列
LinkedBlockingQueue<String> fileNameQueue = null;
public DealWithFailure(String filePath,LinkedBlockingQueue<String> fileNameQueue) {
//
System.out.println("守護線程啓動");
this.fileNameQueue = fileNameQueue;
this.filePath = filePath;
}
@Override
public void run() {
String surThreadName = Thread.currentThread().getName();
while(true){
try {
File file = new File(filePath);
String[] files = file.list();
//執行更名操作
for (String fileName : files) {
if(fileName.contains("_failed")){
//獲取到的所有讀取失敗的文件全路徑名 又扔回隊列中
fileNameQueue.put(filePath+"/"+fileName);
System.out.println(surThreadName+"獲取到並重新添加到隊列中的的執行失敗的文件=="+filePath+"/"+fileName);
}
}
Thread.sleep(1000*60*31);//保證在work執行sleep的區間中,該線程只執行一次
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
/**
* worker線程
* 用於多線程入dataHub
* 讀取被改名之後的文件
* @author yangcheng
* 2018年5月17日
*/
public class Consumer implements Runnable{
LinkedBlockingQueue<String> fileNameQueue = null;
DatahubClient client = null;
CountDownLatch cuCountDownLatch = null;
//計數器用於統計指定時間段內的數據 否則需要清空纔可以
AtomicLong counter = null;
ConcurrentHashMap<String, Integer> localCache = null;
Map<String,Integer> hashMap = Collections.synchronizedMap(new HashMap<String,Integer>());
public Consumer(CountDownLatch cuCountDownLatch, LinkedBlockingQueue<String> fileNameQueue,DatahubClient client,AtomicLong counter) {
this .fileNameQueue = fileNameQueue;
this.client = client;
this.cuCountDownLatch = cuCountDownLatch;
this.counter = counter;
}
/**
* 以小時爲單位緩存執行結果
* @param cuCountDownLatch
* @param fileNameQueue
* @param client
* @param localCache
*/
public Consumer(CountDownLatch cuCountDownLatch, LinkedBlockingQueue<String> fileNameQueue,DatahubClient client,ConcurrentHashMap<String, Integer> localCache ) {
this .fileNameQueue = fileNameQueue;
this.client = client;
this.cuCountDownLatch = cuCountDownLatch;
this.localCache = localCache;
}
@Override
public void run() {
String filePathName = null;
AtomicLong upFailedNum = new AtomicLong();
try{
while((filePathName=this.fileNameQueue.poll()) != null){
System.out.println(Thread.currentThread().getName()+"開始讀取fileName======================="+filePathName);
Date beginDate =new Date();
FileReader fr = null;
try {
fr = new FileReader(filePathName);
} catch (FileNotFoundException e1) {
e1.printStackTrace();
continue;
}
BufferedReader br = new BufferedReader(fr);
String line = null;
List<Datas> dataList = new ArrayList<Datas>();
DateFormat dataDateFormat = new SimpleDateFormat("yyyyMMdd");
String dataDate = dataDateFormat.format(beginDate);
//獲取通道編號
String shardId=String.valueOf(RandomUtils.nextInt(0,9));
boolean isCanBeRmove = true ;
String exceptionMsg = null;
try {
while((line = br.readLine()) != null ){
if(this.localCache != null ){
if(line.contains("AFN=13;FN=161")){
String timeStr = line.substring(1, 14);
methodYQ(timeStr);
}
}
Datas data = new Datas();
data.setContent(line);
data.setDataDate(dataDate);
dataList.add(data);
if(dataList.size() % 1000 == 0){
//添加記錄
DataHubUtil.add2DataHub(client, shardId ,dataList);
dataList = new ArrayList<Datas>();
}
}
//剩餘不滿1000的數據一次提交
if(dataList.size() != 0){
//添加記錄
DataHubUtil.add2DataHub(client, shardId ,dataList);
}
} catch (Exception e) {
exceptionMsg = e.getMessage();
String threadName = Thread.currentThread().getName();
System.out.println(threadName+"捕獲到dataHub上傳異常==="+exceptionMsg);
isCanBeRmove = false ;
// DataHubUtil.closeFileStream(br);
e.printStackTrace();
}finally{
DataHubUtil.closeFileStream(br);
}
//刪除已經讀過的日誌文件
Date endDate =new Date();
System.out.println(Thread.currentThread().getName()+"已讀取文件名稱:"+filePathName+" ; 開始時間:"+beginDate+"結束時間:"+endDate+"..................");
String pathName = filePathName;
String[] arg = {"/bin/sh","-c","rm "+pathName};
if(isCanBeRmove && exceptionMsg == null){
try {
Runtime.getRuntime().exec(arg);
} catch (Exception e) {
e.printStackTrace();
}
System.out.println(Thread.currentThread().getName()+"刪除文件:"+pathName);
}else{
long thisNum = upFailedNum.getAndIncrement();//防止重名
System.out.println(Thread.currentThread().getName()+"未刪除文件:"+pathName+";已更名爲:"+pathName+"_failed"+thisNum);
if("retryFailure".equals(exceptionMsg)){
String[] args = {"/bin/sh","-c","mv "+pathName+" "+ pathName+"_failed"+thisNum};
try {
Runtime.getRuntime().exec(args);
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
}finally{
//將本類的計數轉存到主程序的緩存中、
methodYQ2(hashMap);
cuCountDownLatch.countDown();
}
}
/**
* 將本類的計數轉存到主程序的緩存中、
* @param map
* @param timeStr
*/
private void methodYQ2(Map<String, Integer> hashMap2) {
if(localCache!=null){
Set<String> keySet = hashMap2.keySet();
for (String key : keySet) {
if(localCache.containsKey(key)){
Integer ii = hashMap2.get(key);
localCache.put(key, ii+localCache.get(key));
}else{
localCache.put(key, hashMap2.get(key));
}
}
}
}
/**
* 判斷時間字符串在指定的時間斷內
* @return
*/
private boolean strDog(String str){
String s = new SimpleDateFormat("yyyy-MM-dd").format(new Date());
String start = " 00:30:00";
String end = " 01:30:00";
int a = (s+start).compareTo(str);
int b = str.compareTo(s+end);
return (str.contains(s) && a<0 && b<0) ? true : false;
}
/**
* 根據時間點存Map
* @return
*/
private void methodYQ(String timeStr){
if(hashMap!=null){
if(hashMap.containsKey(timeStr)){
Integer i = hashMap.get(timeStr);
hashMap.put(timeStr, i+1);
}else{
hashMap.put(timeStr,1);
}
}
}
}
程序編寫過程中的感想:
一開始覺得datahub是大數據環境,尤其是阿里開發的,所以對他的吞吐量和性能嚴重高估,覺得datahub多大的併發都能扛住,實時證明,實際使用過程中會有很多不確定性,然後datahub也沒有想想中的那麼能抗,我這個程序當時部署了20個,且每個程序開16個線程,運行了一段時間沒有發現什麼問題,程序一直不報錯,結果統計數據庫中日誌丟了大量數據,一開始懷疑是程序讀取日誌部分有錯,最後通過增加計數器發現,固定時間段內程序從log中讀取的報文數量是對的,但入datahub之後就不對了,最後通過查詢API發現,putRecords有一個失敗數量返回值,這個一開始沒有發現,最後才知道 ,這個方法執行失敗是不報錯的,而是返回當前執行失敗的數量,於是我加上了重試機制!程序再次發佈之後,通過日誌發現,基本上百分之80以上的執行過程都會走到重試機制---後續通過配置shardid以及datahub的參數,情況有所好轉。