批量日誌入DataHub環境(非實時)

先說下需求吧,由於前置機每天產生數百G的日誌報文,這些日誌只有在現場反應出現問題之後,纔會查找具體的日誌,因此需要將前置機上面的大量日誌轉存到dataHub上(datahub會自動向odps(MaxCompute)轉存),本來先要將入datahub代碼融入前置機,這樣前置直接實時往datahub寫入,但是由於一些歷史原因,不能這樣做,最後考慮到業務上面對實時性要求不強自己便通過java程序實現了日誌到datahub的轉存——這個程序運行數月之後,現在已經停用,目前日誌都是通過ELK實現實時保存了,不過在datahub到ELK的過度期中,這段程序圓滿的完成了他的工作!

程序的結構圖如下:

/**
 * 前置日誌入dataHub入口程序
 * 參數爲日誌所在文件夾目錄(不以“/”結尾),參數可爲多個日誌目錄,空格分開
 * @author yangcheng
 * 2018年5月8日
 */
public class GetDataV2 {
	public static ConcurrentHashMap<String, Integer> localCache = new ConcurrentHashMap<String, Integer>();
	
	public static void main(String[] args) {
		System.out.println("服務器可用線程數:"+Runtime.getRuntime().availableProcessors());
//		DateFormat format = new SimpleDateFormat("HH");
		DateFormat format2 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
		int threadNum = 16;//Runtime.getRuntime().availableProcessors();
		ThreadPoolExecutor executor = 
				new ThreadPoolExecutor(threadNum, 100, 1, TimeUnit.MINUTES, new LinkedBlockingQueue<Runnable>());
		LinkedBlockingQueue<String> fileNameQueue = new LinkedBlockingQueue<String>();
		executor.execute(new DealWithFailure(args[0], fileNameQueue));
		while(true){
			//打印當前時間
			System.out.println("開始執行時間:"+format2.format(new Date()));
			//計數器-計算每天0點30分到1點30分之間報文總數
			AtomicLong counter = new AtomicLong(0);
			
			DatahubClient client = DataHubUtil.getClient();
			CountDownLatch cuCountDownLatch =new CountDownLatch(threadNum);
			for (String path : args) {
				String filePath = path;//路徑
				System.out.println("開始執行。。。。。。。。。。。。。。。。。。"+filePath+"——————啓用線程數:"+threadNum);
				//獲取客戶端 生產數據
				try {
					File file = new File(filePath);
					String[] files = file.list();
					//執行更名操作
					for (String fileName : files) {
						if(fileName.endsWith(".log")){
							System.out.println(".log: "+fileName);
							continue;
						}
						if(!isQz(fileName)){
							System.out.println("isQz: "+fileName);
							continue;
						}
						String pathName = filePath+"/"+fileName;
						String newPathName = filePath+"/back_"+fileName;
					    String[] arg = {"/bin/sh","-c","mv "+pathName+" "+ newPathName};
					    Runtime.getRuntime().exec(arg);
					    //更名之後的文件名
					    fileNameQueue.put(newPathName); //newPathName
					}
				} catch (Exception e) {
					e.printStackTrace();
				}
			}
			for(int i = 0; i < threadNum ; i++){
				executor.execute(new Consumer(cuCountDownLatch,fileNameQueue, client,localCache));
			}
			try {
				cuCountDownLatch.await();
				getNum(localCache);
				System.out.println("執行結束................");
			} catch (InterruptedException e1) {
				e1.printStackTrace();
			}finally{
				//關閉資源
				DataHubUtil.closeAll(client);
			}

			try {
				Thread.sleep(1000*60*30);
			} catch (InterruptedException e) {
				e.printStackTrace();
			}
		}
		
	}
	
	private static void getNum(ConcurrentHashMap<String, Integer> localCache2) {
		Set<String> keySet = localCache2.keySet();
		for(String key : keySet){
			Integer value = localCache2.get(key);
			System.out.println("在 "+key+"時間點段"+"=============有"+value+"條");
		}
	}

	public static boolean isQz(String fileName){
		if(fileName.startsWith("confirm") ||fileName.startsWith("event") ){
			return true;
		}
		return false;
	}
/**
 * 前置日誌入dataHub工具類
 * @author yangcheng
 * 2018年5月8日
 */
public class DataHubUtil {
	private static final String endpoint="";//
	private static final String project="";
	private static final String topic= "";  //面向對象  qz_698_day_real_monitor
	private static final String accessId="";
	private static final String accessKey="";
	
	/**
	 * 獲取datahub客戶端
	 * 客戶端使用完成之後,必須執行closeAll()方法關閉資源
	 * @return DatahubClient
	 */
	public static DatahubClient getClient(){
		AliyunAccount account = new AliyunAccount(accessId, accessKey);
		DatahubConfiguration conf = new DatahubConfiguration(account, endpoint);
		//client
		DatahubClient client = new DatahubClient(conf);
		return client;
	}

	
	/**
	 * 添加數據到datahub
	 * @return 0表示成功,非0表示失敗
	 */
	public static void add2DataHub(DatahubClient client, String shardId ,List<Datas> dataList){
		//record
		String threadName = Thread.currentThread().getName();
		RecordSchema schema = client.getTopic(project, topic).getRecordSchema();
		List<RecordEntry> recordEntries = new ArrayList<RecordEntry>();
		for (Datas data : dataList) {
			RecordEntry entry = new RecordEntry(schema);
			entry.setString("area_code", data.getArea_code());
			entry.setString("terminal_addr", data.getTerminal_addr());
			entry.setString("afn", data.getAfn());
			entry.setString("fn", data.getFn());
			entry.setString("content", data.getContent());
			entry.setString("m_type", data.getP_Type());
			entry.setTimeStampInDate("insert_time", data.getInsert_time());
			entry.setString("data_date", data.getDataDate());
			entry.setShardId(shardId);
			recordEntries.add(entry);
		}
		try {
			Thread.sleep(1000);//一秒執行一次
		} catch (InterruptedException e) {
			e.printStackTrace();
		}
		int i = client.putRecords(project, topic, recordEntries).getFailedRecordCount();
		
		if(i>0){
			System.out.println(threadName+"首次上傳失敗,失敗數爲=="+i+"兩秒之後重複上傳");
			try {
				Thread.sleep(2000);
			} catch (InterruptedException e) {
				e.printStackTrace();
			}
			retry(client,recordEntries,3);
			
		}
	}
	/**
	 * 當發現失敗之後 重試機制
	 * @param recordEntries
	 */
	public static void retry(DatahubClient client,List<RecordEntry> recordEntries,int retryTimes){
		boolean suc = false;
		String threadName = Thread.currentThread().getName();
		while(retryTimes != 0){
			retryTimes = retryTimes - 1 ;
			int resulFNum = client.putRecords(project, topic, recordEntries).getFailedRecordCount();
			if(resulFNum > 0){
				System.out.println(threadName+"重複上傳至dataHub......");
				continue;
			}
			suc = true;
			System.out.println(threadName+"重複上傳至dataHub成功......");
			break;
		}
		//重複上傳失敗
		if(!suc){
			System.out.println(threadName+"重複上傳至dataHub失敗......");
			throw new RuntimeException("retryFailure");
		}
		
	}
	
	
	/**
	 * 獲取數據
	 * @param client 客戶端
	 * @param shardId 通道編號
	 * @param timeStart 日誌起始時間
	 * @param limitNum 獲取日誌條數
	 */
	public static void getRecord(DatahubClient client , String shardId,Long timeStart,int limitNum){

		RecordSchema schema = client.getTopic(project, topic).getRecordSchema();
		
		GetCursorResult cursorResult=client.getCursor(project, topic, shardId, timeStart);
		GetRecordsResult cursorResul=client.getRecords(project, topic, shardId, cursorResult.getCursor(), limitNum, schema);
		for (RecordEntry element : cursorResul.getRecords()) {
			System.out.println(element.toJsonNode());
		}
		
	}
	/**
	 * 根據行政區碼和終端地址獲取通道編號
	 * @param area_code
	 * @param terminal_addr
	 * @return 通道編號
	 */
	public static String getShardId(String area_code,String terminal_addr){
		int id=(area_code+terminal_addr).hashCode() % 10 ;
		return String.valueOf(id);
	}
	
	/**
	 * 關閉dataHub資源
	 * @param DatahubClient 需要關閉的DatahubClient
	 */
	public static void closeAll(DatahubClient... client){
		for (DatahubClient cl : client) {
			try {
				cl.close();
			} catch (Exception e) {
				if(cl != null){
					cl = null;
				}
//				e.printStackTrace();
			}
		}
		
	}
	public static void closeFileStream(BufferedReader... brs){
		for (BufferedReader bufferedReader : brs) {
			try {
				bufferedReader.close();
			} catch (IOException e) {
				if(bufferedReader != null){
					bufferedReader=null;
				}
				e.printStackTrace();
			}
		}
	}
	
	/**
	 * 批量入dataHub
	 * @param client 
	 * @param dataList 數據集合
	 */
	public static void addBacthData(DatahubClient client, List<Datas> dataList) {
		//record
		RecordSchema schema = client.getTopic(project, topic).getRecordSchema();
		List<RecordEntry> recordEntries = new ArrayList<RecordEntry>();
		for (Datas data : dataList) {
			RecordEntry entry = new RecordEntry(schema);
			entry.setString("area_code", data.getArea_code());
			entry.setString("terminal_addr", data.getTerminal_addr());
			entry.setString("afn", data.getAfn());
			entry.setString("fn", data.getFn());
			entry.setString("content", data.getContent());
			entry.setString("m_type", data.getP_Type());
			entry.setTimeStampInDate("insert_time", data.getInsert_time());
			entry.setString("data_date", data.getDataDate());
			entry.setShardId(data.getShardId());
			recordEntries.add(entry);
		}
		
		client.putRecords(project, topic, recordEntries).getFailedRecordCount();
		
	}
}
/**
 * 線程01
 * @author yangcheng  
 * @date 2018年11月20日  
 * @version V1.0
 */
public class DealWithFailure implements Runnable{
	String filePath = null;
	//Cosumer線程獲取文件名的隊列
	LinkedBlockingQueue<String> fileNameQueue = null;
	public DealWithFailure(String filePath,LinkedBlockingQueue<String> fileNameQueue) {
		//
		System.out.println("守護線程啓動");
		this.fileNameQueue = fileNameQueue;
		this.filePath = filePath;
	}

	@Override
	public void run() {
		String surThreadName = Thread.currentThread().getName();
		while(true){
			try {
				File file = new File(filePath);
				String[] files = file.list();
				//執行更名操作
				for (String fileName : files) {
					if(fileName.contains("_failed")){
						//獲取到的所有讀取失敗的文件全路徑名 又扔回隊列中
						fileNameQueue.put(filePath+"/"+fileName);
						System.out.println(surThreadName+"獲取到並重新添加到隊列中的的執行失敗的文件=="+filePath+"/"+fileName);
					}
				}
				Thread.sleep(1000*60*31);//保證在work執行sleep的區間中,該線程只執行一次
			} catch (Exception e) {
				e.printStackTrace();
			}
		}
	}
}
/**
 * worker線程
 * 用於多線程入dataHub
 * 讀取被改名之後的文件
 * @author yangcheng
 * 2018年5月17日
 */
public class Consumer implements Runnable{
	LinkedBlockingQueue<String> fileNameQueue = null;
	DatahubClient client = null;
	CountDownLatch cuCountDownLatch = null;
	//計數器用於統計指定時間段內的數據  否則需要清空纔可以
	AtomicLong counter = null;
	ConcurrentHashMap<String, Integer> localCache = null;
	Map<String,Integer> hashMap = Collections.synchronizedMap(new HashMap<String,Integer>());
	
	public Consumer(CountDownLatch cuCountDownLatch, LinkedBlockingQueue<String> fileNameQueue,DatahubClient client,AtomicLong counter) {
		
		this .fileNameQueue = fileNameQueue;
		this.client = client;
		this.cuCountDownLatch = cuCountDownLatch;
		this.counter = counter;
	}
	/**
	 * 以小時爲單位緩存執行結果
	 * @param cuCountDownLatch
	 * @param fileNameQueue
	 * @param client
	 * @param localCache
	 */
	public Consumer(CountDownLatch cuCountDownLatch, LinkedBlockingQueue<String> fileNameQueue,DatahubClient client,ConcurrentHashMap<String, Integer> localCache ) {
		
		this .fileNameQueue = fileNameQueue;
		this.client = client;
		this.cuCountDownLatch = cuCountDownLatch;
		this.localCache = localCache;
	}
	@Override
	public void run() {
		String filePathName = null;
		AtomicLong upFailedNum = new AtomicLong();
		try{
			while((filePathName=this.fileNameQueue.poll()) != null){
				
				System.out.println(Thread.currentThread().getName()+"開始讀取fileName======================="+filePathName);
				Date beginDate =new Date();
				FileReader fr = null;
				try {
					fr = new FileReader(filePathName);
				} catch (FileNotFoundException e1) {
					e1.printStackTrace();
					continue;
				}
				BufferedReader br = new BufferedReader(fr);
			    String line = null;
			    
			    List<Datas> dataList = new ArrayList<Datas>();
			    DateFormat dataDateFormat = new SimpleDateFormat("yyyyMMdd");
		    	String dataDate = dataDateFormat.format(beginDate);
		    	//獲取通道編號
				String shardId=String.valueOf(RandomUtils.nextInt(0,9));
				
				boolean isCanBeRmove = true ; 
				String exceptionMsg = null;
				try {
					while((line = br.readLine()) != null ){
						
						if(this.localCache != null ){
							if(line.contains("AFN=13;FN=161")){
								String timeStr = line.substring(1, 14);
								methodYQ(timeStr);
							}
						}
						Datas data = new Datas();
						data.setContent(line);
						data.setDataDate(dataDate);
						dataList.add(data);
				    	if(dataList.size() % 1000 == 0){
				    		//添加記錄
				    		DataHubUtil.add2DataHub(client, shardId ,dataList);
							dataList = new ArrayList<Datas>();
				    	}
				    }
					//剩餘不滿1000的數據一次提交
					if(dataList.size() != 0){
				    	//添加記錄
				    	DataHubUtil.add2DataHub(client, shardId ,dataList);
				    }
				} catch (Exception e) {
					exceptionMsg = e.getMessage();
					String threadName = Thread.currentThread().getName();
					System.out.println(threadName+"捕獲到dataHub上傳異常==="+exceptionMsg);
					isCanBeRmove = false ;
					
//					DataHubUtil.closeFileStream(br);
					e.printStackTrace();
				}finally{
					DataHubUtil.closeFileStream(br);
				}
			    //刪除已經讀過的日誌文件
			    Date endDate =new Date();
			    System.out.println(Thread.currentThread().getName()+"已讀取文件名稱:"+filePathName+"  ;  開始時間:"+beginDate+"結束時間:"+endDate+"..................");
			    String pathName = filePathName;
			    String[] arg = {"/bin/sh","-c","rm "+pathName};
			    if(isCanBeRmove && exceptionMsg == null){
			    	try {
						Runtime.getRuntime().exec(arg);
					} catch (Exception e) {
						e.printStackTrace();
					}
			    	System.out.println(Thread.currentThread().getName()+"刪除文件:"+pathName);
			    }else{
			    	long thisNum = upFailedNum.getAndIncrement();//防止重名
			    	System.out.println(Thread.currentThread().getName()+"未刪除文件:"+pathName+";已更名爲:"+pathName+"_failed"+thisNum);
			    	if("retryFailure".equals(exceptionMsg)){
			    		String[] args = {"/bin/sh","-c","mv "+pathName+" "+ pathName+"_failed"+thisNum};
					    try {
							Runtime.getRuntime().exec(args);
						} catch (IOException e) {
							e.printStackTrace();
						}
			    	}
			    }
			}
		}finally{
			//將本類的計數轉存到主程序的緩存中、
			methodYQ2(hashMap);
			cuCountDownLatch.countDown();
			
			
		}
	}
	/**
	 * 將本類的計數轉存到主程序的緩存中、
	 * @param map
	 * @param timeStr 
	 */
	private void methodYQ2(Map<String, Integer> hashMap2) {
		
		if(localCache!=null){
			Set<String> keySet = hashMap2.keySet();
			for (String key : keySet) {
				if(localCache.containsKey(key)){
					Integer ii = hashMap2.get(key);
					localCache.put(key, ii+localCache.get(key));
				}else{
					localCache.put(key, hashMap2.get(key));
				}
			}
		}
	}
	/**
	 * 判斷時間字符串在指定的時間斷內
	 * @return
	 */
	private boolean strDog(String str){
		String s = new SimpleDateFormat("yyyy-MM-dd").format(new Date());
		String start = " 00:30:00";
		String end = " 01:30:00";
		int a = (s+start).compareTo(str);
		int b = str.compareTo(s+end);
		
		return (str.contains(s) && a<0 && b<0) ? true : false;
	}

	/**
	 * 根據時間點存Map
	 * @return 
	 */
	private void methodYQ(String timeStr){
		if(hashMap!=null){
			if(hashMap.containsKey(timeStr)){
			  Integer i = hashMap.get(timeStr);
			  hashMap.put(timeStr, i+1);
			}else{
			  hashMap.put(timeStr,1);
			}
		}
	}
}

程序編寫過程中的感想:

一開始覺得datahub是大數據環境,尤其是阿里開發的,所以對他的吞吐量和性能嚴重高估,覺得datahub多大的併發都能扛住,實時證明,實際使用過程中會有很多不確定性,然後datahub也沒有想想中的那麼能抗,我這個程序當時部署了20個,且每個程序開16個線程,運行了一段時間沒有發現什麼問題,程序一直不報錯,結果統計數據庫中日誌丟了大量數據,一開始懷疑是程序讀取日誌部分有錯,最後通過增加計數器發現,固定時間段內程序從log中讀取的報文數量是對的,但入datahub之後就不對了,最後通過查詢API發現,putRecords有一個失敗數量返回值,這個一開始沒有發現,最後才知道 ,這個方法執行失敗是不報錯的,而是返回當前執行失敗的數量,於是我加上了重試機制!程序再次發佈之後,通過日誌發現,基本上百分之80以上的執行過程都會走到重試機制---後續通過配置shardid以及datahub的參數,情況有所好轉。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章