Flume自定義Sink數據到MySQL中

Flume官方提供了可以自定義自定義 sink 的接口:Flume#Sink
根據官方說明自定義 Sink 需要繼承 AbstractSink 類並實現 Configurable 接口。

  • 首先添加需要的Jar包依賴
  <dependencies>
        <dependency>
            <groupId>org.apache.flume</groupId>
            <artifactId>flume-ng-sdk</artifactId>
            <version>1.7.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flume</groupId>
            <artifactId>flume-ng-core</artifactId>
            <version>1.7.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flume</groupId>
            <artifactId>flume-ng-configuration</artifactId>
            <version>1.7.0</version>
        </dependency>
        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>5.1.44</version>
        </dependency>
    </dependencies>
  • 繼承AbstractSink和Configurable,只要在裏面重寫對應的方式進行實行即可
package com.yours.flume.sink;

import org.apache.commons.lang.StringUtils;
import org.apache.flume.*;
import org.apache.flume.conf.Configurable;
import org.apache.flume.sink.AbstractSink;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.sql.CallableStatement;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.util.ArrayList;
import java.util.List;

public class MySQLSink extends AbstractSink implements Configurable {
    private Logger logger= LoggerFactory.getLogger("MySQLSink");
    private String url;
    private String user;
    private String password;
    private String database;
    private String targetTable;
    private int batchSize;
    private String driver="com.mysql.jdbc.Driver";
    private Connection connection;
    private PreparedStatement prepareStatement;
    private CallableStatement callableStatement;

    /*
    * 需要的配置都在這裏定義
    */
    public void configure(Context context) {
        try{
            url = context.getString("sink.url");
            user=context.getString("sink.user");
            password=context.getString("sink.password");
            database=context.getString("sink.database");
            targetTable=context.getString("sink.targetTable");
            batchSize=context.getInteger("sink.batchSize");
        }catch (Exception e){
            e.printStackTrace();
        }
    }
    /**
	*  假設傳入的日誌格式爲date_time&user_id&request_name&request_string&&response_string
	*  2020-05-05 09:00:00&page_load&xxxx&yyyyy
	*/
    public Sink.Status process() throws EventDeliveryException {
        Channel channel = getChannel();
        Transaction transaction = channel.getTransaction();
        Event event = null;
        Sink.Status result = Sink.Status.READY;   
        String content;
        List<RequestEntity> entities =new ArrayList<RequestEntity>();
        try{
            transaction.begin();
            for(int i=0;i<batchSize;i++){
                event=channel.take();
                if(event!=null){
                    content=new String(event.getBody()); 
					try{
						String[] strs=content.split("&")
						RequestEntity entity=new RequestEntity();
						entity.setDateTime(strs[0]);
						entity.setRequestName(strs[1]);
						entity.setRequestString(strs[2]);
						entity.setResponseString(strs[3]);
						entities.add(entity)
					}catch (Exception e){
						//雖然發生異常。但是數據流還再繼續。要繼續解析
						logger.error("Parser log fail:", e);
						continue;
					} 
                }else {
                    // No events found, request back-off semantics from runner
                    result = Sink.Status.BACKOFF;
                    break;
                }
            }
            prepareStatement.clearBatch();
            prepareStatement.clearBatch();
            if(channels.size()>0){
                for (entity temp : entities) {
					/**
					*  prepareStatement 
					*/
                    prepareStatement.addBatch();
                }
                prepareStatement.executeBatch();
                connection.commit();
            }
			//保留不變提交事務
            transaction.commit();
            return Sink.Status.READY;
        }catch (Throwable ex){
            try{
                transaction.rollback();
            } catch (Exception e2) {
                logger.error("Exception in rollback. Rollback might not have been successful." , e2.getMessage());
            }
            logger.error("Failed to commit transaction. Transaction rolled back.",ex.getMessage());
            if(ex instanceof Error || ex instanceof RuntimeException){
                logger.error("Failed to commit transaction.Transaction rolled back.", ex.getMessage());
                //Throwables.propagate(ex);
            } else {
                logger.error("Failed to commit transaction.Transaction rolled back.", ex.getMessage());
                throw new EventDeliveryException("Failed to commit transaction. Transaction rolled back.", ex);
            }
            //throw new EventDeliveryException("Failed to process transaction", ex);
        }finally {
            transaction.close();
        }
        return result;
    }
    public void start(){
		/**
		* 初始化連接和prepareStatement
		*/
        super.start();
        try{
            Class.forName(driver);
            connection= DriverManager.getConnection(url,user,password);
            connection.setAutoCommit(false);
            prepareStatement=connection.prepareStatement("insert into "+database+"."+targetTable+"(colums1,...) values(?,?,...)");
        }catch (Exception e){
            e.printStackTrace();
        }
    }
    public void stop(){
        super.stop();
		/**
		* 停止連接
		*/
        if (prepareStatement != null) {
            try {
                prepareStatement.close();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
        if (connection != null) {
            try {
                connection.close();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }

    }
}
  • 最後編譯打包成自定義jar同時同步到flume的lib路徑下。即可急性
cp your-flume-sink.jar /opt/cloudera/parcels/CDH.../jars/
ln -s /opt/cloudera/parcels/.../jars/your-flume-sink.ja /opt/cloudera/parcels/CDH-5.12.2-1.cdh5.12.2.p0.4/lib/flume-ng/lib/
另外需要吧mysql的驅動包也放入flume liblujing
ln -s /opt/cloudera/parcels/.../jars/your-flume-sink.ja /opt/cloudera/parcels/CDH.../lib/flume-ng/lib/
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章