Flink操作mysql kafka和hbase

主程序

package com.streaming.flink;


import java.util.Properties;

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer08;
import org.apache.flink.util.Collector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.alibaba.fastjson.JSONObject;


public class WordCount {
	private static final Logger logger = LoggerFactory
			.getLogger(WordCount.class);

	
	public static void main(String[] args) throws Exception {
		
		final ExecutionEnvironment env = ExecutionEnvironment
				.getExecutionEnvironment();

		StreamExecutionEnvironment tenv = StreamExecutionEnvironment
				.getExecutionEnvironment();

		// 2.從自定義source中讀取數據
//		DataStream<String> dataStream = tenv.addSource(new JdbcRead());
//		tenv.execute();

		// get input data
		DataSet<String> text = env.fromElements(
				"To be, or not to be,--that is the question:--",
				"Whether 'tis nobler in the mind to suffer",
				"The slings and arrows of outrageous fortune",
				"Or to take arms against a sea of troubles,");

		DataSet<Tuple2<String, Integer>> counts =
		text.flatMap(new LineSplitter()).groupBy(0).sum(1);

		text.flatMap(new LineSplitter()).groupBy(0);

		DataSet<Tuple2<String, Integer>> sum =
		text.flatMap(new LineSplitter())
		// group by the tuple field "0" and sum up tuple field "1"
				.reduce(new ReduceFunction<Tuple2<String, Integer>>() {
					@Override
					public Tuple2<String, Integer> reduce(
							Tuple2<String, Integer> a, Tuple2<String, Integer> b) {
						return new Tuple2<String, Integer>(a.f0, a.f1 + b.f1);
					}
				});

		FlinkKafkaConsumer08<String> kafkaSource = createKafka(tenv);
		String hbase_zk="namenode1.xxx.com";
		String hbase_port="2015";
		String hbase_table="ns:table1";
		tenv.addSource(kafkaSource).map(new MapFunction<String, String>() {
			public String map(String line) {
				logger.error("收到消息:{}", line);
				return line;
			}
		}).process(new HbaseSink(hbase_zk,hbase_port,hbase_table));

		
		tenv.execute();
		// execute and print result
		// counts.print();

	}


	/**
	 * Implements the string tokenizer that splits sentences into words as a
	 * user-defined FlatMapFunction. The function takes a line (String) and
	 * splits it into multiple pairs in the form of "(word,1)"
	 * (Tuple2&lt;String, Integer&gt;).
	 */
	public static final class LineSplitter implements
			FlatMapFunction<String, Tuple2<String, Integer>> {

		@Override
		public void flatMap(String value, Collector<Tuple2<String, Integer>> out) {
			String[] tokens = value.toLowerCase().split("\\W+");

			for (String token : tokens) {
				if (token.length() > 0) {
					out.collect(new Tuple2<String, Integer>(token, 1));
				}
			}
		}
	}

	public static FlinkKafkaConsumer08<String> createKafka(
			StreamExecutionEnvironment tenv) {
		String zkConnc = "kafkazk01.xxx.com:2181";
		String kafkaServer = "kafkabroker01.xxx.com:9092";

		Properties kafkaProps = new Properties();
		kafkaProps.setProperty("zookeeper.connect", zkConnc);
		kafkaProps.setProperty("bootstrap.servers", kafkaServer);
		kafkaProps.setProperty("group.id", "groupId_1");
		kafkaProps.setProperty("auto.offset.reset", "smallest");
		kafkaProps.setProperty("auto.commit.interval.ms", "30000");
		kafkaProps.setProperty("enable.auto.commit", "true");
		FlinkKafkaConsumer08<String> appSource = new FlinkKafkaConsumer08<String>(
				"topicName", new SimpleStringSchema(), kafkaProps);

		return appSource;
	}

}

寫hbase:

package com.streaming.flink;


import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.util.Collector;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.client.Durability;
import org.apache.hadoop.hbase.client.HConnection;
import org.apache.hadoop.hbase.client.HConnectionManager;
import org.apache.hadoop.hbase.client.HTableInterface;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.security.User;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.security.UserGroupInformation;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;


public class HbaseSink extends ProcessFunction<String,String> {
	private static final long serialVersionUID = 1L;

	private static final Logger LOGGER = LoggerFactory.getLogger(HbaseSink.class);

    private String _zookeeper;
    private String _port;
    private String _tableName;
    private HTableInterface _table;

    public HbaseSink(String zookeeper, String port, String tableName) {
        _zookeeper = zookeeper;
        _port = port;
        _tableName = tableName;
    }

    @Override
    public void open(org.apache.flink.configuration.Configuration parameters) throws Exception {
        try {
            Configuration conf = HBaseConfiguration.create();
            conf.set(HConstants.ZOOKEEPER_QUORUM, _zookeeper);
            conf.set(HConstants.ZOOKEEPER_CLIENT_PORT, _port);
            conf.set(HConstants.ZOOKEEPER_ZNODE_PARENT, "/hbase");

            User user = User.create(UserGroupInformation.createRemoteUser("bmps"));
            HConnection connection = HConnectionManager.createConnection(conf,user);
            _table = connection.getTable(_tableName);

            LOGGER.error("[HbaseSink] : open HbaseSink finished");
        } catch (Exception e) {
            LOGGER.error("[HbaseSink] : open HbaseSink faild {}", e);
        }
    }

    @Override
    public void close() throws Exception {
        _table.close();
    }

    @Override
    public void processElement(String  value, Context ctx, Collector<String > out)
            throws Exception {
    	LOGGER.error("process String {}",value);
        String rowKey = new StringBuffer().append("1").toString();
        Put put = new Put(Bytes.toBytes(rowKey));
        put.setDurability(Durability.ASYNC_WAL);
        put.add(Bytes.toBytes("info"), Bytes.toBytes("flink"), Bytes.toBytes(value));
        _table.put(put);
        LOGGER.error("[HbaseSink] : put rowKey:{}, value:{} to hbase", rowKey, value);
    }



}

讀mysql:

package com.streaming.flink;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;

import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.source.RichSourceFunction;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class JdbcRead extends RichSourceFunction<String> {
	private static final long serialVersionUID = 1L;
	
	
	private static final Logger logger = LoggerFactory.getLogger(JdbcRead.class);

	private Connection connection = null;
	private PreparedStatement ps = null;

	/**
	 * 一、open()方法中建立連接,這樣不用每次invoke的時候都要建立連接和釋放連接。
	 */
	@Override
	public void open(Configuration parameters) throws Exception {
		super.open(parameters);
		String driver = "com.mysql.jdbc.Driver";
		String url = "jdbc:mysql://127.0.0.1:3309/zkMonitor?characterEncoding=utf8&useSSL=true";
		String username = "root";
		String password = "123456";
		// 1.加載驅動
		Class.forName(driver);
		// 2.創建連接
		connection = DriverManager.getConnection(url, username, password);
		// 3.獲得執行語句
		String sql = "select name from t_stock_pin;";
		ps = connection.prepareStatement(sql);
	}

	/**
	 * 二、DataStream調用一次run()方法用來獲取數據
	 */
	@Override
	public void run(SourceContext<String> sourceContext) {
		try {
			// 4.執行查詢,封裝數據
			ResultSet resultSet = ps.executeQuery();
			while (resultSet.next()) {
				String name = resultSet.getString("name");
				logger.error("readJDBC name:{}", name);
				sourceContext.collect(name);
			}
		} catch (Exception e) {
			logger.error("runException:{}", e);
		}
	}

	@Override
	public void cancel() {
	}

	/**
	 * 三、 程序執行完畢就可以進行,關閉連接和釋放資源的動作了
	 */
	@Override
	public void close() throws Exception {
		// 5.關閉連接和釋放資源
		super.close();
		if (connection != null) {
			connection.close();
		}
		if (ps != null) {
			ps.close();
		}
	}

}

最後是pom配置:

<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements.  See the NOTICE file
distributed with this work for additional information
regarding copyright ownership.  The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License.  You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied.  See the License for the
specific language governing permissions and limitations
under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
	<modelVersion>4.0.0</modelVersion>

	<groupId>com.streaming.flink</groupId>
	<artifactId>flink-training</artifactId>
	<version>1.0-SNAPSHOT</version>
	<packaging>jar</packaging>

	<name>Flink Quickstart Job</name>
	<url>http://www.myorganization.org</url>

	<properties>
		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
		<flink.version>1.4.1</flink.version>
		<slf4j.version>1.7.7</slf4j.version>
		<log4j.version>1.2.17</log4j.version>
		<scala.binary.version>2.11</scala.binary.version>
	</properties>

	<repositories>
		<repository>
			<id>apache.snapshots</id>
			<name>Apache Development Snapshot Repository</name>
			<url>https://repository.apache.org/content/repositories/snapshots/</url>
			<releases>
				<enabled>false</enabled>
			</releases>
			<snapshots>
				<enabled>true</enabled>
			</snapshots>
		</repository>
	</repositories>

	<dependencies>
		
	<dependency>
			<groupId>mysql</groupId>
			<artifactId>mysql-connector-java</artifactId>
			<version>5.1.6</version>
	</dependency>
			
		<!-- Apache Flink dependencies -->
		<dependency>
			<groupId>org.apache.flink</groupId>
			<artifactId>flink-core</artifactId>
			<version>${flink.version}</version>
		</dependency>
		<dependency>
			<groupId>org.apache.flink</groupId>
			<artifactId>flink-java</artifactId>
			<version>${flink.version}</version>
		</dependency>
		<dependency>
			<!-- This dependency is required to actually execute jobs. It is currently pulled in by
				flink-streaming-java, but we explicitly depend on it to safeguard against future changes. -->
			<groupId>org.apache.flink</groupId>
			<artifactId>flink-clients_${scala.binary.version}</artifactId>
			<version>${flink.version}</version>
		</dependency>
		<dependency>
			<groupId>org.apache.flink</groupId>
			<artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
			<version>${flink.version}</version>
		</dependency>
		
		 <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-kafka-0.8_2.11</artifactId>
            <version>${flink.version}</version>
        </dependency>
        
         <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-client</artifactId>
            <version>0.98.8-hadoop2</version>
            <exclusions>
				
				<exclusion>
					<artifactId>jdk.tools</artifactId>
					<groupId>jdk.tools</groupId>
				</exclusion>
			</exclusions>
        </dependency>

		<!-- explicitly add a standard logging framework, as Flink does not have
			a hard dependency on one specific framework by default -->
		<dependency>
			<groupId>org.slf4j</groupId>
			<artifactId>slf4j-log4j12</artifactId>
			<version>${slf4j.version}</version>
		</dependency>
		<dependency>
			<groupId>log4j</groupId>
			<artifactId>log4j</artifactId>
			<version>${log4j.version}</version>
		</dependency>
	</dependencies>

	<profiles>
		<profile>
			<!-- Profile for packaging correct JAR files -->
			<id>build-jar</id>

			<activation>
				<activeByDefault>false</activeByDefault>
			</activation>

			<dependencies>
				<dependency>
					<groupId>org.apache.flink</groupId>
					<artifactId>flink-core</artifactId>
					<version>${flink.version}</version>
					<scope>provided</scope>
				</dependency>
				<dependency>
					<groupId>org.apache.flink</groupId>
					<artifactId>flink-java</artifactId>
					<version>${flink.version}</version>
					<scope>provided</scope>
				</dependency>
				<dependency>
					<groupId>org.apache.flink</groupId>
					<artifactId>flink-clients_${scala.binary.version}</artifactId>
					<version>${flink.version}</version>
					<scope>provided</scope>
				</dependency>
				<dependency>
					<groupId>org.apache.flink</groupId>
					<artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
					<version>${flink.version}</version>
					<scope>provided</scope>
				</dependency>
				<dependency>
					<groupId>org.slf4j</groupId>
					<artifactId>slf4j-log4j12</artifactId>
					<version>${slf4j.version}</version>
					<scope>provided</scope>
				</dependency>
				<dependency>
					<groupId>log4j</groupId>
					<artifactId>log4j</artifactId>
					<version>${log4j.version}</version>
					<scope>provided</scope>
				</dependency>
			</dependencies>

			<build>
				<plugins>
					<!-- We use the maven-shade plugin to create a fat jar that contains all dependencies
						except flink and its transitive dependencies. The resulting fat-jar can be executed
						on a cluster. Change the value of Program-Class if your program entry point changes. -->
					<plugin>
						<groupId>org.apache.maven.plugins</groupId>
						<artifactId>maven-shade-plugin</artifactId>
						<version>2.4.1</version>
						<executions>
							<!-- Run shade goal on package phase -->
							<execution>
								<phase>package</phase>
								<goals>
									<goal>shade</goal>
								</goals>
								<configuration>
									<artifactSet>
										<excludes>
											<exclude>org.apache.flink:force-shading</exclude>
											<exclude>com.google.code.findbugs:jsr305</exclude>
											<exclude>org.slf4j:*</exclude>
										</excludes>
									</artifactSet>
									<filters>
										<filter>
											<!-- Do not copy the signatures in the META-INF folder.
                                            Otherwise, this might cause SecurityExceptions when using the JAR. -->
											<artifact>*:*</artifact>
											<excludes>
												<exclude>META-INF/*.SF</exclude>
												<exclude>META-INF/*.DSA</exclude>
												<exclude>META-INF/*.RSA</exclude>
											</excludes>
										</filter>
									</filters>
									<!-- If you want to use ./bin/flink run <quickstart jar> uncomment the following lines.
										This will add a Main-Class entry to the manifest file -->
									<!--
									<transformers>
										<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
										<mainClass>com.streaming.flink.StreamingJob</mainClass>
										</transformer>
									</transformers>
									-->
								</configuration>
							</execution>
						</executions>
					</plugin>
				</plugins>
			</build>
		</profile>
	</profiles>

	<build>
		<plugins>
			<plugin>
				<groupId>org.apache.maven.plugins</groupId>
				<artifactId>maven-compiler-plugin</artifactId>
				<version>3.1</version>
				<configuration>
					<source>1.8</source>
					<target>1.8</target>
				</configuration>
			</plugin>
		</plugins>

		
	</build>
</project>

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章