Flink操作mysql kafka和hbase

主程序

package com.streaming.flink;


import java.util.Properties;

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer08;
import org.apache.flink.util.Collector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.alibaba.fastjson.JSONObject;


public class WordCount {
	private static final Logger logger = LoggerFactory
			.getLogger(WordCount.class);

	
	public static void main(String[] args) throws Exception {
		
		final ExecutionEnvironment env = ExecutionEnvironment
				.getExecutionEnvironment();

		StreamExecutionEnvironment tenv = StreamExecutionEnvironment
				.getExecutionEnvironment();

		// 2.從自定義source中讀取數據
//		DataStream<String> dataStream = tenv.addSource(new JdbcRead());
//		tenv.execute();

		// get input data
		DataSet<String> text = env.fromElements(
				"To be, or not to be,--that is the question:--",
				"Whether 'tis nobler in the mind to suffer",
				"The slings and arrows of outrageous fortune",
				"Or to take arms against a sea of troubles,");

		DataSet<Tuple2<String, Integer>> counts =
		text.flatMap(new LineSplitter()).groupBy(0).sum(1);

		text.flatMap(new LineSplitter()).groupBy(0);

		DataSet<Tuple2<String, Integer>> sum =
		text.flatMap(new LineSplitter())
		// group by the tuple field "0" and sum up tuple field "1"
				.reduce(new ReduceFunction<Tuple2<String, Integer>>() {
					@Override
					public Tuple2<String, Integer> reduce(
							Tuple2<String, Integer> a, Tuple2<String, Integer> b) {
						return new Tuple2<String, Integer>(a.f0, a.f1 + b.f1);
					}
				});

		FlinkKafkaConsumer08<String> kafkaSource = createKafka(tenv);
		String hbase_zk="namenode1.xxx.com";
		String hbase_port="2015";
		String hbase_table="ns:table1";
		tenv.addSource(kafkaSource).map(new MapFunction<String, String>() {
			public String map(String line) {
				logger.error("收到消息：{}", line);
				return line;
			}
		}).process(new HbaseSink(hbase_zk,hbase_port,hbase_table));

		
		tenv.execute();
		// execute and print result
		// counts.print();

	}


	/**
	 * Implements the string tokenizer that splits sentences into words as a
	 * user-defined FlatMapFunction. The function takes a line (String) and
	 * splits it into multiple pairs in the form of "(word,1)"
	 * (Tuple2&lt;String, Integer&gt;).
	 */
	public static final class LineSplitter implements
			FlatMapFunction<String, Tuple2<String, Integer>> {

		@Override
		public void flatMap(String value, Collector<Tuple2<String, Integer>> out) {
			String[] tokens = value.toLowerCase().split("\\W+");

			for (String token : tokens) {
				if (token.length() > 0) {
					out.collect(new Tuple2<String, Integer>(token, 1));
				}
			}
		}
	}

	public static FlinkKafkaConsumer08<String> createKafka(
			StreamExecutionEnvironment tenv) {
		String zkConnc = "kafkazk01.xxx.com:2181";
		String kafkaServer = "kafkabroker01.xxx.com:9092";

		Properties kafkaProps = new Properties();
		kafkaProps.setProperty("zookeeper.connect", zkConnc);
		kafkaProps.setProperty("bootstrap.servers", kafkaServer);
		kafkaProps.setProperty("group.id", "groupId_1");
		kafkaProps.setProperty("auto.offset.reset", "smallest");
		kafkaProps.setProperty("auto.commit.interval.ms", "30000");
		kafkaProps.setProperty("enable.auto.commit", "true");
		FlinkKafkaConsumer08<String> appSource = new FlinkKafkaConsumer08<String>(
				"topicName", new SimpleStringSchema(), kafkaProps);

		return appSource;
	}

}

寫hbase：

package com.streaming.flink;


import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.util.Collector;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.client.Durability;
import org.apache.hadoop.hbase.client.HConnection;
import org.apache.hadoop.hbase.client.HConnectionManager;
import org.apache.hadoop.hbase.client.HTableInterface;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.security.User;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.security.UserGroupInformation;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;


public class HbaseSink extends ProcessFunction<String,String> {
	private static final long serialVersionUID = 1L;

	private static final Logger LOGGER = LoggerFactory.getLogger(HbaseSink.class);

    private String _zookeeper;
    private String _port;
    private String _tableName;
    private HTableInterface _table;

    public HbaseSink(String zookeeper, String port, String tableName) {
        _zookeeper = zookeeper;
        _port = port;
        _tableName = tableName;
    }

    @Override
    public void open(org.apache.flink.configuration.Configuration parameters) throws Exception {
        try {
            Configuration conf = HBaseConfiguration.create();
            conf.set(HConstants.ZOOKEEPER_QUORUM, _zookeeper);
            conf.set(HConstants.ZOOKEEPER_CLIENT_PORT, _port);
            conf.set(HConstants.ZOOKEEPER_ZNODE_PARENT, "/hbase");

            User user = User.create(UserGroupInformation.createRemoteUser("bmps"));
            HConnection connection = HConnectionManager.createConnection(conf,user);
            _table = connection.getTable(_tableName);

            LOGGER.error("[HbaseSink] : open HbaseSink finished");
        } catch (Exception e) {
            LOGGER.error("[HbaseSink] : open HbaseSink faild {}", e);
        }
    }

    @Override
    public void close() throws Exception {
        _table.close();
    }

    @Override
    public void processElement(String  value, Context ctx, Collector<String > out)
            throws Exception {
    	LOGGER.error("process String {}",value);
        String rowKey = new StringBuffer().append("1").toString();
        Put put = new Put(Bytes.toBytes(rowKey));
        put.setDurability(Durability.ASYNC_WAL);
        put.add(Bytes.toBytes("info"), Bytes.toBytes("flink"), Bytes.toBytes(value));
        _table.put(put);
        LOGGER.error("[HbaseSink] : put rowKey:{}, value:{} to hbase", rowKey, value);
    }



}

讀mysql：

package com.streaming.flink;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;

import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.source.RichSourceFunction;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class JdbcRead extends RichSourceFunction<String> {
	private static final long serialVersionUID = 1L;
	
	
	private static final Logger logger = LoggerFactory.getLogger(JdbcRead.class);

	private Connection connection = null;
	private PreparedStatement ps = null;

	/**
	 * 一、open()方法中建立連接，這樣不用每次invoke的時候都要建立連接和釋放連接。
	 */
	@Override
	public void open(Configuration parameters) throws Exception {
		super.open(parameters);
		String driver = "com.mysql.jdbc.Driver";
		String url = "jdbc:mysql://127.0.0.1:3309/zkMonitor?characterEncoding=utf8&useSSL=true";
		String username = "root";
		String password = "123456";
		// 1.加載驅動
		Class.forName(driver);
		// 2.創建連接
		connection = DriverManager.getConnection(url, username, password);
		// 3.獲得執行語句
		String sql = "select name from t_stock_pin;";
		ps = connection.prepareStatement(sql);
	}

	/**
	 * 二、DataStream調用一次run()方法用來獲取數據
	 */
	@Override
	public void run(SourceContext<String> sourceContext) {
		try {
			// 4.執行查詢，封裝數據
			ResultSet resultSet = ps.executeQuery();
			while (resultSet.next()) {
				String name = resultSet.getString("name");
				logger.error("readJDBC name:{}", name);
				sourceContext.collect(name);
			}
		} catch (Exception e) {
			logger.error("runException:{}", e);
		}
	}

	@Override
	public void cancel() {
	}

	/**
	 * 三、 程序執行完畢就可以進行，關閉連接和釋放資源的動作了
	 */
	@Override
	public void close() throws Exception {
		// 5.關閉連接和釋放資源
		super.close();
		if (connection != null) {
			connection.close();
		}
		if (ps != null) {
			ps.close();
		}
	}

}

最後是pom配置：

<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements.  See the NOTICE file
distributed with this work for additional information
regarding copyright ownership.  The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License.  You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied.  See the License for the
specific language governing permissions and limitations
under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
	<modelVersion>4.0.0</modelVersion>

	<groupId>com.streaming.flink</groupId>
	<artifactId>flink-training</artifactId>
	<version>1.0-SNAPSHOT</version>
	<packaging>jar</packaging>

	<name>Flink Quickstart Job</name>
	<url>http://www.myorganization.org</url>

	<properties>
		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
		<flink.version>1.4.1</flink.version>
		<slf4j.version>1.7.7</slf4j.version>
		<log4j.version>1.2.17</log4j.version>
		<scala.binary.version>2.11</scala.binary.version>
	</properties>

	<repositories>
		<repository>
			<id>apache.snapshots</id>
			<name>Apache Development Snapshot Repository</name>
			<url>https://repository.apache.org/content/repositories/snapshots/</url>
			<releases>
				<enabled>false</enabled>
			</releases>
			<snapshots>
				<enabled>true</enabled>
			</snapshots>
		</repository>
	</repositories>

	<dependencies>
		
	<dependency>
			<groupId>mysql</groupId>
			<artifactId>mysql-connector-java</artifactId>
			<version>5.1.6</version>
	</dependency>
			
		<!-- Apache Flink dependencies -->
		<dependency>
			<groupId>org.apache.flink</groupId>
			<artifactId>flink-core</artifactId>
			<version>${flink.version}</version>
		</dependency>
		<dependency>
			<groupId>org.apache.flink</groupId>
			<artifactId>flink-java</artifactId>
			<version>${flink.version}</version>
		</dependency>
		<dependency>
			<!-- This dependency is required to actually execute jobs. It is currently pulled in by
				flink-streaming-java, but we explicitly depend on it to safeguard against future changes. -->
			<groupId>org.apache.flink</groupId>
			<artifactId>flink-clients_${scala.binary.version}</artifactId>
			<version>${flink.version}</version>
		</dependency>
		<dependency>
			<groupId>org.apache.flink</groupId>
			<artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
			<version>${flink.version}</version>
		</dependency>
		
		 <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-kafka-0.8_2.11</artifactId>
            <version>${flink.version}</version>
        </dependency>
        
         <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-client</artifactId>
            <version>0.98.8-hadoop2</version>
            <exclusions>
				
				<exclusion>
					<artifactId>jdk.tools</artifactId>
					<groupId>jdk.tools</groupId>
				</exclusion>
			</exclusions>
        </dependency>

		<!-- explicitly add a standard logging framework, as Flink does not have
			a hard dependency on one specific framework by default -->
		<dependency>
			<groupId>org.slf4j</groupId>
			<artifactId>slf4j-log4j12</artifactId>
			<version>${slf4j.version}</version>
		</dependency>
		<dependency>
			<groupId>log4j</groupId>
			<artifactId>log4j</artifactId>
			<version>${log4j.version}</version>
		</dependency>
	</dependencies>

	<profiles>
		<profile>
			<!-- Profile for packaging correct JAR files -->
			<id>build-jar</id>

			<activation>
				<activeByDefault>false</activeByDefault>
			</activation>

			<dependencies>
				<dependency>
					<groupId>org.apache.flink</groupId>
					<artifactId>flink-core</artifactId>
					<version>${flink.version}</version>
					<scope>provided</scope>
				</dependency>
				<dependency>
					<groupId>org.apache.flink</groupId>
					<artifactId>flink-java</artifactId>
					<version>${flink.version}</version>
					<scope>provided</scope>
				</dependency>
				<dependency>
					<groupId>org.apache.flink</groupId>
					<artifactId>flink-clients_${scala.binary.version}</artifactId>
					<version>${flink.version}</version>
					<scope>provided</scope>
				</dependency>
				<dependency>
					<groupId>org.apache.flink</groupId>
					<artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
					<version>${flink.version}</version>
					<scope>provided</scope>
				</dependency>
				<dependency>
					<groupId>org.slf4j</groupId>
					<artifactId>slf4j-log4j12</artifactId>
					<version>${slf4j.version}</version>
					<scope>provided</scope>
				</dependency>
				<dependency>
					<groupId>log4j</groupId>
					<artifactId>log4j</artifactId>
					<version>${log4j.version}</version>
					<scope>provided</scope>
				</dependency>
			</dependencies>

			<build>
				<plugins>
					<!-- We use the maven-shade plugin to create a fat jar that contains all dependencies
						except flink and its transitive dependencies. The resulting fat-jar can be executed
						on a cluster. Change the value of Program-Class if your program entry point changes. -->
					<plugin>
						<groupId>org.apache.maven.plugins</groupId>
						<artifactId>maven-shade-plugin</artifactId>
						<version>2.4.1</version>
						<executions>
							<!-- Run shade goal on package phase -->
							<execution>
								<phase>package</phase>
								<goals>
									<goal>shade</goal>
								</goals>
								<configuration>
									<artifactSet>
										<excludes>
											<exclude>org.apache.flink:force-shading</exclude>
											<exclude>com.google.code.findbugs:jsr305</exclude>
											<exclude>org.slf4j:*</exclude>
										</excludes>
									</artifactSet>
									<filters>
										<filter>
											<!-- Do not copy the signatures in the META-INF folder.
                                            Otherwise, this might cause SecurityExceptions when using the JAR. -->
											<artifact>*:*</artifact>
											<excludes>
												<exclude>META-INF/*.SF</exclude>
												<exclude>META-INF/*.DSA</exclude>
												<exclude>META-INF/*.RSA</exclude>
											</excludes>
										</filter>
									</filters>
									<!-- If you want to use ./bin/flink run <quickstart jar> uncomment the following lines.
										This will add a Main-Class entry to the manifest file -->
									<!--
									<transformers>
										<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
										<mainClass>com.streaming.flink.StreamingJob</mainClass>
										</transformer>
									</transformers>
									-->
								</configuration>
							</execution>
						</executions>
					</plugin>
				</plugins>
			</build>
		</profile>
	</profiles>

	<build>
		<plugins>
			<plugin>
				<groupId>org.apache.maven.plugins</groupId>
				<artifactId>maven-compiler-plugin</artifactId>
				<version>3.1</version>
				<configuration>
					<source>1.8</source>
					<target>1.8</target>
				</configuration>
			</plugin>
		</plugins>

		
	</build>
</project>

Flink操作mysql kafka和hbase

[軟件工具百科] 互聯網資源歷史快照歸檔站點與數字圖書館

網易面試：SpringBoot如何開啓虛擬線程？

杭州的 IT 崩盤了麼？

程序員常見的文本查看工具

VS2022 解決方案打不開 .NET Framework 4.0 、 4.5 等老項目

Vue3 運行可以，build 打包發佈報錯，app.config.globalProperties 用法坑

既然測試也要求寫代碼，那乾脆讓開發兼任測試不就好了嗎？

ITSM落地經驗之建設藍圖規劃

PDF 補丁丁 1.0.2 版更新

奇怪！應用的日誌呢？？

Spark sql操作Hive

Spark2.0操作ES

Timsort工作原理

simhash短文本去重

Spark保持HDFS示例

Mac下配置sublime實現LaTeX

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結