一、準備環境
下載apache-flume-1.8.0-bin.tar.gz、kafka_2.11-0.11.0.3.tgz、flink-1.6.1-bin-hadoop26-scala_2.11.tgz、zookeeper-3.4.13.tar.gz
二、配置環境
1、安裝flume
cd /usr/local
tar -zxvf apache-flume-1.8.0-bin.tar.gz
vim apache-flume-1.8.0/conf/kafka.properties
agent.sources = s1
agent.channels = c1
agent.sinks = k1
# 從指定文件讀取數據
agent.sources.s1.type = exec
agent.sources.s1.command = tail -f /usr/local/test.log
agent.sources.s1.channels = c1
# 配置傳輸通道
agent.channels.c1.type = memory
agent.channels.c1.capacity = 10000
agent.channels.c1.transactionCapacity = 100
# 配置kafka接收數據
agent.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink
agent.sinks.k1.brokerList = 192.168.56.101:9092
agent.sinks.k1.topic = test
agent.sinks.k1.serializer.class = kafka.serializer.StringEncoder
agent.sinks.k1.channel = c1
2、安裝kafka
cd /usr/local
tar -zxvf kafka_2.11-0.11.0.3.tgz
vim kafka_2.11-0.11.0.3/config/server.properties
broker.id=1
advertised.listeners=PLAINTEXT://192.168.56.101:9092
zookeeper.connect=192.168.56.101:2181
3、安裝flink
cd /usr/local
tar -zxvf flink-1.6.1-bin-hadoop26-scala_2.11.tgz
4、安裝zookeeper
cd /usr/local
tar -zxvf zookeeper-3.4.13.tar.gz
vim zookeeper-3.4.13/conf/zoo.cfg
tickTime=2000 initLimit=10
syncLimit=5
dataDir=/tmp/zookeeper
clientPort=2181
三、啓動
1、啓動zk
cd /usr/local/zookeeper-3.4.13/bin
./zkServer.sh start &
2、啓動kafka
cd /usr/local/kafka_2.11-0.11.0.3
./bin/kafka-server-start.sh config/server.properties &
./bin/kafka-console-producer.sh --broker-list 192.168.56.101:9092 --topic test
3、啓動 flume
cd /usr/local/apache-flume-1.8.0
./bin/flume-ng agent --conf-file ./conf/kafka.properties -c conf/ --name agent -Dflume.root.logger=DEBUG,console
4、啓動flink
cd /usr/local/flink-1.6.1
./bin/start-cluster.sh &
在瀏覽器中 輸入 http://192.168.56.101:8081 可看到界面
5、IDEA 開發程序 新建maven 工程
pom.xml
<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>com.tonytaotao</groupId> <artifactId>flink</artifactId> <version>1.0-SNAPSHOT</version> <description>flink 實戰</description> <dependencies> <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-java --> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-java</artifactId> <version>1.6.1</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-clients --> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-clients_2.11</artifactId> <version>1.6.1</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-streaming-java --> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-streaming-java_2.11</artifactId> <version>1.6.1</version> <!-- <scope>provided</scope>--> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-connector-kafka-0.11 --> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-connector-kafka-0.11_2.11</artifactId> <version>1.6.1</version> </dependency> <!-- https://mvnrepository.com/artifact/org.slf4j/slf4j-log4j12 --> <dependency> <groupId>org.slf4j</groupId> <artifactId>slf4j-log4j12</artifactId> <version>1.7.25</version> </dependency> <!-- https://mvnrepository.com/artifact/log4j/log4j --> <dependency> <groupId>log4j</groupId> <artifactId>log4j</artifactId> <version>1.2.17</version> </dependency> </dependencies> <build> <plugins> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-compiler-plugin</artifactId> <version>3.2</version> <configuration> <source>1.8</source> <target>1.8</target> </configuration> </plugin> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-dependency-plugin</artifactId> <executions> <execution> <id>copy-dependencies</id> <phase>test</phase> <goals> <goal>copy-dependencies</goal> </goals> <configuration> <outputDirectory> target/classes/lib </outputDirectory> </configuration> </execution> </executions> </plugin> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-jar-plugin</artifactId> <configuration> <archive> <manifest> <addClasspath>true</addClasspath> <mainClass> com.tonytaotao.flink.FlinkKafka </mainClass> <classpathPrefix>lib/</classpathPrefix> </manifest> <manifestEntries> <Class-Path>.</Class-Path> </manifestEntries> </archive> </configuration> </plugin> </plugins> </build> </project>
FlinkKafka.java
package com.tonytaotao.flink; import org.apache.flink.api.common.serialization.SimpleStringSchema; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks; import org.apache.flink.streaming.api.watermark.Watermark; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011; import javax.annotation.Nullable; import java.util.Properties; public class FlinkKafka { public static void main(String[] args) throws Exception{ // 引入Flink StreamExecutionEnvironment final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // 設置監控數據流時間間隔(官方叫狀態與檢查點) env.enableCheckpointing(1000); // 配置kafka和zookeeper的ip和端口 Properties properties = new Properties(); properties.setProperty("bootstrap.servers", "192.168.56.101:9092"); properties.setProperty("zookeeper.connect", "192.168.56.101:2181"); properties.setProperty("group.id", "test"); // 記載kafka和zookeeper的配置 FlinkKafkaConsumer011<String> consumer = new FlinkKafkaConsumer011<String>("test", new SimpleStringSchema(), properties); consumer.assignTimestampsAndWatermarks(new LineSplitter()); // 轉換kafka數據類型爲flink的dataStream類型 DataStream<String> stream = env.addSource(consumer); stream.print(); env.execute("WordCount from kafka data"); } public static final class LineSplitter implements AssignerWithPunctuatedWatermarks<String> { private static final long serialVersionUID = 1L; @Nullable public Watermark checkAndGetNextWatermark(String arg0, long arg1) { if (null != arg0 && arg0.contains(",")) { String parts[] = arg0.split(","); return new Watermark(Long.parseLong(parts[0])); } return null; } public long extractTimestamp(String arg0, long arg1) { if (null != arg0 && arg0.contains(",")) { String parts[] = arg0.split(","); return Long.parseLong(parts[0]); } return 0; } } }
6、打包程序爲jar包,上傳到服務器
cd /usr/local/flink-1.6.1
./bin/flink run -c com.tonytaotao.flink.FlinkKafak flink-1.0-SNAPSHOT.jar
7、在/usr/local/ 下新建test.log 文件,同時新建腳本 kafkaoutput.sh 文件,腳本內容爲
for((i=0;i<=1000;i++));
do echo "kafka_test-"+$i>>/usr/local/test.log;
done
然後授權 chmod 777 kafkaoutput.sh
8、執行腳本,如果沒有問題,則會在 /usr/local/flink-1.6.1/log/flink-root-taskexecutor-0-master.out 看到腳本生成的內容