測試階段採用了三臺虛擬機,一臺爲master,兩臺爲slave,已經在集羣安裝好了flink,kafka,zookeeper,redis。flink沒有采用高可用模式,所以不需要hadoop,需要注意redis因爲沒有安裝集羣模式,是在每一臺機器上安裝的,這樣的話導致master中的redis不會被用到,而任務開始後,slave1或者slave2中的redis是由flink的任務來選擇的。
服務開啓zookeeper->kafka->flink
master進程:
slave進程:
1.代碼編寫
代碼部分分爲兩塊:
- 生產數據,往kafka的topic中寫入,數據樣式:T0,A7598,2019-08-07 18:23:20:038
- flink讀取kafka數據再存入redis
第一部分,kafka:
項目目錄
pom文件
<dependencies>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>0.11.0.3</version>
</dependency>
</dependencies>
代碼
package com;
import org.apache.kafka.clients.producer.*;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Random;
import java.util.Properties;
/**
* 連接kafka將數據寫入topic
* 保存至本地 data.txt
*/
public class KafkaProducerTest {
public static void main(String[] args) throws IOException {
//Kafka配置
Properties props = new Properties();
props.put("bootstrap.servers", "192.168.37.140:9092,192.168.37.141:9092,192.168.37.142:9092");
props.put("acks", "all");
props.put("retries", 0);
props.put("batch.size", 16384);
props.put("linger.ms", 1);
props.put("buffer.memory", 33554432);
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
Producer<String, String> producer = new KafkaProducer<>(props);
String fileName = "/home/hadoop/Test/data.txt"; //linux下的本地路徑
File out = new File(fileName);
FileWriter fw = null;
try {
if (!out.exists()){
out.createNewFile();
}
System.out.println("開始寫入");
fw = new FileWriter(out);
int len = 1000;
for (int i = 0; i < len; i++) {
//名字
String name ="T"+i;
//內容
String content = getContent();
//當前時間
String time = getDatetime();
//寫入kafka
String value = String.format("%s,%s,%s", name, content, time);
//test4 kafka的topic
producer.send(new ProducerRecord<String, String>("test", value), new Callback() {
@Override
public void onCompletion(RecordMetadata metadata, Exception exception) {
if (exception != null) {
System.out.println("Failed to send message with exception " + exception);
}
}
});
//寫入本地
fw.write(name +","+ content +","+time+"\r\n");
fw.flush();
Thread.sleep(100L);
}
System.out.println("完成寫入");
} catch (InterruptedException e) {
e.printStackTrace();
}catch (IOException e) {
e.printStackTrace();
}finally {
fw.close();
}
producer.close();
}
//內容
public static String getContent(){
Random random=new Random();
String content = "A"+random.nextInt(10000);
return content;
}
//獲取當前時間
public static String getDatetime(){
Date date = new Date();
SimpleDateFormat dateFormat= new SimpleDateFormat("yyyy-MM-dd HH:mm:ss:SSS");
return dateFormat.format(date);
}
}
--------------------------------------------------------------------------------------------------------------------------------------
第二部分,flink:
項目目錄
pom文件
<dependencies>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-clients -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_2.11</artifactId>
<version>1.4.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-streaming-java -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_2.11</artifactId>
<version>1.4.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-java -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>1.4.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-connector-kafka-0.9 -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka-0.9_2.11</artifactId>
<version>1.4.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-redis_2.10</artifactId>
<version>1.1.5</version>
</dependency>
</dependencies>
代碼部分
package com.scn;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer09;
import org.apache.flink.streaming.connectors.redis.RedisSink;
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisPoolConfig;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommand;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommandDescription;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisMapper;
import org.apache.flink.streaming.util.serialization.SimpleStringSchema;
import java.util.Properties;
/**flink連接kafka,讀取topic中的數據,存入redis
*
*/
public class FilnkCostKafka {
public static void main(String[] args) throws Exception {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.enableCheckpointing(5000);
System.out.println("===============》 開始讀取kafka中的數據 ==============》");
Properties properties = new Properties();
properties.setProperty("bootstrap.servers", "master:9092,slave1:9092,slave2:9092");
properties.setProperty("zookeeper.connect", "master:2181,slave1:2181,slave2:2181");
properties.setProperty("group.id", "kafka_to_redis");
//flink連接kafka
FlinkKafkaConsumer09<String> myConsumer = new FlinkKafkaConsumer09<String>("test", new SimpleStringSchema(), properties);
DataStreamSource<String> kafkaData = env.addSource(myConsumer);
//解析kafka數據流 轉化成固定格式數據流
SingleOutputStreamOperator<Tuple3<String, String, String>> userData = kafkaData.map(new MapFunction<String, Tuple3<String, String, String>>() {
@Override
public Tuple3<String, String, String> map(String s) throws Exception {
String[] split = s.split(",");
String userID = split[0];
String itemId = split[1];
String categoryId = split[2];
Tuple3<String, String, String> userInfo = new Tuple3<String, String, String>(userID, itemId, categoryId);
return userInfo;
}
});
//實例化Flink和Redis關聯類FlinkJedisPoolConfig,設置Redis端口
FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig.Builder().setHost("127.0.0.1").build();
//實例化RedisSink,並通過flink的addSink的方式將flink計算的結果插入到redis
userData.addSink(new RedisSink<Tuple3<String, String, String>>(conf, new RedisExampleMapper()));
env.execute("WordCount from Kafka data");
}
//redisMap接口,設置key和value
public static final class RedisExampleMapper implements RedisMapper<Tuple3<String, String, String>> {
//設置數據使用的數據結構 HashSet 並設置key的名稱
public RedisCommandDescription getCommandDescription() {
//RedisCommand.SET 指定存儲類型
return new RedisCommandDescription(RedisCommand.SET, "flink");
}
/**
* 獲取 value值 value的數據是鍵值對
* @param data
* @return
*/
//指定key
public String getKeyFromData(Tuple3<String, String, String> data) {
return "flink";
}
//指定value
public String getValueFromData(Tuple3<String, String, String> data) {
String Adata = data.f0+","+data.f1+","+data.f2;
return Adata;
}
}
}
2.使用IDEA工具將代碼打成jar包,放入虛擬機中
3.創建kafka的topic
4.使用命令啓動flink任務,運行flink.jar包
flink run -c com.scn.FilnkCostKafka flink.jar
在web界面上可以看見slave1在執行任務
5.java命令執行kafkajar,寫入數據到kafka中
java -cp *: com.KafkaProducerTest kafka.jar
6.進入對應slave下面redis中查看數據