flink讀取kafka數據保存到redis

測試階段採用了三臺虛擬機,一臺爲master,兩臺爲slave,已經在集羣安裝好了flink,kafka,zookeeper,redis。flink沒有采用高可用模式,所以不需要hadoop,需要注意redis因爲沒有安裝集羣模式,是在每一臺機器上安裝的,這樣的話導致master中的redis不會被用到,而任務開始後,slave1或者slave2中的redis是由flink的任務來選擇的。
服務開啓zookeeper->kafka->flink
master進程:
master
slave進程:
在這裏插入圖片描述
1.代碼編寫
代碼部分分爲兩塊:

  1. 生產數據,往kafka的topic中寫入,數據樣式:T0,A7598,2019-08-07 18:23:20:038
  2. flink讀取kafka數據再存入redis

第一部分,kafka:
項目目錄在這裏插入圖片描述
pom文件

    <dependencies>
        <dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>kafka-clients</artifactId>
            <version>0.11.0.3</version>
        </dependency>
    </dependencies>

代碼

package com;

import org.apache.kafka.clients.producer.*;

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Random;
import java.util.Properties;

/**
 * 連接kafka將數據寫入topic
 * 保存至本地 data.txt
 */

public class KafkaProducerTest {

    public static void main(String[] args) throws IOException {
        //Kafka配置
        Properties props = new Properties();
        props.put("bootstrap.servers", "192.168.37.140:9092,192.168.37.141:9092,192.168.37.142:9092");
        props.put("acks", "all");
        props.put("retries", 0);
        props.put("batch.size", 16384);
        props.put("linger.ms", 1);
        props.put("buffer.memory", 33554432);
        props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
        props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
        Producer<String, String> producer = new KafkaProducer<>(props);

        String fileName = "/home/hadoop/Test/data.txt"; //linux下的本地路徑
        File out = new File(fileName);
        FileWriter fw = null;
        try {
            if (!out.exists()){
                out.createNewFile();
            }
            System.out.println("開始寫入");
            fw = new FileWriter(out);
            int len = 1000;
            for (int i = 0; i < len; i++) {
                //名字
                String name ="T"+i;
                //內容
                String content = getContent();
                //當前時間
                String time = getDatetime();
                //寫入kafka
                String value = String.format("%s,%s,%s", name, content, time);
                //test4 kafka的topic
                producer.send(new ProducerRecord<String, String>("test", value), new Callback() {
                    @Override
                    public void onCompletion(RecordMetadata metadata, Exception exception) {
                        if (exception != null) {
                            System.out.println("Failed to send message with exception " + exception);
                        }
                    }
                });
                //寫入本地
                fw.write(name +","+ content +","+time+"\r\n");
                fw.flush();
                Thread.sleep(100L);
            }
            System.out.println("完成寫入");
        } catch (InterruptedException e) {
            e.printStackTrace();
        }catch (IOException e) {
            e.printStackTrace();
        }finally {
            fw.close();
        }
        producer.close();
    }
    //內容
    public static String getContent(){
        Random random=new Random();
        String content = "A"+random.nextInt(10000);
        return content;
    }
    //獲取當前時間
    public static String getDatetime(){
        Date date = new Date();
        SimpleDateFormat dateFormat= new SimpleDateFormat("yyyy-MM-dd HH:mm:ss:SSS");
        return dateFormat.format(date);
    }
}

--------------------------------------------------------------------------------------------------------------------------------------
第二部分,flink:
項目目錄
在這裏插入圖片描述
pom文件

    <dependencies>
        <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-clients -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-clients_2.11</artifactId>
            <version>1.4.0</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-streaming-java -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-java_2.11</artifactId>
            <version>1.4.0</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-java -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-java</artifactId>
            <version>1.4.0</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-connector-kafka-0.9 -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-kafka-0.9_2.11</artifactId>
            <version>1.4.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-redis_2.10</artifactId>
            <version>1.1.5</version>
        </dependency>
    </dependencies>

代碼部分

package com.scn;

import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer09;
import org.apache.flink.streaming.connectors.redis.RedisSink;
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisPoolConfig;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommand;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommandDescription;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisMapper;
import org.apache.flink.streaming.util.serialization.SimpleStringSchema;

import java.util.Properties;

/**flink連接kafka,讀取topic中的數據,存入redis
 *
 */
public class FilnkCostKafka {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(5000);

        System.out.println("===============》 開始讀取kafka中的數據  ==============》");

        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers", "master:9092,slave1:9092,slave2:9092");
        properties.setProperty("zookeeper.connect", "master:2181,slave1:2181,slave2:2181");
        properties.setProperty("group.id", "kafka_to_redis");

        //flink連接kafka
        FlinkKafkaConsumer09<String> myConsumer = new FlinkKafkaConsumer09<String>("test", new SimpleStringSchema(), properties);
        DataStreamSource<String> kafkaData = env.addSource(myConsumer);

        //解析kafka數據流 轉化成固定格式數據流
        SingleOutputStreamOperator<Tuple3<String, String, String>>  userData = kafkaData.map(new MapFunction<String, Tuple3<String, String, String>>() {
            @Override
            public Tuple3<String, String, String> map(String s) throws Exception {
                String[] split = s.split(",");
                String userID = split[0];
                String itemId = split[1];
                String categoryId = split[2];
                Tuple3<String, String, String> userInfo = new Tuple3<String, String, String>(userID, itemId, categoryId);
                return userInfo;
            }
        });

        //實例化Flink和Redis關聯類FlinkJedisPoolConfig,設置Redis端口
        FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig.Builder().setHost("127.0.0.1").build();
        //實例化RedisSink,並通過flink的addSink的方式將flink計算的結果插入到redis
        userData.addSink(new RedisSink<Tuple3<String, String, String>>(conf, new RedisExampleMapper()));
        env.execute("WordCount from Kafka data");
    }

    //redisMap接口,設置key和value
    public static final class RedisExampleMapper implements RedisMapper<Tuple3<String, String, String>> {
        //設置數據使用的數據結構 HashSet 並設置key的名稱
        public RedisCommandDescription getCommandDescription() {
            //RedisCommand.SET 指定存儲類型
            return new RedisCommandDescription(RedisCommand.SET, "flink");
        }
        /**
         * 獲取 value值 value的數據是鍵值對
         * @param data
         * @return
         */
        //指定key
        public String getKeyFromData(Tuple3<String, String, String> data) {
            return "flink";
        }
        //指定value
        public String getValueFromData(Tuple3<String, String, String> data) {
            String Adata = data.f0+","+data.f1+","+data.f2;
            return Adata;
        }
    }

}

2.使用IDEA工具將代碼打成jar包,放入虛擬機中
在這裏插入圖片描述
3.創建kafka的topic
在這裏插入圖片描述
4.使用命令啓動flink任務,運行flink.jar包

flink run -c com.scn.FilnkCostKafka flink.jar

在web界面上可以看見slave1在執行任務
在這裏插入圖片描述
5.java命令執行kafkajar,寫入數據到kafka中

java -cp *: com.KafkaProducerTest kafka.jar

在這裏插入圖片描述
6.進入對應slave下面redis中查看數據
在這裏插入圖片描述

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章