KafKa 代碼實現

1.消費者

import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.serialization.StringDeserializer;

import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Properties;

public class Consumer {
    public static void main(String[] args){
        HashMap<String,Object > config = new HashMap<>();
        config.put("bootstrap.servers","hadoop01:9092,hadoop02:9092,hadoop03:9092");
        config.put("key.deserializer",StringDeserializer.class.getName());
        config.put("value.deserializer",StringDeserializer.class.getName());
        config.put("group.id","g000001");
        /**
         * 從哪個位置獲取數據
         * [latest,earliest,none]
         */
        config.put("auto.offset.reset","earliest");
        //是否要自動遞交偏移量(offset)
        //config.put("enable.auto.commit","false");

        config.put("enable.auto.commit","true");

        config.put("","500");

        //      創建一個消費者客戶端實例
        KafkaConsumer<String,String> consumer = new KafkaConsumer<String, String>(config);
         consumer.subscribe(Arrays.asList("test"));

                while (true) {
                    //拉去數據, 會從kafka所有分區下拉取數據
                    ConsumerRecords<String, String> records = consumer.poll(2000);
                    Iterator<ConsumerRecord<String, String>> iterator = records.iterator();
                    while (iterator.hasNext()) {
                        ConsumerRecord<String, String> record = iterator.next();
                        System.out.println("record = " + record);
                    }
                }

                //釋放連接
        // consumer.close();
    }
}

2.生產者

package day12;

import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.common.serialization.StringSerializer;

import java.util.Properties;

public class Producer {
    public static void main(String[] args) throws  Exception{
        Properties props = new Properties();
        props.setProperty("bootstrap.servers","hadoop01:9092,hadoop02:9092,hadoop03:9092");
        props.setProperty("key.serializer",StringSerializer.class.getName());
        props.setProperty("value.serializer","org.apache.kafka.common.serialization.StringSerializer");
        //發送數據的時候時候應答 默認1
        //props.setProperty("scks","1");
        //自定義分區 默認爲
        //org.apache.kafka.clients.producer.internals.DefaultPartitioner

        // props.setProperty("partitioner.class","org.apache.kafka.clients.producer.internals.DefaultPartitioner");

        //創建生產者實例
        KafkaProducer<String,String> kafkaProducer = new KafkaProducer<>(props);
        int count = 0;
        while(count < 10000000){
           // int partitionNum = count%3;
            ProducerRecord record = new ProducerRecord("test",0,"","NO:"+count);
            kafkaProducer.send(record);
            count++;
            Thread.sleep(1000);
        }

        kafkaProducer.close();
    }
}

3.Kafka整合SparkStreaming

package day12

import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.log4j.{Level, Logger}
import org.apache.spark.SparkConf
import org.apache.spark.rdd.RDD
import org.apache.spark.streaming.dstream.{DStream, InputDStream}
import org.apache.spark.streaming.kafka010._
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.codehaus.jackson.map.deser.std.StringDeserializer

/**
  * SparkStreaming整合kafka
  */
object SparkStreaming_kafka {
  def main(args: Array[String]): Unit = {
    Logger.getLogger("org.apache.spark").setLevel(Level.OFF)
    val conf = new SparkConf().setMaster("local[2]").setAppName(s"${this.getClass.getSimpleName}")
    val ssc = new StreamingContext(conf,Seconds(5))


    /**
      * kafka參數列表
      */
    val kafkaParams = Map[String,Object](
      "bootstrap.servers" -> "hadoop01:9092,hadoop02:9092,hadoop03:9092",
      "key.deserializer" -> classOf[StringDeserializer],
      "value.deserializer" -> classOf[StringDeserializer],
      "group.id" -> "day12_005",
      "auto.offset.reset" -> "earliest",
      "enable.auto.comit" -> (false:java.lang.Boolean)
    )


    //指定主題
    val topics = Array("test")

    /**
      * 指定kafka數據源
      */
    val stream: InputDStream[ConsumerRecord[String, String]] = KafkaUtils.createDirectStream(ssc,
      LocationStrategies.PreferConsistent,
      ConsumerStrategies.Subscribe[String, String](topics, kafkaParams)
    )

    /* val maped: DStream[(String, String)] = stream.map(record => (record.key,record.value))
    maped.foreachRDD(rdd => {
    //計算邏輯
    rdd.foreach(println)

  })*/

    stream.foreachRDD(rdd => {
      val offsetRange = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
      val maped: RDD[(String, String)] = rdd.map(record => (record.key,record.value))
      //計算邏輯
      maped.foreach(println)
      //循環輸出
      for(o <- offsetRange){
        println(s"${o.topic}  ${o.partition} ${o.fromOffset} ${o.untilOffset}")
      }
    })

    //啓動程序
    ssc.start()

    //等待程序被終止
    ssc.awaitTermination()





  }

}

4.Zookeeper管理Kafka的Offset

package day12

import kafka.utils.{ZKGroupTopicDirs, ZkUtils}
import org.I0Itec.zkclient.ZkClient
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.log4j.{Level, Logger}
import org.apache.spark.SparkConf
import org.apache.spark.rdd.RDD
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, HasOffsetRanges, KafkaUtils, LocationStrategies}
import org.apache.spark.streaming.{Seconds, StreamingContext}

/**
  * zk管理kafka的offset
  * Created by zhangjingcun on 2018/10/11 8:49.
  */
object SSCDirectKafka010_ZK_Offset {
  def main(args: Array[String]): Unit = {
    Logger.getLogger("org.apache.spark").setLevel(Level.OFF)

    val conf = new SparkConf().setMaster("local[*]").setAppName(s"${this.getClass.getSimpleName}")
    // 批次時間爲2s
    val ssc = new StreamingContext(conf, Seconds(2))

    val groupId = "day13_001";

    /**
      * kafka參數列表
      */
    val kafkaParams = Map[String, Object](
      "bootstrap.servers" -> "bigdata01:9092,bigdata02:9092,bigdata03:9092",
      "key.deserializer" -> classOf[StringDeserializer],
      "value.deserializer" -> classOf[StringDeserializer],
      "group.id" -> groupId,
      "auto.offset.reset" -> "earliest",
      "enable.auto.commit" -> (false: java.lang.Boolean)
    )

    val topic = "testTopic"
    val topics = Array(topic)

    /**
      * 如果我們自己維護偏移量
      * 問題:
      *   1:程序在第一次啓動的時候,應該從什麼開始消費數據?earliest
      *   2:程序如果不是第一次啓動的話,應該 從什麼位置開始消費數據?上一次自己維護的偏移量接着往後消費,比如上一次存儲的offset=88
      *
      * 該類主要拼接字符串
      */
    val zKGroupTopicDirs: ZKGroupTopicDirs = new ZKGroupTopicDirs(groupId, topic)

    /**
      * 生成的目錄結構
      * /customer/day13_001/offsets/testTopic
      */
    val offsetDir = zKGroupTopicDirs.consumerOffsetDir

    //zk字符串連接組
    val zkGroups = "hadoop01:2181,hadoop02:2181,hadoop03:2181"

    /**
      * 創建一個zkClient連接
      * 判斷/customer/day13_001/offsets/testTopic 下面有沒有孩子節點,如果有說明之前維護過偏移量,如果沒有的話說明程序是第一次執行
      */
    val zkClient = new ZkClient(zkGroups)
    val childrenCount = zkClient.countChildren(offsetDir)

    val stream =  if(childrenCount>0){ //非第一次啓動
      println("----------已經啓動過------------")
      //用來存儲我們讀取到的偏移量
       var fromOffsets = Map[TopicPartition, Long]()
      //customer/day13_001/offsets/testTopic/0
      //customer/day13_001/offsets/testTopic/1
      //customer/day13_001/offsets/testTopic/2
      (0  until childrenCount).foreach(partitionId => {
        val offset = zkClient.readData[String](offsetDir+s"/${partitionId}")
        fromOffsets += (new TopicPartition(topic, partitionId) -> offset.toLong)
      })
      KafkaUtils.createDirectStream(ssc, LocationStrategies.PreferConsistent, ConsumerStrategies.Assign[String, String](fromOffsets.keys.toList, kafkaParams, fromOffsets))
    } else { //第一次啓動
      println("-------------第一次啓動-----------")
      KafkaUtils.createDirectStream(ssc, LocationStrategies.PreferConsistent, ConsumerStrategies.Subscribe[String, String](topics, kafkaParams))
    }

    stream.foreachRDD(rdd=>{
      val offsetRange = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
      val maped: RDD[(String, String)] = rdd.map(record => (record.key, record.value))
      //計算邏輯
      maped.foreach(println)

      //自己存儲數據,自己管理
      for(o<-offsetRange){
        //println(s"${o.topic} ${o.partition} ${o.fromOffset} ${o.untilOffset}")
        //寫入到Zookeeper
        ZkUtils(zkClient, false).updatePersistentPath(offsetDir+"/"+o.partition, o.untilOffset.toString)
      }
    })

    ssc.start()

    ssc.awaitTermination()
  }
}
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章