Flink 廣播流 跟 MapState 案例

package com.coder.flink.core.aaa_Use_demo;
 
 
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import org.apache.flink.api.common.state.*;
 
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.core.memory.SeekableDataOutputView;
import org.apache.flink.streaming.api.datastream.BroadcastStream;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.co.KeyedBroadcastProcessFunction;
import org.apache.flink.streaming.api.functions.source.RichSourceFunction;
import org.apache.flink.util.Collector;
 
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
 
import java.util.concurrent.TimeUnit;
 
public class BroadCastWordCountExample2 {
    public static void main(String[] args) throws Exception {
 
 
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        String value1 = "{'name':'xiao_wang','age':'10','id':'1','info':'進入'}";
        String value2 = "{'name':'xiao_wang','age':'10','id':'1','info':'退出'}";
        String value3 = "{'name':'xiao_wang','age':'10','id':'1','info':'購物'}";
        String value4 = "{'name':'xiao_wang','age':'10','id':'1','info':'收藏'}";
 
 
        String value5 = "{'name':'xiao_sang','age':'20','id':'2','info':'進入'}";
        String value6 = "{'name':'xiao_sang','age':'20','id':'2','info':'退出'}";
        String value7 = "{'name':'xiao_sang','age':'20','id':'2','info':'退出'}";
 
        String value8 = "{'name':'xiao_hai','age':'30','id':'3','info':'進入'}";
        String value9 = "{'name':'xiao_hai','age':'30','id':'3','info':'購物'}";
        String value10 = "{'name':'xiao_hai','age':'30','id':'3','info':'購物'}";
 
 
        JSONObject jsonObject1 = JSON.parseObject(value1);
        JSONObject jsonObject2 = JSON.parseObject(value2);
        JSONObject jsonObject3 = JSON.parseObject(value3);
        JSONObject jsonObject4 = JSON.parseObject(value4);
 
        JSONObject jsonObject5 = JSON.parseObject(value5);
        JSONObject jsonObject6 = JSON.parseObject(value6);
        JSONObject jsonObject7 = JSON.parseObject(value7);
 
        JSONObject jsonObject8 = JSON.parseObject(value8);
        JSONObject jsonObject9 = JSON.parseObject(value9);
        JSONObject jsonObject10 = JSON.parseObject(value10);
 
        List<JSONObject> list = new ArrayList<>();
        list.add(jsonObject1);
        list.add(jsonObject2);
        list.add(jsonObject3);
        list.add(jsonObject4);
        list.add(jsonObject5);
        list.add(jsonObject6);
        list.add(jsonObject7);
        list.add(jsonObject8);
        list.add(jsonObject9);
        list.add(jsonObject10);
        String broadCast1 = "{'condition_id':'1','firstAction':'進入','secondAction':'購物','topic':'進入+購物'}";
        String broadCast2 = "{'condition_id':'2','firstAction':'進入','secondAction':'退出','topic':'進入+退出'}";
        JSONObject broadCastJson1 = JSON.parseObject(broadCast1);
        JSONObject broadCastJson2 = JSON.parseObject(broadCast2);
 
        List<JSONObject> list2 = new ArrayList<>();
        list2.add(broadCastJson1);
        list2.add(broadCastJson2);
 
 
        final MapStateDescriptor<String, JSONObject> mapStateDes = new MapStateDescriptor<>(
                "state",
                String.class,
                JSONObject.class);
 
        // 自定義廣播流(單例)
        BroadcastStream<JSONObject> broadcastStream = env.addSource(new RichSourceFunction<JSONObject>() {
 
            private volatile boolean isRunning = true;
 
            /**
             * 數據源:模擬每30秒隨機更新一次攔截的關鍵字
             * @param ctx
             * @throws Exception
             */
            @Override
            public void run(SourceContext<JSONObject> ctx) throws Exception {
                System.out.println("list2 = " + list2);
                while (isRunning) {
                    TimeUnit.SECONDS.sleep(1);
                    Random random = new Random();
 
                    //todo 定時刷新,睡眠6秒
 
                    int i = random.nextInt(2);
 
                    ctx.collect(list2.get(i));
                    System.out.println("發送的字符串:" + list.get(i));
 
                }
 
 
            }
 
            @Override
            public void cancel() {
                isRunning = false;
            }
        }).broadcast(mapStateDes);
 
        // 自定義數據流(單例)
        DataStream<JSONObject> dataStream = env.addSource(new RichSourceFunction<JSONObject>() {
 
            private volatile boolean isRunning = true;
 
 
 
            /**
             * 模擬每3秒隨機產生1條消息
             * @param ctx
             * @throws Exception
             */
            @Override
            public void run(SourceContext<JSONObject> ctx) throws Exception {
 
                while (isRunning) {
                    Random random = new Random();
                    TimeUnit.SECONDS.sleep(3);
                    int i = random.nextInt(10);
                    ctx.collect(list.get(i));
//                    System.out.println("kafka接收數據:" + dataSet[seed]);
                }
            }
 
            @Override
            public void cancel() {
                isRunning = false;
            }
 
        }).setParallelism(1);
        dataStream.keyBy(new KeySelector<JSONObject, String>() {
 
            @Override
            public String getKey(JSONObject value) throws Exception {
                return value.getString("id");
            }
        }).connect(broadcastStream).process(new KeyedBroadcastProcessFunction<String, JSONObject, JSONObject, String>() {
 
            private ValueState<String> prevActionState;
 
            //todo 狀態
            private transient MapState<String, JSONObject> infoState;
 
            @Override
            public void open(Configuration parameters) throws Exception {
                super.open(parameters);
                prevActionState = getRuntimeContext().getState(
                        new ValueStateDescriptor<>(
                                "lastAction",
                                String.class));
 
 
                infoState = getRuntimeContext().getMapState(
                        new MapStateDescriptor<String, JSONObject>(
                                "infoState",
                                String.class,
                                JSONObject.class));
            }
 
 
            @Override
            public void processElement(JSONObject value, ReadOnlyContext ctx, Collector<String> out) throws Exception {
                JSONObject getBroad = ctx.getBroadcastState(mapStateDes).get("a");
                String currentKey = ctx.getCurrentKey();
 
                String prevAction = prevActionState.value();
                if (prevAction!=null ){
                    System.out.println("prevAction = " + prevAction);
                }
                if (infoState !=null){
                    System.out.println("currentKey:"+currentKey+",infoState = " + infoState.get("bbb"));
                }
 
                System.out.println("getBroad = " + getBroad);
 
                System.out.println("接收日誌:" + value);
                infoState.put("bbb",getBroad);
                prevActionState.update(value.toString());
 
            }
 
            @Override
            public void processBroadcastElement(JSONObject value, Context ctx, Collector<String> out) throws Exception {
                BroadcastState<String, JSONObject> broadcastState = ctx.getBroadcastState(mapStateDes);
 
                broadcastState.put("a", value);
//                System.out.println("進入的廣播變量:value = " + value);
 
 
            }
 
 
        });
 
 
        env.execute("BroadCastWordCountExample");
 
 
    }
}
說明:上面的案例,運行看觀察日誌就好了 ,沒有寫輸出打印.............. 使用狀態 一定要keyBy!!!

 

第二個案例:
 

 

 package aliyun.product.customer_analysis_system

import org.apache.flink.api.common.functions.{FlatMapFunction, RichFlatMapFunction}
import org.apache.flink.api.common.state.{ListState, ListStateDescriptor, ValueState, ValueStateDescriptor}
import org.apache.flink.api.scala.createTypeInformation
import org.apache.flink.configuration.Configuration
import org.apache.flink.runtime.state.{FunctionInitializationContext, FunctionSnapshotContext}
import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.util.Collector
object SumUserCount {
  /*class  CheckpointCountDemo extends FlatMapFunction[(String,Long),(String,Long)] with ListCheckpointed[(String, Long)]{


    //接入的數據量
    private  var numberCount :Long = 0L


   // 統計方法
    override def flatMap(value: (String, Long), out: Collector[(String, Long)]): Unit = {
      numberCount += 1
      out.collect(value._1,numberCount)
    }

    override def snapshotState(checkpointId: Long, timestamp: Long): util.List[Long] = {
      Collections.singletonList(numberCount)
    }
    //重置
    override def restoreState(stateList: util.List[Long]): Unit = {
      numberCount  =0L
//      for (count <-  stateList){
//        numberCount += count
//      }
    }
  }
*/

  class  CheckpointCount(val numElement:Int) extends FlatMapFunction[(Int,Long),(Int,Long,Long)] with CheckpointedFunction{


    //存儲數據數量
    private  var operatorCount :Long =_

    // 存儲key相關的狀態值
    private  var keyedState :ValueState[Long] =_

    //存儲算子的狀態值
    private  var operatorState :ListState[Long] =_


    override def flatMap(value: (Int, Long), out: Collector[(Int, Long, Long)]): Unit = {
      val count  = keyedState.value()+1
      keyedState.update(count)
      //更新本地算子的值
      operatorCount = operatorCount + 1

      //輸出結果
      out.collect((value._1,count,operatorCount))

    }
    //當發生snapshot時候,操作
    override def snapshotState(context: FunctionSnapshotContext): Unit = {
      operatorState.clear()
      operatorState.add(operatorCount)
    }

    //初始化
    override def initializeState(context: FunctionInitializationContext): Unit = {
        keyedState= context.getKeyedStateStore
          .getState(new ValueStateDescriptor("keyedState",createTypeInformation[Long]))
      operatorState= context.getOperatorStateStore
        .getListState(new ListStateDescriptor("keyedState",createTypeInformation[Long]))
      if (context.isRestored){
//        operatorCount = operatorState.get().asScala.sum
      }
    }
  }

  class CountWindow extends RichFlatMapFunction[(String,(String,Long)), (String, Long)] {

    private var count: ValueState[(String, Long)] = _

    override def flatMap(in: (String,(String,Long)), out: Collector[(String, Long)]): Unit = {
      var tmpCurrentCount=count.value()
      var currentCount=if(tmpCurrentCount!=null){
        tmpCurrentCount
      }else{
        (in._1,0L)
      }

      var newCount=if((in._2)._2>1){(in._1,currentCount._2)}else{(in._1,currentCount._2+1)}
      count.update(newCount)
      out.collect(newCount)
    }

    override def open(parameters: Configuration): Unit = {
      count = getRuntimeContext.getState(
        new ValueStateDescriptor[(String, Long)]("count", createTypeInformation[(String, Long)])
      )
    }
  }

  class ByKeyCount extends RichFlatMapFunction[(String,String), (String, Long)] {

    private var count: ValueState[(String, Long)] = _

    override def flatMap(in: (String,String), out: Collector[(String, Long)]): Unit = {
      var tmpCurrentCount=count.value()
      var currentCount=if(tmpCurrentCount!=null){
        tmpCurrentCount
      }else{
        (in._1,0L)
      }
      var newCount=((in._1,currentCount._2+1))
      count.update(newCount)
      out.collect(newCount)
    }

    override def open(parameters: Configuration): Unit = {
      count = getRuntimeContext.getState(
        new ValueStateDescriptor[(String, Long)]("count", createTypeInformation[(String, Long)])
      )
    }
  }
  def main(args: Array[String]): Unit = {
    val env = StreamExecutionEnvironment.getExecutionEnvironment

    env.fromElements(
      ("A", "test1"),
      ("A", "test2"),
      ("C", "test3"),
      ("D", "2"),
      ("A", "1"),
      ("C","1")
    ).keyBy(_._1)
      .flatMap(new ByKeyCount())
      .map(f=>{
        ("sum",f)}).keyBy(_._1)
      .flatMap(new CountWindow())
      .print()
    // the printed output will be (1,4) and (1,5)

    env.execute("ExampleManagedState")
  }

}

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章