package com.coder.flink.core.aaa_Use_demo;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import org.apache.flink.api.common.state.*;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.core.memory.SeekableDataOutputView;
import org.apache.flink.streaming.api.datastream.BroadcastStream;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.co.KeyedBroadcastProcessFunction;
import org.apache.flink.streaming.api.functions.source.RichSourceFunction;
import org.apache.flink.util.Collector;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import java.util.concurrent.TimeUnit;
public class BroadCastWordCountExample2 {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
String value1 = "{'name':'xiao_wang','age':'10','id':'1','info':'進入'}";
String value2 = "{'name':'xiao_wang','age':'10','id':'1','info':'退出'}";
String value3 = "{'name':'xiao_wang','age':'10','id':'1','info':'購物'}";
String value4 = "{'name':'xiao_wang','age':'10','id':'1','info':'收藏'}";
String value5 = "{'name':'xiao_sang','age':'20','id':'2','info':'進入'}";
String value6 = "{'name':'xiao_sang','age':'20','id':'2','info':'退出'}";
String value7 = "{'name':'xiao_sang','age':'20','id':'2','info':'退出'}";
String value8 = "{'name':'xiao_hai','age':'30','id':'3','info':'進入'}";
String value9 = "{'name':'xiao_hai','age':'30','id':'3','info':'購物'}";
String value10 = "{'name':'xiao_hai','age':'30','id':'3','info':'購物'}";
JSONObject jsonObject1 = JSON.parseObject(value1);
JSONObject jsonObject2 = JSON.parseObject(value2);
JSONObject jsonObject3 = JSON.parseObject(value3);
JSONObject jsonObject4 = JSON.parseObject(value4);
JSONObject jsonObject5 = JSON.parseObject(value5);
JSONObject jsonObject6 = JSON.parseObject(value6);
JSONObject jsonObject7 = JSON.parseObject(value7);
JSONObject jsonObject8 = JSON.parseObject(value8);
JSONObject jsonObject9 = JSON.parseObject(value9);
JSONObject jsonObject10 = JSON.parseObject(value10);
List<JSONObject> list = new ArrayList<>();
list.add(jsonObject1);
list.add(jsonObject2);
list.add(jsonObject3);
list.add(jsonObject4);
list.add(jsonObject5);
list.add(jsonObject6);
list.add(jsonObject7);
list.add(jsonObject8);
list.add(jsonObject9);
list.add(jsonObject10);
String broadCast1 = "{'condition_id':'1','firstAction':'進入','secondAction':'購物','topic':'進入+購物'}";
String broadCast2 = "{'condition_id':'2','firstAction':'進入','secondAction':'退出','topic':'進入+退出'}";
JSONObject broadCastJson1 = JSON.parseObject(broadCast1);
JSONObject broadCastJson2 = JSON.parseObject(broadCast2);
List<JSONObject> list2 = new ArrayList<>();
list2.add(broadCastJson1);
list2.add(broadCastJson2);
final MapStateDescriptor<String, JSONObject> mapStateDes = new MapStateDescriptor<>(
"state",
String.class,
JSONObject.class);
// 自定義廣播流(單例)
BroadcastStream<JSONObject> broadcastStream = env.addSource(new RichSourceFunction<JSONObject>() {
private volatile boolean isRunning = true;
/**
* 數據源:模擬每30秒隨機更新一次攔截的關鍵字
* @param ctx
* @throws Exception
*/
@Override
public void run(SourceContext<JSONObject> ctx) throws Exception {
System.out.println("list2 = " + list2);
while (isRunning) {
TimeUnit.SECONDS.sleep(1);
Random random = new Random();
//todo 定時刷新,睡眠6秒
int i = random.nextInt(2);
ctx.collect(list2.get(i));
System.out.println("發送的字符串:" + list.get(i));
}
}
@Override
public void cancel() {
isRunning = false;
}
}).broadcast(mapStateDes);
// 自定義數據流(單例)
DataStream<JSONObject> dataStream = env.addSource(new RichSourceFunction<JSONObject>() {
private volatile boolean isRunning = true;
/**
* 模擬每3秒隨機產生1條消息
* @param ctx
* @throws Exception
*/
@Override
public void run(SourceContext<JSONObject> ctx) throws Exception {
while (isRunning) {
Random random = new Random();
TimeUnit.SECONDS.sleep(3);
int i = random.nextInt(10);
ctx.collect(list.get(i));
// System.out.println("kafka接收數據:" + dataSet[seed]);
}
}
@Override
public void cancel() {
isRunning = false;
}
}).setParallelism(1);
dataStream.keyBy(new KeySelector<JSONObject, String>() {
@Override
public String getKey(JSONObject value) throws Exception {
return value.getString("id");
}
}).connect(broadcastStream).process(new KeyedBroadcastProcessFunction<String, JSONObject, JSONObject, String>() {
private ValueState<String> prevActionState;
//todo 狀態
private transient MapState<String, JSONObject> infoState;
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
prevActionState = getRuntimeContext().getState(
new ValueStateDescriptor<>(
"lastAction",
String.class));
infoState = getRuntimeContext().getMapState(
new MapStateDescriptor<String, JSONObject>(
"infoState",
String.class,
JSONObject.class));
}
@Override
public void processElement(JSONObject value, ReadOnlyContext ctx, Collector<String> out) throws Exception {
JSONObject getBroad = ctx.getBroadcastState(mapStateDes).get("a");
String currentKey = ctx.getCurrentKey();
String prevAction = prevActionState.value();
if (prevAction!=null ){
System.out.println("prevAction = " + prevAction);
}
if (infoState !=null){
System.out.println("currentKey:"+currentKey+",infoState = " + infoState.get("bbb"));
}
System.out.println("getBroad = " + getBroad);
System.out.println("接收日誌:" + value);
infoState.put("bbb",getBroad);
prevActionState.update(value.toString());
}
@Override
public void processBroadcastElement(JSONObject value, Context ctx, Collector<String> out) throws Exception {
BroadcastState<String, JSONObject> broadcastState = ctx.getBroadcastState(mapStateDes);
broadcastState.put("a", value);
// System.out.println("進入的廣播變量:value = " + value);
}
});
env.execute("BroadCastWordCountExample");
}
}
說明:上面的案例,運行看觀察日誌就好了 ,沒有寫輸出打印.............. 使用狀態 一定要keyBy!!!
第二個案例:
package aliyun.product.customer_analysis_system
import org.apache.flink.api.common.functions.{FlatMapFunction, RichFlatMapFunction}
import org.apache.flink.api.common.state.{ListState, ListStateDescriptor, ValueState, ValueStateDescriptor}
import org.apache.flink.api.scala.createTypeInformation
import org.apache.flink.configuration.Configuration
import org.apache.flink.runtime.state.{FunctionInitializationContext, FunctionSnapshotContext}
import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.util.Collector
object SumUserCount {
/*class CheckpointCountDemo extends FlatMapFunction[(String,Long),(String,Long)] with ListCheckpointed[(String, Long)]{
//接入的數據量
private var numberCount :Long = 0L
// 統計方法
override def flatMap(value: (String, Long), out: Collector[(String, Long)]): Unit = {
numberCount += 1
out.collect(value._1,numberCount)
}
override def snapshotState(checkpointId: Long, timestamp: Long): util.List[Long] = {
Collections.singletonList(numberCount)
}
//重置
override def restoreState(stateList: util.List[Long]): Unit = {
numberCount =0L
// for (count <- stateList){
// numberCount += count
// }
}
}
*/
class CheckpointCount(val numElement:Int) extends FlatMapFunction[(Int,Long),(Int,Long,Long)] with CheckpointedFunction{
//存儲數據數量
private var operatorCount :Long =_
// 存儲key相關的狀態值
private var keyedState :ValueState[Long] =_
//存儲算子的狀態值
private var operatorState :ListState[Long] =_
override def flatMap(value: (Int, Long), out: Collector[(Int, Long, Long)]): Unit = {
val count = keyedState.value()+1
keyedState.update(count)
//更新本地算子的值
operatorCount = operatorCount + 1
//輸出結果
out.collect((value._1,count,operatorCount))
}
//當發生snapshot時候,操作
override def snapshotState(context: FunctionSnapshotContext): Unit = {
operatorState.clear()
operatorState.add(operatorCount)
}
//初始化
override def initializeState(context: FunctionInitializationContext): Unit = {
keyedState= context.getKeyedStateStore
.getState(new ValueStateDescriptor("keyedState",createTypeInformation[Long]))
operatorState= context.getOperatorStateStore
.getListState(new ListStateDescriptor("keyedState",createTypeInformation[Long]))
if (context.isRestored){
// operatorCount = operatorState.get().asScala.sum
}
}
}
class CountWindow extends RichFlatMapFunction[(String,(String,Long)), (String, Long)] {
private var count: ValueState[(String, Long)] = _
override def flatMap(in: (String,(String,Long)), out: Collector[(String, Long)]): Unit = {
var tmpCurrentCount=count.value()
var currentCount=if(tmpCurrentCount!=null){
tmpCurrentCount
}else{
(in._1,0L)
}
var newCount=if((in._2)._2>1){(in._1,currentCount._2)}else{(in._1,currentCount._2+1)}
count.update(newCount)
out.collect(newCount)
}
override def open(parameters: Configuration): Unit = {
count = getRuntimeContext.getState(
new ValueStateDescriptor[(String, Long)]("count", createTypeInformation[(String, Long)])
)
}
}
class ByKeyCount extends RichFlatMapFunction[(String,String), (String, Long)] {
private var count: ValueState[(String, Long)] = _
override def flatMap(in: (String,String), out: Collector[(String, Long)]): Unit = {
var tmpCurrentCount=count.value()
var currentCount=if(tmpCurrentCount!=null){
tmpCurrentCount
}else{
(in._1,0L)
}
var newCount=((in._1,currentCount._2+1))
count.update(newCount)
out.collect(newCount)
}
override def open(parameters: Configuration): Unit = {
count = getRuntimeContext.getState(
new ValueStateDescriptor[(String, Long)]("count", createTypeInformation[(String, Long)])
)
}
}
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.fromElements(
("A", "test1"),
("A", "test2"),
("C", "test3"),
("D", "2"),
("A", "1"),
("C","1")
).keyBy(_._1)
.flatMap(new ByKeyCount())
.map(f=>{
("sum",f)}).keyBy(_._1)
.flatMap(new CountWindow())
.print()
// the printed output will be (1,4) and (1,5)
env.execute("ExampleManagedState")
}
}