1 水位線(WaterMark)是一個時間戳,等於當前到達的消息最大時間戳減去配置的延遲時間,水位線是單調遞增的,如果有晚到達的早消息也不會更新水位線,因爲消息最大時間戳沒變
2 新消息到達時,才計算新的水位線,如果水位線大於等於窗口的endTime(左閉右開)則觸發窗口計算,反之繼續接收後續消息;消息的EventTime大於等於窗口beginTime則保留,反之被丟棄
3 消息的EventTime小於水位線時不一定被丟棄;消息的EventTime小於窗口beginTime時纔會被丟棄
4 與window一起使用,可以對亂序到達的消息排序後再處理
5 引入水位線機制的目的是延遲窗口觸發計算的時間,使晚到達的早的消息儘可能也能被保留,用於窗口計算,提高數據準確性
版本:flink1.9.2,java1.8
1 滾動窗口,延遲5s,window內不排序 :
package WaterMark;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks;
import org.apache.flink.streaming.api.functions.source.SourceFunction;
import org.apache.flink.streaming.api.functions.windowing.WindowFunction;
import org.apache.flink.streaming.api.watermark.Watermark;
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.util.Collector;
import java.util.Date;
/**
* @Author you guess
* @Date 2020/6/20 15:55
* @Version 1.0
* @Desc
*/
public class WaterMarkTest {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
DataStream<String> dataStream = env.addSource(new SourceFunction<String>() {
@Override
public void run(SourceContext<String> ctx) throws Exception {
ctx.collect("hello,1553503185000");
Thread.sleep(1000);
ctx.collect("hello,1553503186000");
Thread.sleep(1000);
ctx.collect("hello,1553503187000");
Thread.sleep(1000);
ctx.collect("hello,1553503188000");
Thread.sleep(1000);
ctx.collect("hello,1553503189000");
Thread.sleep(1000);
ctx.collect("hello,1553503190000");
Thread.sleep(1000);
ctx.collect("hello,1553503191000");
Thread.sleep(1000);
ctx.collect("hello,1553503186000");
Thread.sleep(1000);
ctx.collect("hello,1553503187000");
Thread.sleep(1000);
ctx.collect("hello,1553503185000");
Thread.sleep(1000);
ctx.collect("hello,1553503184000"); //丟棄
Thread.sleep(1000);
ctx.collect("hello,1553503183000"); //丟棄
Thread.sleep(1000);
ctx.collect("hello,1553503190000");
Thread.sleep(1000);
ctx.collect("hello,1553503192000");
Thread.sleep(1000);
ctx.collect("hello,1553503193000");
Thread.sleep(1000);
ctx.collect("hello,1553503194000");
Thread.sleep(1000);
ctx.collect("hello,1553503195000");
Thread.sleep(1000);
ctx.collect("hello,1553503196000");
Thread.sleep(1000);
ctx.collect("hello,1553503197000");
Thread.sleep(1000);
ctx.collect("hello,1553503198000");
Thread.sleep(1000);
ctx.collect("hello,1553503199000");
Thread.sleep(1000);
ctx.collect("hello,1553503200000");
Thread.sleep(1000);
ctx.collect("hello,1553503201000");
Thread.sleep(1000);
ctx.collect("hello,1553503202000");
}
@Override
public void cancel() {
}
}, "source1").assignTimestampsAndWatermarks(new AssignerWithPeriodicWatermarks<String>() { //週期性水位線
long currentTimeStamp = 0l;
long maxDelayAllowed = 5000l; //延遲5s
long currentWaterMark;
/**
* 週期循環執行,默認是每200ms執行一次該方法
* @return
*/
@Override
public Watermark getCurrentWatermark() {
currentWaterMark = currentTimeStamp - maxDelayAllowed;
System.out.println("當前水位線:" + currentWaterMark);
return new Watermark(currentWaterMark);
}
/**
* 來消息才執行。來消息時先執行該方法extractTimestamp,然後再執行getCurrentWatermark
* @param s
* @param l
* @return
*/
@Override
public long extractTimestamp(String s, long l) {
String[] arr = s.split(",");
long eventTime = Long.parseLong(arr[1]);
currentTimeStamp = Math.max(eventTime, currentTimeStamp);
System.out.println("Key:" + arr[0] + ",EventTime:" + eventTime + ",currentTimeStamp:" + currentTimeStamp);
return eventTime;
}
});
dataStream.map(new MapFunction<String, Tuple2<String, String>>() {
@Override
public Tuple2<String, String> map(String s) throws Exception {
return new Tuple2<String, String>(s.split(",")[0], s.split(",")[1]);
}
}).keyBy(0)
//.timeWindow(Time.seconds(5)).apply() // 作用同下,一樣能獲取到窗口開始和結束時間
.window(TumblingEventTimeWindows.of(Time.seconds(5))).apply(new WindowFunction<Tuple2<String, String>, String, Tuple, TimeWindow>() {
@Override
public void apply(Tuple tuple, TimeWindow window, Iterable<Tuple2<String, String>> input, Collector<String> out) throws Exception {
System.out.println("當前窗口開始時間[" + window.getStart() + ",結束時間" + window.getEnd() + ")");
for (Tuple2<String, String> element : input) {
out.collect(" - " + element.f1);
}
}
}).print();
env.execute("Flink WaterMark Test1");
}
}
輸出:
Key:hello,EventTime:1553503185000,currentTimeStamp:1553503185000
當前水位線:1553503180000
當前水位線:1553503180000
當前水位線:1553503180000
當前水位線:1553503180000
Key:hello,EventTime:1553503186000,currentTimeStamp:1553503186000
當前水位線:1553503181000
當前水位線:1553503181000
當前水位線:1553503181000
當前水位線:1553503181000
當前水位線:1553503181000
Key:hello,EventTime:1553503187000,currentTimeStamp:1553503187000
當前水位線:1553503182000
當前水位線:1553503182000
當前水位線:1553503182000
當前水位線:1553503182000
當前水位線:1553503182000
Key:hello,EventTime:1553503188000,currentTimeStamp:1553503188000
當前水位線:1553503183000
當前水位線:1553503183000
當前水位線:1553503183000
當前水位線:1553503183000
當前水位線:1553503183000
Key:hello,EventTime:1553503189000,currentTimeStamp:1553503189000
當前水位線:1553503184000
當前水位線:1553503184000
當前水位線:1553503184000
當前水位線:1553503184000
當前水位線:1553503184000
Key:hello,EventTime:1553503190000,currentTimeStamp:1553503190000
當前水位線:1553503185000
當前水位線:1553503185000
當前水位線:1553503185000
當前水位線:1553503185000
當前水位線:1553503185000
Key:hello,EventTime:1553503191000,currentTimeStamp:1553503191000
當前水位線:1553503186000
當前水位線:1553503186000
當前水位線:1553503186000
當前水位線:1553503186000
當前水位線:1553503186000
Key:hello,EventTime:1553503186000,currentTimeStamp:1553503191000
當前水位線:1553503186000
當前水位線:1553503186000
當前水位線:1553503186000
當前水位線:1553503186000
當前水位線:1553503186000
Key:hello,EventTime:1553503187000,currentTimeStamp:1553503191000
當前水位線:1553503186000
當前水位線:1553503186000
當前水位線:1553503186000
當前水位線:1553503186000
當前水位線:1553503186000
Key:hello,EventTime:1553503185000,currentTimeStamp:1553503191000 --消息EventTime小於水位線,消息EventTime大於等於窗口beginTime,保留
當前水位線:1553503186000
當前水位線:1553503186000
當前水位線:1553503186000
當前水位線:1553503186000
當前水位線:1553503186000
Key:hello,EventTime:1553503184000,currentTimeStamp:1553503191000 --消息EventTime小於窗口beginTime,被丟棄
當前水位線:1553503186000
當前水位線:1553503186000
當前水位線:1553503186000
當前水位線:1553503186000
當前水位線:1553503186000
Key:hello,EventTime:1553503183000,currentTimeStamp:1553503191000 --消息EventTime小於窗口beginTime,被丟棄
當前水位線:1553503186000
當前水位線:1553503186000
當前水位線:1553503186000
當前水位線:1553503186000
Key:hello,EventTime:1553503190000,currentTimeStamp:1553503191000
當前水位線:1553503186000
當前水位線:1553503186000
當前水位線:1553503186000
當前水位線:1553503186000
當前水位線:1553503186000
Key:hello,EventTime:1553503192000,currentTimeStamp:1553503192000
當前水位線:1553503187000
當前水位線:1553503187000
當前水位線:1553503187000
當前水位線:1553503187000
當前水位線:1553503187000
Key:hello,EventTime:1553503193000,currentTimeStamp:1553503193000
當前水位線:1553503188000
當前水位線:1553503188000
當前水位線:1553503188000
當前水位線:1553503188000
當前水位線:1553503188000
Key:hello,EventTime:1553503194000,currentTimeStamp:1553503194000
當前水位線:1553503189000
當前水位線:1553503189000
當前水位線:1553503189000
當前水位線:1553503189000
當前水位線:1553503189000
Key:hello,EventTime:1553503195000,currentTimeStamp:1553503195000
當前水位線:1553503190000
當前水位線:1553503190000
當前窗口開始時間[1553503185000,結束時間1553503190000) -- 當前水位線:1553503190000 >= 窗口結束時間1553503190000 才觸發計算
4> - 1553503185000
4> - 1553503186000
4> - 1553503187000
4> - 1553503188000
4> - 1553503189000
4> - 1553503186000 --放在第一個窗口裏
4> - 1553503187000 --放在第一個窗口裏
4> - 1553503185000 --放在第一個窗口裏,消息1553503184000,消息1553503183000被丟棄
當前水位線:1553503190000
當前水位線:1553503190000
當前水位線:1553503190000
Key:hello,EventTime:1553503196000,currentTimeStamp:1553503196000
當前水位線:1553503191000
當前水位線:1553503191000
當前水位線:1553503191000
當前水位線:1553503191000
當前水位線:1553503191000
Key:hello,EventTime:1553503197000,currentTimeStamp:1553503197000
當前水位線:1553503192000
當前水位線:1553503192000
當前水位線:1553503192000
當前水位線:1553503192000
當前水位線:1553503192000
Key:hello,EventTime:1553503198000,currentTimeStamp:1553503198000
當前水位線:1553503193000
當前水位線:1553503193000
當前水位線:1553503193000
當前水位線:1553503193000
當前水位線:1553503193000
Key:hello,EventTime:1553503199000,currentTimeStamp:1553503199000
當前水位線:1553503194000
當前水位線:1553503194000
當前水位線:1553503194000
當前水位線:1553503194000
當前水位線:1553503194000
Key:hello,EventTime:1553503200000,currentTimeStamp:1553503200000
當前水位線:1553503195000
當前窗口開始時間[1553503190000,結束時間1553503195000) -- 當前水位線:1553503195000 >= 窗口結束時間1553503195000 才觸發計算
4> - 1553503190000
4> - 1553503191000
4> - 1553503190000
4> - 1553503192000
4> - 1553503193000
4> - 1553503194000
當前水位線:1553503195000
當前水位線:1553503195000
當前水位線:1553503195000
當前水位線:1553503195000
Key:hello,EventTime:1553503201000,currentTimeStamp:1553503201000
當前水位線:1553503196000
當前水位線:1553503196000
當前水位線:1553503196000
當前水位線:1553503196000
當前水位線:1553503196000
Key:hello,EventTime:1553503202000,currentTimeStamp:1553503202000 --末尾元素入窗,也會觸發計算
當前水位線:1553503197000
當前窗口開始時間[1553503195000,結束時間1553503200000)
4> - 1553503195000
4> - 1553503196000
4> - 1553503197000
4> - 1553503198000
4> - 1553503199000
當前窗口開始時間[1553503200000,結束時間1553503205000)
4> - 1553503200000
4> - 1553503201000
4> - 1553503202000
2 修改1中的代碼,在window中加入排序:
dataStream.map(new MapFunction<String, Tuple2<String, String>>() {
@Override
public Tuple2<String, String> map(String s) throws Exception {
return new Tuple2<String, String>(s.split(",")[0], s.split(",")[1]);
}
}).keyBy(0)
//.timeWindow(Time.seconds(5)).apply() // 作用同下,一樣能獲取到窗口開始和結束時間
.window(TumblingEventTimeWindows.of(Time.seconds(5))).apply(new WindowFunction<Tuple2<String, String>, String, Tuple, TimeWindow>() {
@Override
public void apply(Tuple tuple, TimeWindow window, Iterable<Tuple2<String, String>> input, Collector<String> out) throws Exception {
System.out.println("當前窗口開始時間[" + window.getStart() + ",結束時間" + window.getEnd() + ")");
List<Tuple2<String, String>> list = new ArrayList<>();
input.forEach(o -> list.add(o));
list.sort((o1, o2) -> o1.f1.compareTo(o2.f1));
//list.sort(Comparator.comparing(o -> o.f1)); // 與上句代碼同義,按照第二個屬性升序排序
list.forEach(o -> out.collect(" - " + o.f1));
}
}).print();
env.execute("Flink WaterMark Test2");
輸出:
............
Key:hello,EventTime:1553503195000,currentTimeStamp:1553503195000
當前水位線:1553503190000
當前窗口開始時間[1553503185000,結束時間1553503190000) -- 可見已對亂序到達的消息做了升序排序和處理
4> - 1553503185000
4> - 1553503185000
4> - 1553503186000
4> - 1553503186000
4> - 1553503187000
4> - 1553503187000
4> - 1553503188000
4> - 1553503189000
當前水位線:1553503190000
當前水位線:1553503190000
當前水位線:1553503190000
當前水位線:1553503190000
Key:hello,EventTime:1553503196000,currentTimeStamp:1553503196000
當前水位線:1553503191000
............
Key:hello,EventTime:1553503200000,currentTimeStamp:1553503200000
當前水位線:1553503195000
當前窗口開始時間[1553503190000,結束時間1553503195000) -- 可見已對亂序到達的消息做了升序排序和處理
4> - 1553503190000
4> - 1553503190000
4> - 1553503191000
4> - 1553503192000
4> - 1553503193000
4> - 1553503194000
當前水位線:1553503195000
3 修改1的代碼,不延遲,其他代碼不動:
long maxDelayAllowed = 0l; //不延遲
輸出:
Key:hello,EventTime:1553503185000,currentTimeStamp:1553503185000
當前水位線:1553503185000
當前水位線:1553503185000
當前水位線:1553503185000
當前水位線:1553503185000
Key:hello,EventTime:1553503186000,currentTimeStamp:1553503186000
當前水位線:1553503186000
當前水位線:1553503186000
當前水位線:1553503186000
當前水位線:1553503186000
當前水位線:1553503186000
Key:hello,EventTime:1553503187000,currentTimeStamp:1553503187000
當前水位線:1553503187000
當前水位線:1553503187000
當前水位線:1553503187000
當前水位線:1553503187000
當前水位線:1553503187000
Key:hello,EventTime:1553503188000,currentTimeStamp:1553503188000
當前水位線:1553503188000
當前水位線:1553503188000
當前水位線:1553503188000
當前水位線:1553503188000
當前水位線:1553503188000
Key:hello,EventTime:1553503189000,currentTimeStamp:1553503189000
當前水位線:1553503189000
當前水位線:1553503189000
當前水位線:1553503189000
當前水位線:1553503189000
當前水位線:1553503189000
Key:hello,EventTime:1553503190000,currentTimeStamp:1553503190000
當前水位線:1553503190000
當前窗口開始時間[1553503185000,結束時間1553503190000) -- 當前水位線:1553503190000 >= 窗口結束時間1553503190000 才觸發計算
4> - 1553503185000
4> - 1553503186000
4> - 1553503187000
4> - 1553503188000
4> - 1553503189000
當前水位線:1553503190000
當前水位線:1553503190000
當前水位線:1553503190000
當前水位線:1553503190000
Key:hello,EventTime:1553503191000,currentTimeStamp:1553503191000 -- 在第二個窗口
當前水位線:1553503191000
當前水位線:1553503191000
當前水位線:1553503191000
當前水位線:1553503191000
當前水位線:1553503191000
Key:hello,EventTime:1553503186000,currentTimeStamp:1553503191000 -- 在第二個窗口被丟棄,因爲EventTime小於
當前水位線:1553503191000
當前水位線:1553503191000
當前水位線:1553503191000
當前水位線:1553503191000
當前水位線:1553503191000
Key:hello,EventTime:1553503187000,currentTimeStamp:1553503191000 -- 在第二個窗口被丟棄
當前水位線:1553503191000
當前水位線:1553503191000
當前水位線:1553503191000
當前水位線:1553503191000
當前水位線:1553503191000
Key:hello,EventTime:1553503185000,currentTimeStamp:1553503191000 -- 在第二個窗口被丟棄
當前水位線:1553503191000
當前水位線:1553503191000
當前水位線:1553503191000
當前水位線:1553503191000
當前水位線:1553503191000
Key:hello,EventTime:1553503184000,currentTimeStamp:1553503191000 -- 在第二個窗口被丟棄
當前水位線:1553503191000
當前水位線:1553503191000
當前水位線:1553503191000
當前水位線:1553503191000
當前水位線:1553503191000
Key:hello,EventTime:1553503183000,currentTimeStamp:1553503191000 -- 在第二個窗口被丟棄
當前水位線:1553503191000
當前水位線:1553503191000
當前水位線:1553503191000
當前水位線:1553503191000
當前水位線:1553503191000
Key:hello,EventTime:1553503190000,currentTimeStamp:1553503191000 -- EventTime大於等於窗口beginTime,保留
當前水位線:1553503191000
當前水位線:1553503191000
當前水位線:1553503191000
當前水位線:1553503191000
Key:hello,EventTime:1553503192000,currentTimeStamp:1553503192000
當前水位線:1553503192000
當前水位線:1553503192000
當前水位線:1553503192000
當前水位線:1553503192000
當前水位線:1553503192000
Key:hello,EventTime:1553503193000,currentTimeStamp:1553503193000
當前水位線:1553503193000
當前水位線:1553503193000
當前水位線:1553503193000
當前水位線:1553503193000
當前水位線:1553503193000
Key:hello,EventTime:1553503194000,currentTimeStamp:1553503194000
當前水位線:1553503194000
當前水位線:1553503194000
當前水位線:1553503194000
當前水位線:1553503194000
當前水位線:1553503194000
Key:hello,EventTime:1553503195000,currentTimeStamp:1553503195000
當前水位線:1553503195000
當前窗口開始時間[1553503190000,結束時間1553503195000)
4> - 1553503190000
4> - 1553503191000
4> - 1553503190000
4> - 1553503192000
4> - 1553503193000
4> - 1553503194000
當前水位線:1553503195000
當前水位線:1553503195000
當前水位線:1553503195000
當前水位線:1553503195000
Key:hello,EventTime:1553503196000,currentTimeStamp:1553503196000
當前水位線:1553503196000
當前水位線:1553503196000
當前水位線:1553503196000
當前水位線:1553503196000
當前水位線:1553503196000
Key:hello,EventTime:1553503197000,currentTimeStamp:1553503197000
當前水位線:1553503197000
當前水位線:1553503197000
當前水位線:1553503197000
當前水位線:1553503197000
當前水位線:1553503197000
Key:hello,EventTime:1553503198000,currentTimeStamp:1553503198000
當前水位線:1553503198000
當前水位線:1553503198000
當前水位線:1553503198000
當前水位線:1553503198000
當前水位線:1553503198000
Key:hello,EventTime:1553503199000,currentTimeStamp:1553503199000
當前水位線:1553503199000
當前水位線:1553503199000
當前水位線:1553503199000
當前水位線:1553503199000
當前水位線:1553503199000
Key:hello,EventTime:1553503200000,currentTimeStamp:1553503200000
當前水位線:1553503200000
當前窗口開始時間[1553503195000,結束時間1553503200000)
4> - 1553503195000
4> - 1553503196000
4> - 1553503197000
4> - 1553503198000
4> - 1553503199000
當前水位線:1553503200000
當前水位線:1553503200000
當前水位線:1553503200000
當前水位線:1553503200000
Key:hello,EventTime:1553503201000,currentTimeStamp:1553503201000
當前水位線:1553503201000
當前水位線:1553503201000
當前水位線:1553503201000
當前水位線:1553503201000
當前水位線:1553503201000
Key:hello,EventTime:1553503202000,currentTimeStamp:1553503202000
當前水位線:1553503202000
當前窗口開始時間[1553503200000,結束時間1553503205000)
4> - 1553503200000
4> - 1553503201000
4> - 1553503202000
參考:
https://blog.csdn.net/xorxos/article/details/80715113