版本: flink1.9.2,java1.8
package DistributedCache;
import org.apache.commons.io.FileUtils;
import org.apache.flink.api.common.functions.RichMapFunction;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.source.SourceFunction;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
/**
* @Author you guess
* @Date 2020/6/24 20:20
* @Version 1.0
* @Desc
*/
public class DistributedCacheTest1 {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//正確,後面要是.getFile("00ds1.txt");纔對
//env.registerCachedFile("/Users/abc/downloads/00ds1.txt", "00ds1.txt");
//正確
env.registerCachedFile("/Users/abc/downloads/00ds1.txt", "test.txt");
//報錯,FilePathName需要指定到文件名
//env.registerCachedFile("/Users/abc/downloads/", "00ds1.txt");
DataStreamSource<Integer> source1 = env.addSource(new SourceFunction<Integer>() {
@Override
public void run(SourceContext<Integer> ctx) throws Exception {
ctx.collect(100);
ctx.collect(200);
ctx.collect(300);
}
@Override
public void cancel() {
}
});
source1.map(new RichMapFunction<Integer, Object>() {
List<String> lines = new ArrayList<>();
int i = 0;
@Override
public void open(Configuration parameters) throws Exception {
//正確,要使用當時註冊的name纔行,註冊的name是一個別名,與文件名相同或不同均可
//不然報錯IllegalArgumentException: File with name '00ds1.txt' is not available. Did you forget to register the file?
File cache1 = getRuntimeContext().getDistributedCache().getFile("test.txt");
lines = FileUtils.readLines(cache1);
//lines.forEach(System.out::println);
//System.out.println("-----------------");
}
@Override
public Object map(Integer value) throws Exception {
return value + lines.get(i++);
}
}).print();
env.execute("Flink DistributedCacheTest1 ");
}//main
}
/*
輸出:
2> 200abc
3> 300abc
1> 100abc
*/
源碼:實際用的是Tuple2
@Public
public abstract class StreamExecutionEnvironment {
......
protected final List<Tuple2<String, DistributedCache.DistributedCacheEntry>> cacheFile = new ArrayList<>();
......
/**
* Registers a file at the distributed cache under the given name. The file will be accessible
* from any user-defined function in the (distributed) runtime under a local path. Files
* may be local files (which will be distributed via BlobServer), or files in a distributed file system.
* The runtime will copy the files temporarily to a local cache, if needed.
*
* <p>The {@link org.apache.flink.api.common.functions.RuntimeContext} can be obtained inside UDFs via
* {@link org.apache.flink.api.common.functions.RichFunction#getRuntimeContext()} and provides access
* {@link org.apache.flink.api.common.cache.DistributedCache} via
* {@link org.apache.flink.api.common.functions.RuntimeContext#getDistributedCache()}.
*
* @param filePath The path of the file, as a URI (e.g. "file:///some/path" or "hdfs://host:port/and/path")
* @param name The name under which the file is registered.
* @param executable flag indicating whether the file should be executable
*/
public void registerCachedFile(String filePath, String name, boolean executable) {
this.cacheFile.add(new Tuple2<>(name, new DistributedCache.DistributedCacheEntry(filePath, executable)));
}
}