Flink 分佈式緩存 實例

版本: flink1.9.2,java1.8

package DistributedCache;

import org.apache.commons.io.FileUtils;
import org.apache.flink.api.common.functions.RichMapFunction;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.source.SourceFunction;

import java.io.File;
import java.util.ArrayList;
import java.util.List;

/**
 * @Author you guess
 * @Date 2020/6/24 20:20
 * @Version 1.0
 * @Desc
 */
public class DistributedCacheTest1 {


    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        //正確,後面要是.getFile("00ds1.txt");纔對
        //env.registerCachedFile("/Users/abc/downloads/00ds1.txt", "00ds1.txt");

        //正確
        env.registerCachedFile("/Users/abc/downloads/00ds1.txt", "test.txt");

        //報錯,FilePathName需要指定到文件名
        //env.registerCachedFile("/Users/abc/downloads/", "00ds1.txt");

        DataStreamSource<Integer> source1 = env.addSource(new SourceFunction<Integer>() {
            @Override
            public void run(SourceContext<Integer> ctx) throws Exception {
                ctx.collect(100);
                ctx.collect(200);
                ctx.collect(300);
            }

            @Override
            public void cancel() {

            }
        });

        source1.map(new RichMapFunction<Integer, Object>() {
            List<String> lines = new ArrayList<>();
            int i = 0;

            @Override
            public void open(Configuration parameters) throws Exception {

                //正確,要使用當時註冊的name纔行,註冊的name是一個別名,與文件名相同或不同均可
                //不然報錯IllegalArgumentException: File with name '00ds1.txt' is not available. Did you forget to register the file?
                File cache1 = getRuntimeContext().getDistributedCache().getFile("test.txt");
                lines = FileUtils.readLines(cache1);
                //lines.forEach(System.out::println);
                //System.out.println("-----------------");
            }

            @Override
            public Object map(Integer value) throws Exception {
                return value + lines.get(i++);
            }
        }).print();

        env.execute("Flink  DistributedCacheTest1 ");

    }//main
}

/*
輸出:
2> 200abc
3> 300abc
1> 100abc
*/


源碼:實際用的是Tuple2

@Public
public abstract class StreamExecutionEnvironment {
......
	protected final List<Tuple2<String, DistributedCache.DistributedCacheEntry>> cacheFile = new ArrayList<>();

......
	/**
	 * Registers a file at the distributed cache under the given name. The file will be accessible
	 * from any user-defined function in the (distributed) runtime under a local path. Files
	 * may be local files (which will be distributed via BlobServer), or files in a distributed file system.
	 * The runtime will copy the files temporarily to a local cache, if needed.
	 *
	 * <p>The {@link org.apache.flink.api.common.functions.RuntimeContext} can be obtained inside UDFs via
	 * {@link org.apache.flink.api.common.functions.RichFunction#getRuntimeContext()} and provides access
	 * {@link org.apache.flink.api.common.cache.DistributedCache} via
	 * {@link org.apache.flink.api.common.functions.RuntimeContext#getDistributedCache()}.
	 *
	 * @param filePath The path of the file, as a URI (e.g. "file:///some/path" or "hdfs://host:port/and/path")
	 * @param name The name under which the file is registered.
	 * @param executable flag indicating whether the file should be executable
	 */
	public void registerCachedFile(String filePath, String name, boolean executable) {
		this.cacheFile.add(new Tuple2<>(name, new DistributedCache.DistributedCacheEntry(filePath, executable)));
	}
}

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章