Shuffle分區及排序

原創

2019-07-30 02:59

所有的思路都在思維導圖上，在這裏直接實戰進行分區和全排序

//編寫Bean對象
package flow1;

import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class FlowBean implements WritableComparable<FlowBean>{

    private int sumFlow;//總流量

    public FlowBean(){}

    public FlowBean(int sumFlow) {
        this.sumFlow = sumFlow;
    }

    //比較
    public int compareTo(FlowBean a) {
        if(sumFlow>a.getSumFlow()){
            return -1;
        }else if(sumFlow<a.getSumFlow()){
            return 1;
        }else {
            return 0;
        }
    }

    //序列化
    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeInt(sumFlow);
    }

    //反序列化
    public void readFields(DataInput dataInput) throws IOException {
        sumFlow=dataInput.readInt();
    }

    public int getSumFlow() {
        return sumFlow;
    }

    public void setSumFlow(int sumFlow) {
        this.sumFlow = sumFlow;
    }

    @Override
    public String toString() {
        return ""+sumFlow;
    }
}

//編寫Mapper
package flow1;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class FlowMapper extends Mapper<LongWritable,Text,FlowBean,Text>{

    FlowBean k=new FlowBean();
    Text v=new Text();
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String s = value.toString();
        String[] splits = s.split(" ");
        k.setSumFlow(Integer.parseInt(splits[0]));
        v.set(splits[1]);
        context.write(k,v);
    }
}

//編寫Reduce
package flow1;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class FlowReduce extends Reducer<FlowBean,Text,Text,Text> {

    @Override
    protected void reduce(FlowBean key, Iterable<Text> values, Context context) throws IOException, InterruptedException {

        for(Text v:values){
            context.write(new Text(key.toString()),v);
        }

    }
}

//編寫分區相關類
package flow1;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;

public class FlowPartition extends Partitioner<FlowBean,Text> {

    public int getPartition(FlowBean flowBean, Text text, int i) {
        String s = text.toString();
        System.out.println(s.substring(0,2));
        if(s.substring(0,3).equals("138")){
            return 0;//分區0
        }else if(s.substring(0,3).equals("135")){
            return 1;
        }else if(s.substring(0,3).equals("151")){
            return 2;
        }else {
            return 3;
        }
    }
}

//編寫驅動類
package flow1;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class FlowDriver {

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        args=new String[]{"F:/Test/num.txt","F:/Test/output"};
        //創建配置文件
        Configuration con=new Configuration();
        //獲取Job對象
        Job job=Job.getInstance(con);
        //設置指定的jar包
        job.setJarByClass(FlowDriver.class);
        //指定MR文件
        job.setMapperClass(FlowMapper.class);
        job.setReducerClass(FlowReduce.class);

        //指定Map的輸出類
        job.setMapOutputKeyClass(FlowBean.class);
        job.setMapOutputValueClass(Text.class);

        //設置分區
        job.setNumReduceTasks(4);
        //指定分區類
        job.setPartitionerClass(FlowPartition.class);

        //設置總的輸出
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        //設置輸出/入文件
        FileInputFormat.setInputPaths(job,new Path(args[0]));
        FileOutputFormat.setOutputPath(job,new Path(args[1]));

        //提交作業
        job.submit();
    }
}

發表評論

所有評論

還沒有人評論，想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.

Shuffle分區及排序

微服務實踐k8s&dapr開發部署實驗（2）狀態管理

Win10 LTSC 2019 安裝後的一些步驟

Python 潮流週刊#52：Python 處理 Excel 的資源

SpringBoot開發中實現對前端返回數據的一致及錯誤異常統一處理

使用mybatis-generator自動生成Mybatis

SSM整合(Maven)筆記

com.alibaba.druid.pool.DruidDataSource : create connection error, url: jdbc:mysql:// java.sql.SQLE

SpringBoot引入durid時報錯

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結