MapReduce--11--學生成績(基礎版)--需求2

對於剛入門MapReduce的同學來說,學會mapreduce的基本編程套路,懂得mapreduce是如何對於大批量數據集做分佈式運算的是非常關鍵的。

這裏有一個需求,增強各位對mapreduce編程的理解

首先看數據:

computer,huangxiaoming,85
computer,xuzheng,54
computer,huangbo,86
computer,liutao,85
computer,huanglei,99
computer,liujialing,85
computer,liuyifei,75
computer,huangdatou,48
computer,huangjiaju,88
computer,huangzitao,85
english,zhaobenshan,57
english,liuyifei,85
english,liuyifei,76
english,huangdatou,48
english,zhouqi,85
english,huangbo,85
english,huangxiaoming,96
english,huanglei,85
english,liujialing,75
algorithm,liuyifei,75
algorithm,huanglei,76
algorithm,huangjiaju,85
algorithm,liutao,85
algorithm,huangdou,42
algorithm,huangzitao,81
math,wangbaoqiang,85
math,huanglei,76
math,huangjiaju,85
math,liutao,48
math,xuzheng,54
math,huangxiaoming,85
math,liujialing,85

以上所有的是數據,該數據每行有三個字段值,分別是course,name,score

現在求需求2:求該成績表每門課程當中出現了相同分數的分數,還有次數,以及該分數的人數

返回結果的格式:
科目    分數    次數    該分數的人
例子:
computer    85    3    huangzitao,liujialing,huangxiaoming

 

解題思路:

對於mapper階段,輸出的key-value分別是:

key: 課程,分數

value: 名字

對於reducer階段,reduce方法接收的參數是:

key: 課程,分數

values: 課程中的某個分數的多個學生的名字的迭代器

 

看代碼實現:

package com.ghgj.mazh.mapreduce.exercise.coursescore2;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

public class CourseScoreMR_Basic_02 {

    public static void main(String[] args) throws Exception {
        /**
         * 一些參數的初始化
         */
        String inputPath = "D:\\bigdata\\coursescore1\\input";
        String outputPath = "D:\\bigdata\\coursescore1\\output2";

        /**
         * 初始化一個Job對象
         */
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);

        /**
         * 設置jar包所在路徑
         */
        job.setJarByClass(CourseScoreMR_Basic_02.class);

        /**
         * 指定mapper類和reducer類 等各種其他業務邏輯組件
         */
        job.setMapperClass(Mapper_CS.class);
        job.setReducerClass(Reducer_CS.class);
        // 指定maptask的輸出類型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        // 指定reducetask的輸出類型
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        /**
         * 指定該mapreduce程序數據的輸入和輸出路徑
         */
        Path input = new Path(inputPath);
        Path output = new Path(outputPath);
        FileSystem fs = FileSystem.get(conf);
        if (fs.exists(output)) {
            fs.delete(output, true);
        }
        FileInputFormat.setInputPaths(job, input);
        FileOutputFormat.setOutputPath(job, output);

        /**
         * 最後提交任務
         */
        boolean waitForCompletion = job.waitForCompletion(true);
        System.exit(waitForCompletion ? 0 : 1);
    }

    /**
     * Mapper組件:
     * <p>
     * 輸入的key:
     * 輸入的value: computer,huangxiaoming,85
     * <p>
     * 輸出的key: course +"\t"+ score
     * 輸入的value: name
     */
    private static class Mapper_CS extends Mapper<LongWritable, Text, Text, Text> {

        Text keyOut = new Text();
        Text valueOut = new Text();

        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

            String[] splits = value.toString().split(",");
            String course = splits[0];
            String score = splits[2];
            String name = splits[1];

            keyOut.set(course +"\t"+ score);
            valueOut.set(name);

            context.write(keyOut, valueOut);
        }
    }

    /**
     * Reducer組件:
     * <p>
     * 輸入的key:
     * 輸入的values:
     * <p>
     * 輸出的key:  course + "\t" + score
     * 輸入的value:   number + "\t" + names
     */
    private static class Reducer_CS extends Reducer<Text, Text, Text, Text> {

        Text valueOut = new Text();

        @Override
        protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {

            StringBuilder sb = new StringBuilder();
            int number = 0;
            for(Text t: values){
                sb.append(t.toString()).append(",");
                number++;
            }

            if(number > 1){
                String names = sb.toString().substring(0, sb.toString().length() - 1);
                valueOut.set(number + "\t" + names);
                context.write(key, valueOut);
            }
        }
    }
}

 

代碼運行得到的結果如下:

algorithm	85	2	liutao,huangjiaju
computer	85	4	liutao,huangzitao,liujialing,huangxiaoming
english	85	4	huangbo,huanglei,zhouqi,liuyifei
math	85	4	wangbaoqiang,huangjiaju,huangxiaoming,liujialing

 

至此,得出需要的結果

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章