題目描述
關於對於學生成績相關的練習題,之前是一個入門級別的需求,現在對這些需求進行增強,首先看數據的改變:
computer,huangxiaoming,85,86,41,75,93,42,85
computer,xuzheng,54,52,86,91,42
computer,huangbo,85,42,96,38
english,zhaobenshan,54,52,86,91,42,85,75
english,liuyifei,85,41,75,21,85,96,14
algorithm,liuyifei,75,85,62,48,54,96,15
computer,huangjiaju,85,75,86,85,85
english,liuyifei,76,95,86,74,68,74,48
english,huangdatou,48,58,67,86,15,33,85
algorithm,huanglei,76,95,86,74,68,74,48
algorithm,huangjiaju,85,75,86,85,85,74,86
computer,huangdatou,48,58,67,86,15,33,85
english,zhouqi,85,86,41,75,93,42,85,75,55,47,22
english,huangbo,85,42,96,38,55,47,22
algorithm,liutao,85,75,85,99,66
computer,huangzitao,85,86,41,75,93,42,85
math,wangbaoqiang,85,86,41,75,93,42,85
computer,liujialing,85,41,75,21,85,96,14,74,86
computer,liuyifei,75,85,62,48,54,96,15
computer,liutao,85,75,85,99,66,88,75,91
computer,huanglei,76,95,86,74,68,74,48
english,liujialing,75,85,62,48,54,96,15
math,huanglei,76,95,86,74,68,74,48
math,huangjiaju,85,75,86,85,85,74,86
math,liutao,48,58,67,86,15,33,85
english,huanglei,85,75,85,99,66,88,75,91
math,xuzheng,54,52,86,91,42,85,75
math,huangxiaoming,85,75,85,99,66,88,75,91
math,liujialing,85,86,41,75,93,42,85,75
english,huangxiaoming,85,86,41,75,93,42,85
algorithm,huangdatou,48,58,67,86,15,33,85
algorithm,huangzitao,85,86,41,75,93,42,85,75
一、數據解釋
數據字段個數不固定:
第一個是課程名稱,總共四個課程,computer,math,english,algorithm,
第二個是學生姓名,後面是每次考試的分數
二、統計需求:
1、統計每門課程的參考人數和課程平均分
2、統計每門課程參考學生的平均分,並且按課程存入不同的結果文件,要求一門課程一個結果文件,並且按平均分從高到低排序,分數保留一位小數
3、求出每門課程參考學生成績最高的學生的信息:課程,姓名和平均分
三、解題思路
mapper階段的輸出:
key: 課程
value:分數
reducer階段的輸出:
key: 課程
value: 平均分數和人數
四、具體代碼實現
package com.ghgj.mazh.mapreduce.exercise.coursescore3;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
public class CourseScoreMR_Pro_01 {
public static void main(String[] args) throws Exception {
/**
* 一些參數的初始化
*/
String inputPath = "D:\\bigdata\\coursescore2\\input";
String outputPath = "D:\\bigdata\\coursescore2\\output";
/**
* 初始化一個Job對象
*/
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
/**
* 設置jar包所在路徑
*/
job.setJarByClass(CourseScoreMR_Pro_01.class);
/**
* 指定mapper類和reducer類 等各種其他業務邏輯組件
*/
job.setMapperClass(Mapper_CS.class);
job.setReducerClass(Reducer_CS.class);
// 指定maptask的輸出類型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(DoubleWritable.class);
// 指定reducetask的輸出類型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
/**
* 指定該mapreduce程序數據的輸入和輸出路徑
*/
Path input = new Path(inputPath);
Path output = new Path(outputPath);
FileSystem fs = FileSystem.get(conf);
if (fs.exists(output)) {
fs.delete(output, true);
}
FileInputFormat.setInputPaths(job, input);
FileOutputFormat.setOutputPath(job, output);
/**
* 最後提交任務
*/
boolean waitForCompletion = job.waitForCompletion(true);
System.exit(waitForCompletion ? 0 : 1);
}
/**
* Mapper組件:
* <p>
* 輸入的key:
* 輸入的value: computer,liutao,85,75,85,99,66,88,75,91
* <p>
* 輸出的key: 課程
* 輸入的value: 分數
*/
private static class Mapper_CS extends Mapper<LongWritable, Text, Text, DoubleWritable> {
Text keyOut = new Text();
DoubleWritable valueOut = new DoubleWritable();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] splits = value.toString().split(",");
String course = splits[0];
int sum = 0;
int num = 0;
for(int i=2; i<splits.length; i++){
sum += Integer.valueOf(splits[i]);
num ++;
}
// 直接取整數
double avgScore = Math.round(sum * 1D / num * 10) / 10D;
keyOut.set(course);
valueOut.set(avgScore);
context.write(keyOut, valueOut);
}
}
/**
* Reducer組件:
* <p>
* 輸入的key:
* 輸入的values:
* <p>
* 輸出的key: 課程
* 輸入的value: 平均分數 和 人數
*/
private static class Reducer_CS extends Reducer<Text, DoubleWritable, Text, Text> {
Text valueOut = new Text();
@Override
protected void reduce(Text key, Iterable<DoubleWritable> values, Context context) throws IOException, InterruptedException {
int sum = 0;
int num = 0;
for(DoubleWritable v: values){
sum += v.get();
num ++;
}
// 直接取整數
double avgScore = Math.round(sum * 1D / num * 10) / 10D;
valueOut.set(avgScore + "\t" + num);
context.write(key, valueOut);
}
}
}
五、執行結果
algorithm 71.3 6
computer 69.6 10
english 66.0 9
math 72.6 7
至此,大功告成