題目:
對給定的文件進行詞頻統計,然後按照詞頻從大到小排序
詞頻相同時,按照單詞的字典序
思路:
通過兩個MR進行解決
第一個MR:統計詞頻
第二個MR:利用Shuffle階段的排序,實現排序效果
代碼:
//單詞類
package Test02;
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class WordWritable implements WritableComparable<WordWritable> {
private String word;
private int num;
public int compareTo(WordWritable o) {
int tmp = o.num-this.num;
if(tmp != 0){
return tmp;
}
return this.word.compareTo(o.word);
}
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeUTF(this.word);
dataOutput.writeInt(this.num);
}
public void readFields(DataInput dataInput) throws IOException {
this.word = dataInput.readUTF();
this.num = dataInput.readInt();
}
@Override
public String toString() {
return "word=" + word + " num=" + num;
}
public String getWord() {
return word;
}
public void setWord(String word) {
this.word = word;
}
public int getNum() {
return num;
}
public void setNum(int num) {
this.num = num;
}
}
//MapReduce類
package Test02;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob;
import org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class DependDemo01 {
public static void main(String[] args){
try{
Configuration conf = new Configuration();
Job job1 = Job.getInstance(conf, "Test02-DependDemo01-1");
job1.setJarByClass(DependDemo01.class);
job1.setMapperClass(MyMapperOne.class);
job1.setMapOutputKeyClass(Text.class);
job1.setMapOutputValueClass(Text.class);
job1.setReducerClass(MyReducerOne.class);
job1.setOutputKeyClass(Text.class);
job1.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job1, new Path("C:/Users/Administrator/Desktop/Month02/week7/day3/data/input/t2.txt"));
FileOutputFormat.setOutputPath(job1, new Path("C:/Users/Administrator/Desktop/Month02/week7/day3/data/output/t2/01"));
//int success = job1.waitForCompletion(true) ? 0: 1;
//System.exit(success);
Job job2 = Job.getInstance(conf, "Test02-DependDemo01-2");
job2.setJarByClass(DependDemo01.class);
job2.setMapperClass(MyMapperTwo.class);
job2.setMapOutputKeyClass(WordWritable.class);
job2.setMapOutputValueClass(Text.class);
job2.setReducerClass(MyReducerTwo.class);
job2.setOutputKeyClass(WordWritable.class);
job2.setOutputValueClass(NullWritable.class);
FileInputFormat.addInputPath(job2, new Path("C:/Users/Administrator/Desktop/Month02/week7/day3/data/output/t2/01"));
FileOutputFormat.setOutputPath(job2, new Path("C:/Users/Administrator/Desktop/Month02/week7/day3/data/output/t2/02"));
ControlledJob aJob = new ControlledJob(job1.getConfiguration());
ControlledJob bJob = new ControlledJob(job2.getConfiguration());
aJob.setJob(job1);
bJob.setJob(job2);
bJob.addDependingJob(aJob);
JobControl jc = new JobControl("jc01");
jc.addJob(aJob);
jc.addJob(bJob);
Thread thread = new Thread(jc);
thread.start();
while(!jc.allFinished()){
thread.sleep(1000);
}
jc.stop();
}
catch (Exception e){
e.printStackTrace();
}
}
public static class MyMapperOne extends Mapper<Object, Text, Text, Text>{
Text k = new Text();
Text v = new Text("1");
@Override
protected void map(Object key, Text value, Context context) throws IOException, InterruptedException {
String row = value.toString();
String[] words = row.split(" ");
for(String st: words){
k.set(st);
context.write(k, v);
}
}
}
public static class MyReducerOne extends Reducer<Text, Text, Text, Text>{
Text v = new Text();
@Override
protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
int cnt = 0;
for(Text text: values){
cnt += Integer.parseInt(text.toString());
}
v.set(String.valueOf(cnt));
context.write(key, v);
}
}
public static class MyMapperTwo extends Mapper<LongWritable, Text, WordWritable, Text>{
WordWritable k = new WordWritable();
Text v = new Text("1");
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String row = value.toString();
String[] words = row.split("\t");
k.setWord(words[0]);
k.setNum(Integer.parseInt(words[1]));
context.write(k, v);
}
}
public static class MyReducerTwo extends Reducer<WordWritable, Text, WordWritable, NullWritable>{
@Override
protected void reduce(WordWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
context.write(key, NullWritable.get());
}
}
}