Mapreduce的wordcount操作

一、啓動hadoop

cd /apps/hadoop/sbin  
./start-all.sh 

二、在linux上,創建一個目錄/data/mapreduce1

mkdir -p /data/mapreduce1

三、切換到/data/mapreduce1目錄下,下載文本文件

四、在該目錄下,下載依賴包

五、解壓依賴包

六、將文本文件上傳到hdfs的/mymapreduce1/in目錄下

hadoop fs -mkdir -p /mymapreduce1/in
hadoop fs -put /data/mapreduce1/buyer_favorite1 /mymapreduce1/in

七、用eclipse編寫如下代碼:

代碼如下:


 1. 1.package mapreduce;  
    2.import java.io.IOException;  
    3.import java.util.StringTokenizer;  
    4.import org.apache.hadoop.fs.Path;  
    5.import org.apache.hadoop.io.IntWritable;  
    6.import org.apache.hadoop.io.Text;  
    7.import org.apache.hadoop.mapreduce.Job;  
    8.import org.apache.hadoop.mapreduce.Mapper;  
    9.import org.apache.hadoop.mapreduce.Reducer;  
    10.import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;  
    11.import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;  
    12.public class WordCount {  
    13.    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {  
    14.        Job job = Job.getInstance();  
    15.        job.setJobName("WordCount");  
    16.        job.setJarByClass(WordCount.class);
   17.        job.setMapperClass(doMapper.class);  
   18.        job.setReducerClass(doReducer.class);  
   19.        job.setOutputKeyClass(Text.class);  
   20.        job.setOutputValueClass(IntWritable.class);  
   21.        Path in = new Path("hdfs://localhost:9000/mymapreduce1/in/buyer_favorite1");  
   22.        Path out = new Path("hdfs://localhost:9000/mymapreduce1/out");  
   23.        FileInputFormat.addInputPath(job, in);  
   24.        FileOutputFormat.setOutputPath(job, out);  
   25.        System.exit(job.waitForCompletion(true) ? 0 : 1);  
   26.    }  
   27.    public static class doMapper extends Mapper<Object, Text, Text, IntWritable>{  
   28.        public static final IntWritable one = new IntWritable(1);  
   29.        public static Text word = new Text();  
   30.        @Override  
   31.        protected void map(Object key, Text value, Context context)  
   32.                    throws IOException, InterruptedException {  
   33.            StringTokenizer tokenizer = new StringTokenizer(value.toString(), "\t");  
   34.                word.set(tokenizer.nextToken());  
   35.                context.write(word, one);  
   36.        }  
   37.    }  
   38.    public static class doReducer extends Reducer<Text, IntWritable, Text, IntWritable>{  
   39.        private IntWritable result = new IntWritable();  
   40.        @Override  
   41.        protected void reduce(Text key, Iterable<IntWritable> values, Context context)  
   42.        throws IOException, InterruptedException {  
   43.        int sum = 0;  
   44.        for (IntWritable value : values) {  
   45.        sum += value.get();  
   46.        }  
   47.        result.set(sum);  
   48.        context.write(key, result);  
   49.        }  
   50.    }  
   51.}

八、查看hdfs

hadoop fs -ls /mymapreduce1/out
hadoop fs -cat /mymapreuce1/out/*
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章