Hadoop之MapReduce-Partition編程

一、問題描述

       在Hadoop序列化案例(http://blog.csdn.net/gaijianwei/article/details/46004025)的基礎上,將輸出的數據按照手機號所屬的運營商進行分區。

二、問題實現

       DataCount代碼(只是對Hadoop序列化案例的DataCount代碼稍作修改)

package edu.jianwei.hadoop.mr;

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.collections.map.HashedMap;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class  DataCount {
	
	static class DCMapper extends Mapper<LongWritable, Text, Text, DataBean>{
        private Text k=new Text();
        private DataBean v=new DataBean();
		@Override
		protected void map(LongWritable key, Text value, Context context)
				throws IOException, InterruptedException {
			String line=value.toString();
			String[] words=line.split("\t");
		  
			String telNum=words[1];
			double upLoad=Double.parseDouble(words[8]);
			double downLoad=Double.parseDouble(words[9]);
			k.set(telNum);
			v.Set(telNum, upLoad, downLoad);
			context.write(k, v);
			
		}
		
		
	}
	
	static class DCReduce extends Reducer<Text,DataBean, Text, DataBean>{
		 private DataBean v=new DataBean();

		@Override
		protected void reduce(Text key, Iterable<DataBean> v2s,
				Context context)
				throws IOException, InterruptedException {
			double upTotal=0;
			double downToal=0;
			for (DataBean d : v2s) {
				upTotal+=d.getUpLoad();
				downToal+=d.getDownload();
			}
			v.Set("", upTotal, downToal);
			context.write(key, v);
			
		}
		
	}
	
	public static class DCPartitioner  extends Partitioner<Text, DataBean>{
         static Map<String,Integer> provider=new HashMap<String,Integer>();
         static{
        	 provider.put( "139",1);
        	 provider.put( "138",1);
        	 provider.put( "152",2);
             provider.put("153", 2);
    		 provider.put("182", 3);
       		 provider.put("183", 3);
        	 
         }

		@Override
		public int getPartition(Text k, DataBean value, int numPartitions) {
			String tel_sub=k.toString().substring(0,3);
			Integer counter;
		    counter=provider.get(tel_sub);
		    if(counter==null){
		    	counter=0;
		    }
			return counter;
		}
		
	}

	public static void main(String[] args) throws Exception {
		 Configuration conf=new Configuration();
		 Job job=Job.getInstance();
		 
		 job.setJarByClass(DataCount.class);
		 
		 job.setMapperClass(DCMapper.class);
		 job.setMapOutputKeyClass(Text.class);
		 job.setMapOutputValueClass(DataBean.class);
		 FileInputFormat.setInputPaths(job, new Path(args[0]));
		 
		 job.setReducerClass(DCReduce.class);
		 job.setOutputKeyClass(Text.class);
		 job.setOutputValueClass(DataBean.class);
		 FileOutputFormat.setOutputPath(job, new Path(args[1]));
		 
		 job.setPartitionerClass(DCPartitioner.class);
		 job.setNumReduceTasks(Integer.parseInt(args[2]));
		 
		 job.waitForCompletion(true);
	}

}
     DataBean同Hadoop序列化案例中的DataBean

三、代碼測試

       1.代碼運行(啓動4個Reduce任務)

          hadoop jar /root/dc.jar edu.jianwei.hadoop.mr.DataCount  /dc   /dc/res   4

       2.運行結果

       

       這裏輸出結果不在一一列舉, 例part-r-00001的數據:

        13826544101     264.0   0.0     264.0
        13922314466     3008.0  3720.0  6728.0
        13925057413     11058.0 48243.0 59301.0
        13926251106     240.0   0.0     240.0
        13926435656     132.0   1512.0  1644.0

     注意:

      1’. 代碼運行(啓動3個Reduce任務)

          hadoop jar /root/dc.jar edu.jianwei.hadoop.mr.DataCount  /dc/HTTP_20130313143750.dat  /dc/res_3  3

      2‘.運行結果

        

       1’‘.代碼運行(啓動5個Reduce任務)

           hadoop jar /root/dc.jar edu.jianwei.hadoop.mr.DataCount  /dc/HTTP_20130313143750.dat  /dc/res_3  3

       2''.運行結果

         

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章