Oozie調度MapReduce任務

  1. 在/usr/local/module/oozie-4.0.0-cdh5.3.6/oozie-apps目錄下創建map-reduce目錄
     
  2. /usr/local/module/oozie-4.0.0-cdh5.3.6/oozie-apps/map-reduce下創建job.properties,workflow.xml文件,
    以及lib文件夾
  3. 將/usr/local/module/hadoop-cdh5.3.6/hadoop-2.5.0-cdh5.3.6/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.5.0-cdh5.3.6.jar拷貝到lib目錄下
     
  4. 配置workflow.xml文件
     
    <!--
      Licensed to the Apache Software Foundation (ASF) under one
      or more contributor license agreements.  See the NOTICE file
      distributed with this work for additional information
      regarding copyright ownership.  The ASF licenses this file
      to you under the Apache License, Version 2.0 (the
      "License"); you may not use this file except in compliance
      with the License.  You may obtain a copy of the License at
      
           http://www.apache.org/licenses/LICENSE-2.0
      
      Unless required by applicable law or agreed to in writing, software
      distributed under the License is distributed on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      See the License for the specific language governing permissions and
      limitations under the License.
    -->
    <workflow-app xmlns="uri:oozie:workflow:0.2" name="map-reduce-wf">
        <start to="mr-node"/>
        <action name="mr-node">
            <map-reduce>
                <job-tracker>${jobTracker}</job-tracker>
                <name-node>${nameNode}</name-node>
                <prepare>
    				<!--對應的是hdfs上/output1目錄,存在的話要先刪除-->
                    <delete path="${nameNode}/output1"/>
                </prepare>
                <configuration>
                    <property>
                        <name>mapred.job.queue.name</name>
                        <value>${queueName}</value>
                    </property>
    				<!-- 配置調度MR任務時,使用新的API -->
                    <property>
                        <name>mapred.mapper.new-api</name>
                        <value>true</value>
                    </property>
    
                    <property>
                        <name>mapred.reducer.new-api</name>
                        <value>true</value>
                    </property>
    
    				<!-- 指定Map類 -->
                    <property>
                        <name>mapreduce.job.map.class</name>
                        <value>org.apache.hadoop.examples.WordCount$TokenizerMapper</value>
                    </property>
    				 <!-- 指定Reduce類 -->
                    <property>
                        <name>mapreduce.job.reduce.class</name>
                        <value>org.apache.hadoop.examples.WordCount$IntSumReducer</value>
                    </property>
    				 <!-- 指定Job Key輸出類型 -->
                    <property>
                        <name>mapreduce.job.output.key.class</name>
                        <value>org.apache.hadoop.io.Text</value>
                    </property>
    
                    <!-- 指定Job Value輸出類型 -->
                    <property>
                        <name>mapreduce.job.output.value.class</name>
                        <value>org.apache.hadoop.io.IntWritable</value>
                    </property>
    
                    <property>
                        <name>mapred.map.tasks</name>
                        <value>1</value>
                    </property>
    				<!-- 指定輸入輸出路徑 -->
                    <property>
                        <name>mapreduce.input.fileinputformat.inputdir</name>
                        <value>/input1</value>
                    </property>
                    <property>
                        <name>mapreduce.output.fileoutputformat.outputdir</name>
                        <value>/output1</value>
                    </property>
                </configuration>
            </map-reduce>
            <ok to="end"/>
            <error to="fail"/>
        </action>
        <kill name="fail">
            <message>Map/Reduce failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
        </kill>
        <end name="end"/>
    </workflow-app>
    

     

  5. 修改job.properties文件
     
    nameNode=hdfs://hadoop111:8020
    jobTracker=hadoop112:8032
    queueName=default
    examplesRoot=oozie-apps
    oozie.wf.application.path=${nameNode}/user/${user.name}/${examplesRoot}/map-reduce/workflow.xml
    
    

     

  6. 將文件上傳到HDFS上
     
  7. 執行
     
  8. 效果
     
     
  9. 查看HDFS上輸入和生成的輸出文件
     
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章