自定義OutPutFormat
相關知識點如圖所示
要自定義OutPutFormat,我們要做
1 先寫MR兩個類
2 FilterOutPutFormat extends FileOutPutFormat
重寫RecordWriter方法
public class FilterOutputFormat extends FileOutputFormat<Text,NullWritable> {
public RecordWriter<Text, NullWritable> getRecordWriter(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
//此處要傳入job的上下文以便創建IO流
return new FilterRecordWriter(taskAttemptContext);
}
}
3 FRecorfWritter extends RecordWriter
在構造方法中就要初始化輸出流
重寫writer方法和close方法
主要的邏輯執行在writer中
public class FilterRecordWriter extends RecordWriter<Text,NullWritable> {
FSDataOutputStream out1=null; //創建輸出流1
FSDataOutputStream out2=null; //創建輸出流2
public FilterRecordWriter(TaskAttemptContext job){
FileSystem fs=null;//通過job獲得文件系統
try {
fs=FileSystem.get(job.getConfiguration());
//指定路徑
Path out1Path=new Path("F:/Test/baidu.txt");
Path out2Path=new Path("F:/Test/other.txt");
//創建輸出流
out1=fs.create(out1Path);
out2=fs.create(out2Path);
} catch (IOException e) {
e.printStackTrace();
}
}
public void write(Text text, NullWritable nullWritable) throws IOException, InterruptedException {
if(text.toString().contains("baidu")){
out1.write(text.getBytes());
}else {
out2.write(text.getBytes());
}
}
public void close(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
IOUtils.closeStream(out1);
IOUtils.closeStream(out2);
}
}
4 設置驅動類
// 要將自定義的輸出格式組件設置到job中
job.setOutputFormatClass(FilterOutputFormat.class);
這便完成了自定義輸出類
自定義InputFormat
需要完成以下步驟
1 自定義類來繼承FileInputFormat
要重寫裏面的isSplitable()方法,這個是用來切片的,返回false就不切片了
重寫createRecordReader()方法
public class WholeFileInputformat extends FileInputFormat<Text, BytesWritable>{
@Override
protected boolean isSplitable(JobContext context, Path filename) {
return false;
}
@Override
public RecordReader<Text, BytesWritable> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
WholeRecordReader recordReader = new WholeRecordReader();
//調用初始化,裏面傳入切片信息和上下文信息
recordReader.initialize(split, context);
return recordReader;
}
}
2 寫RecordReader類
public class WholeRecordReader extends RecordReader<Text, BytesWritable>{
private Configuration configuration;
private FileSplit split;
private boolean isProgress= true;
private BytesWritable value = new BytesWritable();
private Text k = new Text();
@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
this.split = (FileSplit)split;
configuration = context.getConfiguration();
}
@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
if (isProgress) {
// 1 定義緩存區
byte[] contents = new byte[(int)split.getLength()];
FileSystem fs = null;
FSDataInputStream fis = null;
try {
// 2 獲取文件系統
Path path = split.getPath();
fs = path.getFileSystem(configuration);
// 3 讀取數據
fis = fs.open(path);
// 4 讀取文件內容
IOUtils.readFully(fis, contents, 0, contents.length);
// 5 輸出文件內容
value.set(contents, 0, contents.length);
// 6 獲取文件路徑及名稱
String name = split.getPath().toString();
// 7 設置輸出的key值
k.set(name);
} catch (Exception e) {
}finally {
IOUtils.closeStream(fis);
}
isProgress = false;
return true;
}
return false;
}
@Override
public Text getCurrentKey() throws IOException, InterruptedException {
return k;
}
@Override
public BytesWritable getCurrentValue() throws IOException, InterruptedException {
return value;
}
@Override
public float getProgress() throws IOException, InterruptedException {
return 0;
}
@Override
public void close() throws IOException {
}
}
3 MR類
4 設置輸入的inputFormat
job.setInputFormatClass(WholeFileInputformat.class);
就用這樣的方法就自定義了InputFormat和OutPutFormat