HDFS文件上傳(測試參數優先級)
package com.redhat.hafsclient;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.IOUtils;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.io.FileInputStream;
import java.io.IOException;
import java.net.URI;
import java.lang.InterruptedException;
public class HDFSclient {
private FileSystem fs;
@Before
public void before()throws IOException,InterruptedException {
fs = FileSystem.get(URI.create("hdfs://hadoop102:9000"),new Configuration(),"redhat");
}
@Test
public void put() throws IOException, InterruptedException {//上傳文件
//獲取一個HDFS的抽象封裝對象
Configuration configuration = new Configuration();
configuration.set("dfs.replication", "2");
FileSystem fileSystem = FileSystem.get(URI.create("hdfs://hadoop102:9000" ), configuration,"redhat");
//用這個對象操作文件系統
fileSystem.copyFromLocalFile(new Path("c:\\test.txt"),new Path("/"));
//關閉文件系統
fileSystem.close();
}
@Test
public void get() throws IOException, InterruptedException {//下載文件
FileSystem fileSystem = FileSystem.get(URI.create("hdfs://hadoop102:9000" ),new Configuration(),"redhat");
fileSystem.copyToLocalFile(new Path("/test2"),new Path("c:\\"));
fileSystem.close();
}
@Test
public void rename() throws IOException,InterruptedException {//重命名文件
FileSystem fileSystem = FileSystem.get(URI.create("hdfs://hadoop102:9000"),new Configuration(),"redhat");
fileSystem.rename(new Path("/test"),new Path("/test2"));
fileSystem.close();
}
@Test
public void delete() throws IOException {//刪除文件
boolean delete = fs.delete(new Path("/test"),true);
if(delete){
System.out.println("刪除成功");
} else {
System.out.println("刪除失敗");
}
}
@Test
public void appendfile() throws IOException {追加文件內容
FSDataOutputStream append = fs.append(new Path("/test/1.txt"), 1024);
FileInputStream open = new FileInputStream("c:\\3.txt");//本地流
IOUtils.copyBytes(open,append,1024,true);//流交換
}
@Test
public void ls() throws IOException {//查詢文件信息
FileStatus[] fileStatuses = fs.listStatus(new Path("/"));
for (FileStatus fileStatus : fileStatuses) {
if(fileStatus.isFile()){
System.out.println("以下信息是文件信息");
System.out.println(fileStatus.getPath());//獲取文件路徑
System.out.println(fileStatus.getLen());//獲取文件長度
System.out.println(fileStatus.getPermission());//獲取權限信息
System.out.println(fileStatus.getModificationTime());//獲取修改時間
System.out.println(fileStatus.getAccessTime());//獲取訪問時間
System.out.println(fileStatus.getReplication());//獲取副本數
} else {
System.out.println("以下是文件夾信息");
System.out.println(fileStatus.getPath());
}
}
}
@Test
public void listFiles() throws IOException { //遞歸列出全部文件
RemoteIterator<LocatedFileStatus> files = fs.listFiles(new Path("/"), true);
while(files.hasNext()){
LocatedFileStatus file = files.next();
System.out.println(file.getPath());
System.out.println("塊信息:");
BlockLocation[] blockLocations = file.getBlockLocations();
for (BlockLocation blockLocation : blockLocations) {
String[] hosts = blockLocation.getHosts();
System.out.println("塊在:");
for (String host : hosts) {
System.out.print(host+" ");
}
}
}
}
@After
public void after() throws IOException{
fs.close();
}
}
HDFS的I/O流操作
HDFS文件上傳
需求:將本地C盤上的redhat.txt文件上傳到HDFS根目錄
@Test
public void putFileToHDFS() throws IOException, InterruptedException{
// 1 獲取文件系統
FileSystem fs = FileSystem.get(URI.create("hdfs://hadoop102:9000"), new Configuration(), "redhat");
// 2 獲取輸入流
FileInputStream fis = new FileInputStream(new File("c:\\redhat.txt"));
// 3 獲取輸出流
FSDataOutputStream fos = fs.create(new Path("/redhat.txt"));
// 4 流的對拷
IOUtils.copyBytes(fis,fos,1024,true);
// 5 關閉資源
IOUtils.closeStream(fos);
IOUtils.closeStream(fis);
fs.close();
}
HDFS文件下載
@Test
public void getFileFromHDFS() throws IOException, InterruptedException{
// 1 獲取文件系統
FileSystem fs = FileSystem.get(URI.create("hdfs://hadoop102:9000"), new Configuration(), "redhat");
// 2 獲取輸入流
FSDataInputStream fis = fs.open(new Path("/test.txt"));
// 3 獲取輸出流
FileOutputStream fos = new FileOutputStream(new File("c:\\test.txt"));
// 4 流的對拷
IOUtils.copyBytes(fis,fos,1024,true);
// 5 關閉資源
IOUtils.closeStream(fos);
IOUtils.closeStream(fis);
fs.close();
}
定位文件讀取
需求:分塊讀取HDFS上的大文件,比如根目錄下的/hadoop-2.7.2.tar.gz
在HDFS的根目錄中中顯示的大小爲爲188.5M
顯示文件被分爲了兩個塊,並且每個塊有3個副本
下載第一塊
@Test
public void readFileseek1() throws IOException, InterruptedException{
Configuration configuration = new Configuration();
// 1 獲取文件系統
FileSystem fs = FileSystem.get(URI.create("hdfs://hadoop102:9000"), configuration, "redhat");
// 2 獲取輸入流
FSDataInputStream fis = fs.open(new Path("/hadoop-2.7.2.tar.gz"));
// 3 獲取輸出流
FileOutputStream fos = new FileOutputStream(new File("c:\\phpStudy/hadoop-2.7.2.tar.gz.part1"));
// 4 流的對拷
byte[] buf = new byte[1024];
for(int i =0 ; i < 1024 * 128; i++){
fis.read(buf);
fos.write(buf);
}
// 5 關閉資源
IOUtils.closeStream(fos);
IOUtils.closeStream(fis);
fs.close();
}
下載第二塊
@Test
public void redFileseek2() throws IOException, InterruptedException{
Configuration configuration = new Configuration();
// 1 獲取文件系統
FileSystem fs = FileSystem.get(URI.create("hdfs://hadoop102:9000"), configuration, "redhat");
// 2 獲取輸入流
FSDataInputStream fis = fs.open(new Path("/hadoop-2.7.2.tar.gz"));
fis.seek(1024*1024*128);
// 3 獲取輸出流
FileOutputStream fos = new FileOutputStream(new File("c:\\phpStudy/hadoop-2.7.2.tar.gz.part2"));
// 4 流的對拷
IOUtils.copyBytes(fis,fos,configuration);
// 5 關閉資源
IOUtils.closeStream(fos);
IOUtils.closeStream(fis);
}
合併文件之前
合併文件
在windows的命令行窗口中,進入到C:\phpStudy/,然後執行以下命令,對數據進行合併
type hadoop-2.7.2.tar.gz.part2 >> hadoop-2.7.2.tar.gz.part1
合併完成後將hadoop-2.7.2.tar.gz.part1重新命名爲hadoop-2.7.2.tar.gz。解壓發現該tar包非常完整。