一、問題描述
存在若干個文件夾,文件夾名稱以年月爲名(一個月份一個文件夾)
例:201901,201902,202011,202012每個文件夾下有上w個txt文件,文件名均爲9位數數字
例:204125631.txt,315125620.txt,478125650.txt
每個txt文本有進上千行數據,並且每個文件夾(年月爲名)下的9位數文件名都相同(只有少部分不一樣)
二、問題需求
現在需要將每個月的文件夾下具有相同文件名的txt文件按照時間排序進行合併(不要求源文件不變)
三、代碼實現
RenameMMSI
package com.xtd.file.Thread;
import java.io.File;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicInteger;
public class RenameMMSI {
// 需要修改文件名稱的文件夾根目錄
private static final String basedir = "H:\\歷史全量\\running";
// base文件操作對象
private static final File baseFile = new File(basedir);
// 每個月份的目錄
private static final String[] monthList = baseFile.list();
// 定長線程池
private static final ExecutorService fixedThreadPool = Executors.newFixedThreadPool(60);
public static void main(String[] args) {
long time1 = System.currentTimeMillis();
rename();
long time2 = System.currentTimeMillis();
System.out.println("time:"+(time2-time1));
}
public static void rename(){
AtomicInteger total = new AtomicInteger();
// 遍歷每個月份的目錄
for(int i=0;i<monthList.length;i++){
// 每個月份
String[] listFile = new File(basedir+"\\"+monthList[i]).list();
for(int j=0;j<listFile.length;j++){
final int finalJ = j;
int finalI = i;
fixedThreadPool.execute(() -> {
String currentFileName = listFile[finalJ];
File oldFile = new File(basedir+"\\"+monthList[finalI] + "\\" + currentFileName);
File newFile = new File(basedir+"\\"+monthList[finalI] + "\\" + currentFileName.substring(7,currentFileName.length()));
// System.out.println(oldFile.getName());
// System.out.println(newFile.getName());
oldFile.renameTo(newFile);
// total.incrementAndGet();
});
}
// System.out.println("---------------------------");
}
fixedThreadPool.shutdown();
// System.out.println("total:"+total);
}
}
MoveMMSI
package com.xtd.file.Thread;
import java.io.File;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
/**
* 1、遍歷文件夾下所有的文件名稱
* 2、按照 MMSI 創建文件夾
* 3、將 MMSI 文件 放到一個文件夾
* 4、遍歷 moveDir 下的文件夾名稱,在 mergeDir 下創建 MMSI.txt 文件
*/
public class MoveMMSI {
// 一共 3、4萬個 MMSI
private static Set<String> set = new HashSet(46327);
// 文件路徑
// private static final String basedir = "E:\\HistoryData\\SHGL\\javafile";
// private static final String basedir = "E:\\HistoryData\\ArcticOceanData\\javafile1";
// "D:\\Hadoop\\ship\\上海鋼聯\\測試數據1"
private static final String basedir = "H:\\歷史全量\\running";
// base文件操作對象
private static final File baseFile = new File(basedir);
// 每個月份的目錄
private static final String[] monthList = baseFile.list();
// 移動的文件目錄
private static final String moveDir = baseFile.getParent()+"\\move";
// 合併的文件目錄
private static final String mergeDir = baseFile.getParent()+"\\merge";
// 定長線程池
private static final ExecutorService fixedThreadPool = Executors.newFixedThreadPool(60);
public static void main(String[] args) {
long time1 = System.currentTimeMillis();
// 創建移動和合並的目錄
new File(mergeDir).mkdir();
new File(moveDir).mkdir();
dirSet(basedir); // 遍歷所有文件放到set集合中
// fixTheadPoolTest();
foreachSet(); // 遍歷 union MMSI,以 MMSI 爲名創建目錄
long time2 = System.currentTimeMillis();
formothList(monthList);
System.out.println( time2 - time1);
// System.out.println(moveDir);
// moveFile("E:\\HistoryData\\ArcticOceanData\\movefile\\file1\\file002.txt","E:\\HistoryData\\ArcticOceanData\\movefile\\file2\\file003.txt");
}
// 遍歷所有文件放到set集合中
public static void dirSet(String dir){
int total = 0;
String[] listFile = null;
// 遍歷每個月份的目錄
for(int i=0;i<monthList.length;i++){
listFile = new File(basedir+"\\"+monthList[i]).list();
for(int j=0;j<listFile.length;j++){
set.add(listFile[j]);
++total;
// System.out.println(listFile[j]);
}
}
System.out.println(total);
}
/**
* 多線程運行
* 1、遍歷 union MMSI,以 MMSI 爲名創建目錄
* 2、不管有沒有文件,將每個月份下的每個 union MMSI 文件 move 到 以 MMSI 爲名的目錄下
*/
public static void foreachSet(){
System.out.println("=============================================");
// Iterator<String> iterator = set.iterator();
String mkdir = null;
String sourcePath = null;
String targePath = null;
for(String next:set) {
// while(iterator.hasNext()){
// String next = iterator.next();
mkdir = moveDir+"\\"+next.substring(0,next.length()-4);
// System.out.println(mkdir);
new File(mkdir).mkdir();
try {
new File(mergeDir+"\\"+next).createNewFile();
} catch (IOException e) {
e.printStackTrace();
}
// System.out.println("mergeDir\t"+mergeDir+"\\"+next);
for (int i=0;i<monthList.length;i++){
String monthPath = monthList[i];
sourcePath = basedir+"\\"+monthPath+"\\"+next;
if(monthList[i].length() == 6){
targePath = mkdir+"\\"+monthPath+"_"+next;
}else {
targePath = mkdir+"\\"+monthPath.substring(0,6)+"_"+next;
}
// System.out.println("sourcePath\t" + sourcePath);
// System.out.println("targePath\t" + targePath);
new File(sourcePath).renameTo(new File(targePath));
}
}
System.out.println(set.size());
}
public static void formothList(String[] monthList){
for (String s : monthList) {
System.out.println(s);
}
}
}
MergeMMSI
package com.xtd.file.Thread;
import java.io.*;
import java.util.Arrays;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
/**
* 1、遍歷一個moveDir下的所有文件
*/
public class MergeMMSI {
// private static final String inName = "E:\\HistoryData\\SHGL\\java001.txt";
// private static final String outName = "E:\\HistoryData\\SHGL\\java002.txt";
// 移動後文件的目錄
// private static final String moveDir = "D:\\Hadoop\\ship\\SHGL\\move";
private static final String moveDir = "H:\\歷史全量\\move";
// 合併文件的目錄
// private static final String mergeDir = "D:\\Hadoop\\ship\\SHGL\\merge";
private static final String mergeDir = "H:\\歷史全量\\merge";
// 需要遍歷的 MMSI 目錄
private static final String[] listDir = new File(moveDir).list();
// 定長線程池
private static final ExecutorService fixedThreadPool = Executors.newFixedThreadPool(60);
// private static final ExecutorService fixedThreadPool = Executors.newCachedThreadPool();
public static void main(String[] args) {
// String content = inputStram(inName);
// System.out.println(content);
// outputSteam(outName,content);
// appendWrite(inName,outName);
long time1 = System.currentTimeMillis();
forMoveDir();
long time2 = System.currentTimeMillis();
System.out.println(time2-time1);
}
/**
* 1、遍歷 MMSI 文件夾目錄
* 2、按照日期一次讀取每個 MMSI 文件夾下的文件
* 3、將讀取的內容追加到merge文件中
*/
public static void forMoveDir(){
int total = 0;
// 遍歷每個文件夾
for(String mmdir:listDir){
// 每個線程處理一個 MMSI , 寫入文件會按照順序執行
fixedThreadPool.execute(() -> {
String dir = moveDir+"\\"+mmdir;
String[] listfile = new File(dir).list();
Arrays.sort(listfile);
// 遍歷每個文件
for(String file:listfile){
String sourceFile = moveDir+"\\"+file.substring(7,file.length()-4)+"\\"+file;
String tergeFile = mergeDir+"\\"+file.substring(7);
// System.out.println(sourceFile);
// System.out.println(tergeFile);
appendWrite(sourceFile,tergeFile);
}
});
++total;
}
// 執行完畢,關閉線程池
fixedThreadPool.shutdown();
System.out.println(total);
}
public static void appendWrite(String inName,String outName){
try {
// 文件讀取
FileInputStream fileInputStream = new FileInputStream(inName);
byte[] b = new byte[fileInputStream.available()];
fileInputStream.read(b);
fileInputStream.close();
String content = new String(b);
// System.out.println(content);
// 文件寫入
FileOutputStream fileOutputStream = new FileOutputStream(outName,true);
fileOutputStream.write(b);
// System.out.println("--------------------------------------------");
fileOutputStream.close();
} catch (IOException e) {
e.printStackTrace();
}
}
public static void outputSteam(String inName,String content){
try{
FileOutputStream fileOutputStream = new FileOutputStream(inName,true);
byte[] b = content.getBytes();
fileOutputStream.write(b);
System.out.println("--------------------------------------------");
fileOutputStream.close();
} catch (IOException e) {
e.printStackTrace();
}
}
public static String inputStram(String inName){
try {
FileInputStream fileInputStream = new FileInputStream(inName);
byte[] b = new byte[fileInputStream.available()];
fileInputStream.read(b);
fileInputStream.close();
return new String(b);
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
}