在我們項目中,如果數據量很大,那麼分庫操作不失爲一個選擇,分庫如何實現操作數據呢,比如所有數據去重排序,下面以文件代替進行模擬:
target_file 是合併後生成文件,小了很多,是因爲去重了,
這是臨時文件(類似於我們的多個庫)
下面是代碼實現:
import lombok.*;
import java.io.BufferedReader;
import java.io.IOException;
/**
* @project: testdemo
* @author: SJT
* @date: 2019/3/15
* @desc:
*/
@Data
@ToString
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class FileInfo {
//文件名
private String fileName;
//當前要對比的數據
private int currentNum;
//類似指針讀取新的數據
private BufferedReader reader;
/**
* @desc: 移動指針讀取新的數據
* @author: SJT
* @date: 2019/3/15
* @param: []
* @return: void
*/
public void readNext() throws IOException {
String data = reader.readLine();
if(data != null){
this.currentNum = Integer.valueOf(data);
}else {
this.currentNum = -1;
}
}
}
import java.io.*;
import java.util.*;
/**
* @project: testdemo
* @author: SJT
* @date: 2019/3/14
* @desc:
*/
public class MergeSort {
private static final String file_dir = "C:\\Users\\admin\\Desktop\\DOC";
private static final String temp_file_dir = "C:\\Users\\admin\\Desktop\\DOC\\TEST";
//讀取的行數
private int count = 0;
//子文件最大行數
private static final int split_size = 500000;
//子文件編號
private int file_no = 1;
//文件前綴
private static final String file_prefix = "tjs";
//目標文件
private static String target_file ="C:\\Users\\admin\\Desktop\\DOC\\target_file";
//比較器
private Comparator<FileInfo> comparator = new Comparator<FileInfo>() {
@Override
public int compare(FileInfo o1, FileInfo o2) {
if (o1.getCurrentNum() == o2.getCurrentNum()) {
return o1.getFileName().compareTo(o2.getFileName());
}
return o1.getCurrentNum() - o2.getCurrentNum();
}
};
public static void main(String[] args) throws IOException {
MergeSort ms = new MergeSort();
ms.splitFile("test.txt");
ms.merge();
}
/**
* @desc: 文件拆分
* @author: SJT
* @date: 2019/3/15
* @param: [fileName]
* @return: void
*/
private void splitFile(String fileName) {
SortedSet<Integer> set = new TreeSet<>();
try(BufferedReader br = new BufferedReader(new FileReader(file_dir + File.separator+fileName))) {
String data;
do{
//讀取每行數據
data = br.readLine();
count ++;
if(data != null){
set.add(Integer.valueOf(data));
if(count >= split_size){
//拆分文件寫入到指定目錄
writeFile(temp_file_dir,set);
count = 0;
set.clear();
}
}else {
//讀到最後一批數據
if(!set.isEmpty()){
//將拆分文件寫入到指定目錄
writeFile(temp_file_dir,set);
count = 0;
set.clear();
}
}
}while (data != null);
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* @desc: 將拆分文件寫入到指定目錄
* @author: SJT
* @date: 2019/3/15
* @param: [s, set]
* @return: void
*/
private void writeFile(String s, SortedSet<Integer> set) {
try(BufferedWriter bw = new BufferedWriter(new FileWriter(s+File.separator+file_prefix+file_no))) {
set.stream().forEach(x ->{
try {
bw.write(x+"\r\n");
} catch (IOException e) {
e.printStackTrace();
}
});
file_no ++;
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* @desc: 將各個文件中數據進行歸併處理
* @author: SJT
* @date: 2019/3/15
* @param: []
* @return: void
*/
private void merge() throws IOException {
File[] files = new File(temp_file_dir).listFiles();
List<FileInfo> fileInfos = new ArrayList<>();
for (File file:files){
BufferedReader reader = new BufferedReader(new FileReader(file));
FileInfo fileInfo = FileInfo.builder().fileName(file.getName())
.reader(reader)
.build();
fileInfo.readNext();
fileInfos.add(fileInfo);
}
//初始排序(小---->大)
Collections.sort(fileInfos,comparator);
BufferedWriter bw = new BufferedWriter(new FileWriter(target_file));
while (!fileInfos.isEmpty()){
FileInfo fileInfo = fileInfos.get(0);
bw.write(fileInfo.getCurrentNum()+"\r\n");
//移動指針,讀取此文件中下一條數據
fileInfo.readNext();
//再次將文件進行排序(小---->大)
Collections.sort(fileInfos,comparator);
//文件中數據讀取完時,刪除此文件,直至將所有子文件數據讀完
if(fileInfo.getCurrentNum() == -1){
fileInfos.remove(fileInfo);
}
}
bw.flush();
}
}
ok,這樣即可~.~