數據結構之歸併算法(類似於分庫操作模擬)

在我們項目中,如果數據量很大,那麼分庫操作不失爲一個選擇,分庫如何實現操作數據呢,比如所有數據去重排序,下面以文件代替進行模擬:

target_file 是合併後生成文件,小了很多,是因爲去重了,

這是臨時文件(類似於我們的多個庫)

下面是代碼實現:

import lombok.*;

import java.io.BufferedReader;
import java.io.IOException;

/**
 * @project: testdemo
 * @author: SJT
 * @date: 2019/3/15
 * @desc:
 */
@Data
@ToString
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class FileInfo {

    //文件名
    private String fileName;

    //當前要對比的數據
    private int currentNum;

    //類似指針讀取新的數據
    private BufferedReader reader;

    /** 
    * @desc:  移動指針讀取新的數據
    * @author: SJT
    * @date: 2019/3/15        
    * @param: [] 
    * @return: void 
    */ 
    public void readNext() throws IOException {
        String data = reader.readLine();
        if(data != null){
            this.currentNum = Integer.valueOf(data);
        }else {
            this.currentNum = -1;
        }
    }

}
import java.io.*;
import java.util.*;

/**
 * @project: testdemo
 * @author: SJT
 * @date: 2019/3/14
 * @desc:
 */
public class MergeSort {

    private static final String  file_dir = "C:\\Users\\admin\\Desktop\\DOC";

    private static final String  temp_file_dir = "C:\\Users\\admin\\Desktop\\DOC\\TEST";
    //讀取的行數
    private int count = 0;
    //子文件最大行數
    private static final int split_size = 500000;
    //子文件編號
    private int file_no = 1;
    //文件前綴
    private static final String file_prefix = "tjs";
    //目標文件
    private static String  target_file ="C:\\Users\\admin\\Desktop\\DOC\\target_file";

    //比較器
    private Comparator<FileInfo> comparator = new Comparator<FileInfo>() {
        @Override
        public int compare(FileInfo o1, FileInfo o2) {
            if (o1.getCurrentNum() == o2.getCurrentNum()) {
                return o1.getFileName().compareTo(o2.getFileName());
            }
            return o1.getCurrentNum() - o2.getCurrentNum();

        }
    };

    public static void main(String[] args) throws IOException {
        MergeSort ms = new MergeSort();
        ms.splitFile("test.txt");
        ms.merge();
    }

    /**
    * @desc: 文件拆分
    * @author: SJT
    * @date: 2019/3/15
    * @param: [fileName]
    * @return: void
    */
    private void splitFile(String fileName) {
        SortedSet<Integer> set = new TreeSet<>();
        try(BufferedReader br = new BufferedReader(new FileReader(file_dir + File.separator+fileName))) {
            String data;
            do{
                //讀取每行數據
                data = br.readLine();
                count ++;
                if(data != null){
                    set.add(Integer.valueOf(data));
                    if(count >= split_size){
                        //拆分文件寫入到指定目錄
                        writeFile(temp_file_dir,set);
                        count = 0;
                        set.clear();
                    }
                }else {
                    //讀到最後一批數據
                    if(!set.isEmpty()){
                        //將拆分文件寫入到指定目錄
                        writeFile(temp_file_dir,set);
                        count = 0;
                        set.clear();
                    }
                }

            }while (data != null);

        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
    * @desc: 將拆分文件寫入到指定目錄
    * @author: SJT
    * @date: 2019/3/15
    * @param: [s, set]
    * @return: void
    */
    private void writeFile(String s, SortedSet<Integer> set) {
        try(BufferedWriter bw = new BufferedWriter(new FileWriter(s+File.separator+file_prefix+file_no))) {
            set.stream().forEach(x ->{
                try {
                    bw.write(x+"\r\n");
                } catch (IOException e) {
                    e.printStackTrace();
                }
            });
            file_no ++;

        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
    * @desc: 將各個文件中數據進行歸併處理
    * @author: SJT
    * @date: 2019/3/15
    * @param: []
    * @return: void
    */
    private void merge() throws IOException {
        File[] files = new File(temp_file_dir).listFiles();
        List<FileInfo> fileInfos = new ArrayList<>();
        for (File file:files){
            BufferedReader reader = new BufferedReader(new FileReader(file));
            FileInfo fileInfo = FileInfo.builder().fileName(file.getName())
                    .reader(reader)
                    .build();
            fileInfo.readNext();
            fileInfos.add(fileInfo);
        }
        //初始排序(小---->大)
        Collections.sort(fileInfos,comparator);
        BufferedWriter bw = new BufferedWriter(new FileWriter(target_file));
        while (!fileInfos.isEmpty()){
            FileInfo fileInfo = fileInfos.get(0);
            bw.write(fileInfo.getCurrentNum()+"\r\n");
            //移動指針,讀取此文件中下一條數據
            fileInfo.readNext();
            //再次將文件進行排序(小---->大)
            Collections.sort(fileInfos,comparator);
            //文件中數據讀取完時,刪除此文件,直至將所有子文件數據讀完
            if(fileInfo.getCurrentNum() == -1){
                fileInfos.remove(fileInfo);
            }
        }
        bw.flush();

    }

}

ok,這樣即可~.~

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章