筆試題分析（1）

最近看到一個筆試題目：

10W個文本文件存放在/opt/test/目錄及其子目錄下，每個文件的大小爲1M。
統計文件中的字母A出現的個數。
4核CPU，8G內存。

看到這樣的題目，首先想到的方法是遍歷所有文件，然後把每個文件中的A的次數給統計出來。

那我們一步步分開來。

1、遍歷10W個文本文件

方法一：通過迭代，得到所有的文本數量，但是這樣是單線程，量級越大，效率就越來越慢。

    //方法一  通過迭代，得到所有的文本數量
    public long traverseFolder(String path) {
        long count = 0;
        File file = new File(path);
        if (file.exists()) {
            File[] files = file.listFiles();
            if (null != files && files.length >= 0) {
                for (File file1 : files) {
                    if (file1.isDirectory()) {
//                        System.out.println("文件夾:" + file1.getAbsolutePath());
                        long i = traverseFolder(file1.getAbsolutePath());
                        count = count + i;
                    } else {
                        count ++;
//                        System.out.println("文件:" + file1.getAbsolutePath());
                    }
                }
            }
        } else {
//            System.out.println("文件不存在!");
        }
        return count;
    }

方法二：通過 ForkJoinPool 來完成遍歷所有文件。其實題目中 4核CPU，8G內存。 這個提示，就是希望我們用併發來完成，這樣可以增加效率。關於 ForkJoinPool 這個併發工具，如果不瞭解，這裏就不解釋了，請自行查閱。

public class Demo extends RecursiveTask<Integer> {

    private String path;

    private Demo(String path) {
        this.path = path;
    }

    @Override
    protected Integer compute() {
        int count = 0;
        List<Demo> subTasks = new ArrayList<>();

        // 讀取目錄 dir 的子路徑。
        try {
            File file = new File(path);
            File[] files = file.listFiles();
            if (null != files && files.length > 0) {
                for (File file1 : files) {
                    if (file1.isDirectory()) {
                        subTasks.add(new Demo(file1.getPath()));
                    } else {
                        count ++;
                    }
                }
            }
            if (!subTasks.isEmpty()) {
                // 在當前的 ForkJoinPool 上調度所有的子任務。
                for (Demo subTask : invokeAll(subTasks)) {
                    count += subTask.join();
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        return count;
    }
}

2、對每個Txt進行統計A出現的次數

這個屬於流操作，基本大家都沒什麼問題。

    private static int getcount(String path, char c1) {
        int count = 0;
        BufferedReader bfr = null;   //定義字符讀取(緩衝)流
        try {
            bfr = new BufferedReader(new FileReader(path));//給該流賦值
            String value; //定義一個臨時接收文件中的字符串變量
            String newValue = "";    //接收文件中所有字符串的變量
            while ((value = bfr.readLine()) != null) {    //開始讀取文件中的字符
                newValue = newValue + value;    //存入newValue變量中
            }
            char[] ch = newValue.toCharArray();//把newValue變成字符數組
            for (char c : ch) {   //遍歷ch  將ch中所有的字符存入一個Map集合中(TreeSet)，鍵對應字符，值對應字符出現的次數
                if (c1 == c) {  //如果TreeMap(tm)中有該鍵，則取出該鍵中的值，也就是出現的次數
                    count++;
                }
            }
        } catch (Exception e) {
            System.out.println("文件讀取錯誤");
        } finally {
            try {
                if (bfr != null)
                    bfr.close();
            } catch (IOException e) {
                System.out.println("文件關閉錯誤");
            }
        }
        return count;
    }

3、完整代碼

到這裏，應該是把整個題目給完成了，下面我貼一下完整的代碼。我對代碼進行了修改，因爲我測試的是自己桌面中所有的文件，也不都是 txt 。

package com.blog.common.util;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ForkJoinPool;
import java.util.concurrent.RecursiveTask;

/**
 * 測試便利所有一個目錄下的所有的文件夾。用迭代法 和 ForkJoinPool
 * Created by dyw on 2017/7/25.
 */
public class Demo extends RecursiveTask<Integer> {

    public long traverseFolder(String path) {
        long count = 0;
        File file = new File(path);
        if (file.exists()) {
            File[] files = file.listFiles();
            if (null != files && files.length >= 0) {
                for (File file1 : files) {
                    if (file1.isDirectory()) {
//                        System.out.println("文件夾:" + file2.getAbsolutePath());
                        long i = traverseFolder(file1.getAbsolutePath());
                        count = count + i;
                    } else {
                        if (file1.getAbsolutePath().endsWith(".txt")) {
                            int a = getcount(file1.getAbsolutePath(), 'a');
                            System.out.println(file1.getAbsolutePath() + "==========================" + a);
                            count += a;
                        }
//                        System.out.println("文件:" + file2.getAbsolutePath());
                    }
                }
            }
        } else {
//            System.out.println("文件不存在!");
        }
        return count;
    }

    public static void main(String[] args) {
        //傳統迭代法
        long oldDate1 = System.currentTimeMillis();
        long count1 = new Demo().traverseFolder("C:\\Users\\dyw\\Desktop");
        System.out.println(System.currentTimeMillis() - oldDate1 + "==============" + count1);
        //ForkJoinPool
        long oldDate2 = System.currentTimeMillis();
        Integer count2 = new ForkJoinPool().invoke(new Demo("C:\\Users\\dyw\\Desktop"));
        System.out.println(System.currentTimeMillis() - oldDate2 + "==============" + count2);
    }

    private String path;

    private Demo() {

    }

    private Demo(String path) {
        this.path = path;
    }

    @Override
    protected Integer compute() {
        int count = 0;
        List<Demo> subTasks = new ArrayList<>();

        // 讀取目錄 dir 的子路徑。
        try {
            File file = new File(path);
            File[] files = file.listFiles();
            if (null != files && files.length > 0) {
                for (File file1 : files) {
                    if (file1.isDirectory()) {
                        subTasks.add(new Demo(file1.getPath()));
                    } else {
                        if (file1.getAbsolutePath().endsWith(".txt")) {
                            int a = getcount(file1.getAbsolutePath(), 'a');
                            System.out.println(file1.getAbsolutePath() + "==========================" + a);
                            count += a;
                        }
                    }
                }
            }
            if (!subTasks.isEmpty()) {
                // 在當前的 ForkJoinPool 上調度所有的子任務。
                for (Demo subTask : invokeAll(subTasks)) {
                    count += subTask.join();
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        return count;
    }

    /**
     * 獲取字符出現的次數
     *
     * @param path .
     * @return count
     */
    private static int getcount(String path, char c1) {
        int count = 0;
        BufferedReader bfr = null;   //定義字符讀取(緩衝)流
        try {
            bfr = new BufferedReader(new FileReader(path));//給該流賦值
            String value; //定義一個臨時接收文件中的字符串變量
            String newValue = "";    //接收文件中所有字符串的變量
            while ((value = bfr.readLine()) != null) {    //開始讀取文件中的字符
                newValue = newValue + value;    //存入newValue變量中
            }
            char[] ch = newValue.toCharArray();//把newValue變成字符數組
            for (char c : ch) {   //遍歷ch  將ch中所有的字符存入一個Map集合中(TreeSet)，鍵對應字符，值對應字符出現的次數
                if (c1 == c) {  //如果TreeMap(tm)中有該鍵，則取出該鍵中的值，也就是出現的次數
                    count++;
                }
            }
        } catch (Exception e) {
            System.out.println("文件讀取錯誤");
        } finally {
            try {
                if (bfr != null)
                    bfr.close();
            } catch (IOException e) {
                System.out.println("文件關閉錯誤");
            }
        }
        return count;
    }
}

4、運行結果

因爲我電腦是4核的，基本上，用ForkJoinPool是迭代法的 4分之1 時間。

//迭代法
4223==============32390
//ForkJoinPool方法
1220==============32390

上面是我對這題目的見解，如果我的理解有什麼問題，或者代碼可以優化的地方，請給我留言，大家一起互相進步。

丁垠午

發佈了62 篇原創文章 · 獲贊 80 · 訪問量 12萬+

私信關注

筆試題分析（1）

基於PostgreSQL鏈接JDBC源碼分析

爬蟲記錄（3）——模擬登錄獲取cookie，訪問私信頁面

從零開始搭建自己的網站二十：雲服務器安全組規則配置

Java微信分享接口開發

爬蟲記錄（5）——爬到的文件信息保存到數據庫

Mac下配置sublime實現LaTeX

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結