【NCNN解讀】——benchmark部分

在打開github ncnn項目首頁:https://github.com/Tencent/ncnn

你會看到整個項目包含的內容,其中第一個就是benchmark文件夾;

點進去看,readme.md部分內容翻譯如下:


benchmark 可用於測試NN inference 性能,僅僅需要網絡定義文件(ncnn param文件);大的模型文件(ncnn bin格式)不會加載,但會隨機生成用於測速。之後會加入更多的模型。


Build

# assume you have already build ncnn library successfully
# uncomment the following line in <ncnn-root-dir>/CMakeLists.txt with your favorite editor

# add_subdirectory(benchmark)

$ cd <ncnn-root-dir>/<your-build-dir>
$ make -j4

# you can find benchncnn binary in <ncnn-root-dir>/<your-build-dir>/benchmark

即需要將ncnn項目下CMakeLists.txt中的# add_subdirectory(benchmark)這一行去掉註釋即可,然後再make;

這樣會在ncnn/build/benchmark目錄下生成一個可執行二進制文件,叫benchmark,見下圖:

 


Usage

# copy all param files to the current directory
$ ./benchncnn [loop count] [num threads] [powersave] [gpu device]

那麼如何使用它(benchmark)呢?我們打開終端,將工作路徑切到生成的benchmark目錄下,

[chensq@localhost ~]$ cd /home/chensq/ncnn/build/benchmark/

在這之前,你需要將ncnn/benchmark目錄下的所有以“.param”結尾的文件拷貝到你編譯生成的benchmark目錄下(如我上圖所示),

拷貝使用的指令可參考我的,不過需要更換你對應的路徑:

[chensq@localhost ~]$ sudo cp -r /home/chensq/ncnn/benchmark/*.param /home/chensq/ncnn/build/benchmark/

然後參考usage中給的PC端指令格式(我的如下,4個循環,2個線程,1個gpu):

[chensq@localhost benchmark]$ ./benchncnn 4 2 0 0

其中各個參考的解析爲:


 

 我在PC端運行了以上指令後得到的結果如下(時間單位爲ms):

 如果你想在安卓設備端跑測試,可使用如下方法:

# for running on android device, upload to /data/local/tmp/ folder
$ adb push benchncnn /data/local/tmp/
$ adb push <ncnn-root-dir>/benchmark/*.param /data/local/tmp/
$ adb shell

# executed in android adb shell
$ cd /data/local/tmp/
$ ./benchncnn [loop count] [num threads] [powersave] [gpu device]

即你將手機連接PC端,用adb指令將benchmark這個編譯好的二進制文件push到你手機的/data/local/tmp/目錄下;然後將ncnn/benchmark/下的所有以".param"結尾的網絡結構文件同樣push到/data/local/tmp/目錄下;然後使用adb shell,在/data/local/tmp/目錄下運行指令,指令含義同PC端;(我未嘗試,讀者有信息可試試)

其中benchmark.cpp源碼部分如下:

// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

#include <float.h>
#include <stdio.h>

#ifdef _WIN32
#define NOMINMAX
#include <algorithm>
#include <windows.h> // Sleep()
#else
#include <unistd.h> // sleep()
#endif

#include "benchmark.h"
#include "cpu.h"
#include "net.h"

#if NCNN_VULKAN
#include "gpu.h"

class GlobalGpuInstance
{
public:
    GlobalGpuInstance() { ncnn::create_gpu_instance(); }
    ~GlobalGpuInstance() { ncnn::destroy_gpu_instance(); }
};
// initialize vulkan runtime before main()
GlobalGpuInstance g_global_gpu_instance;
#endif // NCNN_VULKAN

namespace ncnn {

// always return empty weights
class ModelBinFromEmpty : public ModelBin
{
public:
    virtual Mat load(int w, int /*type*/) const { return Mat(w); }
};

class BenchNet : public Net
{
public:
    int load_model()
    {
        // load file
        int ret = 0;

        ModelBinFromEmpty mb;
        for (size_t i=0; i<layers.size(); i++)
        {
            Layer* layer = layers[i];

            int lret = layer->load_model(mb);
            if (lret != 0)
            {
                fprintf(stderr, "layer load_model %d failed\n", (int)i);
                ret = -1;
                break;
            }
        }

#if NCNN_VULKAN
        if (use_vulkan_compute)
        {
            upload_model();

            create_pipeline();
        }
#endif // NCNN_VULKAN

        return ret;
    }
};

} // namespace ncnn

static int g_loop_count = 4;

static ncnn::UnlockedPoolAllocator g_blob_pool_allocator;
static ncnn::PoolAllocator g_workspace_pool_allocator;

#if NCNN_VULKAN
static bool g_use_vulkan_compute = false;

static ncnn::VulkanDevice* g_vkdev = 0;
static ncnn::VkAllocator* g_blob_vkallocator = 0;
static ncnn::VkAllocator* g_staging_vkallocator = 0;
#endif // NCNN_VULKAN

void benchmark(const char* comment, void (*init)(ncnn::Net&), void (*run)(const ncnn::Net&))
{
    ncnn::BenchNet net;

#if NCNN_VULKAN
    if (g_use_vulkan_compute)
    {
        net.use_vulkan_compute = g_use_vulkan_compute;

        net.set_vulkan_device(g_vkdev);
    }
#endif // NCNN_VULKAN

    init(net);

    net.load_model();

    g_blob_pool_allocator.clear();
    g_workspace_pool_allocator.clear();

#if NCNN_VULKAN
    if (g_use_vulkan_compute)
    {
        g_blob_vkallocator->clear();
        g_staging_vkallocator->clear();
    }
#endif // NCNN_VULKAN

    // sleep 10 seconds for cooling down SOC  :(
#ifdef _WIN32
    Sleep(10 * 1000);
#else
    sleep(10);
#endif

    // warm up
    run(net);
    run(net);
    run(net);

    run(net);
    run(net);
    run(net);
    run(net);
    run(net);

    double time_min = DBL_MAX;
    double time_max = -DBL_MAX;
    double time_avg = 0;

    for (int i=0; i<g_loop_count; i++)
    {
        double start = ncnn::get_current_time();

        run(net);

        double end = ncnn::get_current_time();

        double time = end - start;

        time_min = std::min(time_min, time);
        time_max = std::max(time_max, time);
        time_avg += time;
    }

    time_avg /= g_loop_count;

    fprintf(stderr, "%-20s  min = %7.2f  max = %7.2f  avg = %7.2f\n", comment, time_min, time_max, time_avg);
}

void squeezenet_init(ncnn::Net& net)
{
    net.load_param("squeezenet.param");
}

void squeezenet_int8_init(ncnn::Net& net)
{
    net.load_param("squeezenet_int8.param");
}

void squeezenet_run(const ncnn::Net& net)
{
    ncnn::Extractor ex = net.create_extractor();

    ncnn::Mat in(227, 227, 3);
    ex.input("data", in);

    ncnn::Mat out;
    ex.extract("prob", out);
}

void mobilenet_init(ncnn::Net& net)
{
    net.load_param("mobilenet.param");
}

void mobilenet_int8_init(ncnn::Net& net)
{
    net.load_param("mobilenet_int8.param");
}

void mobilenet_run(const ncnn::Net& net)
{
    ncnn::Extractor ex = net.create_extractor();

    ncnn::Mat in(224, 224, 3);
    ex.input("data", in);

    ncnn::Mat out;
    ex.extract("prob", out);
}

void mobilenet_v2_init(ncnn::Net& net)
{
    net.load_param("mobilenet_v2.param");
}

void mobilenet_v2_int8_init(ncnn::Net& net)
{
    net.load_param("mobilenet_v2_int8.param");
}

void mobilenet_v2_run(const ncnn::Net& net)
{
    ncnn::Extractor ex = net.create_extractor();

    ncnn::Mat in(224, 224, 3);
    ex.input("data", in);

    ncnn::Mat out;
    ex.extract("prob", out);
}

void shufflenet_init(ncnn::Net& net)
{
    net.load_param("shufflenet.param");
}

void shufflenet_run(const ncnn::Net& net)
{
    ncnn::Extractor ex = net.create_extractor();

    ncnn::Mat in(224, 224, 3);
    ex.input("data", in);

    ncnn::Mat out;
    ex.extract("fc1000", out);
}

void mnasnet_init(ncnn::Net& net)
{
    net.load_param("mnasnet.param");
}

void mnasnet_run(const ncnn::Net& net)
{
    ncnn::Extractor ex = net.create_extractor();

    ncnn::Mat in(224, 224, 3);
    ex.input("data", in);

    ncnn::Mat out;
    ex.extract("prob", out);
}

void proxylessnasnet_init(ncnn::Net& net)
{
    net.load_param("proxylessnasnet.param");
}

void proxylessnasnet_run(const ncnn::Net& net)
{
    ncnn::Extractor ex = net.create_extractor();

    ncnn::Mat in(224, 224, 3);
    ex.input("data", in);

    ncnn::Mat out;
    ex.extract("prob", out);
}

void googlenet_init(ncnn::Net& net)
{
    net.load_param("googlenet.param");
}

void googlenet_int8_init(ncnn::Net& net)
{
    net.load_param("googlenet_int8.param");
}

void googlenet_run(const ncnn::Net& net)
{
    ncnn::Extractor ex = net.create_extractor();

    ncnn::Mat in(224, 224, 3);
    ex.input("data", in);

    ncnn::Mat out;
    ex.extract("prob", out);
}

void resnet18_init(ncnn::Net& net)
{
    net.load_param("resnet18.param");
}

void resnet18_int8_init(ncnn::Net& net)
{
    net.load_param("resnet18_int8.param");
}

void resnet18_run(const ncnn::Net& net)
{
    ncnn::Extractor ex = net.create_extractor();

    ncnn::Mat in(224, 224, 3);
    ex.input("data", in);

    ncnn::Mat out;
    ex.extract("prob", out);
}

void alexnet_init(ncnn::Net& net)
{
    net.load_param("alexnet.param");
}

void alexnet_run(const ncnn::Net& net)
{
    ncnn::Extractor ex = net.create_extractor();

    ncnn::Mat in(227, 227, 3);
    ex.input("data", in);

    ncnn::Mat out;
    ex.extract("prob", out);
}

void vgg16_init(ncnn::Net& net)
{
    net.load_param("vgg16.param");
}

void vgg16_int8_init(ncnn::Net& net)
{
    net.load_param("vgg16_int8.param");
}

void vgg16_run(const ncnn::Net& net)
{
    ncnn::Extractor ex = net.create_extractor();

    ncnn::Mat in(224, 224, 3);
    ex.input("data", in);

    ncnn::Mat out;
    ex.extract("prob", out);
}

void resnet50_init(ncnn::Net& net)
{
    net.load_param("resnet50.param");
}

void resnet50_int8_init(ncnn::Net& net)
{
    net.load_param("resnet50_int8.param");
}

void resnet50_run(const ncnn::Net& net)
{
    ncnn::Extractor ex = net.create_extractor();

    ncnn::Mat in(224, 224, 3);
    ex.input("data", in);

    ncnn::Mat out;
    ex.extract("prob", out);
}

void squeezenet_ssd_init(ncnn::Net& net)
{
    net.load_param("squeezenet_ssd.param");
}

void squeezenet_ssd_int8_init(ncnn::Net& net)
{
    net.load_param("squeezenet_ssd_int8.param");
}

void squeezenet_ssd_run(const ncnn::Net& net)
{
    ncnn::Extractor ex = net.create_extractor();

    ncnn::Mat in(300, 300, 3);
    ex.input("data", in);

    ncnn::Mat out;
    ex.extract("detection_out", out);
}

void mobilenet_ssd_init(ncnn::Net& net)
{
    net.load_param("mobilenet_ssd.param");
}

void mobilenet_ssd_int8_init(ncnn::Net& net)
{
    net.load_param("mobilenet_ssd_int8.param");
}

void mobilenet_ssd_run(const ncnn::Net& net)
{
    ncnn::Extractor ex = net.create_extractor();

    ncnn::Mat in(300, 300, 3);
    ex.input("data", in);

    ncnn::Mat out;
    ex.extract("detection_out", out);
}

void mobilenet_yolo_init(ncnn::Net& net)
{
    net.load_param("mobilenet_yolo.param");
}

void mobilenet_yolo_run(const ncnn::Net& net)
{
    ncnn::Extractor ex = net.create_extractor();

    ncnn::Mat in(416, 416, 3);
    ex.input("data", in);

    ncnn::Mat out;
    ex.extract("detection_out", out);
}

void mobilenet_yolov3_init(ncnn::Net& net)
{
    net.load_param("mobilenet_yolov3.param");
}

void mobilenet_yolov3_run(const ncnn::Net& net)
{
    ncnn::Extractor ex = net.create_extractor();

    ncnn::Mat in(416, 416, 3);
    ex.input("data", in);

    ncnn::Mat out;
    ex.extract("detection_out", out);
}

int main(int argc, char** argv)
{
    int loop_count = 4;
    int num_threads = ncnn::get_cpu_count();
    int powersave = 0;
    int gpu_device = -1;

    if (argc >= 2)
    {
        loop_count = atoi(argv[1]);
    }
    if (argc >= 3)
    {
        num_threads = atoi(argv[2]);
    }
    if (argc >= 4)
    {
        powersave = atoi(argv[3]);
    }
    if (argc >= 5)
    {
        gpu_device = atoi(argv[4]);
    }

    g_loop_count = loop_count;

    g_blob_pool_allocator.set_size_compare_ratio(0.0f);
    g_workspace_pool_allocator.set_size_compare_ratio(0.5f);

#if NCNN_VULKAN
    g_use_vulkan_compute = gpu_device != -1;
    if (g_use_vulkan_compute)
    {
        g_vkdev = new ncnn::VulkanDevice(gpu_device);

        g_blob_vkallocator = new ncnn::VkUnlockedBlobBufferAllocator(g_vkdev);
        g_staging_vkallocator = new ncnn::VkUnlockedStagingBufferAllocator(g_vkdev);
    }
#endif // NCNN_VULKAN

    ncnn::Option opt;
    opt.lightmode = true;
    opt.num_threads = num_threads;
    opt.blob_allocator = &g_blob_pool_allocator;
    opt.workspace_allocator = &g_workspace_pool_allocator;

#if NCNN_VULKAN
    opt.vulkan_compute = g_use_vulkan_compute;
    opt.blob_vkallocator = g_blob_vkallocator;
    opt.workspace_vkallocator = g_blob_vkallocator;
    opt.staging_vkallocator = g_staging_vkallocator;
#endif // NCNN_VULKAN

    ncnn::set_default_option(opt);

    ncnn::set_cpu_powersave(powersave);

    ncnn::set_omp_dynamic(0);
    ncnn::set_omp_num_threads(num_threads);

    fprintf(stderr, "loop_count = %d\n", g_loop_count);
    fprintf(stderr, "num_threads = %d\n", num_threads);
    fprintf(stderr, "powersave = %d\n", ncnn::get_cpu_powersave());
    fprintf(stderr, "gpu_device = %d\n", gpu_device);

    // run
    benchmark("squeezenet", squeezenet_init, squeezenet_run);

    benchmark("squeezenet-int8", squeezenet_int8_init, squeezenet_run);

    benchmark("mobilenet", mobilenet_init, mobilenet_run);

    benchmark("mobilenet-int8", mobilenet_int8_init, mobilenet_run);

    benchmark("mobilenet_v2", mobilenet_v2_init, mobilenet_v2_run);

//     benchmark("mobilenet_v2-int8", mobilenet_v2_int8_init, mobilenet_v2_run);

    benchmark("shufflenet", shufflenet_init, shufflenet_run);

    benchmark("mnasnet", mnasnet_init, mnasnet_run);

    benchmark("proxylessnasnet", proxylessnasnet_init, proxylessnasnet_run);

    benchmark("googlenet", googlenet_init, googlenet_run);

    benchmark("googlenet-int8", googlenet_int8_init, googlenet_run);

    benchmark("resnet18", resnet18_init, resnet18_run);

    benchmark("resnet18-int8", resnet18_int8_init, resnet18_run);

    benchmark("alexnet", alexnet_init, alexnet_run);

    benchmark("vgg16", vgg16_init, vgg16_run);

    benchmark("resnet50", resnet50_init, resnet50_run);

    benchmark("resnet50-int8", resnet50_int8_init, resnet50_run);

    benchmark("squeezenet-ssd", squeezenet_ssd_init, squeezenet_ssd_run);

    benchmark("squeezenet-ssd-int8", squeezenet_ssd_int8_init, squeezenet_ssd_run);

    benchmark("mobilenet-ssd", mobilenet_ssd_init, mobilenet_ssd_run);

    benchmark("mobilenet-ssd-int8", mobilenet_ssd_int8_init, mobilenet_ssd_run);

    benchmark("mobilenet-yolo", mobilenet_yolo_init, mobilenet_yolo_run);

    benchmark("mobilenet-yolov3", mobilenet_yolov3_init, mobilenet_yolov3_run);

#if NCNN_VULKAN
    delete g_blob_vkallocator;
    delete g_staging_vkallocator;

    delete g_vkdev;
#endif // NCNN_VULKAN

    return 0;
}

下面是項目作者在不同手機平臺的benchmark結果(詳細見官網:https://github.com/Tencent/ncnn/tree/master/benchmark);

Qualcomm MSM8996 Snapdragon 820 (Kyro 2.15GHz x 2 + Kyro 1.6GHz x 2)

root@msm8996:/data/local/tmp/ncnn # ./benchncnn 8 4 0
loop_count = 8
num_threads = 4
powersave = 0
      squeezenet  min =   23.20  max =   24.06  avg =   23.63
       mobilenet  min =   35.89  max =   36.41  avg =   36.09
    mobilenet_v2  min =   27.04  max =   28.62  avg =   27.39
      shufflenet  min =   15.47  max =   16.45  avg =   16.00
       googlenet  min =   85.42  max =   86.15  avg =   85.81
        resnet18  min =   76.82  max =   79.63  avg =   78.50
         alexnet  min =  147.66  max =  156.92  avg =  152.95
           vgg16  min =  493.50  max =  515.03  avg =  507.34
  squeezenet-ssd  min =   56.31  max =   59.35  avg =   57.49
   mobilenet-ssd  min =   68.95  max =   74.24  avg =   71.39
  mobilenet-yolo  min =  142.52  max =  149.72  avg =  148.23

root@msm8996:/data/local/tmp/ncnn # ./benchncnn 8 1 2            
loop_count = 8
num_threads = 1
powersave = 2
      squeezenet  min =   53.26  max =   53.37  avg =   53.31
       mobilenet  min =   96.37  max =   97.09  avg =   96.63
    mobilenet_v2  min =   63.00  max =   63.25  avg =   63.09
      shufflenet  min =   28.22  max =   28.88  avg =   28.48
       googlenet  min =  226.21  max =  228.31  avg =  227.22
        resnet18  min =  197.35  max =  198.55  avg =  197.84
         alexnet  min =  445.32  max =  449.62  avg =  446.65
           vgg16  min = 1416.39  max = 1450.95  avg = 1440.63
  squeezenet-ssd  min =  119.37  max =  119.77  avg =  119.56
   mobilenet-ssd  min =  183.04  max =  185.12  avg =  183.59
  mobilenet-yolo  min =  366.91  max =  369.87  avg =  368.40

Qualcomm MSM8994 Snapdragon 810 (Cortex-A57 2.0GHz x 4 + Cortex-A53 1.55GHz x 4)

angler:/data/local/tmp $ ./benchncnn 8 8 0
loop_count = 8
num_threads = 8
powersave = 0
      squeezenet  min =   35.57  max =   36.56  avg =   36.13
       mobilenet  min =   44.80  max =   56.80  avg =   47.91
    mobilenet_v2  min =   46.80  max =   64.64  avg =   50.34
      shufflenet  min =   28.24  max =   30.27  avg =   29.36
       googlenet  min =  118.82  max =  132.80  avg =  123.74
        resnet18  min =  119.55  max =  141.99  avg =  126.78
         alexnet  min =  104.52  max =  125.98  avg =  110.17
           vgg16  min =  815.12  max =  930.98  avg =  878.57
  squeezenet-ssd  min =  111.05  max =  130.23  avg =  119.43
   mobilenet-ssd  min =   88.88  max =  108.96  avg =   98.38
  mobilenet-yolo  min =  220.57  max =  263.42  avg =  241.03

Qualcomm MSM8916 Snapdragon 410 (Cortex-A53 1.2GHz x 4)

HM2014812:/data/local/tmp # ./benchncnn 8 4 0
loop_count = 8
num_threads = 4
powersave = 0
      squeezenet  min =   79.70  max =   85.42  avg =   82.22
       mobilenet  min =  119.87  max =  125.63  avg =  123.46
    mobilenet_v2  min =  125.65  max =  131.16  avg =  128.20
      shufflenet  min =   60.95  max =   66.03  avg =   63.03
       googlenet  min =  237.47  max =  256.79  avg =  245.65
        resnet18  min =  239.73  max =  250.41  avg =  245.87
         alexnet  min =  248.66  max =  279.08  avg =  267.41
           vgg16  min = 1429.50  max = 1510.46  avg = 1465.25
  squeezenet-ssd  min =  203.33  max =  213.85  avg =  209.81
   mobilenet-ssd  min =  215.26  max =  224.23  avg =  219.73
  mobilenet-yolo  min =  506.41  max =  520.50  avg =  513.30

Raspberry Pi 3 Model B+ Broadcom BCM2837B0, Cortex-A53 (ARMv8) (1.4GHz x 4 )

pi@raspberrypi:~ $ ./benchncnn 8 4 0
loop_count = 8
num_threads = 4
powersave = 0
      squeezenet  min =  108.66  max =  109.24  avg =  108.96
       mobilenet  min =  151.78  max =  152.92  avg =  152.31
    mobilenet_v2  min =  193.14  max =  195.56  avg =  194.50
      shufflenet  min =   91.41  max =   92.19  avg =   91.75
       googlenet  min =  302.02  max =  304.08  avg =  303.24
        resnet18  min =  411.93  max =  423.14  avg =  416.54
         alexnet  min =  275.54  max =  276.50  avg =  276.13
           vgg16  min = 1845.36  max = 1925.95  avg = 1902.28
  squeezenet-ssd  min =  313.86  max =  317.35  avg =  315.28
   mobilenet-ssd  min =  262.91  max =  264.92  avg =  263.85
  mobilenet-yolo  min =  638.73  max =  641.27  avg =  639.87

Rockchip RK3399 (Cortex-A72 1.8GHz x 2 + Cortex-A53 1.5GHz x 4)

rk3399_firefly_box:/data/local/tmp/ncnn # ./benchncnn 8 6 0 
loop_count = 8
num_threads = 6
powersave = 0
      squeezenet  min =   47.28  max =   70.41  avg =   53.37
       mobilenet  min =   68.74  max =  176.25  avg =   82.80
    mobilenet_v2  min =   71.72  max =  180.24  avg =   86.19
      shufflenet  min =   34.90  max =   36.14  avg =   35.54
       googlenet  min =  158.35  max =  301.30  avg =  191.26
        resnet18  min =  190.96  max =  274.38  avg =  214.78
         alexnet  min =  199.21  max =  334.18  avg =  227.98
           vgg16  min =  988.46  max = 1019.90  avg = 1000.14
  squeezenet-ssd  min =  134.83  max =  223.23  avg =  148.35
   mobilenet-ssd  min =  121.47  max =  235.44  avg =  149.53
  mobilenet-yolo  min =  295.01  max =  413.26  avg =  327.84

rk3399_firefly_box:/data/local/tmp/ncnn # ./benchncnn 8 2 2          
loop_count = 8
num_threads = 2
powersave = 2
      squeezenet  min =   51.64  max =   55.08  avg =   52.36
       mobilenet  min =   88.23  max =   91.07  avg =   88.89
    mobilenet_v2  min =   84.98  max =   86.21  avg =   85.74
      shufflenet  min =   36.04  max =   38.40  avg =   36.82
       googlenet  min =  185.42  max =  188.76  avg =  186.77
        resnet18  min =  202.72  max =  212.27  avg =  206.91
         alexnet  min =  203.89  max =  222.28  avg =  215.28
           vgg16  min =  901.60  max = 1013.80  avg =  948.13
  squeezenet-ssd  min =  139.85  max =  147.36  avg =  142.18
   mobilenet-ssd  min =  156.35  max =  161.21  avg =  157.96
  mobilenet-yolo  min =  365.75  max =  380.79  avg =  371.31

rk3399_firefly_box:/data/local/tmp/ncnn # ./benchncnn 8 1 2                    
loop_count = 8
num_threads = 1
powersave = 2
      squeezenet  min =   83.73  max =   86.78  avg =   84.94
       mobilenet  min =  142.90  max =  147.71  avg =  144.64
    mobilenet_v2  min =  119.18  max =  132.26  avg =  123.92
      shufflenet  min =   52.81  max =   55.84  avg =   53.63
       googlenet  min =  316.69  max =  324.03  avg =  319.34
        resnet18  min =  318.96  max =  331.31  avg =  322.68
         alexnet  min =  340.86  max =  365.09  avg =  348.99
           vgg16  min = 1593.88  max = 1611.65  avg = 1602.36
  squeezenet-ssd  min =  199.00  max =  209.26  avg =  204.65
   mobilenet-ssd  min =  268.03  max =  275.70  avg =  270.74
  mobilenet-yolo  min =  589.43  max =  605.75  avg =  595.67
   
rk3399_firefly_box:/data/local/tmp/ncnn # ./benchncnn 8 1 1                    
loop_count = 8
num_threads = 1
powersave = 1
      squeezenet  min =  167.48  max =  173.60  avg =  169.23
       mobilenet  min =  272.88  max =  278.71  avg =  274.73
    mobilenet_v2  min =  235.35  max =  239.87  avg =  237.05
      shufflenet  min =  111.79  max =  127.11  avg =  114.13
       googlenet  min =  669.47  max =  673.68  avg =  671.23
        resnet18  min =  701.96  max =  714.85  avg =  708.56
         alexnet  min =  989.36  max =  990.63  avg =  989.96
           vgg16  min = 3746.20  max = 3835.75  avg = 3788.90
  squeezenet-ssd  min =  445.71  max =  455.03  avg =  449.07
   mobilenet-ssd  min =  511.59  max =  520.00  avg =  514.59
  mobilenet-yolo  min = 1088.56  max = 1093.53  avg = 1090.39 

Rockchip RK3288 (Cortex-A17 1.8GHz x 4)

root@rk3288:/data/local/tmp/ncnn # ./benchncnn 8 4 0 
loop_count = 8
num_threads = 4
powersave = 0
      squeezenet  min =   51.43  max =   74.02  avg =   55.91
       mobilenet  min =  102.06  max =  125.67  avg =  106.02
    mobilenet_v2  min =   80.09  max =   99.23  avg =   85.40
      shufflenet  min =   34.91  max =   35.75  avg =   35.25
       googlenet  min =  181.72  max =  252.12  avg =  210.67
        resnet18  min =  198.86  max =  240.69  avg =  214.87
         alexnet  min =  154.68  max =  208.60  avg =  168.75
           vgg16  min = 1019.49  max = 1231.92  avg = 1129.09
  squeezenet-ssd  min =  133.38  max =  241.11  avg =  167.77
   mobilenet-ssd  min =  156.71  max =  216.70  avg =  175.31
  mobilenet-yolo  min =  396.78  max =  482.60  avg =  433.34
  
root@rk3288:/data/local/tmp/ncnn # ./benchncnn 8 1 0
loop_count = 8
num_threads = 1
powersave = 0
      squeezenet  min =  137.93  max =  140.76  avg =  138.71
       mobilenet  min =  244.01  max =  248.27  avg =  246.24
    mobilenet_v2  min =  177.94  max =  181.57  avg =  179.24
      shufflenet  min =   77.61  max =   78.30  avg =   77.94
       googlenet  min =  548.75  max =  559.40  avg =  553.00
        resnet18  min =  493.66  max =  510.55  avg =  500.37
         alexnet  min =  564.20  max =  604.87  avg =  581.30
           vgg16  min = 2425.03  max = 2447.25  avg = 2433.38
  squeezenet-ssd  min =  298.26  max =  304.67  avg =  302.00
   mobilenet-ssd  min =  465.65  max =  473.33  avg =  469.86
  mobilenet-yolo  min =  997.95  max = 1012.45  avg = 1002.32

HiSilicon Hi3519V101 (Cortex-A17 1.2GHz x 1)

root@Hi3519:/ncnn-benchmark # taskset 2 ./benchncnn 8 1 0 
loop_count = 8
num_threads = 1
powersave = 0
      squeezenet  min =  272.97  max =  275.84  avg =  274.85
 squeezenet-int8  min =  200.87  max =  202.47  avg =  201.74
       mobilenet  min =  480.90  max =  482.16  avg =  481.64
    mobilenet_v2  min =  350.01  max =  352.39  avg =  350.81
      shufflenet  min =  152.40  max =  153.17  avg =  152.80
       googlenet  min = 1096.65  max = 1101.35  avg = 1099.21
        resnet18  min =  983.92  max =  987.00  avg =  985.25
         alexnet  min = 1140.30  max = 1141.55  avg = 1140.92
  squeezenet-ssd  min =  574.62  max =  580.12  avg =  577.23
   mobilenet-ssd  min =  960.26  max =  969.13  avg =  965.93
  mobilenet-yolo  min = 1867.78  max = 1880.08  avg = 1873.89

iPhone 5S (Apple A7 1.3GHz x 2)

iPhone:~ root# ./benchncnn 8 2 0
loop_count = 8
num_threads = 2
powersave = 0
      squeezenet  min =   70.94  max =   72.40  avg =   71.75
       mobilenet  min =   89.24  max =   92.21  avg =   90.60
    mobilenet_v2  min =   71.70  max =   74.43  avg =   73.68
      shufflenet  min =   35.48  max =   41.40  avg =   38.94
       googlenet  min =  282.76  max =  295.00  avg =  289.64
        resnet18  min =  251.99  max =  260.40  avg =  255.23
         alexnet  min =  329.07  max =  337.75  avg =  333.24
           vgg16  min = 4547.25  max = 4706.56  avg = 4647.60
  squeezenet-ssd  min =  171.23  max =  180.49  avg =  175.54
   mobilenet-ssd  min =  174.56  max =  192.69  avg =  179.60
  mobilenet-yolo  min =  357.90  max =  363.93  avg =  360.97

Freescale i.MX7 Dual (Cortex A7 1.0GHz x 2)

imx7d_pico:/data/local/tmp # ./benchncnn 8 2 0
loop_count = 8
num_threads = 2
powersave = 0
      squeezenet  min =  269.26  max =  278.84  avg =  273.10
       mobilenet  min =  442.79  max =  445.82  avg =  444.46
    mobilenet_v2  min =  362.19  max =  364.58  avg =  363.33
      shufflenet  min =  171.30  max =  190.63  avg =  177.52
       googlenet  min =  975.95  max =  986.11  avg =  980.51
        resnet18  min = 1016.60  max = 1035.50  avg = 1021.75
         alexnet  min = 1240.54  max = 1254.86  avg = 1247.18
           vgg16  min =    0.00  max =    0.00  avg =    0.00 (FAIL due to out of memory)
  squeezenet-ssd  min =  614.93  max =  623.15  avg =  619.56
   mobilenet-ssd  min =  842.83  max =  884.64  avg =  855.40
  mobilenet-yolo  min = 1772.24  max = 1924.37  avg = 1805.75

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章