在打開github ncnn項目首頁:https://github.com/Tencent/ncnn
你會看到整個項目包含的內容,其中第一個就是benchmark文件夾;
點進去看,readme.md部分內容翻譯如下:
benchmark 可用於測試NN inference 性能,僅僅需要網絡定義文件(ncnn param文件);大的模型文件(ncnn bin格式)不會加載,但會隨機生成用於測速。之後會加入更多的模型。
Build
# assume you have already build ncnn library successfully
# uncomment the following line in <ncnn-root-dir>/CMakeLists.txt with your favorite editor
# add_subdirectory(benchmark)
$ cd <ncnn-root-dir>/<your-build-dir>
$ make -j4
# you can find benchncnn binary in <ncnn-root-dir>/<your-build-dir>/benchmark
即需要將ncnn項目下CMakeLists.txt中的# add_subdirectory(benchmark)這一行去掉註釋即可,然後再make;
這樣會在ncnn/build/benchmark目錄下生成一個可執行二進制文件,叫benchmark,見下圖:
Usage
# copy all param files to the current directory
$ ./benchncnn [loop count] [num threads] [powersave] [gpu device]
那麼如何使用它(benchmark)呢?我們打開終端,將工作路徑切到生成的benchmark目錄下,
[chensq@localhost ~]$ cd /home/chensq/ncnn/build/benchmark/
在這之前,你需要將ncnn/benchmark目錄下的所有以“.param”結尾的文件拷貝到你編譯生成的benchmark目錄下(如我上圖所示),
拷貝使用的指令可參考我的,不過需要更換你對應的路徑:
[chensq@localhost ~]$ sudo cp -r /home/chensq/ncnn/benchmark/*.param /home/chensq/ncnn/build/benchmark/
然後參考usage中給的PC端指令格式(我的如下,4個循環,2個線程,1個gpu):
[chensq@localhost benchmark]$ ./benchncnn 4 2 0 0
其中各個參考的解析爲:
我在PC端運行了以上指令後得到的結果如下(時間單位爲ms):
如果你想在安卓設備端跑測試,可使用如下方法:
# for running on android device, upload to /data/local/tmp/ folder
$ adb push benchncnn /data/local/tmp/
$ adb push <ncnn-root-dir>/benchmark/*.param /data/local/tmp/
$ adb shell
# executed in android adb shell
$ cd /data/local/tmp/
$ ./benchncnn [loop count] [num threads] [powersave] [gpu device]
即你將手機連接PC端,用adb指令將benchmark這個編譯好的二進制文件push到你手機的/data/local/tmp/目錄下;然後將ncnn/benchmark/下的所有以".param"結尾的網絡結構文件同樣push到/data/local/tmp/目錄下;然後使用adb shell,在/data/local/tmp/目錄下運行指令,指令含義同PC端;(我未嘗試,讀者有信息可試試)
其中benchmark.cpp源碼部分如下:
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#include <float.h>
#include <stdio.h>
#ifdef _WIN32
#define NOMINMAX
#include <algorithm>
#include <windows.h> // Sleep()
#else
#include <unistd.h> // sleep()
#endif
#include "benchmark.h"
#include "cpu.h"
#include "net.h"
#if NCNN_VULKAN
#include "gpu.h"
class GlobalGpuInstance
{
public:
GlobalGpuInstance() { ncnn::create_gpu_instance(); }
~GlobalGpuInstance() { ncnn::destroy_gpu_instance(); }
};
// initialize vulkan runtime before main()
GlobalGpuInstance g_global_gpu_instance;
#endif // NCNN_VULKAN
namespace ncnn {
// always return empty weights
class ModelBinFromEmpty : public ModelBin
{
public:
virtual Mat load(int w, int /*type*/) const { return Mat(w); }
};
class BenchNet : public Net
{
public:
int load_model()
{
// load file
int ret = 0;
ModelBinFromEmpty mb;
for (size_t i=0; i<layers.size(); i++)
{
Layer* layer = layers[i];
int lret = layer->load_model(mb);
if (lret != 0)
{
fprintf(stderr, "layer load_model %d failed\n", (int)i);
ret = -1;
break;
}
}
#if NCNN_VULKAN
if (use_vulkan_compute)
{
upload_model();
create_pipeline();
}
#endif // NCNN_VULKAN
return ret;
}
};
} // namespace ncnn
static int g_loop_count = 4;
static ncnn::UnlockedPoolAllocator g_blob_pool_allocator;
static ncnn::PoolAllocator g_workspace_pool_allocator;
#if NCNN_VULKAN
static bool g_use_vulkan_compute = false;
static ncnn::VulkanDevice* g_vkdev = 0;
static ncnn::VkAllocator* g_blob_vkallocator = 0;
static ncnn::VkAllocator* g_staging_vkallocator = 0;
#endif // NCNN_VULKAN
void benchmark(const char* comment, void (*init)(ncnn::Net&), void (*run)(const ncnn::Net&))
{
ncnn::BenchNet net;
#if NCNN_VULKAN
if (g_use_vulkan_compute)
{
net.use_vulkan_compute = g_use_vulkan_compute;
net.set_vulkan_device(g_vkdev);
}
#endif // NCNN_VULKAN
init(net);
net.load_model();
g_blob_pool_allocator.clear();
g_workspace_pool_allocator.clear();
#if NCNN_VULKAN
if (g_use_vulkan_compute)
{
g_blob_vkallocator->clear();
g_staging_vkallocator->clear();
}
#endif // NCNN_VULKAN
// sleep 10 seconds for cooling down SOC :(
#ifdef _WIN32
Sleep(10 * 1000);
#else
sleep(10);
#endif
// warm up
run(net);
run(net);
run(net);
run(net);
run(net);
run(net);
run(net);
run(net);
double time_min = DBL_MAX;
double time_max = -DBL_MAX;
double time_avg = 0;
for (int i=0; i<g_loop_count; i++)
{
double start = ncnn::get_current_time();
run(net);
double end = ncnn::get_current_time();
double time = end - start;
time_min = std::min(time_min, time);
time_max = std::max(time_max, time);
time_avg += time;
}
time_avg /= g_loop_count;
fprintf(stderr, "%-20s min = %7.2f max = %7.2f avg = %7.2f\n", comment, time_min, time_max, time_avg);
}
void squeezenet_init(ncnn::Net& net)
{
net.load_param("squeezenet.param");
}
void squeezenet_int8_init(ncnn::Net& net)
{
net.load_param("squeezenet_int8.param");
}
void squeezenet_run(const ncnn::Net& net)
{
ncnn::Extractor ex = net.create_extractor();
ncnn::Mat in(227, 227, 3);
ex.input("data", in);
ncnn::Mat out;
ex.extract("prob", out);
}
void mobilenet_init(ncnn::Net& net)
{
net.load_param("mobilenet.param");
}
void mobilenet_int8_init(ncnn::Net& net)
{
net.load_param("mobilenet_int8.param");
}
void mobilenet_run(const ncnn::Net& net)
{
ncnn::Extractor ex = net.create_extractor();
ncnn::Mat in(224, 224, 3);
ex.input("data", in);
ncnn::Mat out;
ex.extract("prob", out);
}
void mobilenet_v2_init(ncnn::Net& net)
{
net.load_param("mobilenet_v2.param");
}
void mobilenet_v2_int8_init(ncnn::Net& net)
{
net.load_param("mobilenet_v2_int8.param");
}
void mobilenet_v2_run(const ncnn::Net& net)
{
ncnn::Extractor ex = net.create_extractor();
ncnn::Mat in(224, 224, 3);
ex.input("data", in);
ncnn::Mat out;
ex.extract("prob", out);
}
void shufflenet_init(ncnn::Net& net)
{
net.load_param("shufflenet.param");
}
void shufflenet_run(const ncnn::Net& net)
{
ncnn::Extractor ex = net.create_extractor();
ncnn::Mat in(224, 224, 3);
ex.input("data", in);
ncnn::Mat out;
ex.extract("fc1000", out);
}
void mnasnet_init(ncnn::Net& net)
{
net.load_param("mnasnet.param");
}
void mnasnet_run(const ncnn::Net& net)
{
ncnn::Extractor ex = net.create_extractor();
ncnn::Mat in(224, 224, 3);
ex.input("data", in);
ncnn::Mat out;
ex.extract("prob", out);
}
void proxylessnasnet_init(ncnn::Net& net)
{
net.load_param("proxylessnasnet.param");
}
void proxylessnasnet_run(const ncnn::Net& net)
{
ncnn::Extractor ex = net.create_extractor();
ncnn::Mat in(224, 224, 3);
ex.input("data", in);
ncnn::Mat out;
ex.extract("prob", out);
}
void googlenet_init(ncnn::Net& net)
{
net.load_param("googlenet.param");
}
void googlenet_int8_init(ncnn::Net& net)
{
net.load_param("googlenet_int8.param");
}
void googlenet_run(const ncnn::Net& net)
{
ncnn::Extractor ex = net.create_extractor();
ncnn::Mat in(224, 224, 3);
ex.input("data", in);
ncnn::Mat out;
ex.extract("prob", out);
}
void resnet18_init(ncnn::Net& net)
{
net.load_param("resnet18.param");
}
void resnet18_int8_init(ncnn::Net& net)
{
net.load_param("resnet18_int8.param");
}
void resnet18_run(const ncnn::Net& net)
{
ncnn::Extractor ex = net.create_extractor();
ncnn::Mat in(224, 224, 3);
ex.input("data", in);
ncnn::Mat out;
ex.extract("prob", out);
}
void alexnet_init(ncnn::Net& net)
{
net.load_param("alexnet.param");
}
void alexnet_run(const ncnn::Net& net)
{
ncnn::Extractor ex = net.create_extractor();
ncnn::Mat in(227, 227, 3);
ex.input("data", in);
ncnn::Mat out;
ex.extract("prob", out);
}
void vgg16_init(ncnn::Net& net)
{
net.load_param("vgg16.param");
}
void vgg16_int8_init(ncnn::Net& net)
{
net.load_param("vgg16_int8.param");
}
void vgg16_run(const ncnn::Net& net)
{
ncnn::Extractor ex = net.create_extractor();
ncnn::Mat in(224, 224, 3);
ex.input("data", in);
ncnn::Mat out;
ex.extract("prob", out);
}
void resnet50_init(ncnn::Net& net)
{
net.load_param("resnet50.param");
}
void resnet50_int8_init(ncnn::Net& net)
{
net.load_param("resnet50_int8.param");
}
void resnet50_run(const ncnn::Net& net)
{
ncnn::Extractor ex = net.create_extractor();
ncnn::Mat in(224, 224, 3);
ex.input("data", in);
ncnn::Mat out;
ex.extract("prob", out);
}
void squeezenet_ssd_init(ncnn::Net& net)
{
net.load_param("squeezenet_ssd.param");
}
void squeezenet_ssd_int8_init(ncnn::Net& net)
{
net.load_param("squeezenet_ssd_int8.param");
}
void squeezenet_ssd_run(const ncnn::Net& net)
{
ncnn::Extractor ex = net.create_extractor();
ncnn::Mat in(300, 300, 3);
ex.input("data", in);
ncnn::Mat out;
ex.extract("detection_out", out);
}
void mobilenet_ssd_init(ncnn::Net& net)
{
net.load_param("mobilenet_ssd.param");
}
void mobilenet_ssd_int8_init(ncnn::Net& net)
{
net.load_param("mobilenet_ssd_int8.param");
}
void mobilenet_ssd_run(const ncnn::Net& net)
{
ncnn::Extractor ex = net.create_extractor();
ncnn::Mat in(300, 300, 3);
ex.input("data", in);
ncnn::Mat out;
ex.extract("detection_out", out);
}
void mobilenet_yolo_init(ncnn::Net& net)
{
net.load_param("mobilenet_yolo.param");
}
void mobilenet_yolo_run(const ncnn::Net& net)
{
ncnn::Extractor ex = net.create_extractor();
ncnn::Mat in(416, 416, 3);
ex.input("data", in);
ncnn::Mat out;
ex.extract("detection_out", out);
}
void mobilenet_yolov3_init(ncnn::Net& net)
{
net.load_param("mobilenet_yolov3.param");
}
void mobilenet_yolov3_run(const ncnn::Net& net)
{
ncnn::Extractor ex = net.create_extractor();
ncnn::Mat in(416, 416, 3);
ex.input("data", in);
ncnn::Mat out;
ex.extract("detection_out", out);
}
int main(int argc, char** argv)
{
int loop_count = 4;
int num_threads = ncnn::get_cpu_count();
int powersave = 0;
int gpu_device = -1;
if (argc >= 2)
{
loop_count = atoi(argv[1]);
}
if (argc >= 3)
{
num_threads = atoi(argv[2]);
}
if (argc >= 4)
{
powersave = atoi(argv[3]);
}
if (argc >= 5)
{
gpu_device = atoi(argv[4]);
}
g_loop_count = loop_count;
g_blob_pool_allocator.set_size_compare_ratio(0.0f);
g_workspace_pool_allocator.set_size_compare_ratio(0.5f);
#if NCNN_VULKAN
g_use_vulkan_compute = gpu_device != -1;
if (g_use_vulkan_compute)
{
g_vkdev = new ncnn::VulkanDevice(gpu_device);
g_blob_vkallocator = new ncnn::VkUnlockedBlobBufferAllocator(g_vkdev);
g_staging_vkallocator = new ncnn::VkUnlockedStagingBufferAllocator(g_vkdev);
}
#endif // NCNN_VULKAN
ncnn::Option opt;
opt.lightmode = true;
opt.num_threads = num_threads;
opt.blob_allocator = &g_blob_pool_allocator;
opt.workspace_allocator = &g_workspace_pool_allocator;
#if NCNN_VULKAN
opt.vulkan_compute = g_use_vulkan_compute;
opt.blob_vkallocator = g_blob_vkallocator;
opt.workspace_vkallocator = g_blob_vkallocator;
opt.staging_vkallocator = g_staging_vkallocator;
#endif // NCNN_VULKAN
ncnn::set_default_option(opt);
ncnn::set_cpu_powersave(powersave);
ncnn::set_omp_dynamic(0);
ncnn::set_omp_num_threads(num_threads);
fprintf(stderr, "loop_count = %d\n", g_loop_count);
fprintf(stderr, "num_threads = %d\n", num_threads);
fprintf(stderr, "powersave = %d\n", ncnn::get_cpu_powersave());
fprintf(stderr, "gpu_device = %d\n", gpu_device);
// run
benchmark("squeezenet", squeezenet_init, squeezenet_run);
benchmark("squeezenet-int8", squeezenet_int8_init, squeezenet_run);
benchmark("mobilenet", mobilenet_init, mobilenet_run);
benchmark("mobilenet-int8", mobilenet_int8_init, mobilenet_run);
benchmark("mobilenet_v2", mobilenet_v2_init, mobilenet_v2_run);
// benchmark("mobilenet_v2-int8", mobilenet_v2_int8_init, mobilenet_v2_run);
benchmark("shufflenet", shufflenet_init, shufflenet_run);
benchmark("mnasnet", mnasnet_init, mnasnet_run);
benchmark("proxylessnasnet", proxylessnasnet_init, proxylessnasnet_run);
benchmark("googlenet", googlenet_init, googlenet_run);
benchmark("googlenet-int8", googlenet_int8_init, googlenet_run);
benchmark("resnet18", resnet18_init, resnet18_run);
benchmark("resnet18-int8", resnet18_int8_init, resnet18_run);
benchmark("alexnet", alexnet_init, alexnet_run);
benchmark("vgg16", vgg16_init, vgg16_run);
benchmark("resnet50", resnet50_init, resnet50_run);
benchmark("resnet50-int8", resnet50_int8_init, resnet50_run);
benchmark("squeezenet-ssd", squeezenet_ssd_init, squeezenet_ssd_run);
benchmark("squeezenet-ssd-int8", squeezenet_ssd_int8_init, squeezenet_ssd_run);
benchmark("mobilenet-ssd", mobilenet_ssd_init, mobilenet_ssd_run);
benchmark("mobilenet-ssd-int8", mobilenet_ssd_int8_init, mobilenet_ssd_run);
benchmark("mobilenet-yolo", mobilenet_yolo_init, mobilenet_yolo_run);
benchmark("mobilenet-yolov3", mobilenet_yolov3_init, mobilenet_yolov3_run);
#if NCNN_VULKAN
delete g_blob_vkallocator;
delete g_staging_vkallocator;
delete g_vkdev;
#endif // NCNN_VULKAN
return 0;
}
下面是項目作者在不同手機平臺的benchmark結果(詳細見官網:https://github.com/Tencent/ncnn/tree/master/benchmark);
Qualcomm MSM8996 Snapdragon 820 (Kyro 2.15GHz x 2 + Kyro 1.6GHz x 2)
root@msm8996:/data/local/tmp/ncnn # ./benchncnn 8 4 0
loop_count = 8
num_threads = 4
powersave = 0
squeezenet min = 23.20 max = 24.06 avg = 23.63
mobilenet min = 35.89 max = 36.41 avg = 36.09
mobilenet_v2 min = 27.04 max = 28.62 avg = 27.39
shufflenet min = 15.47 max = 16.45 avg = 16.00
googlenet min = 85.42 max = 86.15 avg = 85.81
resnet18 min = 76.82 max = 79.63 avg = 78.50
alexnet min = 147.66 max = 156.92 avg = 152.95
vgg16 min = 493.50 max = 515.03 avg = 507.34
squeezenet-ssd min = 56.31 max = 59.35 avg = 57.49
mobilenet-ssd min = 68.95 max = 74.24 avg = 71.39
mobilenet-yolo min = 142.52 max = 149.72 avg = 148.23
root@msm8996:/data/local/tmp/ncnn # ./benchncnn 8 1 2
loop_count = 8
num_threads = 1
powersave = 2
squeezenet min = 53.26 max = 53.37 avg = 53.31
mobilenet min = 96.37 max = 97.09 avg = 96.63
mobilenet_v2 min = 63.00 max = 63.25 avg = 63.09
shufflenet min = 28.22 max = 28.88 avg = 28.48
googlenet min = 226.21 max = 228.31 avg = 227.22
resnet18 min = 197.35 max = 198.55 avg = 197.84
alexnet min = 445.32 max = 449.62 avg = 446.65
vgg16 min = 1416.39 max = 1450.95 avg = 1440.63
squeezenet-ssd min = 119.37 max = 119.77 avg = 119.56
mobilenet-ssd min = 183.04 max = 185.12 avg = 183.59
mobilenet-yolo min = 366.91 max = 369.87 avg = 368.40
Qualcomm MSM8994 Snapdragon 810 (Cortex-A57 2.0GHz x 4 + Cortex-A53 1.55GHz x 4)
angler:/data/local/tmp $ ./benchncnn 8 8 0
loop_count = 8
num_threads = 8
powersave = 0
squeezenet min = 35.57 max = 36.56 avg = 36.13
mobilenet min = 44.80 max = 56.80 avg = 47.91
mobilenet_v2 min = 46.80 max = 64.64 avg = 50.34
shufflenet min = 28.24 max = 30.27 avg = 29.36
googlenet min = 118.82 max = 132.80 avg = 123.74
resnet18 min = 119.55 max = 141.99 avg = 126.78
alexnet min = 104.52 max = 125.98 avg = 110.17
vgg16 min = 815.12 max = 930.98 avg = 878.57
squeezenet-ssd min = 111.05 max = 130.23 avg = 119.43
mobilenet-ssd min = 88.88 max = 108.96 avg = 98.38
mobilenet-yolo min = 220.57 max = 263.42 avg = 241.03
Qualcomm MSM8916 Snapdragon 410 (Cortex-A53 1.2GHz x 4)
HM2014812:/data/local/tmp # ./benchncnn 8 4 0
loop_count = 8
num_threads = 4
powersave = 0
squeezenet min = 79.70 max = 85.42 avg = 82.22
mobilenet min = 119.87 max = 125.63 avg = 123.46
mobilenet_v2 min = 125.65 max = 131.16 avg = 128.20
shufflenet min = 60.95 max = 66.03 avg = 63.03
googlenet min = 237.47 max = 256.79 avg = 245.65
resnet18 min = 239.73 max = 250.41 avg = 245.87
alexnet min = 248.66 max = 279.08 avg = 267.41
vgg16 min = 1429.50 max = 1510.46 avg = 1465.25
squeezenet-ssd min = 203.33 max = 213.85 avg = 209.81
mobilenet-ssd min = 215.26 max = 224.23 avg = 219.73
mobilenet-yolo min = 506.41 max = 520.50 avg = 513.30
Raspberry Pi 3 Model B+ Broadcom BCM2837B0, Cortex-A53 (ARMv8) (1.4GHz x 4 )
pi@raspberrypi:~ $ ./benchncnn 8 4 0
loop_count = 8
num_threads = 4
powersave = 0
squeezenet min = 108.66 max = 109.24 avg = 108.96
mobilenet min = 151.78 max = 152.92 avg = 152.31
mobilenet_v2 min = 193.14 max = 195.56 avg = 194.50
shufflenet min = 91.41 max = 92.19 avg = 91.75
googlenet min = 302.02 max = 304.08 avg = 303.24
resnet18 min = 411.93 max = 423.14 avg = 416.54
alexnet min = 275.54 max = 276.50 avg = 276.13
vgg16 min = 1845.36 max = 1925.95 avg = 1902.28
squeezenet-ssd min = 313.86 max = 317.35 avg = 315.28
mobilenet-ssd min = 262.91 max = 264.92 avg = 263.85
mobilenet-yolo min = 638.73 max = 641.27 avg = 639.87
Rockchip RK3399 (Cortex-A72 1.8GHz x 2 + Cortex-A53 1.5GHz x 4)
rk3399_firefly_box:/data/local/tmp/ncnn # ./benchncnn 8 6 0
loop_count = 8
num_threads = 6
powersave = 0
squeezenet min = 47.28 max = 70.41 avg = 53.37
mobilenet min = 68.74 max = 176.25 avg = 82.80
mobilenet_v2 min = 71.72 max = 180.24 avg = 86.19
shufflenet min = 34.90 max = 36.14 avg = 35.54
googlenet min = 158.35 max = 301.30 avg = 191.26
resnet18 min = 190.96 max = 274.38 avg = 214.78
alexnet min = 199.21 max = 334.18 avg = 227.98
vgg16 min = 988.46 max = 1019.90 avg = 1000.14
squeezenet-ssd min = 134.83 max = 223.23 avg = 148.35
mobilenet-ssd min = 121.47 max = 235.44 avg = 149.53
mobilenet-yolo min = 295.01 max = 413.26 avg = 327.84
rk3399_firefly_box:/data/local/tmp/ncnn # ./benchncnn 8 2 2
loop_count = 8
num_threads = 2
powersave = 2
squeezenet min = 51.64 max = 55.08 avg = 52.36
mobilenet min = 88.23 max = 91.07 avg = 88.89
mobilenet_v2 min = 84.98 max = 86.21 avg = 85.74
shufflenet min = 36.04 max = 38.40 avg = 36.82
googlenet min = 185.42 max = 188.76 avg = 186.77
resnet18 min = 202.72 max = 212.27 avg = 206.91
alexnet min = 203.89 max = 222.28 avg = 215.28
vgg16 min = 901.60 max = 1013.80 avg = 948.13
squeezenet-ssd min = 139.85 max = 147.36 avg = 142.18
mobilenet-ssd min = 156.35 max = 161.21 avg = 157.96
mobilenet-yolo min = 365.75 max = 380.79 avg = 371.31
rk3399_firefly_box:/data/local/tmp/ncnn # ./benchncnn 8 1 2
loop_count = 8
num_threads = 1
powersave = 2
squeezenet min = 83.73 max = 86.78 avg = 84.94
mobilenet min = 142.90 max = 147.71 avg = 144.64
mobilenet_v2 min = 119.18 max = 132.26 avg = 123.92
shufflenet min = 52.81 max = 55.84 avg = 53.63
googlenet min = 316.69 max = 324.03 avg = 319.34
resnet18 min = 318.96 max = 331.31 avg = 322.68
alexnet min = 340.86 max = 365.09 avg = 348.99
vgg16 min = 1593.88 max = 1611.65 avg = 1602.36
squeezenet-ssd min = 199.00 max = 209.26 avg = 204.65
mobilenet-ssd min = 268.03 max = 275.70 avg = 270.74
mobilenet-yolo min = 589.43 max = 605.75 avg = 595.67
rk3399_firefly_box:/data/local/tmp/ncnn # ./benchncnn 8 1 1
loop_count = 8
num_threads = 1
powersave = 1
squeezenet min = 167.48 max = 173.60 avg = 169.23
mobilenet min = 272.88 max = 278.71 avg = 274.73
mobilenet_v2 min = 235.35 max = 239.87 avg = 237.05
shufflenet min = 111.79 max = 127.11 avg = 114.13
googlenet min = 669.47 max = 673.68 avg = 671.23
resnet18 min = 701.96 max = 714.85 avg = 708.56
alexnet min = 989.36 max = 990.63 avg = 989.96
vgg16 min = 3746.20 max = 3835.75 avg = 3788.90
squeezenet-ssd min = 445.71 max = 455.03 avg = 449.07
mobilenet-ssd min = 511.59 max = 520.00 avg = 514.59
mobilenet-yolo min = 1088.56 max = 1093.53 avg = 1090.39
Rockchip RK3288 (Cortex-A17 1.8GHz x 4)
root@rk3288:/data/local/tmp/ncnn # ./benchncnn 8 4 0
loop_count = 8
num_threads = 4
powersave = 0
squeezenet min = 51.43 max = 74.02 avg = 55.91
mobilenet min = 102.06 max = 125.67 avg = 106.02
mobilenet_v2 min = 80.09 max = 99.23 avg = 85.40
shufflenet min = 34.91 max = 35.75 avg = 35.25
googlenet min = 181.72 max = 252.12 avg = 210.67
resnet18 min = 198.86 max = 240.69 avg = 214.87
alexnet min = 154.68 max = 208.60 avg = 168.75
vgg16 min = 1019.49 max = 1231.92 avg = 1129.09
squeezenet-ssd min = 133.38 max = 241.11 avg = 167.77
mobilenet-ssd min = 156.71 max = 216.70 avg = 175.31
mobilenet-yolo min = 396.78 max = 482.60 avg = 433.34
root@rk3288:/data/local/tmp/ncnn # ./benchncnn 8 1 0
loop_count = 8
num_threads = 1
powersave = 0
squeezenet min = 137.93 max = 140.76 avg = 138.71
mobilenet min = 244.01 max = 248.27 avg = 246.24
mobilenet_v2 min = 177.94 max = 181.57 avg = 179.24
shufflenet min = 77.61 max = 78.30 avg = 77.94
googlenet min = 548.75 max = 559.40 avg = 553.00
resnet18 min = 493.66 max = 510.55 avg = 500.37
alexnet min = 564.20 max = 604.87 avg = 581.30
vgg16 min = 2425.03 max = 2447.25 avg = 2433.38
squeezenet-ssd min = 298.26 max = 304.67 avg = 302.00
mobilenet-ssd min = 465.65 max = 473.33 avg = 469.86
mobilenet-yolo min = 997.95 max = 1012.45 avg = 1002.32
HiSilicon Hi3519V101 (Cortex-A17 1.2GHz x 1)
root@Hi3519:/ncnn-benchmark # taskset 2 ./benchncnn 8 1 0
loop_count = 8
num_threads = 1
powersave = 0
squeezenet min = 272.97 max = 275.84 avg = 274.85
squeezenet-int8 min = 200.87 max = 202.47 avg = 201.74
mobilenet min = 480.90 max = 482.16 avg = 481.64
mobilenet_v2 min = 350.01 max = 352.39 avg = 350.81
shufflenet min = 152.40 max = 153.17 avg = 152.80
googlenet min = 1096.65 max = 1101.35 avg = 1099.21
resnet18 min = 983.92 max = 987.00 avg = 985.25
alexnet min = 1140.30 max = 1141.55 avg = 1140.92
squeezenet-ssd min = 574.62 max = 580.12 avg = 577.23
mobilenet-ssd min = 960.26 max = 969.13 avg = 965.93
mobilenet-yolo min = 1867.78 max = 1880.08 avg = 1873.89
iPhone 5S (Apple A7 1.3GHz x 2)
iPhone:~ root# ./benchncnn 8 2 0
loop_count = 8
num_threads = 2
powersave = 0
squeezenet min = 70.94 max = 72.40 avg = 71.75
mobilenet min = 89.24 max = 92.21 avg = 90.60
mobilenet_v2 min = 71.70 max = 74.43 avg = 73.68
shufflenet min = 35.48 max = 41.40 avg = 38.94
googlenet min = 282.76 max = 295.00 avg = 289.64
resnet18 min = 251.99 max = 260.40 avg = 255.23
alexnet min = 329.07 max = 337.75 avg = 333.24
vgg16 min = 4547.25 max = 4706.56 avg = 4647.60
squeezenet-ssd min = 171.23 max = 180.49 avg = 175.54
mobilenet-ssd min = 174.56 max = 192.69 avg = 179.60
mobilenet-yolo min = 357.90 max = 363.93 avg = 360.97
Freescale i.MX7 Dual (Cortex A7 1.0GHz x 2)
imx7d_pico:/data/local/tmp # ./benchncnn 8 2 0
loop_count = 8
num_threads = 2
powersave = 0
squeezenet min = 269.26 max = 278.84 avg = 273.10
mobilenet min = 442.79 max = 445.82 avg = 444.46
mobilenet_v2 min = 362.19 max = 364.58 avg = 363.33
shufflenet min = 171.30 max = 190.63 avg = 177.52
googlenet min = 975.95 max = 986.11 avg = 980.51
resnet18 min = 1016.60 max = 1035.50 avg = 1021.75
alexnet min = 1240.54 max = 1254.86 avg = 1247.18
vgg16 min = 0.00 max = 0.00 avg = 0.00 (FAIL due to out of memory)
squeezenet-ssd min = 614.93 max = 623.15 avg = 619.56
mobilenet-ssd min = 842.83 max = 884.64 avg = 855.40
mobilenet-yolo min = 1772.24 max = 1924.37 avg = 1805.75