opencv dnn模塊示例(10) mask rcnn inception V2

一、opencv的示例模型文件

opencv4.0.0中暫未提供cpp代碼，使用python代碼改編，參考https://github.com/opencv/opencv/blob/master/samples/dnn/mask_rcnn.py，我們使用的模型爲
mask_rcnn_inception_v2_coco_2018_01_28.pb，選擇InceptionV2是因爲其速度更快，其他更好效果的如ResneXt-101相關模型可在tensorflow model zoo下載。

二、示例代碼

#include <fstream>
#include <sstream>

#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>

#include <iostream>

using namespace cv;
using namespace dnn;

float confThreshold;
float maskThreshold;
float nmsThreshold;

std::vector<std::string> classes;
std::vector<Vec3b> colors;

void postprocess(cv::Mat& frame, const std::vector<Mat>& outs);

int main(int argc, char** argv) try
{
	//	mask rcnn

	// 根據選擇的檢測模型文件進行配置 
	confThreshold = 0.5;
	maskThreshold = 0.3;
	nmsThreshold = 0.5;

	float scale = 1.;          // UNneed
	Scalar mean = { 0,0,0 };   // UNneed
	bool swapRB = true;
	int inpWidth = 800;
	int inpHeight = 800;

	String modelPath = "../../data/testdata/dnn/mask_rcnn_inception_v2_coco_2018_01_28.pb";
	String configPath = "../../data/testdata/dnn/mask_rcnn_inception_v2_coco_2018_01_28.pbtxt";

	String framework = "";

	int backendId = cv::dnn::DNN_BACKEND_OPENCV;
	int targetId = cv::dnn::DNN_TARGET_CPU;

	String classesFile = "../../data/dnn/object_detection_classes_coco.txt";
	String colorFile = "";

	// Open file with classes names.
	if (!classesFile.empty()) {
		const std::string file = classesFile;
		std::ifstream ifs(file.c_str());
		if (!ifs.is_open())
			CV_Error(Error::StsError, "File " + file + " not found");
		std::string line;

		classes.emplace_back("background"); //使用的是object_detection_classes類

		while (std::getline(ifs, line)) {
			classes.push_back(line);
		}
	}

	std::srand(324);
	if (!colorFile.empty()) {
		const std::string& file = colorFile;
		std::ifstream ifs(file.c_str());
		if (!ifs.is_open())
			CV_Error(Error::StsError, "File " + file + " not found");
		std::string line;
		while (std::getline(ifs, line)) {
			std::istringstream colorStr(line.c_str());
			Vec3b color;
			for (int i = 0; i < 3 && !colorStr.eof(); ++i)
				colorStr >> color[i];
			colors.push_back(color);
		}
	}
	else {
		colors.emplace_back(Vec3b());			
		for (int i = 1; i < classes.size(); ++i) {
			Vec3b color;
			for (int j = 0; j < 3; ++j)
				color[j] = (colors[i - 1][j] + rand() % 256) / 2;
			colors.emplace_back(color);
		}
	}

	CV_Assert(!modelPath.empty());
	//! [Read and initialize network]
	Net net = readNet(modelPath, configPath, framework);
	net.setPreferableBackend(backendId);
	net.setPreferableTarget(targetId);
	//! [Read and initialize network]

	// Create a window
	static const std::string kWinName = "MASK-RCNN in OpenCV";
	namedWindow(kWinName, WINDOW_AUTOSIZE);

	//! [Open a video file or an image file or a camera stream]
	VideoCapture cap;
	cap.open("../../data/image/dog.jpg");                       // pascal voc 	

	if (!cap.isOpened()) {
		std::cout << "VideoCapture open failed." << std::endl;
		return 0;
	}

	//! [Open a video file or an image file or a camera stream]

	// Process frames.
	Mat frame, blob;
	while (waitKey(1) < 0) {
		cap >> frame;
		if (frame.empty()) {
			waitKey();
			break;
		}

		//! [Create a 4D blob from a frame]
		blobFromImage(frame, blob, scale, Size(inpWidth, inpHeight), mean, swapRB, false);

		//! [Create a 4D blob from a frame]

		//! [Set input blob]
		net.setInput(blob);
		//! [Set input blob]

		//! [Make forward pass]		
		std::vector<Mat> outputs;		// score, mask
		static std::vector<std::string> outputNames = { "detection_out_final" , "detection_masks" };
		net.forward(outputs, outputNames);
		//! [Make forward pass]
		
		// Draw result
		postprocess(frame, outputs);

		// Put efficiency information.
		std::vector<double> layersTimes;
		double freq = getTickFrequency() / 1000;
		double t = net.getPerfProfile(layersTimes) / freq;
		std::string label = format("Inference time: %.2f ms", t);
		putText(frame, label, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0));

		imshow(kWinName, frame);
	}
	return 0;
}
catch (std::exception & e) {
	std::cerr << e.what() << std::endl;
}

void postprocess(cv::Mat& frame, const std::vector<Mat>& outs)
{
	if (frame.empty()) return;

	Mat boxes = outs[0];  // 1x1xNx7
	Mat masks = outs[1];  // 100 * classNum * 15 * 15   掩碼得分最高的100個   mask_rcnn_inception_v2_coco_2018_01_28

	Mat tmpFrame = frame.clone();

	int frameW = frame.cols;
	int frameH = frame.rows;

	std::vector<int> classIds;
	std::vector<float> confidences;
	std::vector<Rect> predBoxes;

	// [batchId, classId, confidence, left, top, right, bottom]    1x1xNx7
	for (int i = 0; i < boxes.size[2]; ++i) {
		float* box = (float*)boxes.ptr<float>(0, 0, i);
		float score = box[2];
		if (score > confThreshold) {
			int classId = box[1];
			int boxLeft = frameW *  box[3];
			int boxTop = frameH *  box[4];
			int boxRight = frameW *  box[5];
			int boxBottom = frameH *  box[6];
			cv::Rect rect{ cv::Point{ boxLeft,boxTop },cv::Point{ boxRight,boxBottom } };
			rect &= cv::Rect({ 0,0 }, frame.size());

			classIds.emplace_back(classId);
			predBoxes.emplace_back(rect);
			confidences.emplace_back(score);
		}
	}

	// 相較於源代碼，增加了nms，避免可能某些區域生成多個目標框的情況。
	std::vector<int> indices;
	cv::dnn::NMSBoxes(predBoxes, confidences, confThreshold, nmsThreshold, indices);

	// draw results

	for (size_t i = 0; i < indices.size(); ++ i) {
		int idx = indices[i];
		Rect box = predBoxes[idx];
		int classId = classIds[idx];
		float conf = confidences[idx];

		// Draw mask     
		Scalar color = colors[(classId+1) % colors.size()];
		//int colorInd = rand() % colors.size();  //generate different instance colors
		//Scalar color = colors[colorInd];
		Mat mask(masks.size[2], masks.size[3], CV_32F, masks.ptr<float>(i, classId));
		resize(mask, mask, box.size(), 0, 0, cv::INTER_LINEAR_EXACT);
		mask = mask > maskThreshold;
		Mat coloredRoi;
		addWeighted(frame(box), 0.3, color, 0.7, 0, coloredRoi);
		coloredRoi.copyTo(frame(box), mask);

		// Draw box
		rectangle(frame, box, Scalar(0, 255, 0));
		std::string label = format("%.2f", conf);
		if (!classes.empty()) {
			CV_Assert(classId < (int)classes.size());
			label = classes[classId + 1] + ": " + label;
		}
		int baseLine;
		cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
		Rect labelRect{ box.tl() - cv::Point2i(0,baseLine + labelSize.height), labelSize + cv::Size{ 0,baseLine } };
		rectangle(frame, labelRect, cv::Scalar::all(255), cv::FILLED);
		putText(frame, label, box.tl() - cv::Point2i(0, baseLine), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar());
	}
}

三、演示

修改代碼，顯示實例分割的結果

//Scalar color = colors[(classId +1) % colors.size()];
int colorInd = rand() % colors.size();  //generate different instance colors
Scalar color = colors[colorInd];

附帶一張使用int backendId = cv::dnn::DNN_BACKEND_INFERENCE_ENGINE;時，使用intel OpenVINO的opencv庫測試結果，性能提高30%。

opencv dnn模塊示例(10) mask rcnn inception V2

一、opencv的示例模型文件

相關知識

二、示例代碼

三、演示

釘釘打卡速度慢

Nginx R31 doc 官方文檔-01-nginx 如何安裝

Python 潮流週刊#51：用 Python 繪製美觀的圖表

Qt/C++音視頻開發74-合併標籤圖形/生成yolo運算結果圖形/文字和圖形合併成一個/水印濾鏡

挑戰程序設計競賽 2.2章習題 POJ - 3617 Best Cow Line 貪心

字節面試：MySQL什麼時候鎖表？如何防止鎖表？

.NET8連接SQL SERVER 2008 R2 報：證書鏈是由不受信任的頒發機構頒發的

golang開發環境搭建(win10)

python計算機視覺學習筆記——PIL庫的用法

Golang初學：獲取程序內存使用情況，std runtime

opencv dnn模塊示例(15) opencv4.2版本dnn支持cuda加速（vs2015異常解決）

c++ 線程安全queue隊列Logger日誌

滿足gb28181的device 實現流程

c++ 操作符operator的重載

opencv dnn模塊示例(7) openpose關鍵點

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結

opencv dnn模塊 示例(10) mask rcnn inception V2

一、opencv的示例模型文件

相關知識

二、示例代碼

三、演示

opencv dnn模塊示例(10) mask rcnn inception V2