使用MLP解決OCR問題（OpenCV）（下）

分類模型：

分類模型涉及的一個比較關鍵的問題就是輸出的10維向量是如何與具體的類別掛鉤的。實際上：10維向量的每一位都代表一類，在對於訓練集的表達中，如果輸入數據是0，則10維向量的第一位賦值爲1，其餘均爲0。即0對應[1,0,0,0,0,0,0,0,0,0]。MLP模型訓練完成後，就需要對用戶輸入的數據所屬類別進行判定。這時得到的輸出數據基本不可能是正好的所屬類爲1，其他位置爲0。那具體的分類方法就是判斷這10位中哪一位最大，則這個輸入就屬於哪一類。

test_sample = test_set.row(tsample);

		//分類器的輸出

		nnetwork.predict(test_sample, classificationResult);
		//輸出向量中最大的值即爲樣本所屬的類

		// 以下的工作就是找到最大的數是哪個
		int maxIndex = 0;
		float value=0.0f;
		float maxValue=classificationResult.at<float>(0,0);
		for(int index=1;index<CLASSES;index++)
		{   
			value = classificationResult.at<float>(0,index);
			if(value>maxValue)
			{   
				maxValue = value;
				maxIndex=index;

			}
		}

		printf("Testing Sample %i -> class result (digit %d)\n", tsample, maxIndex);

測試集：

測試集是用來測試訓練好的模型是否有良好的泛化性，即是否能識別訓練集以外的數據。所以這裏就要求訓練集與測試集最好不要有相同的圖片。如果測試結果不滿意，則需要增加訓練集重新訓練或者調整MLP的參數。

文章的最後將整個CPP文件分享給大家

#include <opencv2/opencv.hpp>
#include <string.h>
#include <fstream>
#include <stdio.h>
using namespace std;
using namespace cv;

#define ATTRIBUTES 135  //每一個樣本的像素總數.9X15
#define CLASSES 10 
#define TRAINING_SAMPLES 460
#define TEST_SAMPLES 200

//將int型轉爲string型
string convertInt(int number)
{
	stringstream ss;
	ss << number;
	return ss.str();
}
//將圖像矩陣轉爲一個向量
void convertToPixelValueArray(Mat &img,int pixelarray[])
{
	int i =0;
	for(int x=0;x<15;x++)
	{  
		for(int y=0;y<9;y++)
		{
			pixelarray[i]=(img.at<uchar>(x,y)==255)?1:0;
			i++;

		}

	}
}
//讀取樣本集，並將樣本集按照一個樣本一行的形式寫入一個文件
void readFile(string datasetPath,int samplesPerClass,string outputfile )
{
	fstream file(outputfile.c_str(),ios::out);
	for(int sample = 1; sample<=samplesPerClass;sample++)
	{
		for(int digit=0;digit<10;digit++)
		{   //構建圖像路徑
			string imagePath = datasetPath+convertInt(digit)+"\\"+convertInt(sample)+".bmp";
			
			Mat img = imread(imagePath,0);
			Mat output;
			
			int pixelValueArray[135];

			//圖像矩陣轉爲向量
			convertToPixelValueArray(img,pixelValueArray);
			//將這個向量寫入文件
			for(int d=0;d<135;d++){
				file<<pixelValueArray[d]<<",";
			}
			//將所屬類別寫入文件（行尾）
			file<<digit<<"\n";

		}
	}
	file.close();
}
//從樣本集生成的文件中讀取數據
void read_dataset(char *filename, Mat &data, Mat &classes,  int total_samples)
{

	int label;
	float pixelvalue;
	FILE* inputfile = fopen( filename, "r" );

	
	for(int row = 0; row < total_samples; row++)
	{
		
		for(int col = 0; col <=ATTRIBUTES; col++)
		{
			
			if (col < ATTRIBUTES){

				fscanf(inputfile, "%f,", &pixelvalue);
				data.at<float>(row,col) = pixelvalue;

			}
			else if (col == ATTRIBUTES){
				//輸出向量的結構是應屬類別的位置賦值爲1，其餘賦值爲0
				fscanf(inputfile, "%i", &label);
				classes.at<float>(row,label) = 1.0;

			}
		}
	}

	fclose(inputfile);

}

int main( int argc, char** argv )
{

	readFile("E:\\workdir\\NN\\character_train\\",46,"E:\\workdir\\NN\\trainingset.txt");
	readFile("E:\\workdir\\NN\\character_test\\",20,"E:\\workdir\\NN\\testset.txt");

	//訓練樣本集構成的矩陣
	Mat training_set(TRAINING_SAMPLES,ATTRIBUTES,CV_32F);
	//訓練樣本集的標籤（輸出向量）構成的矩陣
	Mat training_set_classifications(TRAINING_SAMPLES, CLASSES, CV_32F,Scalar(-1));
	//測試樣本集構成的矩陣
	Mat test_set(TEST_SAMPLES,ATTRIBUTES,CV_32F);
	//測試樣本集的標籤（輸出向量）構成的矩陣
	Mat test_set_classifications(TEST_SAMPLES,CLASSES,CV_32F,Scalar(-1));

	//
	Mat classificationResult(1, CLASSES, CV_32F);
	
	read_dataset("E:\\workdir\\NN\\trainingset.txt", training_set, training_set_classifications, TRAINING_SAMPLES);
	read_dataset("E:\\workdir\\NN\\testset.txt", test_set, test_set_classifications, TEST_SAMPLES);

	// 定義MLP的結構
	// 神經網絡總共有三層
	// - 135輸入節點
	// - 16 隱藏節點
	// - 10 輸出節點.

	cv::Mat layers(3,1,CV_32S);
	layers.at<int>(0,0) = ATTRIBUTES;//input layer
	layers.at<int>(1,0)=16;//hidden layer
	layers.at<int>(2,0) =CLASSES;//output layer

	//創建神經網絡
	//for more details check http://docs.opencv.org/modules/ml/doc/neural_networks.html
	CvANN_MLP nnetwork(layers, CvANN_MLP::SIGMOID_SYM,2.0/3.0,1);

	CvANN_MLP_TrainParams params(                                  

		// 終止訓練在 1000 次迭代之後
		// 或者神經網絡的權值某次迭代
		// 之後發生了很小的改變
		cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 1000, 0.000001),
		// 使用BP算法訓練
		CvANN_MLP_TrainParams::BACKPROP,
		// BP算法的係數
		// recommended values taken from http://docs.opencv.org/modules/ml/doc/neural_networks.html#cvann-mlp-trainparams
		0.1,
		0.1);

	// 訓練神經網絡

	printf( "\nUsing training dataset\n");
	int iterations = nnetwork.train(training_set, training_set_classifications,cv::Mat(),cv::Mat(),params);
	printf( "Training iterations: %i\n\n", iterations);

	// 保存模型到一個XML文件
	CvFileStorage* storage = cvOpenFileStorage( "E:\\workdir\\NN\\param.xml", 0, CV_STORAGE_WRITE );
	nnetwork.write(storage,"DigitOCR");
	cvReleaseFileStorage(&storage);

	// 對生成的模型進行測試.
	cv::Mat test_sample;
	
	int correct_class = 0;
	
	int wrong_class = 0;

	//分類矩陣記錄某個樣本分到某類的次數.
	int classification_matrix[CLASSES][CLASSES]={{}};

	
	for (int tsample = 0; tsample < TEST_SAMPLES; tsample++) 
	{
		test_sample = test_set.row(tsample);

		//分類器的輸出

		nnetwork.predict(test_sample, classificationResult);
		//輸出向量中最大的值即爲樣本所屬的類

		// 以下的工作就是找到最大的數是哪個
		int maxIndex = 0;
		float value=0.0f;
		float maxValue=classificationResult.at<float>(0,0);
		for(int index=1;index<CLASSES;index++)
		{   
			value = classificationResult.at<float>(0,index);
			if(value>maxValue)
			{   
				maxValue = value;
				maxIndex=index;

			}
		}

		printf("Testing Sample %i -> class result (digit %d)\n", tsample, maxIndex);

		//現在比較神經網絡的預測結果與真實結果. 如果分類正確
		//test_set_classifications[tsample][ maxIndex] 應該是 1.
		//如果分類錯誤, 記錄下來.
		if (test_set_classifications.at<float>(tsample, maxIndex)!=1.0f)
		{

			wrong_class++;

			//標記分類矩陣
			for(int class_index=0;class_index<CLASSES;class_index++)
			{
				if(test_set_classifications.at<float>(tsample, class_index)==1.0f)
				{

					classification_matrix[class_index][maxIndex]++;// A class_index sample was wrongly classified as maxindex.
					break;
				}
			}

		} 
		else 
		{
			correct_class++;
			classification_matrix[maxIndex][maxIndex]++;
		}
	}
	
	//輸出測試結果
	printf( "\nResults on the testing dataset\n"
		"\tCorrect classification: %d (%g%%)\n"
		"\tWrong classifications: %d (%g%%)\n", 
		correct_class, (double) correct_class*100/TEST_SAMPLES,
		wrong_class, (double) wrong_class*100/TEST_SAMPLES);
	cout<<"   ";
	for (int i = 0; i < CLASSES; i++)
	{
		cout<< i<<"\t";
	}
	cout<<"\n";
	for(int row=0;row<CLASSES;row++)
	{
		cout<<row<<"  ";
		for(int col=0;col<CLASSES;col++)
		{
			cout<<classification_matrix[row][col]<<"\t";
		}
		cout<<"\n";
	}

	return 0;

}

參考文獻： http://www.nithinrajs.in/ocr-using-artificial-neural-network-opencv-part-1/

使用MLP解決OCR問題（OpenCV）（下）

分類模型：

測試集：

985 碩士程序員，空窗 4 個月沒有 Offer！

賽博鬥地主——使用大語言模型扮演Agent智能體玩牌類遊戲。

VScode右鍵打開(添加到右鍵)

神經網絡學習筆記（七）：線性迴歸模型（上）

神經網絡學習筆記（四）

神經網絡學習筆記（十）:多層感知機（中）--BP算法

使用MLP解決OCR問題（OpenCV）（上）

神經網絡學習筆記（五）:感知機

Mac下配置sublime實現LaTeX

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結