TLD.h
- #include <opencv2/opencv.hpp>
- #include <tld_utils.h>
- #include <LKTracker.h>
- #include <FerNNClassifier.h>
- #include <fstream>
- //Bounding Boxes
- struct BoundingBox : public cv::Rect {
- BoundingBox(){}
- BoundingBox(cv::Rect r): cv::Rect(r){} //繼承的話需要初始化基類
- public:
- float overlap; //Overlap with current Bounding Box
- int sidx; //scale index
- };
- //Detection structure
- struct DetStruct {
- std::vector<int> bb;
- std::vector<std::vector<int> > patt;
- std::vector<float> conf1;
- std::vector<float> conf2;
- std::vector<std::vector<int> > isin;
- std::vector<cv::Mat> patch;
- };
- //Temporal structure
- struct TempStruct {
- std::vector<std::vector<int> > patt;
- std::vector<float> conf;
- };
- struct OComparator{ //比較兩者重合度
- OComparator(const std::vector<BoundingBox>& _grid):grid(_grid){}
- std::vector<BoundingBox> grid;
- bool operator()(int idx1,int idx2){
- return grid[idx1].overlap > grid[idx2].overlap;
- }
- };
- struct CComparator{ //比較兩者確信度?
- CComparator(const std::vector<float>& _conf):conf(_conf){}
- std::vector<float> conf;
- bool operator()(int idx1,int idx2){
- return conf[idx1]> conf[idx2];
- }
- };
- class TLD{
- private:
- cv::PatchGenerator generator; //PatchGenerator類用來對圖像區域進行仿射變換
- FerNNClassifier classifier;
- LKTracker tracker;
- //下面這些參數通過程序開始運行時讀入parameters.yml文件進行初始化
- ///Parameters
- int bbox_step;
- int min_win;
- int patch_size;
- //initial parameters for positive examples
- //從第一幀得到的目標的bounding box中(文件讀取或者用戶框定),經過幾何變換得
- //到 num_closest_init * num_warps_init 個正樣本
- int num_closest_init; //最近鄰窗口數 10
- int num_warps_init; //幾何變換數目 20
- int noise_init;
- float angle_init;
- float shift_init;
- float scale_init;
- ////從跟蹤得到的目標的bounding box中,經過幾何變換更新正樣本(添加到在線模型?)
- //update parameters for positive examples
- int num_closest_update;
- int num_warps_update;
- int noise_update;
- float angle_update;
- float shift_update;
- float scale_update;
- //parameters for negative examples
- float bad_overlap;
- float bad_patches;
- ///Variables
- //Integral Images 積分圖像,用以計算2bitBP特徵(類似於haar特徵的計算)
- //Mat最大的優勢跟STL很相似,都是對內存進行動態的管理,不需要之前用戶手動的管理內存
- cv::Mat iisum;
- cv::Mat iisqsum;
- float var;
- //Training data
- //std::pair主要的作用是將兩個數據組合成一個數據,兩個數據可以是同一類型或者不同類型。
- //pair實質上是一個結構體,其主要的兩個成員變量是first和second,這兩個變量可以直接使用。
- //在這裏用來表示樣本,first成員爲 features 特徵點數組,second成員爲 labels 樣本類別標籤
- std::vector<std::pair<std::vector<int>,int> > pX; //positive ferns <features,labels=1> 正樣本
- std::vector<std::pair<std::vector<int>,int> > nX; // negative ferns <features,labels=0> 負樣本
- cv::Mat pEx; //positive NN example
- std::vector<cv::Mat> nEx; //negative NN examples
- //Test data
- std::vector<std::pair<std::vector<int>,int> > nXT; //negative data to Test
- std::vector<cv::Mat> nExT; //negative NN examples to Test
- //Last frame data
- BoundingBox lastbox;
- bool lastvalid;
- float lastconf;
- //Current frame data
- //Tracker data
- bool tracked;
- BoundingBox tbb;
- bool tvalid;
- float tconf;
- //Detector data
- TempStruct tmp;
- DetStruct dt;
- std::vector<BoundingBox> dbb;
- std::vector<bool> dvalid; //檢測有效性??
- std::vector<float> dconf; //檢測確信度??
- bool detected;
- //Bounding Boxes
- std::vector<BoundingBox> grid;
- std::vector<cv::Size> scales;
- std::vector<int> good_boxes; //indexes of bboxes with overlap > 0.6
- std::vector<int> bad_boxes; //indexes of bboxes with overlap < 0.2
- BoundingBox bbhull; // hull of good_boxes //good_boxes 的 殼,也就是窗口的邊框
- BoundingBox best_box; // maximum overlapping bbox
- public:
- //Constructors
- TLD();
- TLD(const cv::FileNode& file);
- void read(const cv::FileNode& file);
- //Methods
- void init(const cv::Mat& frame1,const cv::Rect &box, FILE* bb_file);
- void generatePositiveData(const cv::Mat& frame, int num_warps);
- void generateNegativeData(const cv::Mat& frame);
- void processFrame(const cv::Mat& img1,const cv::Mat& img2,std::vector<cv::Point2f>& points1,std::vector<cv::Point2f>& points2,
- BoundingBox& bbnext,bool& lastboxfound, bool tl,FILE* bb_file);
- void track(const cv::Mat& img1, const cv::Mat& img2,std::vector<cv::Point2f>& points1,std::vector<cv::Point2f>& points2);
- void detect(const cv::Mat& frame);
- void clusterConf(const std::vector<BoundingBox>& dbb,const std::vector<float>& dconf,std::vector<BoundingBox>& cbb,std::vector<float>& cconf);
- void evaluate();
- void learn(const cv::Mat& img);
- //Tools
- void buildGrid(const cv::Mat& img, const cv::Rect& box);
- float bbOverlap(const BoundingBox& box1,const BoundingBox& box2);
- void getOverlappingBoxes(const cv::Rect& box1,int num_closest);
- void getBBHull();
- void getPattern(const cv::Mat& img, cv::Mat& pattern,cv::Scalar& mean,cv::Scalar& stdev);
- void bbPoints(std::vector<cv::Point2f>& points, const BoundingBox& bb);
- void bbPredict(const std::vector<cv::Point2f>& points1,const std::vector<cv::Point2f>& points2,
- const BoundingBox& bb1,BoundingBox& bb2);
- double getVar(const BoundingBox& box,const cv::Mat& sum,const cv::Mat& sqsum);
- bool bbComp(const BoundingBox& bb1,const BoundingBox& bb2);
- int clusterBB(const std::vector<BoundingBox>& dbb,std::vector<int>& indexes);
- };
TLD.cpp
- /*
- * TLD.cpp
- *
- * Created on: Jun 9, 2011
- * Author: alantrrs
- */
- #include <TLD.h>
- #include <stdio.h>
- using namespace cv;
- using namespace std;
- TLD::TLD()
- {
- }
- TLD::TLD(const FileNode& file){
- read(file);
- }
- void TLD::read(const FileNode& file){
- ///Bounding Box Parameters
- min_win = (int)file["min_win"];
- ///Genarator Parameters
- //initial parameters for positive examples
- patch_size = (int)file["patch_size"];
- num_closest_init = (int)file["num_closest_init"];
- num_warps_init = (int)file["num_warps_init"];
- noise_init = (int)file["noise_init"];
- angle_init = (float)file["angle_init"];
- shift_init = (float)file["shift_init"];
- scale_init = (float)file["scale_init"];
- //update parameters for positive examples
- num_closest_update = (int)file["num_closest_update"];
- num_warps_update = (int)file["num_warps_update"];
- noise_update = (int)file["noise_update"];
- angle_update = (float)file["angle_update"];
- shift_update = (float)file["shift_update"];
- scale_update = (float)file["scale_update"];
- //parameters for negative examples
- bad_overlap = (float)file["overlap"];
- bad_patches = (int)file["num_patches"];
- classifier.read(file);
- }
- //此函數完成準備工作
- void TLD::init(const Mat& frame1, const Rect& box, FILE* bb_file){
- //bb_file = fopen("bounding_boxes.txt","w");
- //Get Bounding Boxes
- //此函數根據傳入的box(目標邊界框)在傳入的圖像frame1中構建全部的掃描窗口,並計算重疊度
- buildGrid(frame1, box);
- printf("Created %d bounding boxes\n",(int)grid.size()); //vector的成員size()用於獲取向量元素的個數
- ///Preparation
- //allocation
- //積分圖像,用以計算2bitBP特徵(類似於haar特徵的計算)
- //Mat的創建,方式有兩種:1.調用create(行,列,類型)2.Mat(行,列,類型(值))。
- iisum.create(frame1.rows+1, frame1.cols+1, CV_32F);
- iisqsum.create(frame1.rows+1, frame1.cols+1, CV_64F);
- //Detector data中定義:std::vector<float> dconf; 檢測確信度??
- //vector 的reserve增加了vector的capacity,但是它的size沒有改變!而resize改變了vector
- //的capacity同時也增加了它的size!reserve是容器預留空間,但在空間內不真正創建元素對象,
- //所以在沒有添加新的對象之前,不能引用容器內的元素。
- //不管是調用resize還是reserve,二者對容器原有的元素都沒有影響。
- //myVec.reserve( 100 ); // 新元素還沒有構造, 此時不能用[]訪問元素
- //myVec.resize( 100 ); // 用元素的默認構造函數構造了100個新的元素,可以直接操作新元素
- dconf.reserve(100);
- dbb.reserve(100);
- bbox_step =7;
- //以下在Detector data中定義的容器都給其分配grid.size()大小(這個是一幅圖像中全部的掃描窗口個數)的容量
- //Detector data中定義TempStruct tmp;
- //tmp.conf.reserve(grid.size());
- tmp.conf = vector<float>(grid.size());
- tmp.patt = vector<vector<int> >(grid.size(), vector<int>(10,0));
- //tmp.patt.reserve(grid.size());
- dt.bb.reserve(grid.size());
- good_boxes.reserve(grid.size());
- bad_boxes.reserve(grid.size());
- //TLD中定義:cv::Mat pEx; //positive NN example 大小爲15*15圖像片
- pEx.create(patch_size, patch_size, CV_64F);
- //Init Generator
- //TLD中定義:cv::PatchGenerator generator; //PatchGenerator類用來對圖像區域進行仿射變換
- /*
- cv::PatchGenerator::PatchGenerator (
- double _backgroundMin,
- double _backgroundMax,
- double _noiseRange,
- bool _randomBlur = true,
- double _lambdaMin = 0.6,
- double _lambdaMax = 1.5,
- double _thetaMin = -CV_PI,
- double _thetaMax = CV_PI,
- double _phiMin = -CV_PI,
- double _phiMax = CV_PI
- )
- 一般的用法是先初始化一個PatchGenerator的實例,然後RNG一個隨機因子,再調用()運算符產生一個變換後的正樣本。
- */
- generator = PatchGenerator (0,0,noise_init,true,1-scale_init,1+scale_init,-angle_init*CV_PI/180,
- angle_init*CV_PI/180,-angle_init*CV_PI/180,angle_init*CV_PI/180);
- //此函數根據傳入的box(目標邊界框),在整幀圖像中的全部窗口中尋找與該box距離最小(即最相似,
- //重疊度最大)的num_closest_init個窗口,然後把這些窗口 歸入good_boxes容器
- //同時,把重疊度小於0.2的,歸入 bad_boxes 容器
- //首先根據overlap的比例信息選出重複區域比例大於60%並且前num_closet_init= 10個的最接近box的RectBox,
- //相當於對RectBox進行篩選。並通過BBhull函數得到這些RectBox的最大邊界。
- getOverlappingBoxes(box, num_closest_init);
- printf("Found %d good boxes, %d bad boxes\n",(int)good_boxes.size(),(int)bad_boxes.size());
- printf("Best Box: %d %d %d %d\n",best_box.x, best_box.y, best_box.width, best_box.height);
- printf("Bounding box hull: %d %d %d %d\n", bbhull.x, bbhull.y, bbhull.width, bbhull.height);
- //Correct Bounding Box
- lastbox=best_box;
- lastconf=1;
- lastvalid=true;
- fprintf(bb_file,"%d,%d,%d,%d,%f\n",lastbox.x,lastbox.y,lastbox.br().x,lastbox.br().y,lastconf);
- //Prepare Classifier 準備分類器
- //scales容器裏是所有掃描窗口的尺度,由buildGrid()函數初始化
- classifier.prepare(scales);
- ///Generate Data
- // Generate positive data
- generatePositiveData(frame1, num_warps_init);
- // Set variance threshold
- Scalar stdev, mean;
- //統計best_box的均值和標準差
- ////例如需要提取圖像A的某個ROI(感興趣區域,由矩形框)的話,用Mat類的B=img(ROI)即可提取
- //frame1(best_box)就表示在frame1中提取best_box區域(目標區域)的圖像片
- meanStdDev(frame1(best_box), mean, stdev);
- //利用積分圖像去計算每個待檢測窗口的方差
- //cvIntegral( const CvArr* image, CvArr* sum, CvArr* sqsum=NULL, CvArr* tilted_sum=NULL );
- //計算積分圖像,輸入圖像,sum積分圖像, W+1×H+1,sqsum對象素值平方的積分圖像,tilted_sum旋轉45度的積分圖像
- //利用積分圖像,可以計算在某象素的上-右方的或者旋轉的矩形區域中進行求和、求均值以及標準方差的計算,
- //並且保證運算的複雜度爲O(1)。
- integral(frame1, iisum, iisqsum);
- //級聯分類器模塊一:方差檢測模塊,利用積分圖計算每個待檢測窗口的方差,方差大於var閾值(目標patch方差的50%)的,
- //則認爲其含有前景目標方差;var 爲標準差的平方
- var = pow(stdev.val[0],2) * 0.5; //getVar(best_box,iisum,iisqsum);
- cout << "variance: " << var << endl;
- //check variance
- //getVar函數通過積分圖像計算輸入的best_box的方差
- double vr = getVar(best_box, iisum, iisqsum)*0.5;
- cout << "check variance: " << vr << endl;
- // Generate negative data
- generateNegativeData(frame1);
- //Split Negative Ferns into Training and Testing sets (they are already shuffled)
- //將負樣本放進 訓練和測試集
- int half = (int)nX.size()*0.5f;
- //vector::assign函數將區間[start, end)中的值賦值給當前的vector.
- //將一半的負樣本集 作爲 測試集
- nXT.assign(nX.begin()+half, nX.end()); //nXT; //negative data to Test
- //然後將剩下的一半作爲訓練集
- nX.resize(half);
- ///Split Negative NN Examples into Training and Testing sets
- half = (int)nEx.size()*0.5f;
- nExT.assign(nEx.begin()+half,nEx.end());
- nEx.resize(half);
- //Merge Negative Data with Positive Data and shuffle it
- //將負樣本和正樣本合併,然後打亂
- vector<pair<vector<int>,int> > ferns_data(nX.size()+pX.size());
- vector<int> idx = index_shuffle(0, ferns_data.size());
- int a=0;
- for (int i=0;i<pX.size();i++){
- ferns_data[idx[a]] = pX[i];
- a++;
- }
- for (int i=0;i<nX.size();i++){
- ferns_data[idx[a]] = nX[i];
- a++;
- }
- //Data already have been shuffled, just putting it in the same vector
- vector<cv::Mat> nn_data(nEx.size()+1);
- nn_data[0] = pEx;
- for (int i=0;i<nEx.size();i++){
- nn_data[i+1]= nEx[i];
- }
- ///Training
- //訓練 集合分類器(森林) 和 最近鄰分類器
- classifier.trainF(ferns_data, 2); //bootstrap = 2
- classifier.trainNN(nn_data);
- ///Threshold Evaluation on testing sets
- //用樣本在上面得到的 集合分類器(森林) 和 最近鄰分類器 中分類,評價得到最好的閾值
- classifier.evaluateTh(nXT, nExT);
- }
- /* Generate Positive data
- * Inputs:
- * - good_boxes (bbP)
- * - best_box (bbP0)
- * - frame (im0)
- * Outputs:
- * - Positive fern features (pX)
- * - Positive NN examples (pEx)
- */
- void TLD::generatePositiveData(const Mat& frame, int num_warps){
- /*
- CvScalar定義可存放1—4個數值的數值,常用來存儲像素,其結構體如下:
- typedef struct CvScalar
- {
- double val[4];
- }CvScalar;
- 如果使用的圖像是1通道的,則s.val[0]中存儲數據
- 如果使用的圖像是3通道的,則s.val[0],s.val[1],s.val[2]中存儲數據
- */
- Scalar mean; //均值
- Scalar stdev; //標準差
- //此函數將frame圖像best_box區域的圖像片歸一化爲均值爲0的15*15大小的patch,存在pEx正樣本中
- getPattern(frame(best_box), pEx, mean, stdev);
- //Get Fern features on warped patches
- Mat img;
- Mat warped;
- //void GaussianBlur(InputArray src, OutputArray dst, Size ksize, double sigmaX, double sigmaY=0,
- // int borderType=BORDER_DEFAULT ) ;
- //功能:對輸入的圖像src進行高斯濾波後用dst輸出。
- //src和dst當然分別是輸入圖像和輸出圖像。Ksize爲高斯濾波器模板大小,sigmaX和sigmaY分別爲高斯濾
- //波在橫向和豎向的濾波係數。borderType爲邊緣擴展點插值類型。
- //用9*9高斯核模糊輸入幀,存入img 去噪??
- GaussianBlur(frame, img, Size(9,9), 1.5);
- //在img圖像中截取bbhull信息(bbhull是包含了位置和大小的矩形框)的圖像賦給warped
- //例如需要提取圖像A的某個ROI(感興趣區域,由矩形框)的話,用Mat類的B=img(ROI)即可提取
- warped = img(bbhull);
- RNG& rng = theRNG(); //生成一個隨機數
- Point2f pt(bbhull.x + (bbhull.width-1)*0.5f, bbhull.y+(bbhull.height-1)*0.5f); //取矩形框中心的座標 int i(2)
- //nstructs樹木(由一個特徵組構建,每組特徵代表圖像塊的不同視圖表示)的個數
- //fern[nstructs] nstructs棵樹的森林的數組??
- vector<int> fern(classifier.getNumStructs());
- pX.clear();
- Mat patch;
- //pX爲處理後的RectBox最大邊界處理後的像素信息,pEx最近鄰的RectBox的Pattern,bbP0爲最近鄰的RectBox。
- if (pX.capacity() < num_warps * good_boxes.size())
- pX.reserve(num_warps * good_boxes.size()); //pX正樣本個數爲 仿射變換個數 * good_box的個數,故需分配至少這麼大的空間
- int idx;
- for (int i=0; i< num_warps; i++){
- if (i>0)
- //PatchGenerator類用來對圖像區域進行仿射變換,先RNG一個隨機因子,再調用()運算符產生一個變換後的正樣本。
- generator(frame, pt, warped, bbhull.size(), rng);
- for (int b=0; b < good_boxes.size(); b++){
- idx = good_boxes[b]; //good_boxes容器保存的是 grid 的索引
- patch = img(grid[idx]); //把img的 grid[idx] 區域(也就是bounding box重疊度高的)這一塊圖像片提取出來
- //getFeatures函數得到輸入的patch的用於樹的節點,也就是特徵組的特徵fern(13位的二進制代碼)
- classifier.getFeatures(patch, grid[idx].sidx, fern); //grid[idx].sidx 對應的尺度索引
- pX.push_back(make_pair(fern, 1)); //positive ferns <features, labels=1> 正樣本
- }
- }
- printf("Positive examples generated: ferns:%d NN:1\n",(int)pX.size());
- }
- //先對最接近box的RectBox區域得到其patch ,然後將像素信息轉換爲Pattern,
- //具體的說就是歸一化RectBox對應的patch的size(放縮至patch_size = 15*15),將2維的矩陣變成一維的向量信息,
- //然後將向量信息均值設爲0,調整爲zero mean and unit variance(ZMUV)
- //Output: resized Zero-Mean patch
- void TLD::getPattern(const Mat& img, Mat& pattern, Scalar& mean, Scalar& stdev){
- //將img放縮至patch_size = 15*15,存到pattern中
- resize(img, pattern, Size(patch_size, patch_size));
- //計算pattern這個矩陣的均值和標準差
- //Computes a mean value and a standard deviation of matrix elements.
- meanStdDev(pattern, mean, stdev);
- pattern.convertTo(pattern, CV_32F);
- //opencv中Mat的運算符有重載, Mat可以 + Mat; + Scalar; + int / float / double 都可以
- //將矩陣所有元素減去其均值,也就是把patch的均值設爲零
- pattern = pattern - mean.val[0];
- }
- /* Inputs:
- * - Image
- * - bad_boxes (Boxes far from the bounding box)
- * - variance (pEx variance)
- * Outputs
- * - Negative fern features (nX)
- * - Negative NN examples (nEx)
- */
- void TLD::generateNegativeData(const Mat& frame){
- //由於之前重疊度小於0.2的,都歸入 bad_boxes了,所以數量挺多,下面的函數用於打亂順序,也就是爲了
- //後面隨機選擇bad_boxes
- random_shuffle(bad_boxes.begin(), bad_boxes.end());//Random shuffle bad_boxes indexes
- int idx;
- //Get Fern Features of the boxes with big variance (calculated using integral images)
- int a=0;
- //int num = std::min((int)bad_boxes.size(),(int)bad_patches*100); //limits the size of bad_boxes to try
- printf("negative data generation started.\n");
- vector<int> fern(classifier.getNumStructs());
- nX.reserve(bad_boxes.size());
- Mat patch;
- for (int j=0;j<bad_boxes.size();j++){ //把方差較大的bad_boxes加入負樣本
- idx = bad_boxes[j];
- if (getVar(grid[idx],iisum,iisqsum)<var*0.5f)
- continue;
- patch = frame(grid[idx]);
- classifier.getFeatures(patch, grid[idx].sidx, fern);
- nX.push_back(make_pair(fern, 0)); //得到負樣本
- a++;
- }
- printf("Negative examples generated: ferns: %d ", a);
- //random_shuffle(bad_boxes.begin(),bad_boxes.begin()+bad_patches);//Randomly selects 'bad_patches' and get the patterns for NN;
- Scalar dum1, dum2;
- //bad_patches = (int)file["num_patches"]; 在參數文件中 num_patches = 100
- nEx=vector<Mat>(bad_patches);
- for (int i=0;i<bad_patches;i++){
- idx=bad_boxes[i];
- patch = frame(grid[idx]);
- //具體的說就是歸一化RectBox對應的patch的size(放縮至patch_size = 15*15)
- //由於負樣本不需要均值和方差,所以就定義dum,將其捨棄
- getPattern(patch,nEx[i],dum1,dum2);
- }
- printf("NN: %d\n",(int)nEx.size());
- }
- //該函數通過積分圖像計算輸入的box的方差
- double TLD::getVar(const BoundingBox& box, const Mat& sum, const Mat& sqsum){
- double brs = sum.at<int>(box.y+box.height, box.x+box.width);
- double bls = sum.at<int>(box.y+box.height, box.x);
- double trs = sum.at<int>(box.y,box.x + box.width);
- double tls = sum.at<int>(box.y,box.x);
- double brsq = sqsum.at<double>(box.y+box.height,box.x+box.width);
- double blsq = sqsum.at<double>(box.y+box.height,box.x);
- double trsq = sqsum.at<double>(box.y,box.x+box.width);
- double tlsq = sqsum.at<double>(box.y,box.x);
- double mean = (brs+tls-trs-bls)/((double)box.area());
- double sqmean = (brsq+tlsq-trsq-blsq)/((double)box.area());
- //方差=E(X^2)-(EX)^2 EX表示均值
- return sqmean-mean*mean;
- }
- void TLD::processFrame(const cv::Mat& img1,const cv::Mat& img2,vector<Point2f>& points1,vector<Point2f>& points2,BoundingBox& bbnext, bool& lastboxfound, bool tl, FILE* bb_file){
- vector<BoundingBox> cbb;
- vector<float> cconf;
- int confident_detections=0;
- int didx; //detection index
- ///Track 跟蹤模塊
- if(lastboxfound && tl){ //tl: train and learn
- //跟蹤
- track(img1, img2, points1, points2);
- }
- else{
- tracked = false;
- }
- ///Detect 檢測模塊
- detect(img2);
- ///Integration 綜合模塊
- //TLD只跟蹤單目標,所以綜合模塊綜合跟蹤器跟蹤到的單個目標和檢測器檢測到的多個目標,然後只輸出保守相似度最大的一個目標
- if (tracked){
- bbnext=tbb;
- lastconf=tconf; //表示相關相似度的閾值
- lastvalid=tvalid; //表示保守相似度的閾值
- printf("Tracked\n");
- if(detected){ // if Detected
- //通過 重疊度 對檢測器檢測到的目標bounding box進行聚類,每個類其重疊度小於0.5
- clusterConf(dbb, dconf, cbb, cconf); // cluster detections
- printf("Found %d clusters\n",(int)cbb.size());
- for (int i=0;i<cbb.size();i++){
- //找到與跟蹤器跟蹤到的box距離比較遠的類(檢測器檢測到的box),而且它的相關相似度比跟蹤器的要大
- if (bbOverlap(tbb, cbb[i])<0.5 && cconf[i]>tconf){ // Get index of a clusters that is far from tracker and are more confident than the tracker
- confident_detections++; //記錄滿足上述條件,也就是可信度比較高的目標box的個數
- didx=i; //detection index
- }
- }
- //如果只有一個滿足上述條件的box,那麼就用這個目標box來重新初始化跟蹤器(也就是用檢測器的結果去糾正跟蹤器)
- if (confident_detections==1){ //if there is ONE such a cluster, re-initialize the tracker
- printf("Found a better match..reinitializing tracking\n");
- bbnext=cbb[didx];
- lastconf=cconf[didx];
- lastvalid=false;
- }
- else {
- printf("%d confident cluster was found\n", confident_detections);
- int cx=0,cy=0,cw=0,ch=0;
- int close_detections=0;
- for (int i=0;i<dbb.size();i++){
- //找到檢測器檢測到的box與跟蹤器預測到的box距離很近(重疊度大於0.7)的box,對其座標和大小進行累加
- if(bbOverlap(tbb,dbb[i])>0.7){ // Get mean of close detections
- cx += dbb[i].x;
- cy +=dbb[i].y;
- cw += dbb[i].width;
- ch += dbb[i].height;
- close_detections++; //記錄最近鄰box的個數
- printf("weighted detection: %d %d %d %d\n",dbb[i].x,dbb[i].y,dbb[i].width,dbb[i].height);
- }
- }
- if (close_detections>0){
- //對與跟蹤器預測到的box距離很近的box 和 跟蹤器本身預測到的box 進行座標與大小的平均作爲最終的
- //目標bounding box,但是跟蹤器的權值較大
- bbnext.x = cvRound((float)(10*tbb.x+cx)/(float)(10+close_detections)); // weighted average trackers trajectory with the close detections
- bbnext.y = cvRound((float)(10*tbb.y+cy)/(float)(10+close_detections));
- bbnext.width = cvRound((float)(10*tbb.width+cw)/(float)(10+close_detections));
- bbnext.height = cvRound((float)(10*tbb.height+ch)/(float)(10+close_detections));
- printf("Tracker bb: %d %d %d %d\n",tbb.x,tbb.y,tbb.width,tbb.height);
- printf("Average bb: %d %d %d %d\n",bbnext.x,bbnext.y,bbnext.width,bbnext.height);
- printf("Weighting %d close detection(s) with tracker..\n",close_detections);
- }
- else{
- printf("%d close detections were found\n",close_detections);
- }
- }
- }
- }
- else{ // If NOT tracking
- printf("Not tracking..\n");
- lastboxfound = false;
- lastvalid = false;
- //如果跟蹤器沒有跟蹤到目標,但是檢測器檢測到了一些可能的目標box,那麼同樣對其進行聚類,但只是簡單的
- //將聚類的cbb[0]作爲新的跟蹤目標box(不比較相似度了??還是裏面已經排好序了??),重新初始化跟蹤器
- if(detected){ // and detector is defined
- clusterConf(dbb,dconf,cbb,cconf); // cluster detections
- printf("Found %d clusters\n",(int)cbb.size());
- if (cconf.size()==1){
- bbnext=cbb[0];
- lastconf=cconf[0];
- printf("Confident detection..reinitializing tracker\n");
- lastboxfound = true;
- }
- }
- }
- lastbox=bbnext;
- if (lastboxfound)
- fprintf(bb_file,"%d,%d,%d,%d,%f\n",lastbox.x,lastbox.y,lastbox.br().x,lastbox.br().y,lastconf);
- else
- fprintf(bb_file,"NaN,NaN,NaN,NaN,NaN\n");
- ///learn 學習模塊
- if (lastvalid && tl)
- learn(img2);
- }
- /*Inputs:
- * -current frame(img2), last frame(img1), last Bbox(bbox_f[0]).
- *Outputs:
- *- Confidence(tconf), Predicted bounding box(tbb), Validity(tvalid), points2 (for display purposes only)
- */
- void TLD::track(const Mat& img1, const Mat& img2, vector<Point2f>& points1, vector<Point2f>& points2){
- //Generate points
- //網格均勻撒點(均勻採樣),在lastbox中共產生最多10*10=100個特徵點,存於points1
- bbPoints(points1, lastbox);
- if (points1.size()<1){
- printf("BB= %d %d %d %d, Points not generated\n",lastbox.x,lastbox.y,lastbox.width,lastbox.height);
- tvalid=false;
- tracked=false;
- return;
- }
- vector<Point2f> points = points1;
- //Frame-to-frame tracking with forward-backward error cheking
- //trackf2f函數完成:跟蹤、計算FB error和匹配相似度sim,然後篩選出 FB_error[i] <= median(FB_error) 和
- //sim_error[i] > median(sim_error) 的特徵點(跟蹤結果不好的特徵點),剩下的是不到50%的特徵點
- tracked = tracker.trackf2f(img1, img2, points, points2);
- if (tracked){
- //Bounding box prediction
- //利用剩下的這不到一半的跟蹤點輸入來預測bounding box在當前幀的位置和大小 tbb
- bbPredict(points, points2, lastbox, tbb);
- //跟蹤失敗檢測:如果FB error的中值大於10個像素(經驗值),或者預測到的當前box的位置移出圖像,則
- //認爲跟蹤錯誤,此時不返回bounding box;Rect::br()返回的是右下角的座標
- //getFB()返回的是FB error的中值
- if (tracker.getFB()>10 || tbb.x>img2.cols || tbb.y>img2.rows || tbb.br().x < 1 || tbb.br().y <1){
- tvalid =false; //too unstable prediction or bounding box out of image
- tracked = false;
- printf("Too unstable predictions FB error=%f\n", tracker.getFB());
- return;
- }
- //Estimate Confidence and Validity
- //評估跟蹤確信度和有效性
- Mat pattern;
- Scalar mean, stdev;
- BoundingBox bb;
- bb.x = max(tbb.x,0);
- bb.y = max(tbb.y,0);
- bb.width = min(min(img2.cols-tbb.x,tbb.width), min(tbb.width, tbb.br().x));
- bb.height = min(min(img2.rows-tbb.y,tbb.height),min(tbb.height,tbb.br().y));
- //歸一化img2(bb)對應的patch的size(放縮至patch_size = 15*15),存入pattern
- getPattern(img2(bb),pattern,mean,stdev);
- vector<int> isin;
- float dummy;
- //計算圖像片pattern到在線模型M的保守相似度
- classifier.NNConf(pattern,isin,dummy,tconf); //Conservative Similarity
- tvalid = lastvalid;
- //保守相似度大於閾值,則評估跟蹤有效
- if (tconf>classifier.thr_nn_valid){
- tvalid =true;
- }
- }
- else
- printf("No points tracked\n");
- }
- //網格均勻撒點,box共10*10=100個特徵點
- void TLD::bbPoints(vector<cv::Point2f>& points, const BoundingBox& bb){
- int max_pts=10;
- int margin_h=0; //採樣邊界
- int margin_v=0;
- //網格均勻撒點
- int stepx = ceil((bb.width-2*margin_h)/max_pts); //ceil返回大於或者等於指定表達式的最小整數
- int stepy = ceil((bb.height-2*margin_v)/max_pts);
- //網格均勻撒點,box共10*10=100個特徵點
- for (int y=bb.y+margin_v; y<bb.y+bb.height-margin_v; y+=stepy){
- for (int x=bb.x+margin_h;x<bb.x+bb.width-margin_h;x+=stepx){
- points.push_back(Point2f(x,y));
- }
- }
- }
- //利用剩下的這不到一半的跟蹤點輸入來預測bounding box在當前幀的位置和大小
- void TLD::bbPredict(const vector<cv::Point2f>& points1,const vector<cv::Point2f>& points2,
- const BoundingBox& bb1,BoundingBox& bb2) {
- int npoints = (int)points1.size();
- vector<float> xoff(npoints); //位移
- vector<float> yoff(npoints);
- printf("tracked points : %d\n", npoints);
- for (int i=0;i<npoints;i++){ //計算每個特徵點在兩幀之間的位移
- xoff[i]=points2[i].x - points1[i].x;
- yoff[i]=points2[i].y - points1[i].y;
- }
- float dx = median(xoff); //計算位移的中值
- float dy = median(yoff);
- float s;
- //計算bounding box尺度scale的變化:通過計算 當前特徵點相互間的距離 與 先前(上一幀)特徵點相互間的距離 的
- //比值,以比值的中值作爲尺度的變化因子
- if (npoints>1){
- vector<float> d;
- d.reserve(npoints*(npoints-1)/2); //等差數列求和:1+2+...+(npoints-1)
- for (int i=0;i<npoints;i++){
- for (int j=i+1;j<npoints;j++){
- //計算 當前特徵點相互間的距離 與 先前(上一幀)特徵點相互間的距離 的比值(位移用絕對值)
- d.push_back(norm(points2[i]-points2[j])/norm(points1[i]-points1[j]));
- }
- }
- s = median(d);
- }
- else {
- s = 1.0;
- }
- float s1 = 0.5*(s-1)*bb1.width;
- float s2 = 0.5*(s-1)*bb1.height;
- printf("s= %f s1= %f s2= %f \n", s, s1, s2);
- //得到當前bounding box的位置與大小信息
- //當前box的x座標 = 前一幀box的x座標 + 全部特徵點位移的中值(可理解爲box移動近似的位移) - 當前box寬的一半
- bb2.x = round( bb1.x + dx - s1);
- bb2.y = round( bb1.y + dy -s2);
- bb2.width = round(bb1.width*s);
- bb2.height = round(bb1.height*s);
- printf("predicted bb: %d %d %d %d\n",bb2.x,bb2.y,bb2.br().x,bb2.br().y);
- }
- void TLD::detect(const cv::Mat& frame){
- //cleaning
- dbb.clear();
- dconf.clear();
- dt.bb.clear();
- //GetTickCount返回從操作系統啓動到現在所經過的時間
- double t = (double)getTickCount();
- Mat img(frame.rows, frame.cols, CV_8U);
- integral(frame,iisum,iisqsum); //計算frame的積分圖
- GaussianBlur(frame,img,Size(9,9),1.5); //高斯模糊,去噪?
- int numtrees = classifier.getNumStructs();
- float fern_th = classifier.getFernTh(); //getFernTh()返回thr_fern; 集合分類器的分類閾值
- vector <int> ferns(10);
- float conf;
- int a=0;
- Mat patch;
- //級聯分類器模塊一:方差檢測模塊,利用積分圖計算每個待檢測窗口的方差,方差大於var閾值(目標patch方差的50%)的,
- //則認爲其含有前景目標
- for (int i=0; i<grid.size(); i++){ //FIXME: BottleNeck 瓶頸
- if (getVar(grid[i],iisum,iisqsum) >= var){ //計算每一個掃描窗口的方差
- a++;
- //級聯分類器模塊二:集合分類器檢測模塊
- patch = img(grid[i]);
- classifier.getFeatures(patch,grid[i].sidx,ferns); //得到該patch特徵(13位的二進制代碼)
- conf = classifier.measure_forest(ferns); //計算該特徵值對應的後驗概率累加值
- tmp.conf[i]=conf; //Detector data中定義TempStruct tmp;
- tmp.patt[i]=ferns;
- //如果集合分類器的後驗概率的平均值大於閾值fern_th(由訓練得到),就認爲含有前景目標
- if (conf > numtrees*fern_th){
- dt.bb.push_back(i); //將通過以上兩個檢測模塊的掃描窗口記錄在detect structure中
- }
- }
- else
- tmp.conf[i]=0.0;
- }
- int detections = dt.bb.size();
- printf("%d Bounding boxes passed the variance filter\n",a);
- printf("%d Initial detection from Fern Classifier\n", detections);
- //如果通過以上兩個檢測模塊的掃描窗口數大於100個,則只取後驗概率大的前100個
- if (detections>100){ //CComparator(tmp.conf)指定比較方式???
- nth_element(dt.bb.begin(), dt.bb.begin()+100, dt.bb.end(), CComparator(tmp.conf));
- dt.bb.resize(100);
- detections=100;
- }
- // for (int i=0;i<detections;i++){
- // drawBox(img,grid[dt.bb[i]]);
- // }
- // imshow("detections",img);
- if (detections==0){
- detected=false;
- return;
- }
- printf("Fern detector made %d detections ",detections);
- //兩次使用getTickCount(),然後再除以getTickFrequency(),計算出來的是以秒s爲單位的時間(opencv 2.0 以前是ms)
- t=(double)getTickCount()-t;
- printf("in %gms\n", t*1000/getTickFrequency()); //打印以上代碼運行使用的毫秒數
- // Initialize detection structure
- dt.patt = vector<vector<int> >(detections,vector<int>(10,0)); // Corresponding codes of the Ensemble Classifier
- dt.conf1 = vector<float>(detections); // Relative Similarity (for final nearest neighbour classifier)
- dt.conf2 =vector<float>(detections); // Conservative Similarity (for integration with tracker)
- dt.isin = vector<vector<int> >(detections,vector<int>(3,-1)); // Detected (isin=1) or rejected (isin=0) by nearest neighbour classifier
- dt.patch = vector<Mat>(detections,Mat(patch_size,patch_size,CV_32F));// Corresponding patches
- int idx;
- Scalar mean, stdev;
- float nn_th = classifier.getNNTh();
- //級聯分類器模塊三:最近鄰分類器檢測模塊
- for (int i=0;i<detections;i++){ // for every remaining detection
- idx=dt.bb[i]; // Get the detected bounding box index
- patch = frame(grid[idx]);
- getPattern(patch,dt.patch[i],mean,stdev); // Get pattern within bounding box
- //計算圖像片pattern到在線模型M的相關相似度和保守相似度
- classifier.NNConf(dt.patch[i],dt.isin[i],dt.conf1[i],dt.conf2[i]); // Evaluate nearest neighbour classifier
- dt.patt[i]=tmp.patt[idx];
- //printf("Testing feature %d, conf:%f isin:(%d|%d|%d)\n",i,dt.conf1[i],dt.isin[i][0],dt.isin[i][1],dt.isin[i][2]);
- //相關相似度大於閾值,則認爲含有前景目標
- if (dt.conf1[i]>nn_th){ // idx = dt.conf1 > tld.model.thr_nn; % get all indexes that made it through the nearest neighbour
- dbb.push_back(grid[idx]); // BB = dt.bb(:,idx); % bounding boxes
- dconf.push_back(dt.conf2[i]); // Conf = dt.conf2(:,idx); % conservative confidences
- }
- }
- //打印檢測到的可能存在目標的掃描窗口數(可以通過三個級聯檢測器的)
- if (dbb.size()>0){
- printf("Found %d NN matches\n",(int)dbb.size());
- detected=true;
- }
- else{
- printf("No NN matches found.\n");
- detected=false;
- }
- }
- //作者已經用python腳本../datasets/evaluate_vis.py來完成算法評估功能,具體見README
- void TLD::evaluate(){
- }
- void TLD::learn(const Mat& img){
- printf("[Learning] ");
- ///Check consistency
- //檢測一致性
- BoundingBox bb;
- bb.x = max(lastbox.x,0);
- bb.y = max(lastbox.y,0);
- bb.width = min(min(img.cols-lastbox.x,lastbox.width),min(lastbox.width,lastbox.br().x));
- bb.height = min(min(img.rows-lastbox.y,lastbox.height),min(lastbox.height,lastbox.br().y));
- Scalar mean, stdev;
- Mat pattern;
- //歸一化img(bb)對應的patch的size(放縮至patch_size = 15*15),存入pattern
- getPattern(img(bb), pattern, mean, stdev);
- vector<int> isin;
- float dummy, conf;
- //計算輸入圖像片(跟蹤器的目標box)與在線模型之間的相關相似度conf
- classifier.NNConf(pattern,isin,conf,dummy);
- if (conf<0.5) { //如果相似度太小了,就不訓練
- printf("Fast change..not training\n");
- lastvalid =false;
- return;
- }
- if (pow(stdev.val[0], 2)< var){ //如果方差太小了,也不訓練
- printf("Low variance..not training\n");
- lastvalid=false;
- return;
- }
- if(isin[2]==1){ //如果被被識別爲負樣本,也不訓練
- printf("Patch in negative data..not traing");
- lastvalid=false;
- return;
- }
- /// Data generation 樣本產生
- for (int i=0;i<grid.size();i++){ //計算所有的掃描窗口與目標box的重疊度
- grid[i].overlap = bbOverlap(lastbox, grid[i]);
- }
- //集合分類器
- vector<pair<vector<int>,int> > fern_examples;
- good_boxes.clear();
- bad_boxes.clear();
- //此函數根據傳入的lastbox,在整幀圖像中的全部窗口中尋找與該lastbox距離最小(即最相似,
- //重疊度最大)的num_closest_update個窗口,然後把這些窗口 歸入good_boxes容器(只是把網格數組的索引存入)
- //同時,把重疊度小於0.2的,歸入 bad_boxes 容器
- getOverlappingBoxes(lastbox, num_closest_update);
- if (good_boxes.size()>0)
- generatePositiveData(img, num_warps_update); //用仿射模型產生正樣本(類似於第一幀的方法,但只產生10*10=100個)
- else{
- lastvalid = false;
- printf("No good boxes..Not training");
- return;
- }
- fern_examples.reserve(pX.size() + bad_boxes.size());
- fern_examples.assign(pX.begin(), pX.end());
- int idx;
- for (int i=0;i<bad_boxes.size();i++){
- idx=bad_boxes[i];
- if (tmp.conf[idx]>=1){ //加入負樣本,相似度大於1??相似度不是出於0和1之間嗎?
- fern_examples.push_back(make_pair(tmp.patt[idx],0));
- }
- }
- //最近鄰分類器
- vector<Mat> nn_examples;
- nn_examples.reserve(dt.bb.size()+1);
- nn_examples.push_back(pEx);
- for (int i=0;i<dt.bb.size();i++){
- idx = dt.bb[i];
- if (bbOverlap(lastbox,grid[idx]) < bad_overlap)
- nn_examples.push_back(dt.patch[i]);
- }
- /// Classifiers update 分類器訓練
- classifier.trainF(fern_examples,2);
- classifier.trainNN(nn_examples);
- classifier.show(); //把正樣本庫(在線模型)包含的所有正樣本顯示在窗口上
- }
- //檢測器採用掃描窗口的策略
- //此函數根據傳入的box(目標邊界框)在傳入的圖像中構建全部的掃描窗口,並計算每個窗口與box的重疊度
- void TLD::buildGrid(const cv::Mat& img, const cv::Rect& box){
- const float SHIFT = 0.1; //掃描窗口步長爲 寬高的 10%
- //尺度縮放係數爲1.2 (0.16151*1.2=0.19381),共21種尺度變換
- const float SCALES[] = {0.16151,0.19381,0.23257,0.27908,0.33490,0.40188,0.48225,
- 0.57870,0.69444,0.83333,1,1.20000,1.44000,1.72800,
- 2.07360,2.48832,2.98598,3.58318,4.29982,5.15978,6.19174};
- int width, height, min_bb_side;
- //Rect bbox;
- BoundingBox bbox;
- Size scale;
- int sc=0;
- for (int s=0; s < 21; s++){
- width = round(box.width*SCALES[s]);
- height = round(box.height*SCALES[s]);
- min_bb_side = min(height,width); //bounding box最短的邊
- //由於圖像片(min_win 爲15x15像素)是在bounding box中採樣得到的,所以box必須比min_win要大
- //另外,輸入的圖像肯定得比 bounding box 要大了
- if (min_bb_side < min_win || width > img.cols || height > img.rows)
- continue;
- scale.width = width;
- scale.height = height;
- //push_back在vector類中作用爲在vector尾部加入一個數據
- //scales在類TLD中定義:std::vector<cv::Size> scales;
- scales.push_back(scale); //把該尺度的窗口存入scales容器,避免在掃描時計算,加快檢測速度
- for (int y=1; y<img.rows-height; y+=round(SHIFT*min_bb_side)){ //按步長移動窗口
- for (int x=1; x<img.cols-width; x+=round(SHIFT*min_bb_side)){
- bbox.x = x;
- bbox.y = y;
- bbox.width = width;
- bbox.height = height;
- //判斷傳入的bounding box(目標邊界框)與 傳入圖像中的此時窗口的 重疊度,
- //以此來確定該圖像窗口是否含有目標
- bbox.overlap = bbOverlap(bbox, BoundingBox(box));
- bbox.sidx = sc; //屬於第幾個尺度
- //grid在類TLD中定義:std::vector<BoundingBox> grid;
- //把本位置和本尺度的掃描窗口存入grid容器
- grid.push_back(bbox);
- }
- }
- sc++;
- }
- }
- //此函數計算兩個bounding box 的重疊度
- //重疊度定義爲 兩個box的交集 與 它們的並集 的比
- float TLD::bbOverlap(const BoundingBox& box1, const BoundingBox& box2){
- //先判斷座標,假如它們都沒有重疊的地方,就直接返回0
- if (box1.x > box2.x + box2.width) { return 0.0; }
- if (box1.y > box2.y + box2.height) { return 0.0; }
- if (box1.x + box1.width < box2.x) { return 0.0; }
- if (box1.y + box1.height < box2.y) { return 0.0; }
- float colInt = min(box1.x + box1.width, box2.x + box2.width) - max(box1.x, box2.x);
- float rowInt = min(box1.y + box1.height, box2.y + box2.height) - max(box1.y, box2.y);
- float intersection = colInt * rowInt;
- float area1 = box1.width * box1.height;
- float area2 = box2.width * box2.height;
- return intersection / (area1 + area2 - intersection);
- }
- //此函數根據傳入的box1(目標邊界框),在整幀圖像中的全部窗口中尋找與該box1距離最小(即最相似,
- //重疊度最大)的num_closest個窗口,然後把這些窗口 歸入good_boxes容器(只是把網格數組的索引存入)
- //同時,把重疊度小於0.2的,歸入 bad_boxes 容器
- void TLD::getOverlappingBoxes(const cv::Rect& box1,int num_closest){
- float max_overlap = 0;
- for (int i=0;i<grid.size();i++){
- if (grid[i].overlap > max_overlap) { //找出重疊度最大的box
- max_overlap = grid[i].overlap;
- best_box = grid[i];
- }
- if (grid[i].overlap > 0.6){ //重疊度大於0.6的,歸入 good_boxes
- good_boxes.push_back(i);
- }
- else if (grid[i].overlap < bad_overlap){ //重疊度小於0.2的,歸入 bad_boxes
- bad_boxes.push_back(i);
- }
- }
- //Get the best num_closest (10) boxes and puts them in good_boxes
- if (good_boxes.size()>num_closest){
- //STL中的nth_element()方法找出一個數列中排名第n(下面爲第num_closest)的那個數。這個函數運行後
- //在good_boxes[num_closest]前面num_closest個數都比他大,也就是找到最好的num_closest個box了
- std::nth_element(good_boxes.begin(), good_boxes.begin() + num_closest, good_boxes.end(), OComparator(grid));
- //重新壓縮good_boxes爲num_closest大小
- good_boxes.resize(num_closest);
- }
- //獲取good_boxes 的 Hull殼,也就是窗口的邊框
- getBBHull();
- }
- //此函數獲取good_boxes 的 Hull殼,也就是窗口(圖像)的邊框 bounding box
- void TLD::getBBHull(){
- int x1=INT_MAX, x2=0; //INT_MAX 最大的整形數
- int y1=INT_MAX, y2=0;
- int idx;
- for (int i=0;i<good_boxes.size();i++){
- idx= good_boxes[i];
- x1=min(grid[idx].x,x1); //防止出現負數??
- y1=min(grid[idx].y,y1);
- x2=max(grid[idx].x + grid[idx].width,x2);
- y2=max(grid[idx].y + grid[idx].height,y2);
- }
- bbhull.x = x1;
- bbhull.y = y1;
- bbhull.width = x2-x1;
- bbhull.height = y2 -y1;
- }
- //如果兩個box的重疊度小於0.5,返回false,否則返回true
- bool bbcomp(const BoundingBox& b1,const BoundingBox& b2){
- TLD t;
- if (t.bbOverlap(b1,b2)<0.5)
- return false;
- else
- return true;
- }
- int TLD::clusterBB(const vector<BoundingBox>& dbb,vector<int>& indexes){
- //FIXME: Conditional jump or move depends on uninitialised value(s)
- const int c = dbb.size();
- //1. Build proximity matrix
- Mat D(c,c,CV_32F);
- float d;
- for (int i=0;i<c;i++){
- for (int j=i+1;j<c;j++){
- d = 1-bbOverlap(dbb[i],dbb[j]);
- D.at<float>(i,j) = d;
- D.at<float>(j,i) = d;
- }
- }
- //2. Initialize disjoint clustering
- float L[c-1]; //Level
- int nodes[c-1][2];
- int belongs[c];
- int m=c;
- for (int i=0;i<c;i++){
- belongs[i]=i;
- }
- for (int it=0;it<c-1;it++){
- //3. Find nearest neighbor
- float min_d = 1;
- int node_a, node_b;
- for (int i=0;i<D.rows;i++){
- for (int j=i+1;j<D.cols;j++){
- if (D.at<float>(i,j)<min_d && belongs[i]!=belongs[j]){
- min_d = D.at<float>(i,j);
- node_a = i;
- node_b = j;
- }
- }
- }
- if (min_d>0.5){
- int max_idx =0;
- bool visited;
- for (int j=0;j<c;j++){
- visited = false;
- for(int i=0;i<2*c-1;i++){
- if (belongs[j]==i){
- indexes[j]=max_idx;
- visited = true;
- }
- }
- if (visited)
- max_idx++;
- }
- return max_idx;
- }
- //4. Merge clusters and assign level
- L[m]=min_d;
- nodes[it][0] = belongs[node_a];
- nodes[it][1] = belongs[node_b];
- for (int k=0;k<c;k++){
- if (belongs[k]==belongs[node_a] || belongs[k]==belongs[node_b])
- belongs[k]=m;
- }
- m++;
- }
- return 1;
- }
- //對檢測器檢測到的目標bounding box進行聚類
- //聚類(Cluster)分析是由若干模式(Pattern)組成的,通常,模式是一個度量(Measurement)的向量,或者是多維空間中的
- //一個點。聚類分析以相似性爲基礎,在一個聚類中的模式之間比不在同一聚類中的模式之間具有更多的相似性。
- void TLD::clusterConf(const vector<BoundingBox>& dbb,const vector<float>& dconf,vector<BoundingBox>& cbb,vector<float>& cconf){
- int numbb =dbb.size();
- vector<int> T;
- float space_thr = 0.5;
- int c=1; //記錄 聚類的類個數
- switch (numbb){ //檢測到的含有目標的bounding box個數
- case 1:
- cbb=vector<BoundingBox>(1,dbb[0]); //如果只檢測到一個,那麼這個就是檢測器檢測到的目標
- cconf=vector<float>(1,dconf[0]);
- return;
- break;
- case 2:
- T =vector<int>(2,0);
- //此函數計算兩個bounding box 的重疊度
- if (1 - bbOverlap(dbb[0],dbb[1]) > space_thr){ //如果只檢測到兩個box,但他們的重疊度小於0.5
- T[1]=1;
- c=2; //重疊度小於0.5的box,屬於不同的類
- }
- break;
- default: //檢測到的box數目大於2個,則篩選出重疊度大於0.5的
- T = vector<int>(numbb, 0);
- //stable_partition()重新排列元素,使得滿足指定條件的元素排在不滿足條件的元素前面。它維持着兩組元素的順序關係。
- //STL partition就是把一個區間中的元素按照某個條件分成兩類。返回第二類子集的起點
- //bbcomp()函數判斷兩個box的重疊度小於0.5,返回false,否則返回true (分界點是重疊度:0.5)
- //partition() 將dbb劃分爲兩個子集,將滿足兩個box的重疊度小於0.5的元素移動到序列的前面,爲一個子集,重疊度大於0.5的,
- //放在序列後面,爲第二個子集,但兩個子集的大小不知道,返回第二類子集的起點
- c = partition(dbb, T, (*bbcomp)); //重疊度小於0.5的box,屬於不同的類,所以c是不同的類別個數
- //c = clusterBB(dbb,T);
- break;
- }
- cconf=vector<float>(c);
- cbb=vector<BoundingBox>(c);
- printf("Cluster indexes: ");
- BoundingBox bx;
- for (int i=0;i<c;i++){ //類別個數
- float cnf=0;
- int N=0,mx=0,my=0,mw=0,mh=0;
- for (int j=0;j<T.size();j++){ //檢測到的bounding box個數
- if (T[j]==i){ //將聚類爲同一個類別的box的座標和大小進行累加
- printf("%d ",i);
- cnf=cnf+dconf[j];
- mx=mx+dbb[j].x;
- my=my+dbb[j].y;
- mw=mw+dbb[j].width;
- mh=mh+dbb[j].height;
- N++;
- }
- }
- if (N>0){ //然後求該類的box的座標和大小的平均值,將平均值作爲該類的box的代表
- cconf[i]=cnf/N;
- bx.x=cvRound(mx/N);
- bx.y=cvRound(my/N);
- bx.width=cvRound(mw/N);
- bx.height=cvRound(mh/N);
- cbb[i]=bx; //返回的是聚類,每一個類都有一個代表的bounding box
- }
- }
- printf("\n");
- }