關於理解middlebury提供的立體匹配代碼後的精減

Middlebury立體匹配源碼總結

優化方法

圖像可否預處理

代價計算可否採用BT方式

可選代價計算方法

可否代價聚合

可否MinFilter優化原始代價

WTA-Box

可以

可以

AD/SD

可以,聚合尺寸可變,迭代次數1次

可以

WTA-Binomial

可以

可以

AD/SD

可以,聚合尺寸固定,迭代次數可變

不可以

WTA-Diffusion

可以

可以

AD/SD

可以,聚合尺寸固定,迭代次數可變

不可以

WTA-membrane

可以

可以

AD/SD

可以,聚合尺寸固定,迭代次數可變

不可以

WTA-Bayesian

可以

可以

AD/SD

可以,聚合尺寸固定,迭代次數可變

不可以

WTA-LASW

可以

可以

AD/SD

可以,聚合尺寸可變,迭代次數1次

不可以

SO

可以

可以

AD/SD

不可以

不可以

DP

可以

可以

AD/SD

不可以

不可以

GC

可以

可以

AD/SD

不可以

不可以

SA

可以

可以

AD/SD

不可以

不可以

BPAccel

可以

可以

AD/SD

不可以

不可以

BPSync

可以

可以

AD/SD

不可以

不可以



1. 主線函數

1.0 ComputeCorrespondence

void ComputeCorrespondence()
    {
        CShape sh = m_frame[frame_ref].input_image.Shape();
        //1.計算m_frame_xxx, m_disp_xxx, disp_step, disp_n, m_match_outside
        //只考慮disp_step==1的情況,所以可進行以下簡化
        //且後文件將除m_disp_n外的所有m_frame_xxx和m_disp_xxx都去掉
        m_frame_diff = 1;// frame_match - frame_ref;
        m_frame_diff_sign = 1;// (m_frame_diff > 0) ? 1 : -1;
        m_disp_num = 1;// disp_step < 1.0f ? 1 : ROUND(disp_step);
        m_disp_den = 1;// disp_step < 1.0f ? ROUND(1.0 / disp_step) : 1;
        m_disp_step_inv = 1;// m_disp_den / (float)m_disp_num;
        m_disp_step = disp_step;// m_disp_num / (float)m_disp_den;
        m_disp_n = disp_n = disp_max-disp_min + 1;// int(m_disp_step_inv * (disp_max - disp_min)) + 1;
        //disp_step = m_disp_step;
        //disp_n = m_disp_n;
        // Special value for border matches
        int worst_match = sh.nBands * ((match_fn == eSD) ? 255 * 255 : 255);
        int cutoff = (match_fn == eSD) ? match_max * match_max : abs(match_max);
        m_match_outside = __min(worst_match, cutoff);    // trim to cutoff

        //2.設置左右圖像
        m_reference.ReAllocate(sh);
        CopyPixels(m_frame[frame_ref].input_image, m_reference);
        m_matching.ReAllocate(sh);
        CopyPixels(m_frame[frame_match].input_image, m_matching);

        //3.設置標準視差圖像
        sh.nBands = 1;
        m_true_disparity.ReAllocate(sh);   // ground truth
        ScaleAndOffset(m_frame[frame_ref].truth_image, m_true_disparity, 1.0f / disp_scale, disp_min);

        //4.生成浮點視差圖像
        sh.nBands = 1;
        m_float_disparity.ReAllocate(sh);
        m_float_disparity.ClearPixels();

        //5.生成整型視差圖像
        sh.nBands = 1;
        m_disparity.ReAllocate(sh);        // winning disparities

        //6.生成代價函數圖像
        sh.nBands = m_disp_n;// number of disparity levels
        m_cost.ReAllocate(sh);             // raw matching costs (# bands = # disparities)

        //if (evaluate_only){暫且略去}
        //7.執行算法
        clock_t time0 = clock();
        PreProcess();   // see StcPreProcess.cpp
        RawCosts();     // see StcRawCosts.cpp
        Aggregate();    // see StcAggregate.cpp
        Optimize();     // see StcOptimize.cpp
        Refine();       // see StcRefine.cpp
        clock_t time1 = clock();    // record end time
        total_time = (float)(time1 - time0) / (float)CLOCKS_PER_SEC;

        //8.生成並設置深度圖像
        sh.nBands = 1;
        m_frame[frame_ref].depth_image.ReAllocate(sh);
        m_frame[frame_ref].depth_image.ClearPixels();      // set to 0 if we just reallocated
        ScaleAndOffset(m_float_disparity, m_frame[frame_ref].depth_image, disp_scale, -disp_min * disp_scale + 0.5);

        //9.
        CopyPixels(m_frame[frame_ref].input_image, m_reference);
    }
1.1 PreProcess

 void PreProcess()
 2     {
 3         for (int iter = 0; iter < preproc_blur_iter; iter++)
 4         {
 5             ConvolveSeparable(m_reference, m_reference, ConvolveKernel_121, ConvolveKernel_14641, 1.0f, 0.0f, 1, 1);
 6             ConvolveSeparable(m_matching, m_matching, ConvolveKernel_121, ConvolveKernel_14641, 1.0f, 0.0f, 1, 1);
 7         }
 8         //Currently, we only support iterated binomial blur, to clean up the images a little.
 9         //This should help sub-pixel fitting work better, by making image shifts closer to a Taylor series expansion,
10         //but will result in worse performance near discontinuity regions and in finely textured regions.
11         //Other potential pre-processing operations (currently not implemented),might include:
12         //(1)bias and gain normalization
13         //(2)histogram equalization (global or local)
14         //(3)rank statistics pre-processing
15     }

1.2 RawCosts

void RawCosts()
    {
        CShape sh = m_reference.Shape();
        int cols = sh.width;
        int rows = sh.height;
        int cn = sh.nBands;
        fprintf(stderr, match_fn == eAD ? "\nmatch_fn=AD, match_max=%d\n" : (match_fn == eSD ? "\nmatch_fn=SD, match_max=%d\n" : "\nmatch_fn=unknown, match_max=%d\n"), match_max);

        int cutoff = (match_fn == eSD) ? match_max * match_max : abs(match_max);
        for (int d = 0; d < disp_n; d++)
        {
            int disp = -(disp_min + d);//計算取不同視差值的代價(一個視差值對應一個cost的通道)
            for (int i = 0; i < rows; i++)
            {
                uchar *ref = &m_reference.Pixel(0, i, 0);
                uchar *match = &m_matching.Pixel(0, i, 0);
                float *cost = &m_cost.Pixel(0, i, d);
                for (int j = 0, jj = 0; j < cols; j++, jj += disp_n)//m_cost的通道數爲disp_n
                {
                    //1.肯定爲錯誤匹配則代價無窮大
                    if ((j + disp) < 0)
                    {
                        cost[jj] = m_match_outside;
                        continue;
                    }

                    //2.否則計算AD代價或SD代價
                    int  diff_sum = 0;//多通道則是所有通道代價之和
                    uchar *pixel0 = &ref[j*cn];
                    uchar *pixel1 = &match[(j + disp)*cn];
                    for (int k = 0; k < cn; k++)
                    {
                        int diff1 = (int)pixel1[k] - (int)pixel0[k];
                        int diff2 = (match_fn == eSD) ? diff1 * diff1 : abs(diff1);
                        diff_sum = diff_sum + diff2;
                    }
                    cost[jj] = __min(diff_sum, cutoff);
                }
            }
        }
    }

1.2.1 PadCosts

void PadCosts()
    {    // fill the outside parts of the DSI
        CShape sh = m_cost.Shape();
        int cols = sh.width;
        int rows = sh.height;

        for (int d = 0; d < m_disp_n; d++)
        {
            int disp = -(disp_min + d);
            for (int i = 0; i < rows; i++)
            {
                float* cost = &m_cost.Pixel(0, i, d);
                for (int j = 0, jj = 0; j < cols; j++, jj += disp_n)//m_cost的通道數爲disp_n
                    cost[jj] = ((j + disp) < 0) ? m_match_outside : cost[jj];
            }
        }
    }

1.3 Aggregate

void Aggregate()
    {
        // Save the raw matching costs in m_cost0;
        CopyPixels(m_cost, m_cost0);

        //1.Perform given number of iteration steps
        for (int iter = 0; iter < aggr_iter; iter++)
            switch (aggr_fn)
            {
                case eBox:
                    if (verbose == eVerboseSummary && iter < 1) fprintf(stderr, ", box=%d", aggr_window_size);
                    BoxFilter(m_cost, m_cost, aggr_window_size, aggr_window_size, true);//可以用cv::boxFilter()代替
                    break;

                case eASWeight:
                    if (verbose == eVerboseSummary && iter < 1) fprintf(stderr, ", AdaptiveWeight (box=%d gamma_p=%g gamma_s=%g color_space=%d )", aggr_window_size, aggr_gamma_proximity, aggr_gamma_similarity, aggr_color_space);
                    LASW(m_cost,        // initial matching cost
                        m_cost,            // aggregated matching cost
                        m_reference,        // reference image
                        m_matching,        // target image
                        aggr_window_size,    // window size - x
                        aggr_window_size,    // window size - y
                        aggr_gamma_proximity,    // gamma_p
                        aggr_gamma_similarity,    // gamma_c
                        aggr_color_space,    // color space
                        aggr_iter            // iteration number (aggregation)
                        );
                    iter = aggr_iter;
                    break;

                default:
                    throw CError("CStereoMatcher::Aggregate(): unknown aggregation function");
            }


        //2.Simulate the effect of shiftable windows
        if (aggr_minfilter > 1)    MinFilter(m_cost, m_cost, aggr_minfilter, aggr_minfilter);

        //3.Pad the outside costs back up to bad values
        PadCosts();
    }

1.3.1 MinFilter

 {
2         //略
3     }

1.4 Optimize

void Optimize()
    {
        // Select the best matches using local or global optimization
 
        // set up the smoothness cost function for the methods that need it
        if (opt_fn == eDynamicProg || opt_fn == eScanlineOpt || opt_fn == eGraphCut || opt_fn == eSimulAnnl || opt_fn == eBPAccel || opt_fn == eBPSync)
        {
            if (verbose == eVerboseSummary) fprintf(stderr, ", smooth=%g, grad_thres=%g, penalty=%g", opt_smoothness, opt_grad_thresh, opt_grad_penalty);
            SmoothCostAll();
        }
 
        switch (opt_fn)
        {
        case eNoOpt:      // no optimization (pass through input depth maps)   
            if (verbose == eVerboseSummary)  fprintf(stderr, ", NO OPT");
            break;
 
        case eWTA:        // winner-take-all (local minimum)       
            if (verbose == eVerboseSummary) fprintf(stderr, ", WTA");
            OptWTA();
            break;
 
        case eGraphCut:     // graph-cut global minimization
            if (verbose == eVerboseSummary)   fprintf(stderr, ", GC");
            OptWTA();       // get an initial labelling (or just set to 0???)
            OptGraphCut();  // run the optimization
            break;
 
        case eDynamicProg:  // scanline dynamic programming    
            if (verbose == eVerboseSummary)    fprintf(stderr, ", DP (occl_cost=%d)", opt_occlusion_cost);
            OptDP();        // see StcOptDP.cpp
            break;
 
        case eScanlineOpt:  // scanline optimization    
            if (verbose == eVerboseSummary)  fprintf(stderr, ", SO");
            OptSO();       // see StcOptSO.cpp
            break;
 
        case eSimulAnnl:  // simulated annealing
            if (verbose == eVerboseSummary)  fprintf(stderr, ", SA");
            OptWTA();           // initialize to reasonable starting point (for low-T gradient descent)
            OptSimulAnnl();    // see StcSimulAnn.cpp
            break;
 
        case eBPAccel:
            OptBP();  // run the optimization
            break;
 
        case eBPSync:
            OptBPSync();  // run the optimization
            break;
 
        default:
            throw CError("CStereoMatcher::Optimize(): unknown optimization function");
        }
 
        if (final_energy < 0.0f)
        {
            if (!m_cost.Shape().SameIgnoringNBands(m_smooth.Shape()))
                SmoothCostAll();
            float finalEd, finalEn;
            CStereoMatcher::ComputeEnergy(finalEd, finalEn);
            final_energy = finalEd + finalEn;
        }
    }

1.4.1 SmoothCostOne

float SmoothCostOne(uchar *pixel1, uchar *pixel2, int cn)
    {
        float tmp = 0.0;
        for (int k = 0; k < cn; k++)
        {
            float tm = int(pixel1[k]) - int(pixel2[k]);
            tmp += tm*tm;
        }
        tmp = tmp/(cn - (cn > 1));//歸一化爲單通道, ppm圖像的通道爲4
        tmp = sqrt(tmp);
        return (tmp < opt_grad_thresh) ? (opt_smoothness*opt_grad_penalty) : opt_smoothness;
    }

1.4.2 SmoothCostAll

void SmoothCostAll()
    {    //calculate smoothness costs for DP and GC
        CShape sh = m_cost.m_shape;
        sh.nBands = 2;//分爲垂直和水平平滑代價
        m_smooth.ReAllocate(sh, false);
        int rows = sh.height;
        int cols = sh.width;
        int cn = m_reference.m_shape.nBands;

        char *im_data0_cr = m_reference.m_memStart;
        char *im_data0_dw = im_data0_cr + m_reference.m_rowSize;
        char *smooth_data0 = m_smooth.m_memStart;
        for (int i = 0; i < rows; i++, im_data0_cr += m_reference.m_rowSize, im_data0_dw += m_reference.m_rowSize, smooth_data0 += m_smooth.m_rowSize)
        {
            uchar *im_data1_cr = (uchar*)im_data0_cr;
            uchar *im_data1_dw = (uchar*)((i < rows - 1) ? im_data0_dw : im_data0_cr);
            float *smooth_data1 = (float*)smooth_data0;
            for (int j = 0; j < cols; j++, im_data1_cr += cn, im_data1_dw += cn, smooth_data1 += 2)
            {
                smooth_data1[0] = (i < rows - 1) ? SmoothCostOne(im_data1_cr, im_data1_dw, cn) : 0;
                smooth_data1[1] = (j < cols - 1) ? SmoothCostOne(im_data1_cr, im_data1_cr + cn, cn) : 0;
            }
        }
    }

1.4.3 ComputeEnergy

static void ComputeEnergy(CFloatImage& m_cost, CFloatImage& m_smooth, CIntImage& m_disparity, float& dataEnergy, float& smoothEnergy)
    {
        int cols = m_cost.m_shape.width;
        int rows = m_cost.m_shape.height;
        int cn1 = m_cost.m_shape.nBands;
        int cn2 = m_smooth.m_shape.nBands;

        float sum1 = 0.0f;
        float sum2 = 0.0f;
        char *disp_data0_cr = m_disparity.m_memStart;
        char *disp_data0_dw = disp_data0_cr + m_disparity.m_rowSize;
        char *datacost_data0 = m_cost.m_memStart;
        char *smoothcost_data0 = m_smooth.m_memStart;
        for (int i = 0; i < rows; i++, disp_data0_cr += m_disparity.m_rowSize, disp_data0_dw += m_disparity.m_rowSize, datacost_data0 += m_cost.m_rowSize, smoothcost_data0 += m_smooth.m_rowSize)
        {
            int *disp_data1_cr = (int*)disp_data0_cr;
            int *disp_data1_dw = (int*)((i < rows - 1) ? disp_data0_dw : disp_data0_cr);
            float *datacost_data1 = (float*)datacost_data0;
            float *smoothcost_data1 = (float*)smoothcost_data0;
            for (int j = 0; j < cols; j++, datacost_data1 += cn1, smoothcost_data1 += cn2)
            {
                int d = disp_data1_cr[j];

                sum1 = sum1 + datacost_data1[d];
                sum2 = sum2 + ((i < rows - 1 && d != disp_data1_dw[j]) ? smoothcost_data1[0] : 0);//水平平滑代價
                sum2 = sum2 + ((j < cols - 1 && d != disp_data1_cr[j + 1]) ? smoothcost_data1[1] : 0);//垂直平滑代價
            }
        }
        dataEnergy = sum1;
        smoothEnergy = sum2;

        //float GC_scale = (1 << 30) / (256 * 256);
        //GC_scale = (1 << 30) / (sum1 + sum2);
    }

1.5 Refine

void Refine()
    {    //Refine the matching disparity to get a sub-pixel match
        if (opt_fn != eNoOpt) ScaleAndOffset(m_disparity, m_float_disparity, disp_step, disp_min);//無優化則跳過
        if (refine_subpix == 0 || disp_n < 3)  return; //不進行提純

        for (int i = 0; i < m_cost.m_shape.height; i++)
        {
            float *cost = &m_cost.Pixel(0, i, 0);
            int   *disp = &m_disparity.Pixel(0, i, 0);
            float *fdisp = &m_float_disparity.Pixel(0, i, 0);

            for (int j = 0; j < m_cost.m_shape.width; j++, cost += disp_n)
            {
                //Get minimum, but offset by 1 from ends
                int d_min = disp[j] + (disp[j] == 0) - (disp[j] == disp_n - 1);

                //Compute the equations of the parabolic fit
                float c0 = cost[d_min - 1];        //a*(d-1)^2+b*(d-1)+c=c0
                float c1 = cost[d_min];            //a*(d  )^2+b*(d  )+c=c1
                float c2 = cost[d_min + 1];        //a*(d+1)^2+b*(d+1)+c=c2
                float a = 0.5 * (c0 - 2.0 * c1 + c2);    //解得a=c2-2*c1+c0, 對稱軸=-b/2*a=d-(c2-c0)/(4*a)
                float b = 0.5 * (c2 - c0);
                if (a <= 0.0 || a < 0.5 * fabs(b))    continue;

                //Solve for minimum
                float x0 = -0.5 * b / a;
                float d_new = m_disp_step * (d_min + x0) + disp_min;
                fdisp[j] = d_new;
            }
        }
    }
2.代價聚合
2.1 BoxFiter

1 {
2     //與cv::boxFilter一致
3 }

2.2 LASW

void LASW(CFloatImage &srcCost, CFloatImage &dstCost, CByteImage &im0, CByteImage &im1, int xWidth, int yWidth, float proximity, float similarity, int color_space, int diff_iter)
{
    int frm_total = im0.m_shape.width*im0.m_shape.height;
    int win_radius = (int)(xWidth / 2.0);
    int win_total = xWidth*yWidth;

    //0.分配所需空間
    double **Lab0 = new double *[frm_total];
    double **Lab1 = new double *[frm_total];
    float **rawCostf = new float *[frm_total];
    float **dstCostf = new float *[frm_total];
    float **sw0f = new float *[frm_total];
    float **sw1f = new float *[frm_total];
    for (int i = 0; i < frm_total; i++)
    {
        Lab0[i] = new double[3];
        Lab1[i] = new double[3];
        rawCostf[i] = new float[srcCost.m_shape.nBands];
        dstCostf[i] = new float[srcCost.m_shape.nBands];
        sw0f[i] = new float[win_total];
        sw1f[i] = new float[win_total];
    }

    //1.計算Lab圖像並
    for (int i = 0, index = 0; i<im0.m_shape.height; i++)
        for (int j = 0; j<im0.m_shape.width; j++, index++)
        {
            double R, G, B;
            R = im0.Pixel(j, i, ((im0.m_shape.nBands - 1) == 3) ? 0 : 0);
            G = im0.Pixel(j, i, ((im0.m_shape.nBands - 1) == 3) ? 1 : 0);
            B = im0.Pixel(j, i, ((im0.m_shape.nBands - 1) == 3) ? 2 : 0);
            RGB2Lab(R, G, B, Lab0[index][0], Lab0[index][1], Lab0[index][2]);
            R = im1.Pixel(j, i, ((im1.m_shape.nBands - 1) == 3) ? 0 : 0);
            G = im1.Pixel(j, i, ((im1.m_shape.nBands - 1) == 3) ? 1 : 0);
            B = im1.Pixel(j, i, ((im1.m_shape.nBands - 1) == 3) ? 2 : 0);
            RGB2Lab(R, G, B, Lab1[index][0], Lab1[index][1], Lab1[index][2]);
        }

    //2.取得原始代價
    for (int i = 0, index = 0; i<srcCost.m_shape.height; i++)
        for (int j = 0; j < srcCost.m_shape.width; j++, index++)
            for (int k = 0; k<srcCost.m_shape.nBands; k++)
                rawCostf[index][k] = (float)srcCost.Pixel(j, i, k);

    //3.計算自適應權重
    calcASW(Lab0, sw0f, proximity, similarity, win_radius, im0.m_shape.width, im0.m_shape.height);
    calcASW(Lab1, sw1f, proximity, similarity, win_radius, im0.m_shape.width, im0.m_shape.height);

    //4.求和自適應權重
    for (int u = 0; u<diff_iter; u++)
    {
        aggrASW(sw0f, sw1f, rawCostf, dstCostf, srcCost.m_shape.nBands, win_radius, im0.m_shape.width, im0.m_shape.height);
        for (int k = 0; k<frm_total; k++)
            memcpy(rawCostf[k], dstCostf[k], sizeof(float)*srcCost.m_shape.nBands);
    }

    //5.返回結果
    for (int i = 0, index = 0; i<dstCost.m_shape.height; i++)
        for (int j = 0; j<dstCost.m_shape.width; j++, index++)
            for (int k = 0; k<dstCost.m_shape.nBands; k++)
                ((float*)dstCost.PixelAddress(j, i, 0))[k] = dstCostf[index][k];

    //6.刪除分配的空間
    for (int i = 0; i < frm_total; i++)
    {
        delete Lab0[i];
        delete Lab1[i];
        delete rawCostf[i];
        delete dstCostf[i];
        delete sw0f[i];
        delete sw1f[i];
    }
}

2.2.1 RGB2Lab

void RGB2Lab(double &R, double &G, double &B, double &L, double &a, double &b)
{
    double X = 0.412453*R + 0.357580*G + 0.189423*B;
    double Y = 0.212671*R + 0.715160*G + 0.072169*B;
    double Z = 0.019334*R + 0.119193*G + 0.950227*B;
    double Xo = 244.66128;
    double Yo = 255.0;
    double Zo = 277.63227;
    double tm1 = X / Xo; tm1 = (tm1 > 0.008856) ? pow(tm1, 0.333333333) : (7.787*tm1 + 0.137931034);
    double tm2 = Y / Yo; tm2 = (tm2 > 0.008856) ? pow(tm2, 0.333333333) : (7.787*tm2 + 0.137931034);
    double tm3 = Z / Zo; tm3 = (tm3 > 0.008856) ? pow(tm3, 0.333333333) : (7.787*tm3 + 0.137931034);
    L = 116 * tm2 - 16;
    a = 500 * (tm1 - tm2);
    b = 200 * (tm2 - tm3);
}

2.2.2 calcASW

void calcASW(double **Lab, float **SW, double proximity, double similarity, int win_radius, int cols, int rows)
{
    int frm_total = cols*rows;
    int win_total = (2 * win_radius + 1)*(2 * win_radius + 1);

    //0.先清零
    for (int i = 0; i<frm_total; i++)
        memset(SW[i], 0, sizeof(float)*win_total);

    //1.計算自適用權重
    for (int i = 0, index = 0; i<rows; i++)    //計算index點的領域點(共win_total個)相對index點的自適應權重,
        for (int j = 0; j<cols; j++, index++)    //每個自適應權重佔用SW的一個通道,索引越小的通道對應越左上角的點
            for (int y = -win_radius, k = 0; y <= win_radius; y++)//依次從左到右從上到下計算領域點相對於index點的自適應權重, k表示第k個領域點
            {
                int ii = i + y;
                if (ii < 0 || ii >= rows)//此行領域點越界,所以對應的權重都爲0
                {
                    for (int x = -win_radius; x <= win_radius; x++, k++)
                        SW[index][k] = 0;//可用menset加快處理
                    continue;
                }
                for (int x = -win_radius; x <= win_radius; x++, k++)
                {
                    if (SW[index][k] > 0)    //之前的循環已經計算則無需再計算
                        continue;
                    int jj = j + x;
                    if (jj < 0 || jj >= cols)//此領域點越界,所以對應的權重爲0
                    {
                        SW[index][k] = 0;
                        continue;
                    }
                    double L1 = Lab[index][0];
                    double a1 = Lab[index][1];
                    double b1 = Lab[index][2];
                    int index1 = ii*cols + jj;//領域點座標
                    double L2 = Lab[index1][0];
                    double a2 = Lab[index1][1];
                    double b2 = Lab[index1][2];
                    double weight_prox = exp(-sqrt((double)(y*y + x*x)) / proximity);
                    double weight_simi = exp(-sqrt((L1 - L2)*(L1 - L2) + (a1 - a2)*(a1 - a2) + (b1 - b2)*(b1 - b2)) / similarity);
                    SW[index][k] = (float)(weight_prox*weight_simi);
                    SW[index1][win_total - 1 - k] = SW[index][k];//得到A相對O權重的同時也得到O相對A權重
                }
            }
}

2.2.3 aggrASW

void aggrASW(float **SW0, float **SW1, float **rawCost, float **dstCost, int cn, int win_radius, int cols, int rows)
{
    for (int i = 0, index = 0; i<rows; i++)
        for (int j = 0; j<cols; j++, index++)
            for (int d = 0; d<cn; d++)//處理第d個通道
            {
                int index1 = j - d;//右圖像上匹配點的座標
                if (index1<0) index1 = index1 + cols;
                else if (index1 >= cols) index1 = index1 - cols;
                index1 = i*cols + index1;//右圖像上匹配點的座標

                double weight_sum = 0;
                double cost_sum = 0;
                for (int y = -win_radius, k = 0; y <= win_radius; y++)//k表示第k個領域點
                {
                    int ii = i + y;
                    if (ii<0) ii = ii + rows;
                    if (ii >= rows) ii = ii - rows;

                    for (int x = -win_radius; x <= win_radius; x++, k++)
                    {
                        int jj = j + x;
                        if (jj<0) jj = cols + jj;
                        else if (jj >= cols) jj = jj - cols;

                        double weight = SW0[index][k] * SW1[index1][k];//權重之積
                        weight_sum = weight_sum + weight;
                        int index_k = ii*cols + jj;//index_k表示第k個領域點
                        cost_sum = cost_sum + rawCost[index_k][d] * weight;
                    }
                }
                dstCost[index][d] = (float)(cost_sum / weight_sum);
            }
}

3.視差優化
3.1 OptWTA

void CStereoMatcher::OptWTA()
{
    CShape sh = m_cost.Shape();
    int cols = sh.width;
    int rows = sh.height;

    for (int i = 0; i < rows; i++)
    {
        float* cost = &m_cost.Pixel(0, i, 0);
        int*   disp = &m_disparity.Pixel(0, i, 0);
        for (int j = 0; j < cols; j++, cost += disp_n)//m_cost的通道數爲disp_n
        {
            int best_disp = 0;
            float best_cost = cost[0];
            for (int d = 1; d < disp_n; d++)
            if (cost[d] < best_cost)
            {
                best_cost = cost[d];
                best_disp = d;
            }
        disp[j] = best_disp;
        }
    }
}

3.2 OptSO

void OptSO()
    {    // scanline optimization
        int cols = m_cost.m_shape.width;
        int rows = m_cost.m_shape.height;

        int endcol = cols - 1;
        int rowElem = cols*disp_n;
        char *datacost_data0 = m_cost.m_memStart;
        char *smoothcost_data0 = m_smooth.m_memStart;
        char *disparity_data0 = m_disparity.m_memStart;
        float *sumcost_data0 = (float*)malloc(rowElem*sizeof(float));//存儲每一列的每一視差(通道)的最優結果
        int *position_data0 = (int*)malloc(rowElem*sizeof(int));//存儲每一列取得最優結果時對應的前一列哪個索引的視差(通道)
        for (int i = 0; i < rows; i++, datacost_data0 += m_cost.m_rowSize, smoothcost_data0 += m_smooth.m_rowSize, disparity_data0 += m_disparity.m_rowSize)//對每一行
        {
            float *datacost_data1 = (float*)datacost_data0;
            float *smoothcost_data1 = (float*)smoothcost_data0;
            int *position_data1 = position_data0;
            float *sumcost_data1 = sumcost_data0;

            //1.初始化第一列
            for (int d = 0; d < disp_n; d++)
            {
                position_data1[d] = -1;
                sumcost_data1[d] = datacost_data1[d];
            }
            datacost_data1 += disp_n; position_data1 += disp_n; sumcost_data1 += disp_n;//定位第二列

            //2.用動態歸劃處理後續列
            for (int j = 1; j < cols; j++, datacost_data1 += disp_n, position_data1 += disp_n, sumcost_data1 += disp_n, smoothcost_data1 += 2)//對每一列
            {
                for (int d1 = 0; d1 < disp_n; d1++)//對每一通道(視差)
                {
                    sumcost_data1[d1] = COST_MAX; //當前列當前通道的最小匹配代價
                    position_data1[d1] = -1; //最小匹配代價對應前一列的哪個通道(視差)                    
                    for (int d0 = 0; d0 < disp_n; d0++)//對前一列的每一通道(視差)
                    {
                        float tm = datacost_data1[d1]; //當前列當前通道(視差)的原始代價
                        tm = tm + sumcost_data1[d0 - disp_n];//前一列的每一通道(視差)的最小匹配代價
                        tm = (d0 != d1) ? (tm + smoothcost_data1[1]) : tm;//兩通道(視差)間的平滑代價(第二通道纔是水平方向的平滑代價)
                        if (tm < sumcost_data1[d1])
                        {
                            sumcost_data1[d1] = tm;
                            position_data1[d1] = d0;
                        }
                    }
                }
            }

            //3.在尾列查看最優結果(指針來源與前面不相關)   
            position_data1 -= disp_n;
            sumcost_data1 -= disp_n;
            float best_cost = COST_MAX;
            int best_disp = 0;
            for (int d = 0; d < disp_n; d++)
                if (sumcost_data1[d] < best_cost)
                {
                    best_cost = sumcost_data1[d];
                    best_disp = d;
                }

            //4.回溯(從尾列到首列)
            int *disparity_data1 = (int*)disparity_data0;
            for (int x = endcol; x >= 0; x--, position_data1 -= disp_n)
            {
                disparity_data1[x] = best_disp;
                best_disp = position_data1[best_disp];
            }
        }
        free(sumcost_data0);
        free(position_data0);
    }

3.3 OptDP

void OptDP()       
    {    //dynamic programming stereo (Intille and Bobick, no GCPs)
        float ocl = opt_occlusion_cost;
        float ocr = opt_occlusion_cost;
        int occ = -9999; // marker for occluded pixels (use 0 if you want to leave occluded pixels black)
        int cols = m_cost.m_shape.width;
        int rows = m_cost.m_shape.height;

        int state0[7] = { 0, 0, 1, 1, 0, 2, 2 };//前一點的狀態
        int state1[7] = { 0, 1, 1, 0, 2, 2, 0 };//當前點的狀態
        int colElem = disp_n * 3;//每點的基元數=通道數*狀態數
        int left = -colElem, diag = -colElem - 3, up = 3;
        int steps[7] = { left, left, diag, diag, up, up, left };//不同狀態時最優的前一點的位置與當前點的跨度  
        int dleft = -disp_n, ddiag = -disp_n - 1, dup = 1;
        int disp_step[7] = { dleft, dleft, ddiag, ddiag, dup, dup, dleft };//不同狀態時視差的跨度
        int border0[7] = { 0, 0, 1, 1, 0, 0, 0 }; //視差爲0時沒有左下角的前一點
        int border1[7] = { 0, 0, 0, 0, 1, 1, 0 }; //視差爲max沒有同列的上一點

        int rowElem = cols * colElem;
        char *datacost_data0 = m_cost.m_memStart;
        char *smoothcost_data0 = m_smooth.m_memStart;
        char *disparity_data0 = m_disparity.m_memStart + (cols - 1) * m_disparity.m_pixSize;//視差是從最後列開始計算的
        int *position_data0 = (int*)malloc(rowElem*sizeof(int));//存儲每一列取得最優結果時對應的前一列哪個索引的視差(通道)
        float *sumcost_data0 = (float*)malloc(rowElem*sizeof(float));//存儲每一列的每一視差(通道)的最優結果    
        int *position_data1_endlcol = position_data0 + (cols - 1)*colElem;
        float *sumcost_data1_endcol = sumcost_data0 + (cols - 1)*colElem;
        for (int i = 0; i < rows; i++, datacost_data0 += m_cost.m_rowSize, smoothcost_data0 += m_smooth.m_rowSize, disparity_data0 += m_disparity.m_rowSize)
        {
            float *datacost_data1 = (float*)datacost_data0;
            float *smoothcost_data1 = (float*)smoothcost_data0;
            int *position_data1 = (int*)position_data0;
            float *sumcost_data1 = (float*)sumcost_data0;

            //1.初始化第一列(每列有disp_n個通道(視差)而每個視差又有3個狀態)
            {
                float *datacost_data2 = datacost_data1;
                int *position_data2 = position_data1;
                float *sumcost_data2 = sumcost_data1;
                for (int d = 0; d < disp_n; d++, datacost_data2++, position_data2 += 3, sumcost_data2 += 3)
                {    //強制第一個點是非遮擋的
                    position_data2[0] = 0;
                    position_data2[1] = -1;
                    position_data2[2] = -1;
                    sumcost_data2[0] = datacost_data2[0];
                    sumcost_data2[1] = COST_MAX;
                    sumcost_data2[2] = COST_MAX;
                }
                datacost_data1 += disp_n; position_data1 += colElem; sumcost_data1 += colElem;//定位到第二列
            }

            //2.用動態歸劃處理後續列
            for (int j = 1; j < cols; j++, datacost_data1 += disp_n, smoothcost_data1 += 2, position_data1 += colElem, sumcost_data1 += colElem)//對每一列
            {
                float *datacost_data2 = datacost_data1 + disp_n - 1;//先定位到第二列的最後一個通道,因爲要從最後個通道開始處理
                float *smoothcost_data2 = smoothcost_data1;//平滑代價只與列相關而與通道無關
                int *position_data2 = position_data1 + colElem - 3;//先定位到第二列的最後一個通道,因爲要從最後個通道開始處理
                float *sumcost_data2 = sumcost_data1 + colElem - 3;//從最後個通道開始處理是因爲m→R和r→R時處理當前通道時要用到下一通道的數據
                for (int d1 = disp_n - 1; d1 >= 0; d1--, datacost_data2--, position_data2 -= 3, sumcost_data2 -= 3) //對每一通道(視差)
                {
                    sumcost_data2[0] = COST_MAX;//當前列當前通道第0狀態的最小匹配代價
                    sumcost_data2[1] = COST_MAX;//當前列當前通道第1狀態的最小匹配代價
                    sumcost_data2[2] = COST_MAX;//當前列當前通道第2狀態的最小匹配代價
                    position_data2[0] = -1; //第0狀態最小匹配代價對應前一列的哪個通道(視差)
                    position_data2[1] = -1; //第1狀態最小匹配代價對應前一列的哪個通道(視差)
                    position_data2[2] = -1; //第2狀態最小匹配代價對應前一列的哪個通道(視差)

                    for (int t = 0; t < 7; t++)
                    {
                        if ((d1 == 0 && border0[t]) || (d1 == disp_n - 1 && border1[t]))  continue;//前一點不存在
                        int pre_state = state0[t];
                        int cur_state = state1[t];
                        int pre_pos = steps[t] + pre_state;

                        float tm = (cur_state == 1 ? ocl : (cur_state == 2 ? ocr : datacost_data2[0]));//當前列當前通道(視差)的原始代價
                        tm = tm + sumcost_data2[pre_pos];//前一列的每一通道(視差)的每一狀態的最小匹配代價
                        tm = (t == 3 || t == 6) ? (tm + smoothcost_data2[1]) : tm;//平滑代價(從遮擋到匹配時)//第二通道纔是水平方向的平滑代價
                        if (tm < sumcost_data2[cur_state])
                        {
                            sumcost_data2[cur_state] = tm;
                            position_data2[cur_state] = t;
                        }
                    }
                }
            }

            //3.在尾列查看最優結果(指針來源與前面不相關)   
            float best_cost = COST_MAX;
            int best_disp = 0;
            int best_state = 0;//只考慮左右圖像都可見的狀態
            {
                float *sumcost_data2 = sumcost_data1_endcol;//因爲在遍歷通道所以用data2
                for (int d = 0; d < disp_n; d++, sumcost_data2 += 3)
                    if (sumcost_data2[best_state] < best_cost)
                    {
                        best_cost = sumcost_data2[best_state];
                        best_disp = d;
                    }
            }

            //4.回溯(從尾列到首列)(指針來源與前面不相關)
            position_data1 = position_data1_endlcol + best_disp * 3 + best_state;//因爲在遍歷列所以用data1
            int *disparity_data1 = (int*)disparity_data0;
            while (position_data1 >= position_data0)
            {
                int pos = *position_data1;
                int current_state = state1[pos];
                int prev_state = state0[pos];
                *disparity_data1 = (current_state == 0) ? best_disp : occ;

                int stride = steps[pos] - current_state + prev_state;
                position_data1 += stride;

                best_disp += disp_step[pos];
                if (best_disp < 0)
                {
                    best_disp += disp_n;
                    disparity_data1--;
                }
            }
        }
        free(sumcost_data0);
        free(position_data0);


        //填充遮擋點(可單獨寫成函數)
        if (occ != 0)
        {
            char *disp_data0 = m_disparity.m_memStart;
            for (int i = 0; i < rows; i++, disp_data0 += m_disparity.m_rowSize)
            {
                int *disp_data1 = (int*)disp_data0;

                //找到第一個非遮掩點
                int nonocc;
                for (int j = 0; j < cols; j++)
                    if (disp_data1[j] != occ)
                    {
                        nonocc = disp_data1[j];
                        break;
                    }


                //除最左邊的遮擋點外用與之右相鄰的非遮擋點填充外, 其餘遮擋點都用與之左相鄰的非遮擋點填充
                for (int j = 0; j < cols; j++)
                {
                    int d = disp_data1[j];
                    if (d == occ)
                        disp_data1[j] = nonocc;
                    else
                        nonocc = d;
                }
            }
        }
    }

8.雜項函數
8.1 BirchfieldTomasiMinMax

void BirchfieldTomasiMinMax(int* buffer, int* min, int* max, int cols, int cn)
{
    int cur, pre, nex;
    //第一個值
    cur = buffer[0];
    pre = (buffer[0] + buffer[0] + 1) / 2;
    nex = (buffer[0] + buffer[1] + 1) / 2;
    min[0] = __min(cur, __min(pre, nex));
    max[0] = __max(cur, __max(pre, nex));
    //中間的值
    for (int i = 1; i < cols - 1; i++)
    {
        cur = buffer[i];
        pre = (buffer[i] + buffer[i - 1] + 1) / 2;
        nex = (buffer[i] + buffer[i + 1] + 1) / 2;
        min[i] = __min(cur, __min(pre, nex));
        max[i] = __max(cur, __max(pre, nex));
    }
    //最後個值
    cur = buffer[cols - 1];
    pre = (buffer[cols - 2] + buffer[cols - 1] + 1) / 2;
    nex = (buffer[cols - 1] + buffer[cols - 1] + 1) / 2;
    min[cols - 1] = __min(cur, __min(pre, nex));
    max[cols - 1] = __max(cur, __max(pre, nex));
}

9. Image.h添加

(1)將所有private及protected成員變成public

(2)添加如下代碼:


#include <opencv2/opencv.hpp>
using namespace cv;//將所有權限改爲public

template <class T> Mat ImgToMat(CImageOf<T> *src)
{
    Mat dst;
    const char *depth = src->m_pTI->name();

    if (strcmp(depth, "unsigned char") == 0)
    {
        dst = Mat(src->m_shape.height, src->m_shape.width, CV_8UC(src->m_shape.nBands));
        for (int k = 0; k < src->m_shape.nBands; k++)
            for (int i = 0; i < src->m_shape.height; i++)
                for (int j = 0; j < src->m_shape.width; j++)
                    *((unsigned char*)(dst.data + i*dst.step + j*dst.elemSize() + k*dst.elemSize1())) = *((unsigned char*)(src->m_memStart + i*src->m_rowSize + j*src->m_pixSize + k*src->m_bandSize));
    }
    if (strcmp(depth, "char") == 0)
    {
        dst = Mat(src->m_shape.height, src->m_shape.width, CV_8SC(src->m_shape.nBands));
        for (int k = 0; k < src->m_shape.nBands; k++)
            for (int i = 0; i < src->m_shape.height; i++)
                for (int j = 0; j < src->m_shape.width; j++)
                    *((char*)(dst.data + i*dst.step + j*dst.elemSize() + k*dst.elemSize1())) = *((char*)(src->m_memStart + i*src->m_rowSize + j*src->m_pixSize + k*src->m_bandSize));
    }
    if (strcmp(depth, "unsigned short") == 0)
    {
        dst = Mat(src->m_shape.height, src->m_shape.width, CV_16UC(src->m_shape.nBands));
        for (int k = 0; k < src->m_shape.nBands; k++)
            for (int i = 0; i < src->m_shape.height; i++)
                for (int j = 0; j < src->m_shape.width; j++)
                    *((unsigned short*)(dst.data + i*dst.step + j*dst.elemSize() + k*dst.elemSize1())) = *((unsigned short*)(src->m_memStart + i*src->m_rowSize + j*src->m_pixSize + k*src->m_bandSize));
    }
    if (strcmp(depth, "short") == 0)
    {
        dst = Mat(src->m_shape.height, src->m_shape.width, CV_16SC(src->m_shape.nBands));
        for (int k = 0; k < src->m_shape.nBands; k++)
            for (int i = 0; i < src->m_shape.height; i++)
                for (int j = 0; j < src->m_shape.width; j++)
                    *((short*)(dst.data + i*dst.step + j*dst.elemSize() + k*dst.elemSize1())) = *((short*)(src->m_memStart + i*src->m_rowSize + j*src->m_pixSize + k*src->m_bandSize));
    }
    if (strcmp(depth, "float") == 0)
    {
        dst = Mat(src->m_shape.height, src->m_shape.width, CV_32FC(src->m_shape.nBands));
        for (int k = 0; k < src->m_shape.nBands; k++)
            for (int i = 0; i < src->m_shape.height; i++)
                for (int j = 0; j < src->m_shape.width; j++)
                    *((float*)(dst.data + i*dst.step + j*dst.elemSize() + k*dst.elemSize1())) = *((float*)(src->m_memStart + i*src->m_rowSize + j*src->m_pixSize + k*src->m_bandSize));
    }
    if (strcmp(depth, "int") == 0)
    {
        dst = Mat(src->m_shape.height, src->m_shape.width, CV_32SC(src->m_shape.nBands));
        for (int k = 0; k < src->m_shape.nBands; k++)
            for (int i = 0; i < src->m_shape.height; i++)
                for (int j = 0; j < src->m_shape.width; j++)
                    *((int*)(dst.data + i*dst.step + j*dst.elemSize() + k*dst.elemSize1())) = *((int*)(src->m_memStart + i*src->m_rowSize + j*src->m_pixSize + k*src->m_bandSize));
    }
    if (strcmp(depth, "double") == 0)
    {
        dst = Mat(src->m_shape.height, src->m_shape.width, CV_64FC(src->m_shape.nBands));
        for (int k = 0; k < src->m_shape.nBands; k++)
            for (int i = 0; i < src->m_shape.height; i++)
                for (int j = 0; j < src->m_shape.width; j++)
                    *((double*)(dst.data + i*dst.step + j*dst.elemSize() + k*dst.elemSize1())) = *((double*)(src->m_memStart + i*src->m_rowSize + j*src->m_pixSize + k*src->m_bandSize));
    }
    return dst;
}


template <class T> CImageOf<T> MatToImg(Mat* src)
{
    CImageOf<T> dst;
    CShape shape(src->cols, src->rows, src->channels());
    dst.ReAllocate(shape);
    const char *depth = dst.m_pTI->name();

    if (strcmp(depth, "unsigned char") == 0)
    {
        for (int k = 0; k < dst.m_shape.nBands; k++)
            for (int i = 0; i < dst.m_shape.height; i++)
                for (int j = 0; j < dst.m_shape.width; j++)
                    *((unsigned char*)(dst.m_memStart + i*dst.m_rowSize + j*dst.m_pixSize + k*dst.m_bandSize)) = *((unsigned char*)(src->data + i*src->step + j*src->elemSize() + k*src->elemSize1()));
    }
    if (strcmp(depth, "char") == 0)
    {
        for (int k = 0; k < dst.m_shape.nBands; k++)
            for (int i = 0; i < dst.m_shape.height; i++)
                for (int j = 0; j < dst.m_shape.width; j++)
                    *((char*)(dst.m_memStart + i*dst.m_rowSize + j*dst.m_pixSize + k*dst.m_bandSize)) = *((char*)(src->data + i*src->step + j*src->elemSize() + k*src->elemSize1()));
    }
    if (strcmp(depth, "unsigned short") == 0)
    {
        for (int k = 0; k < dst.m_shape.nBands; k++)
            for (int i = 0; i < dst.m_shape.height; i++)
                for (int j = 0; j < dst.m_shape.width; j++)
                    *((unsigned short*)(dst.m_memStart + i*dst.m_rowSize + j*dst.m_pixSize + k*dst.m_bandSize)) = *((unsigned short*)(src->data + i*src->step + j*src->elemSize() + k*src->elemSize1()));
    }
    if (strcmp(depth, "short") == 0)
    {
        for (int k = 0; k < dst.m_shape.nBands; k++)
            for (int i = 0; i < dst.m_shape.height; i++)
                for (int j = 0; j < dst.m_shape.width; j++)
                    *((short*)(dst.m_memStart + i*dst.m_rowSize + j*dst.m_pixSize + k*dst.m_bandSize)) = *((short*)(src->data + i*src->step + j*src->elemSize() + k*src->elemSize1()));
    }
    if (strcmp(depth, "float") == 0)
    {
        for (int k = 0; k < dst.m_shape.nBands; k++)
            for (int i = 0; i < dst.m_shape.height; i++)
                for (int j = 0; j < dst.m_shape.width; j++)
                    *((float*)(dst.m_memStart + i*dst.m_rowSize + j*dst.m_pixSize + k*dst.m_bandSize)) = *((float*)(src->data + i*src->step + j*src->elemSize() + k*src->elemSize1()));
    }
    if (strcmp(depth, "int") == 0)
    {
        for (int k = 0; k < dst.m_shape.nBands; k++)
            for (int i = 0; i < dst.m_shape.height; i++)
                for (int j = 0; j < dst.m_shape.width; j++)
                    *((int*)(dst.m_memStart + i*dst.m_rowSize + j*dst.m_pixSize + k*dst.m_bandSize)) = *((int*)(src->data + i*src->step + j*src->elemSize() + k*src->elemSize1()));
    }
    if (strcmp(depth, "double") == 0)
    {
        for (int k = 0; k < dst.m_shape.nBands; k++)
            for (int i = 0; i < dst.m_shape.height; i++)
                for (int j = 0; j < dst.m_shape.width; j++)
                    *((double*)(dst.m_memStart + i*dst.m_rowSize + j*dst.m_pixSize + k*dst.m_bandSize)) = *((double*)(src->data + i*src->step + j*src->elemSize() + k*src->elemSize1()));
    }
    return dst;
}


template <class T> void saveXML(string name, CImageOf<T>* src)
{
    Mat dst = ImgToMat<T>(src);
    FileStorage fs;
    fs.open("./../TestData/" + name, FileStorage::WRITE);
    fs << "mat" << dst;
    fs.release();
}

template <class T> void saveXML(string name, CImageOf<T>* src, int count)
{
    vector<Mat> dst;
    for (int i = 0; i<count; i++)
        dst.push_back(ImgToMat<T>(&src[i]));
    FileStorage fs;
    fs.open("./../TestData/" + name, FileStorage::WRITE);
    fs << "vectorMat" << dst;
    fs.release();
}


發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章