dlut_libvo: matcher.cpp Source File

Go to the documentation of this file.
00001 /*
00002    Copyright 2012. All rights reserved.
00003    Institute of Measurement and Control Systems
00004    Karlsruhe Institute of Technology, Germany
00005 
00006    This file is part of libviso2.
00007 Authors: Andreas Geiger
00008 
00009 libviso2 is free software; you can redistribute it and/or modify it under the
00010 terms of the GNU General Public License as published by the Free Software
00011 Foundation; either version 2 of the License, or any later version.
00012 
00013 libviso2 is distributed in the hope that it will be useful, but WITHOUT ANY
00014 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
00015 PARTICULAR PURPOSE. See the GNU General Public License for more details.
00016 
00017 You should have received a copy of the GNU General Public License along with
00018 libviso2; if not, write to the Free Software Foundation, Inc., 51 Franklin
00019 Street, Fifth Floor, Boston, MA 02110-1301, USA
00020 */
00021 
00022 #include "matcher.h"
00023 #include "triangle.h"
00024 #include "filter.h"
00025 
00026 using namespace std;
00027 
00029 // PUBLIC FUNCTIONS //
00031 
00032 // constructor (with default parameters)
00033 Matcher::Matcher(parameters param) : param(param) {
00034 
00035     // init match ring buffer to zero
00036     m1p1 = 0; n1p1 = 0;
00037     m1p2 = 0; n1p2 = 0;
00038     m2p1 = 0; n2p1 = 0;
00039     m2p2 = 0; n2p2 = 0;
00040     m1c1 = 0; n1c1 = 0;
00041     m1c2 = 0; n1c2 = 0;
00042     m2c1 = 0; n2c1 = 0;
00043     m2c2 = 0; n2c2 = 0;
00044     I1p    = 0; I2p    = 0;
00045     I1c    = 0; I2c    = 0;
00046     I1p_du = 0; I1p_dv = 0;
00047     I2p_du = 0; I2p_dv = 0;
00048     I1c_du = 0; I1c_dv = 0;
00049     I2c_du = 0; I2c_dv = 0;
00050     I1p_du_full = 0; I1p_dv_full = 0;
00051     I2p_du_full = 0; I2p_dv_full = 0;
00052     I1c_du_full = 0; I1c_dv_full = 0;
00053     I2c_du_full = 0; I2c_dv_full = 0;
00054 
00055     // margin needed to compute descriptor + sobel responses
00056     margin = 5+1;
00057 
00058     // adjust match radius on half resolution
00059     if (param.half_resolution)
00060         this->param.match_radius /= 2;
00061 }
00062 
00063 // deconstructor
00064 Matcher::~Matcher() {
00065     if (I1p)          _mm_free(I1p);
00066     if (I2p)          _mm_free(I2p);
00067     if (I1c)          _mm_free(I1c);
00068     if (I2c)          _mm_free(I2c);
00069     if (m1p1)         _mm_free(m1p1);
00070     if (m1p2)         _mm_free(m1p2);
00071     if (I1p_du)       _mm_free(I1p_du);
00072     if (I1p_dv)       _mm_free(I1p_dv);
00073     if (I1p_du_full)  _mm_free(I1p_du_full);
00074     if (I1p_dv_full)  _mm_free(I1p_dv_full);
00075     if (m2p1)         _mm_free(m2p1);
00076     if (m2p2)         _mm_free(m2p2);
00077     if (I2p_du)       _mm_free(I2p_du);
00078     if (I2p_dv)       _mm_free(I2p_dv);
00079     if (I2p_du_full)  _mm_free(I2p_du_full);
00080     if (I2p_dv_full)  _mm_free(I2p_dv_full);
00081     if (m1c1)         _mm_free(m1c1);
00082     if (m1c2)         _mm_free(m1c2);
00083     if (I1c_du)       _mm_free(I1c_du);
00084     if (I1c_dv)       _mm_free(I1c_dv);
00085     if (I1c_du_full)  _mm_free(I1c_du_full);
00086     if (I1c_dv_full)  _mm_free(I1c_dv_full);
00087     if (m2c1)         _mm_free(m2c1);
00088     if (m2c2)         _mm_free(m2c2);
00089     if (I2c_du)       _mm_free(I2c_du);
00090     if (I2c_dv)       _mm_free(I2c_dv);
00091     if (I2c_du_full)  _mm_free(I2c_du_full);
00092     if (I2c_dv_full)  _mm_free(I2c_dv_full);
00093 }
00094 
00095 void Matcher::pushBack (uint8_t *I1,uint8_t* I2,int32_t* dims,const bool replace) {
00096 
00097     // image dimensions
00098     int32_t width  = dims[0];
00099     int32_t height = dims[1];
00100     //bytes per line
00101     int32_t bpl    = dims[2];
00102 
00103     // sanity check
00104     // Why not assert?!
00105     if (width<=0 || height<=0 || bpl<width || I1==0) {
00106         cerr << "ERROR: Image dimension mismatch!" << endl;
00107         return;
00108     }
00109 
00110     if (replace) {
00111         if (I1c)         _mm_free(I1c);
00112         if (I2c)         _mm_free(I2c);
00113         if (m1c1)        _mm_free(m1c1);
00114         if (m1c2)        _mm_free(m1c2);
00115         if (I1c_du)      _mm_free(I1c_du);
00116         if (I1c_dv)      _mm_free(I1c_dv);
00117         if (I1c_du_full) _mm_free(I1c_du_full);
00118         if (I1c_dv_full) _mm_free(I1c_dv_full);
00119         if (m2c1)        _mm_free(m2c1);
00120         if (m2c2)        _mm_free(m2c2);
00121         if (I2c_du)      _mm_free(I2c_du);
00122         if (I2c_dv)      _mm_free(I2c_dv);
00123         if (I2c_du_full) _mm_free(I2c_du_full);
00124         if (I2c_dv_full) _mm_free(I2c_dv_full);
00125     } else {
00126         if (I1p)         _mm_free(I1p);
00127         if (I2p)         _mm_free(I2p);
00128         if (m1p1)        _mm_free(m1p1);
00129         if (m1p2)        _mm_free(m1p2);
00130         if (I1p_du)      _mm_free(I1p_du);
00131         if (I1p_dv)      _mm_free(I1p_dv);
00132         if (I1p_du_full) _mm_free(I1p_du_full);
00133         if (I1p_dv_full) _mm_free(I1p_dv_full);
00134         if (m2p1)        _mm_free(m2p1);
00135         if (m2p2)        _mm_free(m2p2);
00136         if (I2p_du)      _mm_free(I2p_du);
00137         if (I2p_dv)      _mm_free(I2p_dv);
00138         if (I2p_du_full) _mm_free(I2p_du_full);
00139         if (I2p_dv_full) _mm_free(I2p_dv_full);
00140         m1p1 = m1c1; n1p1 = n1c1;
00141         m1p2 = m1c2; n1p2 = n1c2;
00142         m2p1 = m2c1; n2p1 = n2c1;
00143         m2p2 = m2c2; n2p2 = n2c2;
00144         I1p         = I1c;
00145         I2p         = I2c;
00146         I1p_du      = I1c_du;
00147         I1p_dv      = I1c_dv;
00148         I1p_du_full = I1c_du_full;
00149         I1p_dv_full = I1c_dv_full;
00150         I2p_du      = I2c_du;
00151         I2p_dv      = I2c_dv;
00152         I2p_du_full = I2c_du_full;
00153         I2p_dv_full = I2c_dv_full;
00154         dims_p[0]   = dims_c[0];
00155         dims_p[1]   = dims_c[1];
00156         dims_p[2]   = dims_c[2];
00157     }
00158 
00159     // set new dims (bytes per line must be multiple of 16)
00160     dims_c[0] = width;
00161     dims_c[1] = height;
00162     dims_c[2] = width + 15-(width-1)%16;
00163 
00164     // copy images to byte aligned memory
00165     // uint8_t: typedefine, unsigned int , 1 bytes
00166     // _mm_malloc: aligned memory, 16 is the alignment constraint
00167     I1c = (uint8_t*)_mm_malloc(dims_c[2]*dims_c[1]*sizeof(uint8_t),16);
00168     I2c = (uint8_t*)_mm_malloc(dims_c[2]*dims_c[1]*sizeof(uint8_t),16);
00169     if (dims_c[2]==bpl) {
00170         memcpy(I1c,I1,dims_c[2]*dims_c[1]*sizeof(uint8_t));
00171         if (I2!=0)
00172             memcpy(I2c,I2,dims_c[2]*dims_c[1]*sizeof(uint8_t));
00173     } else {
00174         for (int32_t v=0; v<height; v++) {
00175             memcpy(I1c+v*dims_c[2],I1+v*bpl,dims_c[0]*sizeof(uint8_t));
00176             if (I2!=0)
00177                 memcpy(I2c+v*dims_c[2],I2+v*bpl,dims_c[0]*sizeof(uint8_t));
00178         }
00179     }
00180 
00181     // compute new features for current frame
00182     computeFeatures(I1c,dims_c,m1c1,n1c1,m1c2,n1c2,I1c_du,I1c_dv,I1c_du_full,I1c_dv_full);
00183     if (I2!=0)
00184         computeFeatures(I2c,dims_c,m2c1,n2c1,m2c2,n2c2,I2c_du,I2c_dv,I2c_du_full,I2c_dv_full);
00185 }
00186 
00187 void Matcher::matchFeatures(int32_t method, Matrix *Tr_delta) {
00188 
00190     // sanity check //
00192 
00193     // flow
00194     if (method==0) {
00195         if (m1p2==0 || n1p2==0 || m1c2==0 || n1c2==0)
00196             return;
00197         if (param.multi_stage)
00198             if (m1p1==0 || n1p1==0 || m1c1==0 || n1c1==0)
00199                 return;
00200 
00201         // stereo
00202     } else if (method==1) {
00203         if (m1c2==0 || n1c2==0 || m2c2==0 || n2c2==0)
00204             return;
00205         if (param.multi_stage)
00206             if (m1c1==0 || n1c1==0 || m2c1==0 || n2c1==0)
00207                 return;
00208 
00209         // quad matching
00210     } else {
00211         if (m1p2==0 || n1p2==0 || m2p2==0 || n2p2==0 || m1c2==0 || n1c2==0 || m2c2==0 || n2c2==0)
00212             return;
00213         if (param.multi_stage)
00214             if (m1p1==0 || n1p1==0 || m2p1==0 || n2p1==0 || m1c1==0 || n1c1==0 || m2c1==0 || n2c1==0)
00215                 return;
00216     }
00217 
00218     // clear old matches
00219     p_matched_1.clear();
00220     p_matched_2.clear();
00221 
00222     // double pass matching
00223     if (param.multi_stage) {
00224 
00225         //printf("multi_stage\n");
00226         // 1st pass (sparse matches)
00227         // methon=2
00228         // use_prior false
00229         matching(m1p1,m2p1,m1c1,m2c1,n1p1,n2p1,n1c1,n2c1,p_matched_1,method,false,Tr_delta);
00230         removeOutliers(p_matched_1,method);
00231 
00232         // compute search range prior statistics (used for speeding up 2nd pass)
00233         computePriorStatistics(p_matched_1,method);
00234 
00235         // 2nd pass (dense matches)
00236         matching(m1p2,m2p2,m1c2,m2c2,n1p2,n2p2,n1c2,n2c2,p_matched_2,method,true,Tr_delta);
00237         /*printf("param.refinement=%d",param.refinement);*/
00238         //param.refinement=1 pixel; 2 subpixel
00239         if (param.refinement>0)
00240             refinement(p_matched_2,method);
00241         removeOutliers(p_matched_2,method);
00242 
00243         // single pass matching
00244     } else {
00245         matching(m1p2,m2p2,m1c2,m2c2,n1p2,n2p2,n1c2,n2c2,p_matched_2,method,false,Tr_delta);
00246         if (param.refinement>0)
00247             refinement(p_matched_2,method);
00248         removeOutliers(p_matched_2,method);
00249     }
00250 }
00251 
00252 void Matcher::bucketFeatures(int32_t max_features,float bucket_width,float bucket_height) {
00253 
00254     // find max values
00255     float u_max = 0;
00256     float v_max = 0;
00257     for (vector<p_match>::iterator it = p_matched_2.begin(); it!=p_matched_2.end(); it++) {
00258         if (it->u1c>u_max) u_max=it->u1c;
00259         if (it->v1c>v_max) v_max=it->v1c;
00260     }
00261 
00262     // allocate number of buckets needed
00263     int32_t bucket_cols = (int32_t)floor(u_max/bucket_width)+1;
00264     int32_t bucket_rows = (int32_t)floor(v_max/bucket_height)+1;
00265     vector<p_match> *buckets = new vector<p_match>[bucket_cols*bucket_rows];
00266 
00267     // assign matches to their buckets
00268     for (vector<p_match>::iterator it=p_matched_2.begin(); it!=p_matched_2.end(); it++) {
00269         int32_t u = (int32_t)floor(it->u1c/bucket_width);
00270         int32_t v = (int32_t)floor(it->v1c/bucket_height);
00271         buckets[v*bucket_cols+u].push_back(*it);
00272     }
00273 
00274     // refill p_matched from buckets
00275     p_matched_2.clear();
00276     for (int32_t i=0; i<bucket_cols*bucket_rows; i++) {
00277 
00278         // shuffle bucket indices randomly
00279         std::random_shuffle(buckets[i].begin(),buckets[i].end());
00280 
00281         // add up to max_features features from this bucket to p_matched
00282         int32_t k=0;
00283         for (vector<p_match>::iterator it=buckets[i].begin(); it!=buckets[i].end(); it++) {
00284             p_matched_2.push_back(*it);
00285             k++;
00286             if (k>=max_features)
00287                 break;
00288         }
00289     }
00290 
00291     // free buckets
00292     delete []buckets;
00293 }
00294 
00295 float Matcher::getGain (vector<int32_t> inliers) {
00296 
00297     // check if two images are provided and matched
00298     if (I1p==0 || I1c==0 || p_matched_2.size()==0 || inliers.size()==0)
00299         return 1;
00300 
00301     int32_t window_size = 3;
00302     float   gain        = 0;
00303     int32_t num         = 0;
00304     int32_t u_min,u_max,v_min,v_max;
00305     float   mean_prev,mean_curr;
00306 
00307     for (vector<int32_t>::iterator it=inliers.begin(); it!=inliers.end(); it++) {
00308         if (*it<(int32_t)p_matched_2.size()) {
00309 
00310             // mean in previous image
00311             u_min = min(max((int32_t)p_matched_2[*it].u1p-window_size,0),dims_p[0]);
00312             u_max = min(max((int32_t)p_matched_2[*it].u1p+window_size,0),dims_p[0]);
00313             v_min = min(max((int32_t)p_matched_2[*it].v1p-window_size,0),dims_p[1]);
00314             v_max = min(max((int32_t)p_matched_2[*it].v1p+window_size,0),dims_p[1]);
00315             mean_prev = mean(I1p,dims_p[2],u_min,u_max,v_min,v_max);
00316 
00317             // mean in current image
00318             u_min = min(max((int32_t)p_matched_2[*it].u1c-window_size,0),dims_p[0]);
00319             u_max = min(max((int32_t)p_matched_2[*it].u1c+window_size,0),dims_p[0]);
00320             v_min = min(max((int32_t)p_matched_2[*it].v1c-window_size,0),dims_p[1]);
00321             v_max = min(max((int32_t)p_matched_2[*it].v1c+window_size,0),dims_p[1]);
00322             mean_curr = mean(I1c,dims_c[2],u_min,u_max,v_min,v_max);
00323 
00324             if (mean_prev>10) {
00325                 gain += mean_curr/mean_prev;
00326                 num++;
00327             }
00328         }
00329     }
00330 
00331     if (num>0) return gain/=(float)num;
00332     else       return 1;
00333 }
00334 
00336 // PRIVATE FUNCTIONS //
00338 
00339 void Matcher::nonMaximumSuppression (int16_t* I_f1,int16_t* I_f2,const int32_t* dims,vector<Matcher::maximum> &maxima,int32_t nms_n) {
00340 
00341     // extract parameters
00342     int32_t width  = dims[0];
00343     int32_t height = dims[1];
00344     int32_t bpl    = dims[2];
00345     int32_t n      = nms_n;
00346     int32_t tau    = param.nms_tau;
00347 
00348     // loop variables
00349     int32_t f1mini,f1minj,f1maxi,f1maxj,f2mini,f2minj,f2maxi,f2maxj;
00350     int32_t f1minval,f1maxval,f2minval,f2maxval,currval;
00351     int32_t addr;
00352 
00353     for (int32_t i=n+margin; i<width-n-margin;i+=n+1) {
00354         for (int32_t j=n+margin; j<height-n-margin;j+=n+1) {
00355 
00356             f1mini = i; f1minj = j; f1maxi = i; f1maxj = j;
00357             f2mini = i; f2minj = j; f2maxi = i; f2maxj = j;
00358 
00359             addr     = getAddressOffsetImage(i,j,bpl);
00360             f1minval = *(I_f1+addr);
00361             f1maxval = f1minval;
00362             f2minval = *(I_f2+addr);
00363             f2maxval = f2minval;
00364 
00365             for (int32_t i2=i; i2<=i+n; i2++) {
00366                 for (int32_t j2=j; j2<=j+n; j2++) {
00367                     addr    = getAddressOffsetImage(i2,j2,bpl);
00368                     currval = *(I_f1+addr);
00369                     if (currval<f1minval) {
00370                         f1mini   = i2;
00371                         f1minj   = j2;
00372                         f1minval = currval;
00373                     } else if (currval>f1maxval) {
00374                         f1maxi   = i2;
00375                         f1maxj   = j2;
00376                         f1maxval = currval;
00377                     }
00378                     currval = *(I_f2+addr);
00379                     if (currval<f2minval) {
00380                         f2mini   = i2;
00381                         f2minj   = j2;
00382                         f2minval = currval;
00383                     } else if (currval>f2maxval) {
00384                         f2maxi   = i2;
00385                         f2maxj   = j2;
00386                         f2maxval = currval;
00387                     }
00388                 }
00389             }
00390 
00391             // f1 minimum
00392             for (int32_t i2=f1mini-n; i2<=min(f1mini+n,width-1-margin); i2++) {
00393                 for (int32_t j2=f1minj-n; j2<=min(f1minj+n,height-1-margin); j2++) {
00394                     currval = *(I_f1+getAddressOffsetImage(i2,j2,bpl));
00395                     if (currval<f1minval && (i2<i || i2>i+n || j2<j || j2>j+n))
00396                         goto failed_f1min;
00397                 }
00398             }
00399             if (f1minval<=-tau)
00400                 maxima.push_back(Matcher::maximum(f1mini,f1minj,f1minval,0));
00401 failed_f1min:;
00402 
00403              // f1 maximum
00404              for (int32_t i2=f1maxi-n; i2<=min(f1maxi+n,width-1-margin); i2++) {
00405                  for (int32_t j2=f1maxj-n; j2<=min(f1maxj+n,height-1-margin); j2++) {
00406                      currval = *(I_f1+getAddressOffsetImage(i2,j2,bpl));
00407                      if (currval>f1maxval && (i2<i || i2>i+n || j2<j || j2>j+n))
00408                          goto failed_f1max;
00409                  }
00410              }
00411              if (f1maxval>=tau)
00412                  maxima.push_back(Matcher::maximum(f1maxi,f1maxj,f1maxval,1));
00413 failed_f1max:;
00414 
00415              // f2 minimum
00416              for (int32_t i2=f2mini-n; i2<=min(f2mini+n,width-1-margin); i2++) {
00417                  for (int32_t j2=f2minj-n; j2<=min(f2minj+n,height-1-margin); j2++) {
00418                      currval = *(I_f2+getAddressOffsetImage(i2,j2,bpl));
00419                      if (currval<f2minval && (i2<i || i2>i+n || j2<j || j2>j+n))
00420                          goto failed_f2min;
00421                  }
00422              }
00423              if (f2minval<=-tau)
00424                  maxima.push_back(Matcher::maximum(f2mini,f2minj,f2minval,2));
00425 failed_f2min:;
00426 
00427              // f2 maximum
00428              for (int32_t i2=f2maxi-n; i2<=min(f2maxi+n,width-1-margin); i2++) {
00429                  for (int32_t j2=f2maxj-n; j2<=min(f2maxj+n,height-1-margin); j2++) {
00430                      currval = *(I_f2+getAddressOffsetImage(i2,j2,bpl));
00431                      if (currval>f2maxval && (i2<i || i2>i+n || j2<j || j2>j+n))
00432                          goto failed_f2max;
00433                  }
00434              }
00435              if (f2maxval>=tau)
00436                  maxima.push_back(Matcher::maximum(f2maxi,f2maxj,f2maxval,3));
00437 failed_f2max:;
00438         }
00439     }
00440 }
00441 
00442 inline void Matcher::computeDescriptor (const uint8_t* I_du,const uint8_t* I_dv,const int32_t &bpl,const int32_t &u,const int32_t &v,uint8_t *desc_addr) {
00443 
00444     // get address indices
00445     int32_t addr_m1 = getAddressOffsetImage(u,v-1,bpl);
00446     int32_t addr_m3 = addr_m1-2*bpl;
00447     int32_t addr_m5 = addr_m3-2*bpl;
00448     int32_t addr_p1 = addr_m1+2*bpl;
00449     int32_t addr_p3 = addr_p1+2*bpl;
00450     int32_t addr_p5 = addr_p3+2*bpl;
00451 
00452     // compute descriptor
00453     uint32_t k = 0;
00454     desc_addr[k++] = I_du[addr_m1-3];
00455     desc_addr[k++] = I_dv[addr_m1-3];
00456     /*printf("k0=%d,k1=%d",I_du[addr_m1-3],I_dv[addr_m1-3]);*/
00457     desc_addr[k++] = I_du[addr_p1-3];
00458     desc_addr[k++] = I_dv[addr_p1-3];
00459     desc_addr[k++] = I_du[addr_m1-1];
00460     desc_addr[k++] = I_dv[addr_m1-1];
00461     desc_addr[k++] = I_du[addr_p1-1];
00462     desc_addr[k++] = I_dv[addr_p1-1];
00463     desc_addr[k++] = I_du[addr_m1+3];
00464     desc_addr[k++] = I_dv[addr_m1+3];
00465     desc_addr[k++] = I_du[addr_p1+3];
00466     desc_addr[k++] = I_dv[addr_p1+3];
00467     desc_addr[k++] = I_du[addr_m1+1];
00468     desc_addr[k++] = I_dv[addr_m1+1];
00469     desc_addr[k++] = I_du[addr_p1+1];
00470     desc_addr[k++] = I_dv[addr_p1+1];
00471     desc_addr[k++] = I_du[addr_m5-1];
00472     desc_addr[k++] = I_dv[addr_m5-1];
00473     desc_addr[k++] = I_du[addr_p5-1];
00474     desc_addr[k++] = I_dv[addr_p5-1];
00475     desc_addr[k++] = I_du[addr_m5+1];
00476     desc_addr[k++] = I_dv[addr_m5+1];
00477     desc_addr[k++] = I_du[addr_p5+1];
00478     desc_addr[k++] = I_dv[addr_p5+1];
00479     desc_addr[k++] = I_du[addr_m3-5];
00480     desc_addr[k++] = I_dv[addr_m3-5];
00481     desc_addr[k++] = I_du[addr_p3-5];
00482     desc_addr[k++] = I_dv[addr_p3-5];
00483     desc_addr[k++] = I_du[addr_m3+5];
00484     desc_addr[k++] = I_dv[addr_m3+5];
00485     desc_addr[k++] = I_du[addr_p3+5];
00486     desc_addr[k++] = I_dv[addr_p3+5];
00487     //k=32
00488 }
00489 
00490 inline void Matcher::computeSmallDescriptor (const uint8_t* I_du,const uint8_t* I_dv,const int32_t &bpl,const int32_t &u,const int32_t &v,uint8_t *desc_addr) {
00491 
00492     // get address indices
00493     int32_t addr2 = getAddressOffsetImage(u,v,bpl);
00494     int32_t addr1 = addr2-bpl;
00495     int32_t addr0 = addr1-bpl;
00496     int32_t addr3 = addr2+bpl;
00497     int32_t addr4 = addr3+bpl;
00498 
00499     // compute ELAS-descriptor
00500     uint32_t k = 0;
00501     desc_addr[k++] = I_du[addr0];
00502     desc_addr[k++] = I_du[addr1-2];
00503     desc_addr[k++] = I_du[addr1];
00504     desc_addr[k++] = I_du[addr1+2];
00505     desc_addr[k++] = I_du[addr2-1];
00506     desc_addr[k++] = I_du[addr2];
00507     desc_addr[k++] = I_du[addr2];
00508     desc_addr[k++] = I_du[addr2+1];
00509     desc_addr[k++] = I_du[addr3-2];
00510     desc_addr[k++] = I_du[addr3];
00511     desc_addr[k++] = I_du[addr3+2];
00512     desc_addr[k++] = I_du[addr4];
00513     desc_addr[k++] = I_dv[addr1];
00514     desc_addr[k++] = I_dv[addr2-1];
00515     desc_addr[k++] = I_dv[addr2+1];
00516     desc_addr[k++] = I_dv[addr3];
00517 }
00518 
00519 void Matcher::computeDescriptors (uint8_t* I_du,uint8_t* I_dv,const int32_t bpl,std::vector<Matcher::maximum> &maxima) {
00520 
00521     // loop variables
00522     int32_t u,v;
00523     uint8_t *desc_addr;
00524 
00525     // for all maxima do
00526     for (vector<Matcher::maximum>::iterator it=maxima.begin(); it!=maxima.end(); it++) {
00527         u = (*it).u;
00528         v = (*it).v;
00529         desc_addr = (uint8_t*)(&((*it).d1));
00530         computeDescriptor(I_du,I_dv,bpl,u,v,desc_addr);
00531     }
00532 }
00533 
00534 inline uint8_t Matcher::saturate (int16_t in) {
00535     if (in<0)   return 0;
00536     if (in>255) return 255;
00537     return in;
00538 }
00539 
00540 void Matcher::filterImageAll (uint8_t* I,uint8_t* I_du,uint8_t* I_dv,int16_t* I_f1,int16_t* I_f2,const int* dims) {
00541 
00542     // get bpl and height
00543     const int32_t height = dims[1];
00544     const int32_t bpl    = dims[2];
00545 
00546     // init sobel pointers
00547     const uint8_t* p00 = I + 0;
00548     const uint8_t* p01 = I + 1;
00549     const uint8_t* p02 = I + 2;
00550     const uint8_t* p03 = I + 3;
00551     const uint8_t* p04 = I + 4;
00552     const uint8_t* p10 = I + 0 + bpl;
00553     const uint8_t* p11 = I + 1 + bpl;
00554     const uint8_t* p12 = I + 2 + bpl;
00555     const uint8_t* p13 = I + 3 + bpl;
00556     const uint8_t* p14 = I + 4 + bpl;
00557     const uint8_t* p20 = I + 0 + 2*bpl;
00558     const uint8_t* p21 = I + 1 + 2*bpl;
00559     const uint8_t* p22 = I + 2 + 2*bpl;
00560     const uint8_t* p23 = I + 3 + 2*bpl;
00561     const uint8_t* p24 = I + 4 + 2*bpl;
00562     const uint8_t* p30 = I + 0 + 3*bpl;
00563     const uint8_t* p31 = I + 1 + 3*bpl;
00564     const uint8_t* p32 = I + 2 + 3*bpl;
00565     const uint8_t* p33 = I + 3 + 3*bpl;
00566     const uint8_t* p34 = I + 4 + 3*bpl;
00567     const uint8_t* p40 = I + 0 + 4*bpl;
00568     const uint8_t* p41 = I + 1 + 4*bpl;
00569     const uint8_t* p42 = I + 2 + 4*bpl;
00570     const uint8_t* p43 = I + 3 + 4*bpl;
00571     const uint8_t* p44 = I + 4 + 4*bpl;
00572 
00573     // init output pointers
00574     uint8_t* result_du = I_du +   bpl + 1;
00575     uint8_t* result_dv = I_dv +   bpl + 1;
00576     int16_t* result_f1 = I_f1 + 2*bpl + 2;
00577     int16_t* result_f2 = I_f2 + 2*bpl + 2;
00578 
00579     // stop here
00580     const uint8_t* end_input = I + bpl*height;
00581 
00582     // compute filter responses (border pixels are invalid)
00583     for( ; p44 != end_input; p00++, p01++, p02++, p03++, p04++,
00584             p10++, p11++, p12++, p13++, p14++,
00585             p20++, p21++, p22++, p23++, p24++,
00586             p30++, p31++, p32++, p33++, p34++,
00587             p40++, p41++, p42++, p43++, p44++,
00588             result_du++, result_dv++, result_f1++, result_f2++ ) {
00589         int16_t temp_du = - *p00 - 2 * *p10 - *p20 + *p02 + 2 * *p12 + *p22;
00590         int16_t temp_dv = - *p00 - 2 * *p01 - *p02 + *p20 + 2 * *p21 + *p22;
00591         *result_du = saturate( temp_du/4 + 128 );
00592         *result_dv = saturate( temp_dv/4 + 128 );
00593         *result_f1 = - *p00 - *p01 -     *p02 - *p03 - *p04
00594             - *p10 + *p11 +     *p12 + *p13 - *p14
00595             - *p20 + *p21 + 8 * *p22 + *p23 - *p24
00596             - *p30 + *p31 +     *p32 + *p33 - *p34
00597             - *p40 - *p41 -     *p42 - *p43 - *p44;
00598         *result_f2 = - *p00 - *p01 + *p03 + *p04
00599             - *p10 - *p11 + *p13 + *p14
00600             + *p30 + *p31 - *p33 - *p34
00601             + *p40 + *p41 - *p43 - *p44;
00602     }
00603 }
00604 
00605 void Matcher::filterImageSobel (uint8_t* I,uint8_t* I_du,uint8_t* I_dv,const int* dims) {
00606 
00607     // get image width and height
00608     const int32_t height = dims[1];
00609     const int32_t bpl    = dims[2];
00610 
00611     // init sobel pointers
00612     const uint8_t* p00 = I + 0;
00613     const uint8_t* p01 = I + 1;
00614     const uint8_t* p02 = I + 2;
00615     const uint8_t* p10 = I + 0 + bpl;
00616     const uint8_t* p11 = I + 1 + bpl;
00617     const uint8_t* p12 = I + 2 + bpl;
00618     const uint8_t* p20 = I + 0 + 2*bpl;
00619     const uint8_t* p21 = I + 1 + 2*bpl;
00620     const uint8_t* p22 = I + 2 + 2*bpl;
00621 
00622     // init output pointers
00623     uint8_t* result_du = I_du + bpl + 1;
00624     uint8_t* result_dv = I_dv + bpl + 1;
00625 
00626     // stop here
00627     const uint8_t* end_input = I + bpl*height;
00628 
00629     // compute filter responses (border pixels are invalid)
00630     for( ; p22 != end_input; p00++, p01++, p02++,
00631             p10++, p11++, p12++,
00632             p20++, p21++, p22++,
00633             result_du++, result_dv++) {
00634         int16_t temp_du = - *p00 - 2 * *p10 - *p20 + *p02 + 2 * *p12 + *p22;
00635         int16_t temp_dv = - *p00 - 2 * *p01 - *p02 + *p20 + 2 * *p21 + *p22;
00636         *result_du = saturate( temp_du/4 + 128 );
00637         *result_dv = saturate( temp_dv/4 + 128 );
00638     }
00639 }
00640 
00641 void Matcher::getHalfResolutionDimensions(const int32_t *dims,int32_t *dims_half) {
00642     dims_half[0] = dims[0]/2;
00643     dims_half[1] = dims[1]/2;
00644     dims_half[2] = dims_half[0]+15-(dims_half[0]-1)%16;
00645 }
00646 
00647 uint8_t* Matcher::createHalfResolutionImage(uint8_t *I,const int32_t* dims) {
00648     int32_t dims_half[3];
00649     getHalfResolutionDimensions(dims,dims_half);
00650     uint8_t* I_half = (uint8_t*)_mm_malloc(dims_half[2]*dims_half[1]*sizeof(uint8_t),16);
00651     for (int32_t v=0; v<dims_half[1]; v++)
00652         for (int32_t u=0; u<dims_half[0]; u++)
00653             I_half[v*dims_half[2]+u] =  (uint8_t)(((int32_t)I[(v*2+0)*dims[2]+u*2+0]+
00654                         (int32_t)I[(v*2+0)*dims[2]+u*2+1]+
00655                         (int32_t)I[(v*2+1)*dims[2]+u*2+0]+
00656                         (int32_t)I[(v*2+1)*dims[2]+u*2+1])/4);
00657     return I_half;
00658 }
00659 
00660 void Matcher::computeFeatures (uint8_t *I,const int32_t* dims,int32_t* &max1,int32_t &num1,int32_t* &max2,int32_t &num2,uint8_t* &I_du,uint8_t* &I_dv,uint8_t* &I_du_full,uint8_t* &I_dv_full) {
00661 
00662     int16_t *I_f1;
00663     int16_t *I_f2;
00664 
00665     int32_t dims_matching[3];
00666     memcpy(dims_matching,dims,3*sizeof(int32_t));
00667 
00668     // allocate memory for sobel images and filter images
00669     // Default half_resolution is true
00670     if (!param.half_resolution) {
00671         I_du = (uint8_t*)_mm_malloc(dims[2]*dims[1]*sizeof(uint8_t*),16);
00672         I_dv = (uint8_t*)_mm_malloc(dims[2]*dims[1]*sizeof(uint8_t*),16);
00673         I_f1 = (int16_t*)_mm_malloc(dims[2]*dims[1]*sizeof(int16_t),16);
00674         I_f2 = (int16_t*)_mm_malloc(dims[2]*dims[1]*sizeof(int16_t),16);
00675         filter::sobel5x5(I,I_du,I_dv,dims[2],dims[1]);
00676         filter::blob5x5(I,I_f1,dims[2],dims[1]);
00677         filter::checkerboard5x5(I,I_f2,dims[2],dims[1]);
00678     } else {
00679         //分辨率减半,像素的颜色取周边四邻均值
00680         uint8_t* I_matching = createHalfResolutionImage(I,dims);
00681         //dims_matching has been half of dims
00682         getHalfResolutionDimensions(dims,dims_matching);
00683         I_du      = (uint8_t*)_mm_malloc(dims_matching[2]*dims_matching[1]*sizeof(uint8_t*),16);
00684         I_dv      = (uint8_t*)_mm_malloc(dims_matching[2]*dims_matching[1]*sizeof(uint8_t*),16);
00685         I_f1      = (int16_t*)_mm_malloc(dims_matching[2]*dims_matching[1]*sizeof(int16_t),16);
00686         I_f2      = (int16_t*)_mm_malloc(dims_matching[2]*dims_matching[1]*sizeof(int16_t),16);
00687         I_du_full = (uint8_t*)_mm_malloc(dims[2]*dims[1]*sizeof(uint8_t*),16);
00688         I_dv_full = (uint8_t*)_mm_malloc(dims[2]*dims[1]*sizeof(uint8_t*),16);
00689         filter::sobel5x5(I_matching,I_du,I_dv,dims_matching[2],dims_matching[1]);
00690         filter::sobel5x5(I,I_du_full,I_dv_full,dims[2],dims[1]);
00691         filter::blob5x5(I_matching,I_f1,dims_matching[2],dims_matching[1]);
00692         filter::checkerboard5x5(I_matching,I_f2,dims_matching[2],dims_matching[1]);
00693         _mm_free(I_matching);
00694     }
00695 
00696     // extract sparse maxima (1st pass) via non-maximum suppression
00697     vector<Matcher::maximum> maxima1;
00698     if (param.multi_stage) {
00699         int32_t nms_n_sparse = param.nms_n*3;
00700         if (nms_n_sparse>10)
00701             nms_n_sparse = max(param.nms_n,10);
00702         nonMaximumSuppression(I_f1,I_f2,dims_matching,maxima1,nms_n_sparse);
00703         computeDescriptors(I_du,I_dv,dims_matching[2],maxima1);
00704     }
00705 
00706     // extract dense maxima (2nd pass) via non-maximum suppression
00707     vector<Matcher::maximum> maxima2;
00708     nonMaximumSuppression(I_f1,I_f2,dims_matching,maxima2,param.nms_n);
00709     computeDescriptors(I_du,I_dv,dims_matching[2],maxima2);
00710 
00711     // release filter images
00712     _mm_free(I_f1);
00713     _mm_free(I_f2);
00714 
00715     // get number of interest points and init maxima pointer to NULL
00716     num1 = maxima1.size();
00717     num2 = maxima2.size();
00718     //printf("n1=%d,n2=%d\n",num1,num2);
00719     max1 = 0;
00720     max2 = 0;
00721 
00722     int32_t s = 1;
00723     if (param.half_resolution)
00724         s = 2;
00725 
00726     // return sparse maxima as 16-bytes aligned memory
00727     if (num1!=0) {
00728         max1 = (int32_t*)_mm_malloc(sizeof(Matcher::maximum)*num1,16);
00729         int32_t k=0;
00730         for (vector<Matcher::maximum>::iterator it=maxima1.begin(); it!=maxima1.end(); it++) {
00731             *(max1+k++) = it->u*s;  *(max1+k++) = it->v*s;  *(max1+k++) = 0;        *(max1+k++) = it->c;
00732             *(max1+k++) = it->d1;   *(max1+k++) = it->d2;   *(max1+k++) = it->d3;   *(max1+k++) = it->d4;
00733             *(max1+k++) = it->d5;   *(max1+k++) = it->d6;   *(max1+k++) = it->d7;   *(max1+k++) = it->d8;
00734         }
00735     }
00736 
00737     // return dense maxima as 16-bytes aligned memory
00738     if (num2!=0) {
00739         max2 = (int32_t*)_mm_malloc(sizeof(Matcher::maximum)*num2,16);
00740         int32_t k=0;
00741         for (vector<Matcher::maximum>::iterator it=maxima2.begin(); it!=maxima2.end(); it++) {
00742             *(max2+k++) = it->u*s;  *(max2+k++) = it->v*s;  *(max2+k++) = 0;        *(max2+k++) = it->c;
00743             *(max2+k++) = it->d1;   *(max2+k++) = it->d2;   *(max2+k++) = it->d3;   *(max2+k++) = it->d4;
00744             *(max2+k++) = it->d5;   *(max2+k++) = it->d6;   *(max2+k++) = it->d7;   *(max2+k++) = it->d8;
00745         }
00746     }
00747 }
00748 
00749 void Matcher::computePriorStatistics (vector<Matcher::p_match> &p_matched,int32_t method) {
00750 
00751     // compute number of bins
00752     int32_t u_bin_num = (int32_t)ceil((float)dims_c[0]/(float)param.match_binsize);
00753     int32_t v_bin_num = (int32_t)ceil((float)dims_c[1]/(float)param.match_binsize);
00754     int32_t bin_num   = v_bin_num*u_bin_num;
00755 
00756     // number of matching stages
00757     int32_t num_stages = 2;
00758     if (method==2)
00759         num_stages = 4;
00760 
00761     // allocate bin accumulator memory
00762     vector<Matcher::delta> *delta_accu = new vector<Matcher::delta>[bin_num];
00763 
00764     // fill bin accumulator
00765     Matcher::delta delta_curr;
00766     for (vector<Matcher::p_match>::iterator it=p_matched.begin(); it!=p_matched.end(); it++) {
00767 
00768         // method flow: compute position delta
00769         if (method==0) {
00770             delta_curr.val[0] = it->u1p - it->u1c;
00771             delta_curr.val[1] = it->v1p - it->v1c;
00772             delta_curr.val[2] = it->u1c - it->u1p;
00773             delta_curr.val[3] = it->v1c - it->v1p;
00774 
00775             // method stereo: compute position delta
00776         } else if (method==1) {
00777             delta_curr.val[0] = it->u2c - it->u1c;
00778             delta_curr.val[1] = 0;
00779             delta_curr.val[2] = it->u1c - it->u2c;
00780             delta_curr.val[3] = 0;
00781 
00782             // method quad matching: compute position delta
00783         } else {
00784             /*
00785                delta_curr.val[0] = it->u1p - it->u1c;
00786                delta_curr.val[1] = it->v1p - it->v1c;
00787                delta_curr.val[2] = it->u2p - it->u1p;
00788                delta_curr.val[3] = 0;
00789                delta_curr.val[4] = it->u2c - it->u2p;
00790                delta_curr.val[5] = it->v2c - it->v2p;
00791                delta_curr.val[6] = it->u1c - it->u2c;
00792                delta_curr.val[7] = 0;*/
00793             delta_curr.val[0] = it->u2p - it->u1p;
00794             delta_curr.val[1] = 0;
00795             delta_curr.val[2] = it->u2c - it->u2p;
00796             delta_curr.val[3] = it->v2c - it->v2p;
00797             delta_curr.val[4] = it->u1c - it->u2c;
00798             delta_curr.val[5] = 0;
00799             delta_curr.val[6] = it->u1p - it->u1c;
00800             delta_curr.val[7] = it->v1p - it->v1c;
00801         }
00802 
00803         // compute row and column of bin to which this observation belongs
00804         int32_t u_bin_min,u_bin_max,v_bin_min,v_bin_max;
00805 
00806         // flow + stereo: use current left image as reference
00807         if (method<2) {
00808             u_bin_min = min(max((int32_t)floor(it->u1c/(float)param.match_binsize)-1,0),u_bin_num-1);
00809             u_bin_max = min(max((int32_t)floor(it->u1c/(float)param.match_binsize)+1,0),u_bin_num-1);
00810             v_bin_min = min(max((int32_t)floor(it->v1c/(float)param.match_binsize)-1,0),v_bin_num-1);
00811             v_bin_max = min(max((int32_t)floor(it->v1c/(float)param.match_binsize)+1,0),v_bin_num-1);
00812 
00813             // quad matching: use current previous image as reference
00814         } else {
00815             u_bin_min = min(max((int32_t)floor(it->u1p/(float)param.match_binsize)-1,0),u_bin_num-1);
00816             u_bin_max = min(max((int32_t)floor(it->u1p/(float)param.match_binsize)+1,0),u_bin_num-1);
00817             v_bin_min = min(max((int32_t)floor(it->v1p/(float)param.match_binsize)-1,0),v_bin_num-1);
00818             v_bin_max = min(max((int32_t)floor(it->v1p/(float)param.match_binsize)+1,0),v_bin_num-1);
00819         }
00820 
00821         // add to accumulator
00822         for (int32_t v_bin=v_bin_min; v_bin<=v_bin_max; v_bin++)
00823             for (int32_t u_bin=u_bin_min; u_bin<=u_bin_max; u_bin++)
00824                 delta_accu[v_bin*u_bin_num+u_bin].push_back(delta_curr);
00825     }
00826 
00827     // clear ranges
00828     ranges.clear();
00829 
00830     // for all bins compute statistics
00831     for (int32_t v_bin=0; v_bin<v_bin_num; v_bin++) {
00832         for (int32_t u_bin=0; u_bin<u_bin_num; u_bin++) {
00833 
00834             // use full range in case there are no observations
00835             delta delta_min(-param.match_radius);
00836             delta delta_max(+param.match_radius);
00837 
00838             // otherwise determine delta min and delta max
00839             if (delta_accu[v_bin*u_bin_num+u_bin].size()>0) {
00840 
00841                 // init displacements 'delta' to 'infinite'
00842                 delta_min = delta(+1000000);
00843                 delta_max = delta(-1000000);
00844 
00845                 // find minimum and maximum displacements
00846                 for (vector<Matcher::delta>::iterator it=delta_accu[v_bin*u_bin_num+u_bin].begin();
00847                         it!=delta_accu[v_bin*u_bin_num+u_bin].end(); it++) {
00848                     for (int32_t i=0; i<num_stages*2; i++) {
00849                         if (it->val[i]<delta_min.val[i]) delta_min.val[i] = it->val[i];
00850                         if (it->val[i]>delta_max.val[i]) delta_max.val[i] = it->val[i];
00851                     }
00852                 }
00853             }
00854 
00855             // set search range for this bin
00856             // stage 0-3
00857             range r;
00858             for (int32_t i=0; i<num_stages; i++) {
00859 
00860                 // bound minimum search range to 20x20
00861                 float delta_u = delta_max.val[i*2+0]-delta_min.val[i*2+0];
00862                 if (delta_u<20) {
00863                     delta_min.val[i*2+0] -= ceil((20-delta_u)/2);
00864                     delta_max.val[i*2+0] += ceil((20-delta_u)/2);
00865                 }
00866                 float delta_v = delta_max.val[i*2+1]-delta_min.val[i*2+1];
00867                 if (delta_v<20) {
00868                     delta_min.val[i*2+1] -= ceil((20-delta_v)/2);
00869                     delta_max.val[i*2+1] += ceil((20-delta_v)/2);
00870                 }
00871 
00872                 // set range for this bin
00873                 r.u_min[i] = delta_min.val[i*2+0];
00874                 r.u_max[i] = delta_max.val[i*2+0];
00875                 r.v_min[i] = delta_min.val[i*2+1];
00876                 r.v_max[i] = delta_max.val[i*2+1];
00877             }
00878             ranges.push_back(r);
00879         }
00880     }
00881 
00882     // free bin accumulator memory
00883     delete []delta_accu;
00884 }
00885 
00886 void Matcher::createIndexVector (int32_t* m,int32_t n,vector<int32_t> *k,const int32_t &u_bin_num,const int32_t &v_bin_num) {
00887 
00888     // descriptor step size
00889     int32_t step_size = sizeof(Matcher::maximum)/sizeof(int32_t);
00890 
00891     // for all points do
00892     for (int32_t i=0; i<n; i++) {
00893 
00894         // extract coordinates and class
00895         int32_t u = *(m+step_size*i+0); // u-coordinate
00896         int32_t v = *(m+step_size*i+1); // v-coordinate
00897         int32_t c = *(m+step_size*i+3); // class,0123
00898         // compute row and column of bin to which this observation belongs
00899         int32_t u_bin = min((int32_t)floor((float)u/(float)param.match_binsize),u_bin_num-1);
00900         int32_t v_bin = min((int32_t)floor((float)v/(float)param.match_binsize),v_bin_num-1);
00901 
00902         // save index
00903         k[(c*v_bin_num+v_bin)*u_bin_num+u_bin].push_back(i);
00904     }
00905 }
00906 
00907 inline void Matcher::findMatch (int32_t* m1,const int32_t &i1,int32_t* m2,const int32_t &step_size,vector<int32_t> *k2,
00908         const int32_t &u_bin_num,const int32_t &v_bin_num,const int32_t &stat_bin,
00909         int32_t& min_ind,int32_t stage,bool flow,bool use_prior,double u_,double v_) {
00910 
00911     // init and load image coordinates + feature
00912     //min_ind输出
00913     min_ind          = 0;
00914     double  min_cost = 10000000;
00915     int32_t u1       = *(m1+step_size*i1+0);
00916     int32_t v1       = *(m1+step_size*i1+1);
00917     int32_t c        = *(m1+step_size*i1+3);
00918     //这俩是特征描述符
00919     __m128i xmm1     = _mm_load_si128((__m128i*)(m1+step_size*i1+4));
00920     __m128i xmm2     = _mm_load_si128((__m128i*)(m1+step_size*i1+8));
00921 
00922     float u_min,u_max,v_min,v_max;
00923 
00924     // restrict search range with prior
00925     if (use_prior) {
00926         u_min = u1+ranges[stat_bin].u_min[stage];
00927         u_max = u1+ranges[stat_bin].u_max[stage];
00928         v_min = v1+ranges[stat_bin].v_min[stage];
00929         v_max = v1+ranges[stat_bin].v_max[stage];
00930 
00931         // otherwise: use full search space
00932     } else {
00933         u_min = u1-param.match_radius;
00934         u_max = u1+param.match_radius;
00935         v_min = v1-param.match_radius;
00936         v_max = v1+param.match_radius;
00937     }
00938 
00939     // if stereo search => constrain to 1d
00940     // param.match_disp_tolerance=2
00941     if (!flow) {
00942         v_min = v1-param.match_disp_tolerance;
00943         v_max = v1+param.match_disp_tolerance;
00944     }
00945     /*printf("use_prior=%d,flow=%d\n",use_prior,flow);*/
00946     // bins of interest,bin_size=50, match_radius=200
00947     int32_t u_bin_min = min(max((int32_t)floor(u_min/(float)param.match_binsize),0),u_bin_num-1);
00948     int32_t u_bin_max = min(max((int32_t)floor(u_max/(float)param.match_binsize),0),u_bin_num-1);
00949     int32_t v_bin_min = min(max((int32_t)floor(v_min/(float)param.match_binsize),0),v_bin_num-1);
00950     int32_t v_bin_max = min(max((int32_t)floor(v_max/(float)param.match_binsize),0),v_bin_num-1);
00951 
00952     // for all bins of interest do
00953     for (int32_t u_bin=u_bin_min; u_bin<=u_bin_max; u_bin++) {
00954         for (int32_t v_bin=v_bin_min; v_bin<=v_bin_max; v_bin++) {
00955             int32_t k2_ind = (c*v_bin_num+v_bin)*u_bin_num+u_bin;
00956             for (vector<int32_t>::const_iterator i2_it=k2[k2_ind].begin(); i2_it!=k2[k2_ind].end(); i2_it++) {
00957                 //1-xx
00958                 /*printf("size=%d\n",k2[k2_ind].size());*/
00959                 int32_t u2   = *(m2+step_size*(*i2_it)+0);
00960                 int32_t v2   = *(m2+step_size*(*i2_it)+1);
00961                 if (u2>=u_min && u2<=u_max && v2>=v_min && v2<=v_max) {
00962                     __m128i xmm3 = _mm_load_si128((__m128i*)(m2+step_size*(*i2_it)+4));
00963                     __m128i xmm4 = _mm_load_si128((__m128i*)(m2+step_size*(*i2_it)+8));
00964                     //Computes the absolute difference of the 16 unsigned 8-bit integers from a and the 16 unsigned 8-bit integers from b.
00965                     xmm3 = _mm_sad_epu8 (xmm1,xmm3);
00966                     xmm4 = _mm_sad_epu8 (xmm2,xmm4);
00967                     xmm4 = _mm_add_epi16(xmm3,xmm4);
00968                     double cost = (double)(_mm_extract_epi16(xmm4,0)+_mm_extract_epi16(xmm4,4));
00969                     /*printf("%f,%f,%f,%f",_mm_extract_epi16(xmm4,0),_mm_extract_epi16(xmm4,2),_mm_extract_epi16(xmm4,3),_mm_extract_epi16(xmm4,4));*/
00970                     /*printf("cost=%f",cost);*/
00971                     /*printf("%f,%f\n",_mm_extract_epi16(xmm4,1),_mm_extract_epi16(xmm4,3));*/
00972                     if (u_>=0 && v_>=0) {
00973                         double du = (double)u2-u_;
00974                         double dv = (double)v2-v_;
00975                         double dist = sqrt(du*du+dv*dv);
00976                         cost += 4*dist;
00977                     }
00978 
00979                     if (cost<min_cost) {
00980                         min_ind  = *i2_it;
00981                         min_cost = cost;
00982                     }
00983                 }
00984             }
00985         }
00986     }
00987 }
00988 
00989 void Matcher::matching (int32_t *m1p,int32_t *m2p,int32_t *m1c,int32_t *m2c,
00990         int32_t n1p,int32_t n2p,int32_t n1c,int32_t n2c,
00991         vector<Matcher::p_match> &p_matched,int32_t method,bool use_prior,Matrix *Tr_delta) {
00992 
00993     // descriptor step size (number of int32_t elements in struct)
00994     int32_t step_size = sizeof(Matcher::maximum)/sizeof(int32_t);
00995 
00996     // compute number of bins
00997     int32_t u_bin_num = (int32_t)ceil((float)dims_c[0]/(float)param.match_binsize);
00998     int32_t v_bin_num = (int32_t)ceil((float)dims_c[1]/(float)param.match_binsize);
00999     int32_t bin_num   = 4*v_bin_num*u_bin_num; // 4 classes
01000 
01001     // allocate memory for index vectors (needed for efficient search)
01002     vector<int32_t> *k1p = new vector<int32_t>[bin_num];
01003     vector<int32_t> *k2p = new vector<int32_t>[bin_num];
01004     vector<int32_t> *k1c = new vector<int32_t>[bin_num];
01005     vector<int32_t> *k2c = new vector<int32_t>[bin_num];
01006 
01007     // loop variables
01008     int32_t* M = (int32_t*)calloc(dims_c[0]*dims_c[1],sizeof(int32_t));
01009     int32_t i1p,i2p,i1c,i2c,i1c2,i1p2;
01010     int32_t u1p,v1p,u2p,v2p,u1c,v1c,u2c,v2c;
01011 
01012     double t00,t01,t02,t03,t10,t11,t12,t13,t20,t21,t22,t23;
01013     //前两次运行tr_delta = null
01014     if (Tr_delta) {
01015         t00 = Tr_delta->val[0][0];
01016         t01 = Tr_delta->val[0][1];
01017         t02 = Tr_delta->val[0][2];
01018         t03 = Tr_delta->val[0][3];
01019         t10 = Tr_delta->val[1][0];
01020         t11 = Tr_delta->val[1][1];
01021         t12 = Tr_delta->val[1][2];
01022         t13 = Tr_delta->val[1][3];
01023         t20 = Tr_delta->val[2][0];
01024         t21 = Tr_delta->val[2][1];
01025         t22 = Tr_delta->val[2][2];
01026         t23 = Tr_delta->val[2][3];
01027     }
01028 
01030     // method: flow
01031     if (method==0) {
01032 
01033         //printf("flow\n");
01034         // create position/class bin index vectors
01035         createIndexVector(m1p,n1p,k1p,u_bin_num,v_bin_num);
01036         createIndexVector(m1c,n1c,k1c,u_bin_num,v_bin_num);
01037 
01038         // for all points do
01039         for (i1c=0; i1c<n1c; i1c++) {
01040 
01041             // coordinates in previous left image
01042             u1c = *(m1c+step_size*i1c+0);
01043             v1c = *(m1c+step_size*i1c+1);
01044 
01045             // compute row and column of statistics bin to which this observation belongs
01046             int32_t u_bin = min((int32_t)floor((float)u1c/(float)param.match_binsize),u_bin_num-1);
01047             int32_t v_bin = min((int32_t)floor((float)v1c/(float)param.match_binsize),v_bin_num-1);
01048             int32_t stat_bin = v_bin*u_bin_num+u_bin;
01049 
01050             // match forward/backward
01051             findMatch(m1c,i1c,m1p,step_size,k1p,u_bin_num,v_bin_num,stat_bin,i1p, 0,true,use_prior);
01052             findMatch(m1p,i1p,m1c,step_size,k1c,u_bin_num,v_bin_num,stat_bin,i1c2,1,true,use_prior);
01053 
01054             // circle closure success?
01055             if (i1c2==i1c) {
01056 
01057                 // extract coordinates
01058                 u1p = *(m1p+step_size*i1p+0);
01059                 v1p = *(m1p+step_size*i1p+1);
01060 
01061                 // add match if this pixel isn't matched yet
01062                 if (*(M+getAddressOffsetImage(u1c,v1c,dims_c[0]))==0) {
01063                     p_matched.push_back(Matcher::p_match(u1p,v1p,i1p,-1,-1,-1,u1c,v1c,i1c,-1,-1,-1));
01064                     *(M+getAddressOffsetImage(u1c,v1c,dims_c[0])) = 1;
01065                 }
01066             }
01067         }
01068 
01070         // method: stereo
01071     } else if (method==1) {
01072 
01073         //printf("stereo\n");
01074         // create position/class bin index vectors
01075         createIndexVector(m1c,n1c,k1c,u_bin_num,v_bin_num);
01076         createIndexVector(m2c,n2c,k2c,u_bin_num,v_bin_num);
01077 
01078         // for all points do
01079         for (i1c=0; i1c<n1c; i1c++) {
01080 
01081             // coordinates in previous left image
01082             u1c = *(m1c+step_size*i1c+0);
01083             v1c = *(m1c+step_size*i1c+1);
01084 
01085             // compute row and column of statistics bin to which this observation belongs
01086             int32_t u_bin = min((int32_t)floor((float)u1c/(float)param.match_binsize),u_bin_num-1);
01087             int32_t v_bin = min((int32_t)floor((float)v1c/(float)param.match_binsize),v_bin_num-1);
01088             int32_t stat_bin = v_bin*u_bin_num+u_bin;
01089 
01090             // match left/right
01091             findMatch(m1c,i1c,m2c,step_size,k2c,u_bin_num,v_bin_num,stat_bin,i2c, 0,false,use_prior);
01092             findMatch(m2c,i2c,m1c,step_size,k1c,u_bin_num,v_bin_num,stat_bin,i1c2,1,false,use_prior);
01093 
01094             // circle closure success?
01095             if (i1c2==i1c) {
01096 
01097                 // extract coordinates
01098                 u2c = *(m2c+step_size*i2c+0);
01099                 v2c = *(m2c+step_size*i2c+1);
01100 
01101                 // if disparity is positive
01102                 if (u1c>=u2c) {
01103 
01104                     // add match if this pixel isn't matched yet
01105                     if (*(M+getAddressOffsetImage(u1c,v1c,dims_c[0]))==0) {
01106                         p_matched.push_back(Matcher::p_match(-1,-1,-1,-1,-1,-1,u1c,v1c,i1c,u2c,v2c,i2c));
01107                         *(M+getAddressOffsetImage(u1c,v1c,dims_c[0])) = 1;
01108                     }
01109                 }
01110             }
01111         }
01112 
01114         // method: quad matching
01115     } else {
01116 
01117         //printf("quad\n");
01118         // create position/class bin index vectors
01119         createIndexVector(m1p,n1p,k1p,u_bin_num,v_bin_num);
01120         createIndexVector(m2p,n2p,k2p,u_bin_num,v_bin_num);
01121         createIndexVector(m1c,n1c,k1c,u_bin_num,v_bin_num);
01122         createIndexVector(m2c,n2c,k2c,u_bin_num,v_bin_num);
01123 
01124         // for all points do
01125         for (i1p=0; i1p<n1p; i1p++) {
01126 
01127             // coordinates
01128             // 特征描述符的个数,一个描述符32byte
01129             u1p = *(m1p+step_size*i1p+0);
01130             v1p = *(m1p+step_size*i1p+1);
01131 
01132             // compute row and column of statistics bin to which this observation belongs
01133             int32_t u_bin = min((int32_t)floor((float)u1p/(float)param.match_binsize),u_bin_num-1);
01134             int32_t v_bin = min((int32_t)floor((float)v1p/(float)param.match_binsize),v_bin_num-1);
01135             int32_t stat_bin = v_bin*u_bin_num+u_bin;
01136 
01137             // match in circle
01138             // 第一次使用1/2分辨率时use_prio=false,第二次全分辨率=true
01139             // 这个false指的是flaw,0指的是stage
01140             findMatch(m1p,i1p,m2p,step_size,k2p,u_bin_num,v_bin_num,stat_bin,i2p, 0,false,use_prior);
01141 
01142             u2p = *(m2p+step_size*i2p+0);
01143             v2p = *(m2p+step_size*i2p+1);
01144 
01145             if (Tr_delta) {
01146 
01147                 double d = max((double)u1p-(double)u2p,1.0);
01148                 double x1p = ((double)u1p-param.cu)*param.base/d;
01149                 double y1p = ((double)v1p-param.cv)*param.base/d;
01150                 double z1p = param.f*param.base/d;
01151                 /*printf("%f,%f,%f\n",x1p,y1p,z1p);*/
01152 
01153                 //在右摄像头坐标中的位置
01154                 double x2c = t00*x1p + t01*y1p + t02*z1p + t03 - param.base;
01155                 double y2c = t10*x1p + t11*y1p + t12*z1p + t13;
01156                 double z2c = t20*x1p + t21*y1p + t22*z1p + t23;
01157 
01158                 double u2c_ = param.f*x2c/z2c+param.cu;
01159                 double v2c_ = param.f*y2c/z2c+param.cv;
01160 
01161                 findMatch(m2p,i2p,m2c,step_size,k2c,u_bin_num,v_bin_num,stat_bin,i2c, 1,true ,use_prior,u2c_,v2c_);
01162             }
01163             else
01164             {
01165                 findMatch(m2p,i2p,m2c,step_size,k2c,u_bin_num,v_bin_num,stat_bin,i2c, 1,true ,use_prior);
01166             }
01167 
01168 
01169             findMatch(m2c,i2c,m1c,step_size,k1c,u_bin_num,v_bin_num,stat_bin,i1c, 2,false,use_prior);
01170             if (Tr_delta)
01171                 findMatch(m1c,i1c,m1p,step_size,k1p,u_bin_num,v_bin_num,stat_bin,i1p2,3,true ,use_prior,u1p,v1p);
01172             else
01173                 findMatch(m1c,i1c,m1p,step_size,k1p,u_bin_num,v_bin_num,stat_bin,i1p2,3,true ,use_prior);
01174 
01175             // circle closure success?
01176             if (i1p2==i1p) {
01177 
01178                 // extract coordinates
01179                 u2c = *(m2c+step_size*i2c+0); v2c = *(m2c+step_size*i2c+1);
01180                 u1c = *(m1c+step_size*i1c+0); v1c = *(m1c+step_size*i1c+1);
01181 
01182                 // if disparities are positive
01183                 if (u1p>=u2p && u1c>=u2c) //&& param.f*param.base/(u1p-u2p)<3.0 && param.f*param.base/(u1c-u2c)<3.0 )
01184                 {
01185                     // add match
01186                     p_matched.push_back(Matcher::p_match(u1p,v1p,i1p,u2p,v2p,i2p,
01187                                 u1c,v1c,i1c,u2c,v2c,i2c));
01188                 }
01189             }
01190         }
01191 
01192         // old version:
01193         /*
01194         // for all points do
01195         for (i1c=0; i1c<n1c; i1c++) {
01196 
01197         // coordinates
01198         u1c = *(m1c+step_size*i1c+0);
01199         v1c = *(m1c+step_size*i1c+1);
01200 
01201         // compute row and column of statistics bin to which this observation belongs
01202         int32_t u_bin = min((int32_t)floor((float)u1c/(float)param.match_binsize),u_bin_num-1);
01203         int32_t v_bin = min((int32_t)floor((float)v1c/(float)param.match_binsize),v_bin_num-1);
01204         int32_t stat_bin = v_bin*u_bin_num+u_bin;
01205 
01206         // match in circle
01207         findMatch(m1c,i1c,m1p,step_size,k1p,u_bin_num,v_bin_num,stat_bin,i1p, 0,true ,use_prior,Tr_delta);
01208         findMatch(m1p,i1p,m2p,step_size,k2p,u_bin_num,v_bin_num,stat_bin,i2p, 1,false,use_prior,Tr_delta);
01209         findMatch(m2p,i2p,m2c,step_size,k2c,u_bin_num,v_bin_num,stat_bin,i2c, 2,true ,use_prior,Tr_delta);
01210         findMatch(m2c,i2c,m1c,step_size,k1c,u_bin_num,v_bin_num,stat_bin,i1c2,3,false,use_prior,Tr_delta);
01211 
01212         // circle closure success?
01213         if (i1c2==i1c) {
01214 
01215         // extract coordinates
01216         u1p = *(m1p+step_size*i1p+0); v1p = *(m1p+step_size*i1p+1);
01217         u2p = *(m2p+step_size*i2p+0); v2p = *(m2p+step_size*i2p+1);
01218         u2c = *(m2c+step_size*i2c+0); v2c = *(m2c+step_size*i2c+1);
01219 
01220         // if disparities are positive
01221         if (u1p>=u2p && u1c>=u2c) {
01222 
01223         // add match if this pixel isn't matched yet
01224         if (*(M+getAddressOffsetImage(u1c,v1c,dims_c[0]))==0) {
01225         p_matched.push_back(Matcher::p_match(u1p,v1p,i1p,u2p,v2p,i2p,
01226         u1c,v1c,i1c,u2c,v2c,i2c));
01227          *(M+getAddressOffsetImage(u1c,v1c,dims_c[0])) = 1;
01228          }
01229 
01230          }
01231          }
01232          }
01233          */
01234     }
01235 
01236     // free memory
01237     free(M);
01238     delete []k1p;
01239     delete []k2p;
01240     delete []k1c;
01241     delete []k2c;
01242 }
01243 
01244 void Matcher::removeOutliers (vector<Matcher::p_match> &p_matched,int32_t method) {
01245 
01246     // do we have enough points for outlier removal?
01247     if (p_matched.size()<=3)
01248         return;
01249 
01250     // input/output structure for triangulation
01251     struct triangulateio in, out;
01252 
01253     // inputs
01254     in.numberofpoints = p_matched.size();
01255     in.pointlist = (float*)malloc(in.numberofpoints*2*sizeof(float));
01256     int32_t k=0;
01257 
01258     // create copy of p_matched, init vector with number of support points
01259     // and fill triangle point vector for delaunay triangulation
01260     vector<Matcher::p_match> p_matched_copy;
01261     vector<int32_t> num_support;
01262     for (vector<Matcher::p_match>::iterator it=p_matched.begin(); it!=p_matched.end(); it++) {
01263         p_matched_copy.push_back(*it);
01264         num_support.push_back(0);
01265         in.pointlist[k++] = it->u1c;
01266         in.pointlist[k++] = it->v1c;
01267     }
01268 
01269     // input parameters
01270     in.numberofpointattributes = 0;
01271     in.pointattributelist      = NULL;
01272     in.pointmarkerlist         = NULL;
01273     in.numberofsegments        = 0;
01274     in.numberofholes           = 0;
01275     in.numberofregions         = 0;
01276     in.regionlist              = NULL;
01277 
01278     // outputs
01279     out.pointlist              = NULL;
01280     out.pointattributelist     = NULL;
01281     out.pointmarkerlist        = NULL;
01282     out.trianglelist           = NULL;
01283     out.triangleattributelist  = NULL;
01284     out.neighborlist           = NULL;
01285     out.segmentlist            = NULL;
01286     out.segmentmarkerlist      = NULL;
01287     out.edgelist               = NULL;
01288     out.edgemarkerlist         = NULL;
01289 
01290     // do triangulation (z=zero-based, n=neighbors, Q=quiet, B=no boundary markers)
01291     // attention: not using the B switch or using the n switch creates a memory leak (=> use valgrind!)
01292     char parameters[] = "zQB";
01293     triangulate(parameters, &in, &out, NULL);
01294 
01295     // for all triangles do
01296     for (int32_t i=0; i<out.numberoftriangles; i++) {
01297 
01298         // extract triangle corner points
01299         int32_t p1 = out.trianglelist[i*3+0];
01300         int32_t p2 = out.trianglelist[i*3+1];
01301         int32_t p3 = out.trianglelist[i*3+2];
01302 
01303         // method: flow
01304         if (method==0) {
01305 
01306             // 1. corner disparity and flow
01307             float p1_flow_u = p_matched_copy[p1].u1c-p_matched_copy[p1].u1p;
01308             float p1_flow_v = p_matched_copy[p1].v1c-p_matched_copy[p1].v1p;
01309 
01310             // 2. corner disparity and flow
01311             float p2_flow_u = p_matched_copy[p2].u1c-p_matched_copy[p2].u1p;
01312             float p2_flow_v = p_matched_copy[p2].v1c-p_matched_copy[p2].v1p;
01313 
01314             // 3. corner disparity and flow
01315             float p3_flow_u = p_matched_copy[p3].u1c-p_matched_copy[p3].u1p;
01316             float p3_flow_v = p_matched_copy[p3].v1c-p_matched_copy[p3].v1p;
01317 
01318             // consistency of 1. edge
01319             if (fabs(p1_flow_u-p2_flow_u)+fabs(p1_flow_v-p2_flow_v)<param.outlier_flow_tolerance) {
01320                 num_support[p1]++;
01321                 num_support[p2]++;
01322             }
01323 
01324             // consistency of 2. edge
01325             if (fabs(p2_flow_u-p3_flow_u)+fabs(p2_flow_v-p3_flow_v)<param.outlier_flow_tolerance) {
01326                 num_support[p2]++;
01327                 num_support[p3]++;
01328             }
01329 
01330             // consistency of 3. edge
01331             if (fabs(p1_flow_u-p3_flow_u)+fabs(p1_flow_v-p3_flow_v)<param.outlier_flow_tolerance) {
01332                 num_support[p1]++;
01333                 num_support[p3]++;
01334             }
01335 
01336             // method: stereo
01337         } else if (method==1) {
01338 
01339             // 1. corner disparity and flow
01340             float p1_disp   = p_matched_copy[p1].u1c-p_matched_copy[p1].u2c;
01341 
01342             // 2. corner disparity and flow
01343             float p2_disp   = p_matched_copy[p2].u1c-p_matched_copy[p2].u2c;
01344 
01345             // 3. corner disparity and flow
01346             float p3_disp   = p_matched_copy[p3].u1c-p_matched_copy[p3].u2c;
01347 
01348             // consistency of 1. edge
01349             if (fabs(p1_disp-p2_disp)<param.outlier_disp_tolerance) {
01350                 num_support[p1]++;
01351                 num_support[p2]++;
01352             }
01353 
01354             // consistency of 2. edge
01355             if (fabs(p2_disp-p3_disp)<param.outlier_disp_tolerance) {
01356                 num_support[p2]++;
01357                 num_support[p3]++;
01358             }
01359 
01360             // consistency of 3. edge
01361             if (fabs(p1_disp-p3_disp)<param.outlier_disp_tolerance) {
01362                 num_support[p1]++;
01363                 num_support[p3]++;
01364             }
01365 
01366             // method: quad matching
01367         } else {
01368 
01369             // 1. corner disparity and flow
01370             float p1_flow_u = p_matched_copy[p1].u1c-p_matched_copy[p1].u1p;
01371             float p1_flow_v = p_matched_copy[p1].v1c-p_matched_copy[p1].v1p;
01372             float p1_disp   = p_matched_copy[p1].u1p-p_matched_copy[p1].u2p;
01373 
01374             // 2. corner disparity and flow
01375             float p2_flow_u = p_matched_copy[p2].u1c-p_matched_copy[p2].u1p;
01376             float p2_flow_v = p_matched_copy[p2].v1c-p_matched_copy[p2].v1p;
01377             float p2_disp   = p_matched_copy[p2].u1p-p_matched_copy[p2].u2p;
01378 
01379             // 3. corner disparity and flow
01380             float p3_flow_u = p_matched_copy[p3].u1c-p_matched_copy[p3].u1p;
01381             float p3_flow_v = p_matched_copy[p3].v1c-p_matched_copy[p3].v1p;
01382             float p3_disp   = p_matched_copy[p3].u1p-p_matched_copy[p3].u2p;
01383 
01384             // consistency of 1. edge
01385             if (fabs(p1_disp-p2_disp)<param.outlier_disp_tolerance && fabs(p1_flow_u-p2_flow_u)+fabs(p1_flow_v-p2_flow_v)<param.outlier_flow_tolerance) {
01386                 num_support[p1]++;
01387                 num_support[p2]++;
01388             }
01389 
01390             // consistency of 2. edge
01391             if (fabs(p2_disp-p3_disp)<param.outlier_disp_tolerance && fabs(p2_flow_u-p3_flow_u)+fabs(p2_flow_v-p3_flow_v)<param.outlier_flow_tolerance) {
01392                 num_support[p2]++;
01393                 num_support[p3]++;
01394             }
01395 
01396             // consistency of 3. edge
01397             if (fabs(p1_disp-p3_disp)<param.outlier_disp_tolerance && fabs(p1_flow_u-p3_flow_u)+fabs(p1_flow_v-p3_flow_v)<param.outlier_flow_tolerance) {
01398                 num_support[p1]++;
01399                 num_support[p3]++;
01400             }
01401         }
01402     }
01403 
01404     // refill p_matched
01405     p_matched.clear();
01406     for (int i=0; i<in.numberofpoints; i++)
01407         if (num_support[i]>=4)
01408             p_matched.push_back(p_matched_copy[i]);
01409 
01410     // free memory used for triangulation
01411     free(in.pointlist);
01412     free(out.pointlist);
01413     free(out.trianglelist);
01414 }
01415 
01416 bool Matcher::parabolicFitting(const uint8_t* I1_du,const uint8_t* I1_dv,const int32_t* dims1,
01417         const uint8_t* I2_du,const uint8_t* I2_dv,const int32_t* dims2,
01418         const float &u1,const float &v1,
01419         float       &u2,float       &v2,
01420         Matrix At,Matrix AtA,
01421         uint8_t* desc_buffer) {
01422 
01423     // check if parabolic fitting is feasible (descriptors are within margin)
01424     if (u2-3<margin || u2+3>dims2[0]-1-margin || v2-3<margin || v2+3>dims2[1]-1-margin)
01425         return false;
01426 
01427     // compute reference descriptor
01428     __m128i xmm1,xmm2;
01429     computeSmallDescriptor(I1_du,I1_dv,dims1[2],(int32_t)u1,(int32_t)v1,desc_buffer);
01430     xmm1 = _mm_load_si128((__m128i*)(desc_buffer));
01431 
01432     // compute cost matrix
01433     int32_t cost[49];
01434     for (int32_t dv=0; dv<7; dv++) {
01435         for (int32_t du=0; du<7; du++) {
01436             computeSmallDescriptor(I2_du,I2_dv,dims2[2],(int32_t)u2+du-3,(int32_t)v2+dv-3,desc_buffer);
01437             xmm2 = _mm_load_si128((__m128i*)(desc_buffer));
01438             xmm2 = _mm_sad_epu8(xmm1,xmm2);
01439             cost[dv*7+du] = _mm_extract_epi16(xmm2,0)+_mm_extract_epi16(xmm2,4);
01440         }
01441     }
01442 
01443     // compute minimum
01444     int32_t min_ind  = 0;
01445     int32_t min_cost = cost[0];
01446     for (int32_t i=1; i<49; i++) {
01447         if (cost[i]<min_cost) {
01448             min_ind   = i;
01449             min_cost  = cost[i];
01450         }
01451     }
01452 
01453     // get indices
01454     int32_t du = min_ind%7;
01455     int32_t dv = min_ind/7;
01456 
01457     // if minimum is at borders => remove this match
01458     if (du==0 || du==6 || dv==0 || dv==6)
01459         return false;
01460 
01461     // solve least squares system
01462     Matrix c(9,1);
01463     for (int32_t i=-1; i<=+1; i++) {
01464         for (int32_t j=-1; j<=+1; j++) {
01465             int32_t cost_curr = cost[(dv+i)*7+(du+j)];
01466             // if (i!=0 && j!=0 && cost_curr<=min_cost+150)
01467             // return false;
01468             c.val[(i+1)*3+(j+1)][0] = cost_curr;
01469         }
01470     }
01471     Matrix b = At*c;
01472     if (!b.solve(AtA))
01473         return false;
01474 
01475     // extract relative coordinates
01476     float divisor = (b.val[2][0]*b.val[2][0]-4.0*b.val[0][0]*b.val[1][0]);
01477     if (fabs(divisor)<1e-8 || fabs(b.val[2][0])<1e-8)
01478         return false;
01479     float ddv = (2.0*b.val[0][0]*b.val[4][0]-b.val[2][0]*b.val[3][0])/divisor;
01480     float ddu = -(b.val[4][0]+2.0*b.val[1][0]*ddv)/b.val[2][0];
01481     if (fabs(ddu)>=1.0 || fabs(ddv)>=1.0)
01482         return false;
01483 
01484     // update target
01485     u2 += (float)du-3.0+ddu;
01486     v2 += (float)dv-3.0+ddv;
01487 
01488     // return true on success
01489     return true;
01490 }
01491 
01492 void Matcher::relocateMinimum(const uint8_t* I1_du,const uint8_t* I1_dv,const int32_t* dims1,
01493         const uint8_t* I2_du,const uint8_t* I2_dv,const int32_t* dims2,
01494         const float &u1,const float &v1,
01495         float       &u2,float       &v2,
01496         uint8_t* desc_buffer) {
01497 
01498     // check if parabolic fitting is feasible (descriptors are within margin)
01499     if (u2-2<margin || u2+2>dims2[0]-1-margin || v2-2<margin || v2+2>dims2[1]-1-margin)
01500         return;
01501 
01502     // compute reference descriptor
01503     __m128i xmm1,xmm2;
01504     computeSmallDescriptor(I1_du,I1_dv,dims1[2],(int32_t)u1,(int32_t)v1,desc_buffer);
01505     xmm1 = _mm_load_si128((__m128i*)(desc_buffer));
01506 
01507     // compute cost matrix
01508     int32_t cost[25];
01509     for (int32_t dv=0; dv<5; dv++) {
01510         for (int32_t du=0; du<5; du++) {
01511             computeSmallDescriptor(I2_du,I2_dv,dims2[2],(int32_t)u2+du-2,(int32_t)v2+dv-2,desc_buffer);
01512             xmm2 = _mm_load_si128((__m128i*)(desc_buffer));
01513             xmm2 = _mm_sad_epu8(xmm1,xmm2);
01514             cost[dv*5+du] = _mm_extract_epi16(xmm2,0)+_mm_extract_epi16(xmm2,4);
01515         }
01516     }
01517 
01518     // compute minimum
01519     int32_t min_ind  = 0;
01520     int32_t min_cost = cost[0];
01521     for (int32_t i=1; i<25; i++) {
01522         if (cost[i]<min_cost) {
01523             min_ind   = i;
01524             min_cost  = cost[i];
01525         }
01526     }
01527 
01528     // update target
01529     u2 += (float)(min_ind%5)-2.0;
01530     v2 += (float)(min_ind/5)-2.0;
01531 }
01532 
01533 void Matcher::refinement (vector<Matcher::p_match> &p_matched,int32_t method) {
01534 
01535     // allocate aligned memory (32 bytes for 1 descriptors)
01536     uint8_t* desc_buffer = (uint8_t*)_mm_malloc(32*sizeof(uint8_t),16);
01537 
01538     // copy vector (for refill)
01539     vector<Matcher::p_match> p_matched_copy = p_matched;
01540     p_matched.clear();
01541 
01542     // create matrices for least square fitting
01543     FLOAT A_data[9*6] =
01544     {
01545         1, 1, 1,-1,-1, 1,
01546         0, 1, 0, 0,-1, 1,
01547         1, 1,-1, 1,-1, 1,
01548         1, 0, 0,-1, 0, 1,
01549         0, 0, 0, 0, 0, 1,
01550         1, 0, 0, 1, 0, 1,
01551         1, 1,-1,-1, 1, 1,
01552         0, 1, 0, 0, 1, 1,
01553         1, 1, 1, 1, 1, 1
01554     };
01555     Matrix A(9,6,A_data);
01556     Matrix At  = ~A;
01557     Matrix AtA = At*A;
01558 
01559     uint8_t* I1p_du_fit = I1p_du;
01560     uint8_t* I1p_dv_fit = I1p_dv;
01561     uint8_t* I2p_du_fit = I2p_du;
01562     uint8_t* I2p_dv_fit = I2p_dv;
01563     uint8_t* I1c_du_fit = I1c_du;
01564     uint8_t* I1c_dv_fit = I1c_dv;
01565     uint8_t* I2c_du_fit = I2c_du;
01566     uint8_t* I2c_dv_fit = I2c_dv;
01567     if (param.half_resolution) {
01568         I1p_du_fit = I1p_du_full;
01569         I1p_dv_fit = I1p_dv_full;
01570         I2p_du_fit = I2p_du_full;
01571         I2p_dv_fit = I2p_dv_full;
01572         I1c_du_fit = I1c_du_full;
01573         I1c_dv_fit = I1c_dv_full;
01574         I2c_du_fit = I2c_du_full;
01575         I2c_dv_fit = I2c_dv_full;
01576     }
01577 
01578     // for all matches do
01579     for (vector<Matcher::p_match>::iterator it=p_matched_copy.begin(); it!=p_matched_copy.end(); it++) {
01580 
01581         // method: flow or quad matching
01582         if (method==0 || method==2) {
01583             if (param.refinement==2) {
01584                 if (!parabolicFitting(I1c_du_fit,I1c_dv_fit,dims_c,I1p_du_fit,I1p_dv_fit,dims_p,
01585                             it->u1c,it->v1c,it->u1p,it->v1p,At,AtA,desc_buffer))
01586                     continue;
01587             } else {
01588                 relocateMinimum(I1c_du_fit,I1c_dv_fit,dims_c,I1p_du_fit,I1p_dv_fit,dims_p,
01589                         it->u1c,it->v1c,it->u1p,it->v1p,desc_buffer);
01590             }
01591         }
01592 
01593         // method: stereo or quad matching
01594         if (method==1 || method==2) {
01595             if (param.refinement==2) {
01596                 if (!parabolicFitting(I1c_du_fit,I1c_dv_fit,dims_c,I2c_du_fit,I2c_dv_fit,dims_c,
01597                             it->u1c,it->v1c,it->u2c,it->v2c,At,AtA,desc_buffer))
01598                     continue;
01599             } else {
01600                 relocateMinimum(I1c_du_fit,I1c_dv_fit,dims_c,I2c_du_fit,I2c_dv_fit,dims_c,
01601                         it->u1c,it->v1c,it->u2c,it->v2c,desc_buffer);
01602             }
01603         }
01604 
01605         // method: quad matching
01606         if (method==2) {
01607             if (param.refinement==2) {
01608                 if (!parabolicFitting(I1c_du_fit,I1c_dv_fit,dims_c,I2p_du_fit,I2p_dv_fit,dims_p,
01609                             it->u1c,it->v1c,it->u2p,it->v2p,At,AtA,desc_buffer))
01610                     continue;
01611             } else {
01612                 relocateMinimum(I1c_du_fit,I1c_dv_fit,dims_c,I2p_du_fit,I2p_dv_fit,dims_p,
01613                         it->u1c,it->v1c,it->u2p,it->v2p,desc_buffer);
01614             }
01615         }
01616 
01617         // add this match
01618         p_matched.push_back(*it);
01619     }
01620 
01621     // free memory
01622     _mm_free(desc_buffer);
01623 }
01624 
01625 float Matcher::mean(const uint8_t* I,const int32_t &bpl,const int32_t &u_min,const int32_t &u_max,const int32_t &v_min,const int32_t &v_max) {
01626     float mean = 0;
01627     for (int32_t v=v_min; v<=v_max; v++)
01628         for (int32_t u=u_min; u<=u_max; u++)
01629             mean += (float)*(I+getAddressOffsetImage(u,v,bpl));
01630     return
01631         mean /= (float)((u_max-u_min+1)*(v_max-v_min+1));
01632 }
01633