libviso2: matcher.cpp Source File

Go to the documentation of this file.
00001 /*
00002 Copyright 2012. All rights reserved.
00003 Institute of Measurement and Control Systems
00004 Karlsruhe Institute of Technology, Germany
00005 
00006 This file is part of libviso2.
00007 Authors: Andreas Geiger
00008 
00009 libviso2 is free software; you can redistribute it and/or modify it under the
00010 terms of the GNU General Public License as published by the Free Software
00011 Foundation; either version 2 of the License, or any later version.
00012 
00013 libviso2 is distributed in the hope that it will be useful, but WITHOUT ANY
00014 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
00015 PARTICULAR PURPOSE. See the GNU General Public License for more details.
00016 
00017 You should have received a copy of the GNU General Public License along with
00018 libviso2; if not, write to the Free Software Foundation, Inc., 51 Franklin
00019 Street, Fifth Floor, Boston, MA 02110-1301, USA 
00020 */
00021 
00022 #include "matcher.h"
00023 #include "triangle.h"
00024 #include "filter.h"
00025 
00026 using namespace std;
00027 
00029 // PUBLIC FUNCTIONS //
00031 
00032 // constructor (with default parameters)
00033 Matcher::Matcher(parameters param) : param(param) {
00034 
00035   // init match ring buffer to zero
00036   m1p1 = 0; n1p1 = 0;
00037   m1p2 = 0; n1p2 = 0;
00038   m2p1 = 0; n2p1 = 0;
00039   m2p2 = 0; n2p2 = 0;
00040   m1c1 = 0; n1c1 = 0;
00041   m1c2 = 0; n1c2 = 0;
00042   m2c1 = 0; n2c1 = 0;
00043   m2c2 = 0; n2c2 = 0;
00044   I1p    = 0; I2p    = 0;
00045   I1c    = 0; I2c    = 0;
00046   I1p_du = 0; I1p_dv = 0;
00047   I2p_du = 0; I2p_dv = 0;
00048   I1c_du = 0; I1c_dv = 0;
00049   I2c_du = 0; I2c_dv = 0;
00050   I1p_du_full = 0; I1p_dv_full = 0;
00051   I2p_du_full = 0; I2p_dv_full = 0;
00052   I1c_du_full = 0; I1c_dv_full = 0;
00053   I2c_du_full = 0; I2c_dv_full = 0;
00054 
00055   // margin needed to compute descriptor + sobel responses
00056   margin = 5+1;
00057   
00058   // adjust match radius on half resolution
00059   if (param.half_resolution)
00060     this->param.match_radius /= 2;
00061 }
00062 
00063 // deconstructor
00064 Matcher::~Matcher() {
00065   if (I1p)          _mm_free(I1p);
00066   if (I2p)          _mm_free(I2p);
00067   if (I1c)          _mm_free(I1c);
00068   if (I2c)          _mm_free(I2c);
00069   if (m1p1)         _mm_free(m1p1);
00070   if (m1p2)         _mm_free(m1p2);
00071   if (I1p_du)       _mm_free(I1p_du);
00072   if (I1p_dv)       _mm_free(I1p_dv);
00073   if (I1p_du_full)  _mm_free(I1p_du_full);
00074   if (I1p_dv_full)  _mm_free(I1p_dv_full);
00075   if (m2p1)         _mm_free(m2p1);
00076   if (m2p2)         _mm_free(m2p2);
00077   if (I2p_du)       _mm_free(I2p_du);
00078   if (I2p_dv)       _mm_free(I2p_dv);
00079   if (I2p_du_full)  _mm_free(I2p_du_full);
00080   if (I2p_dv_full)  _mm_free(I2p_dv_full);
00081   if (m1c1)         _mm_free(m1c1);
00082   if (m1c2)         _mm_free(m1c2);
00083   if (I1c_du)       _mm_free(I1c_du);
00084   if (I1c_dv)       _mm_free(I1c_dv);
00085   if (I1c_du_full)  _mm_free(I1c_du_full);
00086   if (I1c_dv_full)  _mm_free(I1c_dv_full);
00087   if (m2c1)         _mm_free(m2c1);
00088   if (m2c2)         _mm_free(m2c2);
00089   if (I2c_du)       _mm_free(I2c_du);
00090   if (I2c_dv)       _mm_free(I2c_dv);
00091   if (I2c_du_full)  _mm_free(I2c_du_full);
00092   if (I2c_dv_full)  _mm_free(I2c_dv_full);
00093 }
00094 
00095 void Matcher::pushBack (uint8_t *I1,uint8_t* I2,int32_t* dims,const bool replace) {
00096 
00097   // image dimensions
00098   int32_t width  = dims[0];
00099   int32_t height = dims[1];
00100   int32_t bpl    = dims[2];
00101 
00102   // sanity check
00103   if (width<=0 || height<=0 || bpl<width || I1==0) {
00104     cerr << "ERROR: Image dimension mismatch!" << endl;
00105     return;
00106   }
00107 
00108   if (replace) {
00109     if (I1c)         _mm_free(I1c);
00110     if (I2c)         _mm_free(I2c);
00111     if (m1c1)        _mm_free(m1c1);
00112     if (m1c2)        _mm_free(m1c2);
00113     if (I1c_du)      _mm_free(I1c_du);
00114     if (I1c_dv)      _mm_free(I1c_dv);
00115     if (I1c_du_full) _mm_free(I1c_du_full);
00116     if (I1c_dv_full) _mm_free(I1c_dv_full);
00117     if (m2c1)        _mm_free(m2c1);
00118     if (m2c2)        _mm_free(m2c2);
00119     if (I2c_du)      _mm_free(I2c_du);
00120     if (I2c_dv)      _mm_free(I2c_dv);
00121     if (I2c_du_full) _mm_free(I2c_du_full);
00122     if (I2c_dv_full) _mm_free(I2c_dv_full);
00123   } else {
00124     if (I1p)         _mm_free(I1p);
00125     if (I2p)         _mm_free(I2p);
00126     if (m1p1)        _mm_free(m1p1);
00127     if (m1p2)        _mm_free(m1p2);
00128     if (I1p_du)      _mm_free(I1p_du);
00129     if (I1p_dv)      _mm_free(I1p_dv);
00130     if (I1p_du_full) _mm_free(I1p_du_full);
00131     if (I1p_dv_full) _mm_free(I1p_dv_full);
00132     if (m2p1)        _mm_free(m2p1);
00133     if (m2p2)        _mm_free(m2p2);
00134     if (I2p_du)      _mm_free(I2p_du);
00135     if (I2p_dv)      _mm_free(I2p_dv);
00136     if (I2p_du_full) _mm_free(I2p_du_full);
00137     if (I2p_dv_full) _mm_free(I2p_dv_full);
00138     m1p1 = m1c1; n1p1 = n1c1;
00139     m1p2 = m1c2; n1p2 = n1c2;
00140     m2p1 = m2c1; n2p1 = n2c1;
00141     m2p2 = m2c2; n2p2 = n2c2;
00142     I1p         = I1c;
00143     I2p         = I2c;
00144     I1p_du      = I1c_du;
00145     I1p_dv      = I1c_dv;
00146     I1p_du_full = I1c_du_full;
00147     I1p_dv_full = I1c_dv_full;
00148     I2p_du      = I2c_du;
00149     I2p_dv      = I2c_dv;
00150     I2p_du_full = I2c_du_full;
00151     I2p_dv_full = I2c_dv_full;
00152     dims_p[0]   = dims_c[0];
00153     dims_p[1]   = dims_c[1];
00154     dims_p[2]   = dims_c[2];
00155   }
00156 
00157   // set new dims (bytes per line must be multiple of 16)
00158   dims_c[0] = width;
00159   dims_c[1] = height;
00160   dims_c[2] = width + 15-(width-1)%16;
00161 
00162   // copy images to byte aligned memory
00163   I1c = (uint8_t*)_mm_malloc(dims_c[2]*dims_c[1]*sizeof(uint8_t),16);
00164   I2c = (uint8_t*)_mm_malloc(dims_c[2]*dims_c[1]*sizeof(uint8_t),16);
00165   if (dims_c[2]==bpl) {
00166     memcpy(I1c,I1,dims_c[2]*dims_c[1]*sizeof(uint8_t));
00167     if (I2!=0)
00168       memcpy(I2c,I2,dims_c[2]*dims_c[1]*sizeof(uint8_t));
00169   } else {
00170     for (int32_t v=0; v<height; v++) {
00171       memcpy(I1c+v*dims_c[2],I1+v*bpl,dims_c[0]*sizeof(uint8_t));
00172       if (I2!=0)
00173         memcpy(I2c+v*dims_c[2],I2+v*bpl,dims_c[0]*sizeof(uint8_t));
00174     }
00175   }
00176 
00177   // compute new features for current frame
00178   computeFeatures(I1c,dims_c,m1c1,n1c1,m1c2,n1c2,I1c_du,I1c_dv,I1c_du_full,I1c_dv_full);
00179   if (I2!=0)
00180     computeFeatures(I2c,dims_c,m2c1,n2c1,m2c2,n2c2,I2c_du,I2c_dv,I2c_du_full,I2c_dv_full);
00181 }
00182 
00183 void Matcher::matchFeatures(int32_t method, Matrix *Tr_delta) {
00184   
00186   // sanity check //
00188   
00189   // flow
00190   if (method==0) {
00191     if (m1p2==0 || n1p2==0 || m1c2==0 || n1c2==0)
00192       return;
00193     if (param.multi_stage)
00194       if (m1p1==0 || n1p1==0 || m1c1==0 || n1c1==0)
00195         return;
00196     
00197   // stereo
00198   } else if (method==1) {
00199     if (m1c2==0 || n1c2==0 || m2c2==0 || n2c2==0)
00200       return;
00201     if (param.multi_stage)
00202       if (m1c1==0 || n1c1==0 || m2c1==0 || n2c1==0)
00203         return;
00204     
00205   // quad matching
00206   } else {
00207     if (m1p2==0 || n1p2==0 || m2p2==0 || n2p2==0 || m1c2==0 || n1c2==0 || m2c2==0 || n2c2==0)
00208       return;
00209     if (param.multi_stage)
00210       if (m1p1==0 || n1p1==0 || m2p1==0 || n2p1==0 || m1c1==0 || n1c1==0 || m2c1==0 || n2c1==0)
00211         return;    
00212   }
00213 
00214   // clear old matches
00215   p_matched_1.clear();
00216   p_matched_2.clear();
00217 
00218   // double pass matching
00219   if (param.multi_stage) {
00220 
00221     // 1st pass (sparse matches)
00222     matching(m1p1,m2p1,m1c1,m2c1,n1p1,n2p1,n1c1,n2c1,p_matched_1,method,false,Tr_delta);
00223     removeOutliers(p_matched_1,method);
00224     
00225     // compute search range prior statistics (used for speeding up 2nd pass)
00226     computePriorStatistics(p_matched_1,method);      
00227 
00228     // 2nd pass (dense matches)
00229     matching(m1p2,m2p2,m1c2,m2c2,n1p2,n2p2,n1c2,n2c2,p_matched_2,method,true,Tr_delta);
00230     if (param.refinement>0)
00231       refinement(p_matched_2,method);
00232     removeOutliers(p_matched_2,method);
00233 
00234   // single pass matching
00235   } else {
00236     matching(m1p2,m2p2,m1c2,m2c2,n1p2,n2p2,n1c2,n2c2,p_matched_2,method,false,Tr_delta);
00237     if (param.refinement>0)
00238       refinement(p_matched_2,method);
00239     removeOutliers(p_matched_2,method);
00240   }
00241 }
00242 
00243 void Matcher::bucketFeatures(int32_t max_features,float bucket_width,float bucket_height) {
00244 
00245   // find max values
00246   float u_max = 0;
00247   float v_max = 0;
00248   for (vector<p_match>::iterator it = p_matched_2.begin(); it!=p_matched_2.end(); it++) {
00249     if (it->u1c>u_max) u_max=it->u1c;
00250     if (it->v1c>v_max) v_max=it->v1c;
00251   }
00252 
00253   // allocate number of buckets needed
00254   int32_t bucket_cols = (int32_t)floor(u_max/bucket_width)+1;
00255   int32_t bucket_rows = (int32_t)floor(v_max/bucket_height)+1;
00256   vector<p_match> *buckets = new vector<p_match>[bucket_cols*bucket_rows];
00257 
00258   // assign matches to their buckets
00259   for (vector<p_match>::iterator it=p_matched_2.begin(); it!=p_matched_2.end(); it++) {
00260     int32_t u = (int32_t)floor(it->u1c/bucket_width);
00261     int32_t v = (int32_t)floor(it->v1c/bucket_height);
00262     buckets[v*bucket_cols+u].push_back(*it);
00263   }
00264   
00265   // refill p_matched from buckets
00266   p_matched_2.clear();
00267   for (int32_t i=0; i<bucket_cols*bucket_rows; i++) {
00268     
00269     // shuffle bucket indices randomly
00270     std::random_shuffle(buckets[i].begin(),buckets[i].end());
00271     
00272     // add up to max_features features from this bucket to p_matched
00273     int32_t k=0;
00274     for (vector<p_match>::iterator it=buckets[i].begin(); it!=buckets[i].end(); it++) {
00275       p_matched_2.push_back(*it);
00276       k++;
00277       if (k>=max_features)
00278         break;
00279     }
00280   }
00281 
00282   // free buckets
00283   delete []buckets;
00284 }
00285 
00286 float Matcher::getGain (vector<int32_t> inliers) {
00287 
00288   // check if two images are provided and matched
00289   if (I1p==0 || I1c==0 || p_matched_2.size()==0 || inliers.size()==0)
00290     return 1;
00291 
00292   int32_t window_size = 3;
00293   float   gain        = 0;
00294   int32_t num         = 0;
00295   int32_t u_min,u_max,v_min,v_max;
00296   float   mean_prev,mean_curr;
00297 
00298   for (vector<int32_t>::iterator it=inliers.begin(); it!=inliers.end(); it++) {
00299     if (*it<(int32_t)p_matched_2.size()) {
00300 
00301       // mean in previous image
00302       u_min = min(max((int32_t)p_matched_2[*it].u1p-window_size,0),dims_p[0]);
00303       u_max = min(max((int32_t)p_matched_2[*it].u1p+window_size,0),dims_p[0]);
00304       v_min = min(max((int32_t)p_matched_2[*it].v1p-window_size,0),dims_p[1]);
00305       v_max = min(max((int32_t)p_matched_2[*it].v1p+window_size,0),dims_p[1]);
00306       mean_prev = mean(I1p,dims_p[2],u_min,u_max,v_min,v_max);
00307 
00308       // mean in current image
00309       u_min = min(max((int32_t)p_matched_2[*it].u1c-window_size,0),dims_p[0]);
00310       u_max = min(max((int32_t)p_matched_2[*it].u1c+window_size,0),dims_p[0]);
00311       v_min = min(max((int32_t)p_matched_2[*it].v1c-window_size,0),dims_p[1]);
00312       v_max = min(max((int32_t)p_matched_2[*it].v1c+window_size,0),dims_p[1]);
00313       mean_curr = mean(I1c,dims_c[2],u_min,u_max,v_min,v_max);
00314 
00315       if (mean_prev>10) {
00316         gain += mean_curr/mean_prev;
00317         num++;
00318       }
00319     }
00320   }
00321 
00322   if (num>0) return gain/=(float)num;
00323   else       return 1;
00324 }
00325 
00327 // PRIVATE FUNCTIONS //
00329 
00330 void Matcher::nonMaximumSuppression (int16_t* I_f1,int16_t* I_f2,const int32_t* dims,vector<Matcher::maximum> &maxima,int32_t nms_n) {
00331   
00332   // extract parameters
00333   int32_t width  = dims[0];
00334   int32_t height = dims[1];
00335   int32_t bpl    = dims[2];
00336   int32_t n      = nms_n;
00337   int32_t tau    = param.nms_tau;
00338   
00339   // loop variables
00340   int32_t f1mini,f1minj,f1maxi,f1maxj,f2mini,f2minj,f2maxi,f2maxj;
00341   int32_t f1minval,f1maxval,f2minval,f2maxval,currval;
00342   int32_t addr;
00343   
00344   for (int32_t i=n+margin; i<width-n-margin;i+=n+1) {
00345     for (int32_t j=n+margin; j<height-n-margin;j+=n+1) {
00346 
00347       f1mini = i; f1minj = j; f1maxi = i; f1maxj = j;
00348       f2mini = i; f2minj = j; f2maxi = i; f2maxj = j;
00349       
00350       addr     = getAddressOffsetImage(i,j,bpl);
00351       f1minval = *(I_f1+addr);
00352       f1maxval = f1minval;
00353       f2minval = *(I_f2+addr);
00354       f2maxval = f2minval;
00355 
00356       for (int32_t i2=i; i2<=i+n; i2++) {
00357         for (int32_t j2=j; j2<=j+n; j2++) {
00358           addr    = getAddressOffsetImage(i2,j2,bpl);
00359           currval = *(I_f1+addr);
00360           if (currval<f1minval) {
00361             f1mini   = i2;
00362             f1minj   = j2;
00363             f1minval = currval;
00364           } else if (currval>f1maxval) {
00365             f1maxi   = i2;
00366             f1maxj   = j2;
00367             f1maxval = currval;
00368           }
00369           currval = *(I_f2+addr);
00370           if (currval<f2minval) {
00371             f2mini   = i2;
00372             f2minj   = j2;
00373             f2minval = currval;
00374           } else if (currval>f2maxval) {
00375             f2maxi   = i2;
00376             f2maxj   = j2;
00377             f2maxval = currval;
00378           }
00379         }
00380       }
00381       
00382       // f1 minimum
00383       for (int32_t i2=f1mini-n; i2<=min(f1mini+n,width-1-margin); i2++) {
00384         for (int32_t j2=f1minj-n; j2<=min(f1minj+n,height-1-margin); j2++) {
00385           currval = *(I_f1+getAddressOffsetImage(i2,j2,bpl));
00386           if (currval<f1minval && (i2<i || i2>i+n || j2<j || j2>j+n))
00387             goto failed_f1min;            
00388         }
00389       }
00390       if (f1minval<=-tau)
00391         maxima.push_back(Matcher::maximum(f1mini,f1minj,f1minval,0));
00392       failed_f1min:;
00393 
00394       // f1 maximum
00395       for (int32_t i2=f1maxi-n; i2<=min(f1maxi+n,width-1-margin); i2++) {
00396         for (int32_t j2=f1maxj-n; j2<=min(f1maxj+n,height-1-margin); j2++) {
00397           currval = *(I_f1+getAddressOffsetImage(i2,j2,bpl));
00398           if (currval>f1maxval && (i2<i || i2>i+n || j2<j || j2>j+n))
00399             goto failed_f1max;
00400         }
00401       }
00402       if (f1maxval>=tau)
00403         maxima.push_back(Matcher::maximum(f1maxi,f1maxj,f1maxval,1));
00404       failed_f1max:;
00405       
00406       // f2 minimum
00407       for (int32_t i2=f2mini-n; i2<=min(f2mini+n,width-1-margin); i2++) {
00408         for (int32_t j2=f2minj-n; j2<=min(f2minj+n,height-1-margin); j2++) {
00409           currval = *(I_f2+getAddressOffsetImage(i2,j2,bpl));
00410           if (currval<f2minval && (i2<i || i2>i+n || j2<j || j2>j+n))
00411             goto failed_f2min;
00412         }
00413       }
00414       if (f2minval<=-tau)
00415         maxima.push_back(Matcher::maximum(f2mini,f2minj,f2minval,2));
00416       failed_f2min:;
00417 
00418       // f2 maximum
00419       for (int32_t i2=f2maxi-n; i2<=min(f2maxi+n,width-1-margin); i2++) {
00420         for (int32_t j2=f2maxj-n; j2<=min(f2maxj+n,height-1-margin); j2++) {
00421           currval = *(I_f2+getAddressOffsetImage(i2,j2,bpl));
00422           if (currval>f2maxval && (i2<i || i2>i+n || j2<j || j2>j+n))
00423             goto failed_f2max;
00424         }
00425       }
00426       if (f2maxval>=tau)
00427         maxima.push_back(Matcher::maximum(f2maxi,f2maxj,f2maxval,3));
00428       failed_f2max:;
00429     }
00430   }
00431 }
00432 
00433 inline void Matcher::computeDescriptor (const uint8_t* I_du,const uint8_t* I_dv,const int32_t &bpl,const int32_t &u,const int32_t &v,uint8_t *desc_addr) {
00434   
00435     // get address indices
00436   int32_t addr_m1 = getAddressOffsetImage(u,v-1,bpl);
00437   int32_t addr_m3 = addr_m1-2*bpl;
00438   int32_t addr_m5 = addr_m3-2*bpl;
00439   int32_t addr_p1 = addr_m1+2*bpl;
00440   int32_t addr_p3 = addr_p1+2*bpl;
00441   int32_t addr_p5 = addr_p3+2*bpl;
00442   
00443   // compute descriptor
00444   uint32_t k = 0;
00445   desc_addr[k++] = I_du[addr_m1-3];
00446   desc_addr[k++] = I_dv[addr_m1-3];
00447   desc_addr[k++] = I_du[addr_p1-3];
00448   desc_addr[k++] = I_dv[addr_p1-3];
00449   desc_addr[k++] = I_du[addr_m1-1];
00450   desc_addr[k++] = I_dv[addr_m1-1];
00451   desc_addr[k++] = I_du[addr_p1-1];
00452   desc_addr[k++] = I_dv[addr_p1-1];
00453   desc_addr[k++] = I_du[addr_m1+3];
00454   desc_addr[k++] = I_dv[addr_m1+3];
00455   desc_addr[k++] = I_du[addr_p1+3];
00456   desc_addr[k++] = I_dv[addr_p1+3];
00457   desc_addr[k++] = I_du[addr_m1+1];
00458   desc_addr[k++] = I_dv[addr_m1+1];
00459   desc_addr[k++] = I_du[addr_p1+1];
00460   desc_addr[k++] = I_dv[addr_p1+1];
00461   desc_addr[k++] = I_du[addr_m5-1];
00462   desc_addr[k++] = I_dv[addr_m5-1];
00463   desc_addr[k++] = I_du[addr_p5-1];
00464   desc_addr[k++] = I_dv[addr_p5-1];
00465   desc_addr[k++] = I_du[addr_m5+1];
00466   desc_addr[k++] = I_dv[addr_m5+1];
00467   desc_addr[k++] = I_du[addr_p5+1];
00468   desc_addr[k++] = I_dv[addr_p5+1];
00469   desc_addr[k++] = I_du[addr_m3-5];
00470   desc_addr[k++] = I_dv[addr_m3-5];
00471   desc_addr[k++] = I_du[addr_p3-5];
00472   desc_addr[k++] = I_dv[addr_p3-5];
00473   desc_addr[k++] = I_du[addr_m3+5];
00474   desc_addr[k++] = I_dv[addr_m3+5];
00475   desc_addr[k++] = I_du[addr_p3+5];
00476   desc_addr[k++] = I_dv[addr_p3+5];
00477 }
00478 
00479 inline void Matcher::computeSmallDescriptor (const uint8_t* I_du,const uint8_t* I_dv,const int32_t &bpl,const int32_t &u,const int32_t &v,uint8_t *desc_addr) {
00480   
00481   // get address indices
00482   int32_t addr2 = getAddressOffsetImage(u,v,bpl);
00483   int32_t addr1 = addr2-bpl;
00484   int32_t addr0 = addr1-bpl;
00485   int32_t addr3 = addr2+bpl;
00486   int32_t addr4 = addr3+bpl;
00487   
00488   // compute ELAS-descriptor
00489   uint32_t k = 0;
00490   desc_addr[k++] = I_du[addr0];
00491   desc_addr[k++] = I_du[addr1-2];
00492   desc_addr[k++] = I_du[addr1];
00493   desc_addr[k++] = I_du[addr1+2];
00494   desc_addr[k++] = I_du[addr2-1];
00495   desc_addr[k++] = I_du[addr2];
00496   desc_addr[k++] = I_du[addr2];
00497   desc_addr[k++] = I_du[addr2+1];
00498   desc_addr[k++] = I_du[addr3-2];
00499   desc_addr[k++] = I_du[addr3];
00500   desc_addr[k++] = I_du[addr3+2];
00501   desc_addr[k++] = I_du[addr4];
00502   desc_addr[k++] = I_dv[addr1];
00503   desc_addr[k++] = I_dv[addr2-1];
00504   desc_addr[k++] = I_dv[addr2+1];
00505   desc_addr[k++] = I_dv[addr3];
00506 }
00507 
00508 void Matcher::computeDescriptors (uint8_t* I_du,uint8_t* I_dv,const int32_t bpl,std::vector<Matcher::maximum> &maxima) {
00509   
00510   // loop variables
00511   int32_t u,v;
00512   uint8_t *desc_addr;
00513   
00514   // for all maxima do
00515   for (vector<Matcher::maximum>::iterator it=maxima.begin(); it!=maxima.end(); it++) {
00516     u = (*it).u;
00517     v = (*it).v;
00518     desc_addr = (uint8_t*)(&((*it).d1));
00519     computeDescriptor(I_du,I_dv,bpl,u,v,desc_addr);    
00520   }
00521 }
00522 
00523 inline uint8_t Matcher::saturate (int16_t in) {
00524   if (in<0)   return 0;
00525   if (in>255) return 255;
00526   return in;
00527 }
00528 
00529 void Matcher::filterImageAll (uint8_t* I,uint8_t* I_du,uint8_t* I_dv,int16_t* I_f1,int16_t* I_f2,const int* dims) {
00530   
00531   // get bpl and height  
00532   const int32_t height = dims[1];
00533   const int32_t bpl    = dims[2];
00534   
00535   // init sobel pointers
00536   const uint8_t* p00 = I + 0;
00537   const uint8_t* p01 = I + 1;
00538   const uint8_t* p02 = I + 2;
00539   const uint8_t* p03 = I + 3;
00540   const uint8_t* p04 = I + 4;
00541   const uint8_t* p10 = I + 0 + bpl;
00542   const uint8_t* p11 = I + 1 + bpl;
00543   const uint8_t* p12 = I + 2 + bpl;
00544   const uint8_t* p13 = I + 3 + bpl;
00545   const uint8_t* p14 = I + 4 + bpl;
00546   const uint8_t* p20 = I + 0 + 2*bpl;
00547   const uint8_t* p21 = I + 1 + 2*bpl;
00548   const uint8_t* p22 = I + 2 + 2*bpl;
00549   const uint8_t* p23 = I + 3 + 2*bpl;
00550   const uint8_t* p24 = I + 4 + 2*bpl;
00551   const uint8_t* p30 = I + 0 + 3*bpl;
00552   const uint8_t* p31 = I + 1 + 3*bpl;
00553   const uint8_t* p32 = I + 2 + 3*bpl;
00554   const uint8_t* p33 = I + 3 + 3*bpl;
00555   const uint8_t* p34 = I + 4 + 3*bpl;
00556   const uint8_t* p40 = I + 0 + 4*bpl;
00557   const uint8_t* p41 = I + 1 + 4*bpl;
00558   const uint8_t* p42 = I + 2 + 4*bpl;
00559   const uint8_t* p43 = I + 3 + 4*bpl;
00560   const uint8_t* p44 = I + 4 + 4*bpl;
00561 
00562   // init output pointers
00563   uint8_t* result_du = I_du +   bpl + 1;
00564   uint8_t* result_dv = I_dv +   bpl + 1;
00565   int16_t* result_f1 = I_f1 + 2*bpl + 2;
00566   int16_t* result_f2 = I_f2 + 2*bpl + 2;
00567 
00568   // stop here
00569   const uint8_t* end_input = I + bpl*height;
00570 
00571   // compute filter responses (border pixels are invalid)
00572   for( ; p44 != end_input; p00++, p01++, p02++, p03++, p04++,
00573                            p10++, p11++, p12++, p13++, p14++,
00574                            p20++, p21++, p22++, p23++, p24++,
00575                            p30++, p31++, p32++, p33++, p34++,
00576                            p40++, p41++, p42++, p43++, p44++,
00577                            result_du++, result_dv++, result_f1++, result_f2++ ) {
00578     int16_t temp_du = - *p00 - 2 * *p10 - *p20 + *p02 + 2 * *p12 + *p22;
00579     int16_t temp_dv = - *p00 - 2 * *p01 - *p02 + *p20 + 2 * *p21 + *p22;
00580     *result_du = saturate( temp_du/4 + 128 );
00581     *result_dv = saturate( temp_dv/4 + 128 );
00582     *result_f1 = - *p00 - *p01 -     *p02 - *p03 - *p04
00583                  - *p10 + *p11 +     *p12 + *p13 - *p14
00584                  - *p20 + *p21 + 8 * *p22 + *p23 - *p24
00585                  - *p30 + *p31 +     *p32 + *p33 - *p34
00586                  - *p40 - *p41 -     *p42 - *p43 - *p44;
00587     *result_f2 = - *p00 - *p01 + *p03 + *p04
00588                  - *p10 - *p11 + *p13 + *p14
00589                  + *p30 + *p31 - *p33 - *p34
00590                  + *p40 + *p41 - *p43 - *p44;
00591   }
00592 }
00593 
00594 void Matcher::filterImageSobel (uint8_t* I,uint8_t* I_du,uint8_t* I_dv,const int* dims) {
00595   
00596   // get image width and height  
00597   const int32_t height = dims[1];
00598   const int32_t bpl    = dims[2];
00599   
00600   // init sobel pointers
00601   const uint8_t* p00 = I + 0;
00602   const uint8_t* p01 = I + 1;
00603   const uint8_t* p02 = I + 2;
00604   const uint8_t* p10 = I + 0 + bpl;
00605   const uint8_t* p11 = I + 1 + bpl;
00606   const uint8_t* p12 = I + 2 + bpl;
00607   const uint8_t* p20 = I + 0 + 2*bpl;
00608   const uint8_t* p21 = I + 1 + 2*bpl;
00609   const uint8_t* p22 = I + 2 + 2*bpl;
00610 
00611   // init output pointers
00612   uint8_t* result_du = I_du + bpl + 1;
00613   uint8_t* result_dv = I_dv + bpl + 1;
00614 
00615   // stop here
00616   const uint8_t* end_input = I + bpl*height;
00617 
00618   // compute filter responses (border pixels are invalid)
00619   for( ; p22 != end_input; p00++, p01++, p02++,
00620                            p10++, p11++, p12++,
00621                            p20++, p21++, p22++,
00622                            result_du++, result_dv++) {
00623     int16_t temp_du = - *p00 - 2 * *p10 - *p20 + *p02 + 2 * *p12 + *p22;
00624     int16_t temp_dv = - *p00 - 2 * *p01 - *p02 + *p20 + 2 * *p21 + *p22;
00625     *result_du = saturate( temp_du/4 + 128 );
00626     *result_dv = saturate( temp_dv/4 + 128 );
00627   }
00628 }
00629 
00630 void Matcher::getHalfResolutionDimensions(const int32_t *dims,int32_t *dims_half) {
00631   dims_half[0] = dims[0]/2;
00632   dims_half[1] = dims[1]/2;
00633   dims_half[2] = dims_half[0]+15-(dims_half[0]-1)%16;
00634 }
00635 
00636 uint8_t* Matcher::createHalfResolutionImage(uint8_t *I,const int32_t* dims) {
00637   int32_t dims_half[3];
00638   getHalfResolutionDimensions(dims,dims_half);
00639   uint8_t* I_half = (uint8_t*)_mm_malloc(dims_half[2]*dims_half[1]*sizeof(uint8_t),16);
00640   for (int32_t v=0; v<dims_half[1]; v++)
00641     for (int32_t u=0; u<dims_half[0]; u++)
00642       I_half[v*dims_half[2]+u] =  (uint8_t)(((int32_t)I[(v*2+0)*dims[2]+u*2+0]+
00643                                              (int32_t)I[(v*2+0)*dims[2]+u*2+1]+
00644                                              (int32_t)I[(v*2+1)*dims[2]+u*2+0]+
00645                                              (int32_t)I[(v*2+1)*dims[2]+u*2+1])/4);
00646   return I_half;
00647 }
00648 
00649 void Matcher::computeFeatures (uint8_t *I,const int32_t* dims,int32_t* &max1,int32_t &num1,int32_t* &max2,int32_t &num2,uint8_t* &I_du,uint8_t* &I_dv,uint8_t* &I_du_full,uint8_t* &I_dv_full) {
00650   
00651   int16_t *I_f1;
00652   int16_t *I_f2;
00653   
00654   int32_t dims_matching[3];
00655   memcpy(dims_matching,dims,3*sizeof(int32_t));
00656   
00657   // allocate memory for sobel images and filter images
00658   if (!param.half_resolution) {
00659     I_du = (uint8_t*)_mm_malloc(dims[2]*dims[1]*sizeof(uint8_t*),16);
00660     I_dv = (uint8_t*)_mm_malloc(dims[2]*dims[1]*sizeof(uint8_t*),16);
00661     I_f1 = (int16_t*)_mm_malloc(dims[2]*dims[1]*sizeof(int16_t),16);
00662     I_f2 = (int16_t*)_mm_malloc(dims[2]*dims[1]*sizeof(int16_t),16);
00663     filter::sobel5x5(I,I_du,I_dv,dims[2],dims[1]);
00664     filter::blob5x5(I,I_f1,dims[2],dims[1]);
00665     filter::checkerboard5x5(I,I_f2,dims[2],dims[1]);
00666   } else {
00667     uint8_t* I_matching = createHalfResolutionImage(I,dims);
00668     getHalfResolutionDimensions(dims,dims_matching);
00669     I_du      = (uint8_t*)_mm_malloc(dims_matching[2]*dims_matching[1]*sizeof(uint8_t*),16);
00670     I_dv      = (uint8_t*)_mm_malloc(dims_matching[2]*dims_matching[1]*sizeof(uint8_t*),16);
00671     I_f1      = (int16_t*)_mm_malloc(dims_matching[2]*dims_matching[1]*sizeof(int16_t),16);
00672     I_f2      = (int16_t*)_mm_malloc(dims_matching[2]*dims_matching[1]*sizeof(int16_t),16);
00673     I_du_full = (uint8_t*)_mm_malloc(dims[2]*dims[1]*sizeof(uint8_t*),16);
00674     I_dv_full = (uint8_t*)_mm_malloc(dims[2]*dims[1]*sizeof(uint8_t*),16);
00675     filter::sobel5x5(I_matching,I_du,I_dv,dims_matching[2],dims_matching[1]);
00676     filter::sobel5x5(I,I_du_full,I_dv_full,dims[2],dims[1]);
00677     filter::blob5x5(I_matching,I_f1,dims_matching[2],dims_matching[1]);
00678     filter::checkerboard5x5(I_matching,I_f2,dims_matching[2],dims_matching[1]);
00679     _mm_free(I_matching);
00680   }
00681   
00682   // extract sparse maxima (1st pass) via non-maximum suppression
00683   vector<Matcher::maximum> maxima1;
00684   if (param.multi_stage) {
00685     int32_t nms_n_sparse = param.nms_n*3;
00686     if (nms_n_sparse>10)
00687       nms_n_sparse = max(param.nms_n,10);
00688     nonMaximumSuppression(I_f1,I_f2,dims_matching,maxima1,nms_n_sparse);
00689     computeDescriptors(I_du,I_dv,dims_matching[2],maxima1);
00690   }
00691   
00692   // extract dense maxima (2nd pass) via non-maximum suppression
00693   vector<Matcher::maximum> maxima2;
00694   nonMaximumSuppression(I_f1,I_f2,dims_matching,maxima2,param.nms_n);
00695   computeDescriptors(I_du,I_dv,dims_matching[2],maxima2);
00696 
00697   // release filter images
00698   _mm_free(I_f1);
00699   _mm_free(I_f2);  
00700   
00701   // get number of interest points and init maxima pointer to NULL
00702   num1 = maxima1.size();
00703   num2 = maxima2.size();
00704   max1 = 0;
00705   max2 = 0;
00706   
00707   int32_t s = 1;
00708   if (param.half_resolution)
00709     s = 2;
00710 
00711   // return sparse maxima as 16-bytes aligned memory
00712   if (num1!=0) {
00713     max1 = (int32_t*)_mm_malloc(sizeof(Matcher::maximum)*num1,16);
00714     int32_t k=0;
00715     for (vector<Matcher::maximum>::iterator it=maxima1.begin(); it!=maxima1.end(); it++) {
00716       *(max1+k++) = it->u*s;  *(max1+k++) = it->v*s;  *(max1+k++) = 0;        *(max1+k++) = it->c;
00717       *(max1+k++) = it->d1;   *(max1+k++) = it->d2;   *(max1+k++) = it->d3;   *(max1+k++) = it->d4;
00718       *(max1+k++) = it->d5;   *(max1+k++) = it->d6;   *(max1+k++) = it->d7;   *(max1+k++) = it->d8;
00719     }
00720   }
00721   
00722   // return dense maxima as 16-bytes aligned memory
00723   if (num2!=0) {
00724     max2 = (int32_t*)_mm_malloc(sizeof(Matcher::maximum)*num2,16);
00725     int32_t k=0;
00726     for (vector<Matcher::maximum>::iterator it=maxima2.begin(); it!=maxima2.end(); it++) {
00727       *(max2+k++) = it->u*s;  *(max2+k++) = it->v*s;  *(max2+k++) = 0;        *(max2+k++) = it->c;
00728       *(max2+k++) = it->d1;   *(max2+k++) = it->d2;   *(max2+k++) = it->d3;   *(max2+k++) = it->d4;
00729       *(max2+k++) = it->d5;   *(max2+k++) = it->d6;   *(max2+k++) = it->d7;   *(max2+k++) = it->d8;
00730     }
00731   }
00732 }
00733 
00734 void Matcher::computePriorStatistics (vector<Matcher::p_match> &p_matched,int32_t method) {
00735    
00736   // compute number of bins
00737   int32_t u_bin_num = (int32_t)ceil((float)dims_c[0]/(float)param.match_binsize);
00738   int32_t v_bin_num = (int32_t)ceil((float)dims_c[1]/(float)param.match_binsize);
00739   int32_t bin_num   = v_bin_num*u_bin_num;
00740   
00741   // number of matching stages
00742   int32_t num_stages = 2;
00743   if (method==2)
00744     num_stages = 4;
00745   
00746   // allocate bin accumulator memory
00747   vector<Matcher::delta> *delta_accu = new vector<Matcher::delta>[bin_num];
00748   
00749   // fill bin accumulator
00750   Matcher::delta delta_curr;
00751   for (vector<Matcher::p_match>::iterator it=p_matched.begin(); it!=p_matched.end(); it++) {
00752 
00753     // method flow: compute position delta
00754     if (method==0) {
00755       delta_curr.val[0] = it->u1p - it->u1c;
00756       delta_curr.val[1] = it->v1p - it->v1c;
00757       delta_curr.val[2] = it->u1c - it->u1p;
00758       delta_curr.val[3] = it->v1c - it->v1p;
00759       
00760     // method stereo: compute position delta
00761     } else if (method==1) {
00762       delta_curr.val[0] = it->u2c - it->u1c;
00763       delta_curr.val[1] = 0; 
00764       delta_curr.val[2] = it->u1c - it->u2c;
00765       delta_curr.val[3] = 0;     
00766       
00767     // method quad matching: compute position delta
00768     } else {
00769       /*
00770       delta_curr.val[0] = it->u1p - it->u1c;
00771       delta_curr.val[1] = it->v1p - it->v1c;
00772       delta_curr.val[2] = it->u2p - it->u1p;
00773       delta_curr.val[3] = 0;
00774       delta_curr.val[4] = it->u2c - it->u2p;
00775       delta_curr.val[5] = it->v2c - it->v2p;
00776       delta_curr.val[6] = it->u1c - it->u2c;
00777       delta_curr.val[7] = 0;*/
00778       delta_curr.val[0] = it->u2p - it->u1p;
00779       delta_curr.val[1] = 0;
00780       delta_curr.val[2] = it->u2c - it->u2p;
00781       delta_curr.val[3] = it->v2c - it->v2p;
00782       delta_curr.val[4] = it->u1c - it->u2c;
00783       delta_curr.val[5] = 0;
00784       delta_curr.val[6] = it->u1p - it->u1c;
00785       delta_curr.val[7] = it->v1p - it->v1c;
00786     }
00787     
00788     // compute row and column of bin to which this observation belongs
00789     int32_t u_bin_min,u_bin_max,v_bin_min,v_bin_max;
00790 
00791     // flow + stereo: use current left image as reference
00792     if (method<2) {
00793       u_bin_min = min(max((int32_t)floor(it->u1c/(float)param.match_binsize)-1,0),u_bin_num-1);
00794       u_bin_max = min(max((int32_t)floor(it->u1c/(float)param.match_binsize)+1,0),u_bin_num-1);
00795       v_bin_min = min(max((int32_t)floor(it->v1c/(float)param.match_binsize)-1,0),v_bin_num-1);
00796       v_bin_max = min(max((int32_t)floor(it->v1c/(float)param.match_binsize)+1,0),v_bin_num-1);
00797       
00798     // quad matching: use current previous image as reference
00799     } else {
00800       u_bin_min = min(max((int32_t)floor(it->u1p/(float)param.match_binsize)-1,0),u_bin_num-1);
00801       u_bin_max = min(max((int32_t)floor(it->u1p/(float)param.match_binsize)+1,0),u_bin_num-1);
00802       v_bin_min = min(max((int32_t)floor(it->v1p/(float)param.match_binsize)-1,0),v_bin_num-1);
00803       v_bin_max = min(max((int32_t)floor(it->v1p/(float)param.match_binsize)+1,0),v_bin_num-1);
00804     }
00805     
00806     // add to accumulator
00807     for (int32_t v_bin=v_bin_min; v_bin<=v_bin_max; v_bin++)
00808       for (int32_t u_bin=u_bin_min; u_bin<=u_bin_max; u_bin++)
00809         delta_accu[v_bin*u_bin_num+u_bin].push_back(delta_curr);
00810   }
00811   
00812   // clear ranges
00813   ranges.clear();
00814   
00815   // for all bins compute statistics
00816   for (int32_t v_bin=0; v_bin<v_bin_num; v_bin++) {
00817     for (int32_t u_bin=0; u_bin<u_bin_num; u_bin++) {
00818       
00819       // use full range in case there are no observations
00820       delta delta_min(-param.match_radius);
00821       delta delta_max(+param.match_radius);
00822       
00823       // otherwise determine delta min and delta max
00824       if (delta_accu[v_bin*u_bin_num+u_bin].size()>0) {
00825         
00826         // init displacements 'delta' to 'infinite'
00827         delta_min = delta(+1000000);
00828         delta_max = delta(-1000000);
00829         
00830         // find minimum and maximum displacements
00831         for (vector<Matcher::delta>::iterator it=delta_accu[v_bin*u_bin_num+u_bin].begin();
00832              it!=delta_accu[v_bin*u_bin_num+u_bin].end(); it++) {
00833           for (int32_t i=0; i<num_stages*2; i++) {
00834             if (it->val[i]<delta_min.val[i]) delta_min.val[i] = it->val[i];
00835             if (it->val[i]>delta_max.val[i]) delta_max.val[i] = it->val[i];
00836           }
00837         }
00838       }
00839       
00840       // set search range for this bin
00841       range r;
00842       for (int32_t i=0; i<num_stages; i++) {
00843         
00844         // bound minimum search range to 20x20
00845         float delta_u = delta_max.val[i*2+0]-delta_min.val[i*2+0];
00846         if (delta_u<20) {
00847           delta_min.val[i*2+0] -= ceil((20-delta_u)/2);
00848           delta_max.val[i*2+0] += ceil((20-delta_u)/2);
00849         }
00850         float delta_v = delta_max.val[i*2+1]-delta_min.val[i*2+1];
00851         if (delta_v<20) {
00852           delta_min.val[i*2+1] -= ceil((20-delta_v)/2);
00853           delta_max.val[i*2+1] += ceil((20-delta_v)/2);
00854         }
00855         
00856         // set range for this bin
00857         r.u_min[i] = delta_min.val[i*2+0];
00858         r.u_max[i] = delta_max.val[i*2+0];
00859         r.v_min[i] = delta_min.val[i*2+1];
00860         r.v_max[i] = delta_max.val[i*2+1];
00861       }
00862       ranges.push_back(r);      
00863     }
00864   }
00865   
00866   // free bin accumulator memory
00867   delete []delta_accu;
00868 }
00869 
00870 void Matcher::createIndexVector (int32_t* m,int32_t n,vector<int32_t> *k,const int32_t &u_bin_num,const int32_t &v_bin_num) {
00871 
00872   // descriptor step size
00873   int32_t step_size = sizeof(Matcher::maximum)/sizeof(int32_t);
00874   
00875   // for all points do
00876   for (int32_t i=0; i<n; i++) {
00877     
00878     // extract coordinates and class
00879     int32_t u = *(m+step_size*i+0); // u-coordinate
00880     int32_t v = *(m+step_size*i+1); // v-coordinate
00881     int32_t c = *(m+step_size*i+3); // class
00882     
00883     // compute row and column of bin to which this observation belongs
00884     int32_t u_bin = min((int32_t)floor((float)u/(float)param.match_binsize),u_bin_num-1);
00885     int32_t v_bin = min((int32_t)floor((float)v/(float)param.match_binsize),v_bin_num-1);
00886     
00887     // save index
00888     k[(c*v_bin_num+v_bin)*u_bin_num+u_bin].push_back(i);
00889   }
00890 }
00891 
00892 inline void Matcher::findMatch (int32_t* m1,const int32_t &i1,int32_t* m2,const int32_t &step_size,vector<int32_t> *k2,
00893                                 const int32_t &u_bin_num,const int32_t &v_bin_num,const int32_t &stat_bin,
00894                                 int32_t& min_ind,int32_t stage,bool flow,bool use_prior,double u_,double v_) {
00895   
00896   // init and load image coordinates + feature
00897   min_ind          = 0;
00898   double  min_cost = 10000000;
00899   int32_t u1       = *(m1+step_size*i1+0);
00900   int32_t v1       = *(m1+step_size*i1+1);
00901   int32_t c        = *(m1+step_size*i1+3);
00902   __m128i xmm1     = _mm_load_si128((__m128i*)(m1+step_size*i1+4));
00903   __m128i xmm2     = _mm_load_si128((__m128i*)(m1+step_size*i1+8));
00904   
00905   float u_min,u_max,v_min,v_max;
00906   
00907   // restrict search range with prior
00908   if (use_prior) {
00909     u_min = u1+ranges[stat_bin].u_min[stage];
00910     u_max = u1+ranges[stat_bin].u_max[stage];
00911     v_min = v1+ranges[stat_bin].v_min[stage];
00912     v_max = v1+ranges[stat_bin].v_max[stage];
00913     
00914   // otherwise: use full search space
00915   } else {
00916     u_min = u1-param.match_radius;
00917     u_max = u1+param.match_radius;
00918     v_min = v1-param.match_radius;
00919     v_max = v1+param.match_radius;
00920   }
00921   
00922   // if stereo search => constrain to 1d
00923   if (!flow) {
00924     v_min = v1-param.match_disp_tolerance;
00925     v_max = v1+param.match_disp_tolerance;
00926   }
00927   
00928   // bins of interest
00929   int32_t u_bin_min = min(max((int32_t)floor(u_min/(float)param.match_binsize),0),u_bin_num-1);
00930   int32_t u_bin_max = min(max((int32_t)floor(u_max/(float)param.match_binsize),0),u_bin_num-1);
00931   int32_t v_bin_min = min(max((int32_t)floor(v_min/(float)param.match_binsize),0),v_bin_num-1);
00932   int32_t v_bin_max = min(max((int32_t)floor(v_max/(float)param.match_binsize),0),v_bin_num-1);
00933   
00934   // for all bins of interest do
00935   for (int32_t u_bin=u_bin_min; u_bin<=u_bin_max; u_bin++) {
00936     for (int32_t v_bin=v_bin_min; v_bin<=v_bin_max; v_bin++) {
00937       int32_t k2_ind = (c*v_bin_num+v_bin)*u_bin_num+u_bin;
00938       for (vector<int32_t>::const_iterator i2_it=k2[k2_ind].begin(); i2_it!=k2[k2_ind].end(); i2_it++) {
00939         int32_t u2   = *(m2+step_size*(*i2_it)+0);
00940         int32_t v2   = *(m2+step_size*(*i2_it)+1);
00941         if (u2>=u_min && u2<=u_max && v2>=v_min && v2<=v_max) {
00942           __m128i xmm3 = _mm_load_si128((__m128i*)(m2+step_size*(*i2_it)+4));
00943           __m128i xmm4 = _mm_load_si128((__m128i*)(m2+step_size*(*i2_it)+8));                    
00944           xmm3 = _mm_sad_epu8 (xmm1,xmm3);
00945           xmm4 = _mm_sad_epu8 (xmm2,xmm4);
00946           xmm4 = _mm_add_epi16(xmm3,xmm4);
00947           double cost = (double)(_mm_extract_epi16(xmm4,0)+_mm_extract_epi16(xmm4,4));
00948           
00949           if (u_>=0 && v_>=0) {
00950             double du = (double)u2-u_;
00951             double dv = (double)v2-v_;
00952             double dist = sqrt(du*du+dv*dv);
00953             cost += 4*dist;
00954           }
00955           
00956           if (cost<min_cost) {
00957             min_ind  = *i2_it;
00958             min_cost = cost;
00959           }
00960         }
00961       }
00962     }
00963   }
00964 }
00965 
00966 void Matcher::matching (int32_t *m1p,int32_t *m2p,int32_t *m1c,int32_t *m2c,
00967                         int32_t n1p,int32_t n2p,int32_t n1c,int32_t n2c,
00968                         vector<Matcher::p_match> &p_matched,int32_t method,bool use_prior,Matrix *Tr_delta) {
00969 
00970   // descriptor step size (number of int32_t elements in struct)
00971   int32_t step_size = sizeof(Matcher::maximum)/sizeof(int32_t);
00972   
00973   // compute number of bins
00974   int32_t u_bin_num = (int32_t)ceil((float)dims_c[0]/(float)param.match_binsize);
00975   int32_t v_bin_num = (int32_t)ceil((float)dims_c[1]/(float)param.match_binsize);
00976   int32_t bin_num   = 4*v_bin_num*u_bin_num; // 4 classes
00977   
00978   // allocate memory for index vectors (needed for efficient search)
00979   vector<int32_t> *k1p = new vector<int32_t>[bin_num];
00980   vector<int32_t> *k2p = new vector<int32_t>[bin_num];
00981   vector<int32_t> *k1c = new vector<int32_t>[bin_num];
00982   vector<int32_t> *k2c = new vector<int32_t>[bin_num];
00983   
00984   // loop variables
00985   int32_t* M = (int32_t*)calloc(dims_c[0]*dims_c[1],sizeof(int32_t));
00986   int32_t i1p,i2p,i1c,i2c,i1c2,i1p2;
00987   int32_t u1p,v1p,u2p,v2p,u1c,v1c,u2c,v2c;
00988   
00989   double t00,t01,t02,t03,t10,t11,t12,t13,t20,t21,t22,t23;
00990   if (Tr_delta) {
00991     t00 = Tr_delta->val[0][0];
00992     t01 = Tr_delta->val[0][1];
00993     t02 = Tr_delta->val[0][2];
00994     t03 = Tr_delta->val[0][3];
00995     t10 = Tr_delta->val[1][0];
00996     t11 = Tr_delta->val[1][1];
00997     t12 = Tr_delta->val[1][2];
00998     t13 = Tr_delta->val[1][3];
00999     t20 = Tr_delta->val[2][0];
01000     t21 = Tr_delta->val[2][1];
01001     t22 = Tr_delta->val[2][2];
01002     t23 = Tr_delta->val[2][3];
01003   }
01004 
01006   // method: flow
01007   if (method==0) {
01008     
01009     // create position/class bin index vectors
01010     createIndexVector(m1p,n1p,k1p,u_bin_num,v_bin_num);
01011     createIndexVector(m1c,n1c,k1c,u_bin_num,v_bin_num);
01012     
01013     // for all points do
01014     for (i1c=0; i1c<n1c; i1c++) {
01015 
01016       // coordinates in previous left image
01017       u1c = *(m1c+step_size*i1c+0);
01018       v1c = *(m1c+step_size*i1c+1);
01019 
01020       // compute row and column of statistics bin to which this observation belongs
01021       int32_t u_bin = min((int32_t)floor((float)u1c/(float)param.match_binsize),u_bin_num-1);
01022       int32_t v_bin = min((int32_t)floor((float)v1c/(float)param.match_binsize),v_bin_num-1);
01023       int32_t stat_bin = v_bin*u_bin_num+u_bin;
01024 
01025       // match forward/backward
01026       findMatch(m1c,i1c,m1p,step_size,k1p,u_bin_num,v_bin_num,stat_bin,i1p, 0,true,use_prior);
01027       findMatch(m1p,i1p,m1c,step_size,k1c,u_bin_num,v_bin_num,stat_bin,i1c2,1,true,use_prior);
01028 
01029       // circle closure success?
01030       if (i1c2==i1c) {
01031 
01032         // extract coordinates
01033         u1p = *(m1p+step_size*i1p+0);
01034         v1p = *(m1p+step_size*i1p+1);
01035 
01036         // add match if this pixel isn't matched yet
01037         if (*(M+getAddressOffsetImage(u1c,v1c,dims_c[0]))==0) {
01038           p_matched.push_back(Matcher::p_match(u1p,v1p,i1p,-1,-1,-1,u1c,v1c,i1c,-1,-1,-1));
01039           *(M+getAddressOffsetImage(u1c,v1c,dims_c[0])) = 1;
01040         }
01041       }
01042     }
01043     
01045   // method: stereo
01046   } else if (method==1) {
01047     
01048     // create position/class bin index vectors
01049     createIndexVector(m1c,n1c,k1c,u_bin_num,v_bin_num);
01050     createIndexVector(m2c,n2c,k2c,u_bin_num,v_bin_num);
01051     
01052     // for all points do
01053     for (i1c=0; i1c<n1c; i1c++) {
01054 
01055       // coordinates in previous left image
01056       u1c = *(m1c+step_size*i1c+0);
01057       v1c = *(m1c+step_size*i1c+1);
01058 
01059       // compute row and column of statistics bin to which this observation belongs
01060       int32_t u_bin = min((int32_t)floor((float)u1c/(float)param.match_binsize),u_bin_num-1);
01061       int32_t v_bin = min((int32_t)floor((float)v1c/(float)param.match_binsize),v_bin_num-1);
01062       int32_t stat_bin = v_bin*u_bin_num+u_bin;
01063 
01064       // match left/right
01065       findMatch(m1c,i1c,m2c,step_size,k2c,u_bin_num,v_bin_num,stat_bin,i2c, 0,false,use_prior);
01066       findMatch(m2c,i2c,m1c,step_size,k1c,u_bin_num,v_bin_num,stat_bin,i1c2,1,false,use_prior);
01067 
01068       // circle closure success?
01069       if (i1c2==i1c) {
01070 
01071         // extract coordinates
01072         u2c = *(m2c+step_size*i2c+0);
01073         v2c = *(m2c+step_size*i2c+1);
01074 
01075         // if disparity is positive
01076         if (u1c>=u2c) {
01077 
01078           // add match if this pixel isn't matched yet
01079           if (*(M+getAddressOffsetImage(u1c,v1c,dims_c[0]))==0) {
01080             p_matched.push_back(Matcher::p_match(-1,-1,-1,-1,-1,-1,u1c,v1c,i1c,u2c,v2c,i2c));
01081             *(M+getAddressOffsetImage(u1c,v1c,dims_c[0])) = 1;
01082           }
01083         }
01084       }
01085     }
01086     
01088   // method: quad matching
01089   } else {
01090     
01091     // create position/class bin index vectors
01092     createIndexVector(m1p,n1p,k1p,u_bin_num,v_bin_num);
01093     createIndexVector(m2p,n2p,k2p,u_bin_num,v_bin_num);
01094     createIndexVector(m1c,n1c,k1c,u_bin_num,v_bin_num);
01095     createIndexVector(m2c,n2c,k2c,u_bin_num,v_bin_num);
01096     
01097     // for all points do
01098     for (i1p=0; i1p<n1p; i1p++) {
01099 
01100       // coordinates
01101       u1p = *(m1p+step_size*i1p+0);
01102       v1p = *(m1p+step_size*i1p+1);
01103 
01104       // compute row and column of statistics bin to which this observation belongs
01105       int32_t u_bin = min((int32_t)floor((float)u1p/(float)param.match_binsize),u_bin_num-1);
01106       int32_t v_bin = min((int32_t)floor((float)v1p/(float)param.match_binsize),v_bin_num-1);
01107       int32_t stat_bin = v_bin*u_bin_num+u_bin;
01108 
01109       // match in circle
01110       findMatch(m1p,i1p,m2p,step_size,k2p,u_bin_num,v_bin_num,stat_bin,i2p, 0,false,use_prior);
01111 
01112       u2p = *(m2p+step_size*i2p+0);
01113       v2p = *(m2p+step_size*i2p+1);
01114 
01115       if (Tr_delta) {
01116       
01117         double d = max((double)u1p-(double)u2p,1.0);
01118         double x1p = ((double)u1p-param.cu)*param.base/d;
01119         double y1p = ((double)v1p-param.cv)*param.base/d;
01120         double z1p = param.f*param.base/d;
01121 
01122         double x2c = t00*x1p + t01*y1p + t02*z1p + t03 - param.base;
01123         double y2c = t10*x1p + t11*y1p + t12*z1p + t13;
01124         double z2c = t20*x1p + t21*y1p + t22*z1p + t23;
01125 
01126         double u2c_ = param.f*x2c/z2c+param.cu;
01127         double v2c_ = param.f*y2c/z2c+param.cv;
01128 
01129         findMatch(m2p,i2p,m2c,step_size,k2c,u_bin_num,v_bin_num,stat_bin,i2c, 1,true ,use_prior,u2c_,v2c_);
01130       } else {
01131         findMatch(m2p,i2p,m2c,step_size,k2c,u_bin_num,v_bin_num,stat_bin,i2c, 1,true ,use_prior);
01132       }
01133       findMatch(m2c,i2c,m1c,step_size,k1c,u_bin_num,v_bin_num,stat_bin,i1c, 2,false,use_prior);
01134       if (Tr_delta)
01135         findMatch(m1c,i1c,m1p,step_size,k1p,u_bin_num,v_bin_num,stat_bin,i1p2,3,true ,use_prior,u1p,v1p);
01136       else
01137         findMatch(m1c,i1c,m1p,step_size,k1p,u_bin_num,v_bin_num,stat_bin,i1p2,3,true ,use_prior);
01138       
01139       // circle closure success?
01140       if (i1p2==i1p) {
01141 
01142         // extract coordinates
01143         u2c = *(m2c+step_size*i2c+0); v2c = *(m2c+step_size*i2c+1);
01144         u1c = *(m1c+step_size*i1c+0); v1c = *(m1c+step_size*i1c+1);
01145 
01146         // if disparities are positive
01147         if (u1p>=u2p && u1c>=u2c) {
01148           
01149           // add match
01150           p_matched.push_back(Matcher::p_match(u1p,v1p,i1p,u2p,v2p,i2p,
01151                                                u1c,v1c,i1c,u2c,v2c,i2c));
01152         }
01153       }
01154     }
01155     
01156     // old version:
01157     /*
01158     // for all points do
01159     for (i1c=0; i1c<n1c; i1c++) {
01160 
01161       // coordinates
01162       u1c = *(m1c+step_size*i1c+0);
01163       v1c = *(m1c+step_size*i1c+1);
01164 
01165       // compute row and column of statistics bin to which this observation belongs
01166       int32_t u_bin = min((int32_t)floor((float)u1c/(float)param.match_binsize),u_bin_num-1);
01167       int32_t v_bin = min((int32_t)floor((float)v1c/(float)param.match_binsize),v_bin_num-1);
01168       int32_t stat_bin = v_bin*u_bin_num+u_bin;
01169 
01170       // match in circle
01171       findMatch(m1c,i1c,m1p,step_size,k1p,u_bin_num,v_bin_num,stat_bin,i1p, 0,true ,use_prior,Tr_delta);
01172       findMatch(m1p,i1p,m2p,step_size,k2p,u_bin_num,v_bin_num,stat_bin,i2p, 1,false,use_prior,Tr_delta);
01173       findMatch(m2p,i2p,m2c,step_size,k2c,u_bin_num,v_bin_num,stat_bin,i2c, 2,true ,use_prior,Tr_delta);
01174       findMatch(m2c,i2c,m1c,step_size,k1c,u_bin_num,v_bin_num,stat_bin,i1c2,3,false,use_prior,Tr_delta);
01175       
01176       // circle closure success?
01177       if (i1c2==i1c) {
01178 
01179         // extract coordinates
01180         u1p = *(m1p+step_size*i1p+0); v1p = *(m1p+step_size*i1p+1);
01181         u2p = *(m2p+step_size*i2p+0); v2p = *(m2p+step_size*i2p+1);
01182         u2c = *(m2c+step_size*i2c+0); v2c = *(m2c+step_size*i2c+1);
01183 
01184         // if disparities are positive
01185         if (u1p>=u2p && u1c>=u2c) {
01186           
01187           // add match if this pixel isn't matched yet
01188           if (*(M+getAddressOffsetImage(u1c,v1c,dims_c[0]))==0) {
01189             p_matched.push_back(Matcher::p_match(u1p,v1p,i1p,u2p,v2p,i2p,
01190                                                  u1c,v1c,i1c,u2c,v2c,i2c));
01191             *(M+getAddressOffsetImage(u1c,v1c,dims_c[0])) = 1;
01192           }
01193           
01194         }
01195       }
01196     }
01197     */
01198   }
01199 
01200   // free memory
01201   free(M);
01202   delete []k1p;
01203   delete []k2p;
01204   delete []k1c;
01205   delete []k2c;
01206 }
01207 
01208 void Matcher::removeOutliers (vector<Matcher::p_match> &p_matched,int32_t method) {
01209   
01210   // do we have enough points for outlier removal?
01211   if (p_matched.size()<=3)
01212     return;
01213 
01214   // input/output structure for triangulation
01215   struct triangulateio in, out;
01216 
01217   // inputs
01218   in.numberofpoints = p_matched.size();
01219   in.pointlist = (float*)malloc(in.numberofpoints*2*sizeof(float));
01220   int32_t k=0;
01221   
01222   // create copy of p_matched, init vector with number of support points
01223   // and fill triangle point vector for delaunay triangulation
01224   vector<Matcher::p_match> p_matched_copy;  
01225   vector<int32_t> num_support;
01226   for (vector<Matcher::p_match>::iterator it=p_matched.begin(); it!=p_matched.end(); it++) {
01227     p_matched_copy.push_back(*it);
01228     num_support.push_back(0);
01229     in.pointlist[k++] = it->u1c;
01230     in.pointlist[k++] = it->v1c;
01231   }
01232   
01233   // input parameters
01234   in.numberofpointattributes = 0;
01235   in.pointattributelist      = NULL;
01236   in.pointmarkerlist         = NULL;
01237   in.numberofsegments        = 0;
01238   in.numberofholes           = 0;
01239   in.numberofregions         = 0;
01240   in.regionlist              = NULL;
01241   
01242   // outputs
01243   out.pointlist              = NULL;
01244   out.pointattributelist     = NULL;
01245   out.pointmarkerlist        = NULL;
01246   out.trianglelist           = NULL;
01247   out.triangleattributelist  = NULL;
01248   out.neighborlist           = NULL;
01249   out.segmentlist            = NULL;
01250   out.segmentmarkerlist      = NULL;
01251   out.edgelist               = NULL;
01252   out.edgemarkerlist         = NULL;
01253 
01254   // do triangulation (z=zero-based, n=neighbors, Q=quiet, B=no boundary markers)
01255   // attention: not using the B switch or using the n switch creates a memory leak (=> use valgrind!)
01256   char parameters[] = "zQB";
01257   triangulate(parameters, &in, &out, NULL);
01258   
01259   // for all triangles do
01260   for (int32_t i=0; i<out.numberoftriangles; i++) {
01261     
01262     // extract triangle corner points
01263     int32_t p1 = out.trianglelist[i*3+0];
01264     int32_t p2 = out.trianglelist[i*3+1];
01265     int32_t p3 = out.trianglelist[i*3+2];
01266     
01267     // method: flow
01268     if (method==0) {
01269       
01270       // 1. corner disparity and flow
01271       float p1_flow_u = p_matched_copy[p1].u1c-p_matched_copy[p1].u1p;
01272       float p1_flow_v = p_matched_copy[p1].v1c-p_matched_copy[p1].v1p;
01273 
01274       // 2. corner disparity and flow
01275       float p2_flow_u = p_matched_copy[p2].u1c-p_matched_copy[p2].u1p;
01276       float p2_flow_v = p_matched_copy[p2].v1c-p_matched_copy[p2].v1p;
01277 
01278       // 3. corner disparity and flow
01279       float p3_flow_u = p_matched_copy[p3].u1c-p_matched_copy[p3].u1p;
01280       float p3_flow_v = p_matched_copy[p3].v1c-p_matched_copy[p3].v1p;
01281 
01282       // consistency of 1. edge
01283       if (fabs(p1_flow_u-p2_flow_u)+fabs(p1_flow_v-p2_flow_v)<param.outlier_flow_tolerance) {
01284         num_support[p1]++;
01285         num_support[p2]++;
01286       }
01287 
01288       // consistency of 2. edge
01289       if (fabs(p2_flow_u-p3_flow_u)+fabs(p2_flow_v-p3_flow_v)<param.outlier_flow_tolerance) {
01290         num_support[p2]++;
01291         num_support[p3]++;
01292       }
01293 
01294       // consistency of 3. edge
01295       if (fabs(p1_flow_u-p3_flow_u)+fabs(p1_flow_v-p3_flow_v)<param.outlier_flow_tolerance) {
01296         num_support[p1]++;
01297         num_support[p3]++;
01298       }
01299       
01300     // method: stereo
01301     } else if (method==1) {
01302       
01303       // 1. corner disparity and flow
01304       float p1_disp   = p_matched_copy[p1].u1c-p_matched_copy[p1].u2c;
01305 
01306       // 2. corner disparity and flow
01307       float p2_disp   = p_matched_copy[p2].u1c-p_matched_copy[p2].u2c;
01308 
01309       // 3. corner disparity and flow
01310       float p3_disp   = p_matched_copy[p3].u1c-p_matched_copy[p3].u2c;
01311 
01312       // consistency of 1. edge
01313       if (fabs(p1_disp-p2_disp)<param.outlier_disp_tolerance) {
01314         num_support[p1]++;
01315         num_support[p2]++;
01316       }
01317 
01318       // consistency of 2. edge
01319       if (fabs(p2_disp-p3_disp)<param.outlier_disp_tolerance) {
01320         num_support[p2]++;
01321         num_support[p3]++;
01322       }
01323 
01324       // consistency of 3. edge
01325       if (fabs(p1_disp-p3_disp)<param.outlier_disp_tolerance) {
01326         num_support[p1]++;
01327         num_support[p3]++;
01328       }
01329       
01330     // method: quad matching
01331     } else {
01332       
01333       // 1. corner disparity and flow
01334       float p1_flow_u = p_matched_copy[p1].u1c-p_matched_copy[p1].u1p;
01335       float p1_flow_v = p_matched_copy[p1].v1c-p_matched_copy[p1].v1p;
01336       float p1_disp   = p_matched_copy[p1].u1p-p_matched_copy[p1].u2p;
01337 
01338       // 2. corner disparity and flow
01339       float p2_flow_u = p_matched_copy[p2].u1c-p_matched_copy[p2].u1p;
01340       float p2_flow_v = p_matched_copy[p2].v1c-p_matched_copy[p2].v1p;
01341       float p2_disp   = p_matched_copy[p2].u1p-p_matched_copy[p2].u2p;
01342 
01343       // 3. corner disparity and flow
01344       float p3_flow_u = p_matched_copy[p3].u1c-p_matched_copy[p3].u1p;
01345       float p3_flow_v = p_matched_copy[p3].v1c-p_matched_copy[p3].v1p;
01346       float p3_disp   = p_matched_copy[p3].u1p-p_matched_copy[p3].u2p;
01347 
01348       // consistency of 1. edge
01349       if (fabs(p1_disp-p2_disp)<param.outlier_disp_tolerance && fabs(p1_flow_u-p2_flow_u)+fabs(p1_flow_v-p2_flow_v)<param.outlier_flow_tolerance) {
01350         num_support[p1]++;
01351         num_support[p2]++;
01352       }
01353 
01354       // consistency of 2. edge
01355       if (fabs(p2_disp-p3_disp)<param.outlier_disp_tolerance && fabs(p2_flow_u-p3_flow_u)+fabs(p2_flow_v-p3_flow_v)<param.outlier_flow_tolerance) {
01356         num_support[p2]++;
01357         num_support[p3]++;
01358       }
01359 
01360       // consistency of 3. edge
01361       if (fabs(p1_disp-p3_disp)<param.outlier_disp_tolerance && fabs(p1_flow_u-p3_flow_u)+fabs(p1_flow_v-p3_flow_v)<param.outlier_flow_tolerance) {
01362         num_support[p1]++;
01363         num_support[p3]++;
01364       }
01365     }
01366   }
01367   
01368   // refill p_matched
01369   p_matched.clear();
01370   for (int i=0; i<in.numberofpoints; i++)
01371     if (num_support[i]>=4)
01372       p_matched.push_back(p_matched_copy[i]);
01373   
01374   // free memory used for triangulation
01375   free(in.pointlist);
01376   free(out.pointlist);
01377   free(out.trianglelist);
01378 }
01379 
01380 bool Matcher::parabolicFitting(const uint8_t* I1_du,const uint8_t* I1_dv,const int32_t* dims1,
01381                                const uint8_t* I2_du,const uint8_t* I2_dv,const int32_t* dims2,
01382                                const float &u1,const float &v1,
01383                                float       &u2,float       &v2,
01384                                Matrix At,Matrix AtA,
01385                                uint8_t* desc_buffer) {
01386 
01387   // check if parabolic fitting is feasible (descriptors are within margin)
01388   if (u2-3<margin || u2+3>dims2[0]-1-margin || v2-3<margin || v2+3>dims2[1]-1-margin)
01389     return false;
01390   
01391   // compute reference descriptor
01392   __m128i xmm1,xmm2;
01393   computeSmallDescriptor(I1_du,I1_dv,dims1[2],(int32_t)u1,(int32_t)v1,desc_buffer);
01394   xmm1 = _mm_load_si128((__m128i*)(desc_buffer));
01395   
01396   // compute cost matrix
01397   int32_t cost[49];
01398   for (int32_t dv=0; dv<7; dv++) {
01399     for (int32_t du=0; du<7; du++) {
01400       computeSmallDescriptor(I2_du,I2_dv,dims2[2],(int32_t)u2+du-3,(int32_t)v2+dv-3,desc_buffer);
01401       xmm2 = _mm_load_si128((__m128i*)(desc_buffer));
01402       xmm2 = _mm_sad_epu8(xmm1,xmm2);
01403       cost[dv*7+du] = _mm_extract_epi16(xmm2,0)+_mm_extract_epi16(xmm2,4);
01404     }
01405   }
01406   
01407   // compute minimum
01408   int32_t min_ind  = 0;
01409   int32_t min_cost = cost[0];
01410   for (int32_t i=1; i<49; i++) {
01411     if (cost[i]<min_cost) {
01412       min_ind   = i;
01413       min_cost  = cost[i];
01414     }
01415   }
01416   
01417   // get indices
01418   int32_t du = min_ind%7;
01419   int32_t dv = min_ind/7;
01420   
01421   // if minimum is at borders => remove this match
01422   if (du==0 || du==6 || dv==0 || dv==6)
01423     return false;
01424   
01425   // solve least squares system
01426   Matrix c(9,1);
01427   for (int32_t i=-1; i<=+1; i++) {
01428     for (int32_t j=-1; j<=+1; j++) {
01429       int32_t cost_curr = cost[(dv+i)*7+(du+j)];
01430       // if (i!=0 && j!=0 && cost_curr<=min_cost+150)
01431         // return false;
01432       c.val[(i+1)*3+(j+1)][0] = cost_curr;
01433     }
01434   }
01435   Matrix b = At*c;
01436   if (!b.solve(AtA))
01437     return false;
01438   
01439   // extract relative coordinates
01440   float divisor = (b.val[2][0]*b.val[2][0]-4.0*b.val[0][0]*b.val[1][0]);
01441   if (fabs(divisor)<1e-8 || fabs(b.val[2][0])<1e-8)
01442     return false;
01443   float ddv = (2.0*b.val[0][0]*b.val[4][0]-b.val[2][0]*b.val[3][0])/divisor;
01444   float ddu = -(b.val[4][0]+2.0*b.val[1][0]*ddv)/b.val[2][0];
01445   if (fabs(ddu)>=1.0 || fabs(ddv)>=1.0)
01446     return false;
01447   
01448   // update target
01449   u2 += (float)du-3.0+ddu;
01450   v2 += (float)dv-3.0+ddv;
01451   
01452   // return true on success
01453   return true;
01454 }
01455 
01456 void Matcher::relocateMinimum(const uint8_t* I1_du,const uint8_t* I1_dv,const int32_t* dims1,
01457                               const uint8_t* I2_du,const uint8_t* I2_dv,const int32_t* dims2,
01458                               const float &u1,const float &v1,
01459                               float       &u2,float       &v2,
01460                               uint8_t* desc_buffer) {
01461 
01462   // check if parabolic fitting is feasible (descriptors are within margin)
01463   if (u2-2<margin || u2+2>dims2[0]-1-margin || v2-2<margin || v2+2>dims2[1]-1-margin)
01464     return;
01465   
01466   // compute reference descriptor
01467   __m128i xmm1,xmm2;
01468   computeSmallDescriptor(I1_du,I1_dv,dims1[2],(int32_t)u1,(int32_t)v1,desc_buffer);
01469   xmm1 = _mm_load_si128((__m128i*)(desc_buffer));
01470   
01471   // compute cost matrix
01472   int32_t cost[25];
01473   for (int32_t dv=0; dv<5; dv++) {
01474     for (int32_t du=0; du<5; du++) {
01475       computeSmallDescriptor(I2_du,I2_dv,dims2[2],(int32_t)u2+du-2,(int32_t)v2+dv-2,desc_buffer);
01476       xmm2 = _mm_load_si128((__m128i*)(desc_buffer));
01477       xmm2 = _mm_sad_epu8(xmm1,xmm2);
01478       cost[dv*5+du] = _mm_extract_epi16(xmm2,0)+_mm_extract_epi16(xmm2,4);
01479     }
01480   }
01481   
01482   // compute minimum
01483   int32_t min_ind  = 0;
01484   int32_t min_cost = cost[0];
01485   for (int32_t i=1; i<25; i++) {
01486     if (cost[i]<min_cost) {
01487       min_ind   = i;
01488       min_cost  = cost[i];
01489     }
01490   }
01491   
01492   // update target
01493   u2 += (float)(min_ind%5)-2.0;
01494   v2 += (float)(min_ind/5)-2.0;
01495 }
01496 
01497 void Matcher::refinement (vector<Matcher::p_match> &p_matched,int32_t method) {
01498   
01499   // allocate aligned memory (32 bytes for 1 descriptors)
01500   uint8_t* desc_buffer = (uint8_t*)_mm_malloc(32*sizeof(uint8_t),16);
01501   
01502   // copy vector (for refill)
01503   vector<Matcher::p_match> p_matched_copy = p_matched;
01504   p_matched.clear();
01505   
01506   // create matrices for least square fitting
01507   FLOAT A_data[9*6] = { 1, 1, 1,-1,-1, 1,
01508                         0, 1, 0, 0,-1, 1,
01509                         1, 1,-1, 1,-1, 1,
01510                         1, 0, 0,-1, 0, 1,
01511                         0, 0, 0, 0, 0, 1,
01512                         1, 0, 0, 1, 0, 1,
01513                         1, 1,-1,-1, 1, 1,
01514                         0, 1, 0, 0, 1, 1,
01515                         1, 1, 1, 1, 1, 1};
01516   Matrix A(9,6,A_data);
01517   Matrix At  = ~A;
01518   Matrix AtA = At*A;
01519   
01520   uint8_t* I1p_du_fit = I1p_du;
01521   uint8_t* I1p_dv_fit = I1p_dv;
01522   uint8_t* I2p_du_fit = I2p_du;
01523   uint8_t* I2p_dv_fit = I2p_dv;
01524   uint8_t* I1c_du_fit = I1c_du;
01525   uint8_t* I1c_dv_fit = I1c_dv;
01526   uint8_t* I2c_du_fit = I2c_du;
01527   uint8_t* I2c_dv_fit = I2c_dv;
01528   if (param.half_resolution) {
01529     I1p_du_fit = I1p_du_full;
01530     I1p_dv_fit = I1p_dv_full;
01531     I2p_du_fit = I2p_du_full;
01532     I2p_dv_fit = I2p_dv_full;
01533     I1c_du_fit = I1c_du_full;
01534     I1c_dv_fit = I1c_dv_full;
01535     I2c_du_fit = I2c_du_full;
01536     I2c_dv_fit = I2c_dv_full;
01537   }
01538   
01539   // for all matches do
01540   for (vector<Matcher::p_match>::iterator it=p_matched_copy.begin(); it!=p_matched_copy.end(); it++) {
01541     
01542     // method: flow or quad matching
01543     if (method==0 || method==2) {
01544       if (param.refinement==2) {
01545         if (!parabolicFitting(I1c_du_fit,I1c_dv_fit,dims_c,I1p_du_fit,I1p_dv_fit,dims_p,
01546                               it->u1c,it->v1c,it->u1p,it->v1p,At,AtA,desc_buffer))
01547           continue;
01548       } else {
01549         relocateMinimum(I1c_du_fit,I1c_dv_fit,dims_c,I1p_du_fit,I1p_dv_fit,dims_p,
01550                         it->u1c,it->v1c,it->u1p,it->v1p,desc_buffer);
01551       }
01552     }
01553     
01554     // method: stereo or quad matching
01555     if (method==1 || method==2) {
01556       if (param.refinement==2) {
01557         if (!parabolicFitting(I1c_du_fit,I1c_dv_fit,dims_c,I2c_du_fit,I2c_dv_fit,dims_c,
01558                               it->u1c,it->v1c,it->u2c,it->v2c,At,AtA,desc_buffer))
01559           continue;
01560       } else {
01561         relocateMinimum(I1c_du_fit,I1c_dv_fit,dims_c,I2c_du_fit,I2c_dv_fit,dims_c,
01562                         it->u1c,it->v1c,it->u2c,it->v2c,desc_buffer);
01563       }
01564     }
01565     
01566     // method: quad matching
01567     if (method==2) {
01568       if (param.refinement==2) {
01569         if (!parabolicFitting(I1c_du_fit,I1c_dv_fit,dims_c,I2p_du_fit,I2p_dv_fit,dims_p,
01570                               it->u1c,it->v1c,it->u2p,it->v2p,At,AtA,desc_buffer))
01571           continue;
01572       } else {
01573         relocateMinimum(I1c_du_fit,I1c_dv_fit,dims_c,I2p_du_fit,I2p_dv_fit,dims_p,
01574                         it->u1c,it->v1c,it->u2p,it->v2p,desc_buffer);
01575       }
01576     }
01577 
01578     // add this match
01579     p_matched.push_back(*it);
01580   }
01581   
01582   // free memory
01583   _mm_free(desc_buffer);
01584 }
01585 
01586 float Matcher::mean(const uint8_t* I,const int32_t &bpl,const int32_t &u_min,const int32_t &u_max,const int32_t &v_min,const int32_t &v_max) {
01587   float mean = 0;
01588   for (int32_t v=v_min; v<=v_max; v++)
01589     for (int32_t u=u_min; u<=u_max; u++)
01590       mean += (float)*(I+getAddressOffsetImage(u,v,bpl));
01591   return
01592     mean /= (float)((u_max-u_min+1)*(v_max-v_min+1));
01593 }
01594