refine_feature_match.cpp
Go to the documentation of this file.
00001 #include "refine_feature_match.hpp"
00002 #include "internal_utils.hpp"
00003 
00004 #include "pyramid_level.hpp"
00005 
00006 #ifdef FOVIS_USE_SSE
00007 #include <emmintrin.h>
00008 #endif
00009 
00010 namespace fovis
00011 {
00012 
00013 static inline int dot_int16_aligned(const int16_t* a, const int16_t* b, int num_taps)
00014 {
00015 #ifdef FOVIS_USE_SSE
00016   assert(FOVIS_IS_ALIGNED16(a) && FOVIS_IS_ALIGNED16(b));
00017   const int16_t* ap = a;
00018   const int16_t* bp = b;
00019   __m128i tmp;
00020   __m128i ma = _mm_load_si128((const __m128i *) a);
00021   __m128i mb = _mm_load_si128((const __m128i *) b);
00022   __m128i hi = _mm_mulhi_epi16(ma, mb);
00023   __m128i lo = _mm_mullo_epi16(ma, mb);
00024   ma = _mm_unpackhi_epi16(lo, hi);
00025   mb = _mm_unpacklo_epi16(lo, hi);
00026   __m128i sums = _mm_add_epi32(ma, mb);
00027   ap += 8;
00028   bp += 8;
00029   for (int i = 1; i < num_taps; i++) {
00030     ma = _mm_load_si128((const __m128i *) ap);
00031     mb = _mm_load_si128((const __m128i *) bp);
00032     hi = _mm_mulhi_epi16(ma, mb);
00033     lo = _mm_mullo_epi16(ma, mb);
00034     ma = _mm_unpackhi_epi16(lo, hi);
00035     mb = _mm_unpacklo_epi16(lo, hi);
00036     sums = _mm_add_epi32(sums, ma);
00037     sums = _mm_add_epi32(sums, mb);
00038     ap += 8;
00039     bp += 8;
00040   }
00041   tmp = _mm_shuffle_epi32(sums, _MM_SHUFFLE(0, 1, 2, 3));
00042   sums = _mm_add_epi32(sums, tmp);
00043   tmp = _mm_shuffle_epi32(sums, _MM_SHUFFLE(1, 0, 0, 1));
00044   sums = _mm_add_epi32(sums, tmp);
00045   return _mm_cvtsi128_si32(sums);
00046   //  int check = 0;
00047   //  for(int i=0; i<num_taps * 8; i++)
00048   //    check += a[i] * b[i];
00049   //  assert(check == result);
00050 #else
00051   int result = 0;
00052   for(int i=0; i<num_taps * 8; i++) {
00053     result += a[i] * b[i];
00054   }
00055   return result;
00056 #endif
00057 }
00058 
00059 #ifdef DEBUG_SUBPIXEL_MATCHING
00060 static int
00061 bot_pgm_write_fname(const char *fname, const uint8_t * pixels,
00062     int width, int height, int rowstride)
00063 {
00064   FILE *fp = fopen(fname, "wb");
00065   if (!fp) return -1;
00066   fprintf(fp, "P5\n%d\n%d\n%d\n", width, height, 255);
00067   int i, count;
00068   for (i=0; i<height; i++) {
00069     count = fwrite(pixels + i*rowstride, width, 1, fp);
00070     if (1 != count) return -1;
00071   }
00072   fclose(fp);
00073   return 0;
00074 }
00075 #endif
00076 
00077 
00078 // sub-pixel refinement of feature matches, for more accurate depth
00079 // estimation.
00080 void refineFeatureMatch(PyramidLevel* ref_level,
00081                         PyramidLevel* target_level,
00082                         Eigen::Vector2d ref_uv,
00083                         Eigen::Vector2d init_target_uv,
00084                         Eigen::Vector2d * final_target_uv,
00085                         float *delta_sse)
00086 {
00087   // get the reference descriptor
00088   const uint8_t* ref_gray = ref_level->getGrayscaleImage();
00089   int ref_gray_stride = ref_level->getGrayscaleImageStride();
00090 
00091   int desc_len = ref_level->getDescriptorLength();
00092   int desc_stride = ref_level->getDescriptorStride();
00093 
00094   uint8_t ref_descriptor[desc_stride] __attribute__ ((aligned (16)));
00095   ref_level->populateDescriptorInterp(ref_uv.x(), ref_uv.y(), ref_descriptor);
00096 
00097   int buf_num_bytes = round_up_to_multiple(desc_len * sizeof(int16_t), 16);
00098   int buf_num_elements = buf_num_bytes / sizeof(int16_t);
00099   int buf_num_pad_bytes = (buf_num_elements - desc_len) * sizeof(int16_t);
00100 
00101   // how many SSE operations does it take to compute a dot product?  Each
00102   // operation works on 8 elements at a time.
00103   int dot_prod_num_taps = buf_num_elements / 8;
00104 
00105   // initialize the target descriptor
00106   uint8_t orig_target_desc[desc_stride] __attribute__ ((aligned (16)));
00107   target_level->populateDescriptorInterp(init_target_uv.x(), init_target_uv.y(),
00108                                          orig_target_desc);
00109   uint8_t tgt_desc[desc_stride] __attribute__ ((aligned (16)));
00110   memcpy(tgt_desc, orig_target_desc, desc_stride);
00111 
00112   float tx = init_target_uv.x();
00113   float ty = init_target_uv.y();
00114 
00115   const uint8_t* target_gray = target_level->getGrayscaleImage();
00116   int tgt_gray_stride = target_level->getGrayscaleImageStride();
00117 
00118   int16_t pix_errs[buf_num_elements] __attribute__ ((aligned (16)));
00119   memset(pix_errs + desc_len, 0, buf_num_pad_bytes);
00120 
00121   // compute an initial error
00122   for (int i = 0; i < desc_len; i++) {
00123     pix_errs[i] = tgt_desc[i] - ref_descriptor[i];
00124   }
00125   int initial_sse = dot_int16_aligned(pix_errs, pix_errs, dot_prod_num_taps);
00126 
00127   int32_t final_sse = initial_sse;
00128 
00129   // Minimization using Efficient Second-order Minimization (ESM) method
00130   // described in:
00131   //   Selim Benhimane and Ezio Malis, "Real-time image-based tracking of
00132   //   planes using Efficient Second-order Minimization", IROS 2004
00133 
00134   // compute reference image gradients
00135   int16_t ref_desc_dx[buf_num_elements] __attribute__ ((aligned (16)));
00136   int16_t ref_desc_dy[buf_num_elements] __attribute__ ((aligned (16)));
00137   memset(ref_desc_dx + desc_len, 0, buf_num_pad_bytes);
00138   memset(ref_desc_dy + desc_len, 0, buf_num_pad_bytes);
00139   int rdesc_offset = ref_uv.y() * ref_gray_stride + ref_uv.x();
00140   const int* ref_desc_offsets = ref_level->getDescriptorIndexOffsets();
00141   for (int i = 0; i < desc_len; i++) {
00142     int k = rdesc_offset + ref_desc_offsets[i];
00143     ref_desc_dx[i] = ref_gray[k + 1] - ref_gray[k];
00144     ref_desc_dy[i] = ref_gray[k + ref_gray_stride] - ref_gray[k];
00145     assert(k + ref_gray_stride < ref_level->getHeight() * ref_gray_stride);
00146   }
00147 
00148   int16_t Mx[buf_num_elements] __attribute__ ((aligned (16)));
00149   int16_t My[buf_num_elements] __attribute__ ((aligned (16)));
00150   memset(Mx + desc_len, 0, buf_num_pad_bytes);
00151   memset(My + desc_len, 0, buf_num_pad_bytes);
00152 
00153   int max_iterations = 6;
00154   const int* tgt_desc_offsets = target_level->getDescriptorIndexOffsets();
00155   for (int iter_num = 0; iter_num < max_iterations; iter_num++) {
00156     // compute target image gradients at current position and
00157     // M = sum of Jacobians at reference and current positions
00158     int tdesc_center = ((int) ty) * tgt_gray_stride + (int) tx;
00159     for (int i = 0; i < desc_len; i++) {
00160       int k = tdesc_center + tgt_desc_offsets[i];
00161       Mx[i] = ref_desc_dx[i] + (target_gray[k + 1] - target_gray[k]);
00162       My[i] = ref_desc_dy[i] + (target_gray[k + tgt_gray_stride] - target_gray[k]);
00163     }
00164 
00165     // S = M'*M
00166     double S_00 = dot_int16_aligned(Mx, Mx, dot_prod_num_taps);
00167     double S_01 = dot_int16_aligned(Mx, My, dot_prod_num_taps);
00168     double S_11 = dot_int16_aligned(My, My, dot_prod_num_taps);
00169 
00170     // S^{-1}
00171     double det = S_00 * S_11 - S_01 * S_01;
00172     if (det <= 1e-9)
00173       break;
00174     double Sinv_00 = S_11 / det;
00175     double Sinv_01 = -S_01 / det;
00176     double Sinv_11 = S_00 / det;
00177 
00178     // M'*err
00179     double gx = dot_int16_aligned(Mx, pix_errs, dot_prod_num_taps);
00180     double gy = dot_int16_aligned(My, pix_errs, dot_prod_num_taps);
00181 
00182     // M^{+} = pseudoinverse of M
00183     // delta = -2 * M^{+} * err
00184     double delta_x = -2 * (Sinv_00 * gx + Sinv_01 * gy);
00185     double delta_y = -2 * (Sinv_01 * gx + Sinv_11 * gy);
00186 
00187     float next_tx = tx + delta_x;
00188     float next_ty = ty + delta_y;
00189 
00190     // stop if the keypoint is about to go out of bounds
00191     if (!target_level->isLegalKeypointCoordinate(next_tx, next_ty))
00192       break;
00193 
00194     // compute shifted target descriptor and error
00195     target_level->populateDescriptorInterp(next_tx, next_ty, tgt_desc);
00196 
00197     for (int i = 0; i < desc_len; i++)
00198       pix_errs[i] = tgt_desc[i] - ref_descriptor[i];
00199     int sse = dot_int16_aligned(pix_errs, pix_errs, dot_prod_num_taps);
00200 
00201 #ifdef DEBUG_SUBPIXEL_MATCHING
00202     printf("sse: %d, delta : %f, %f\n", sse, delta_x, delta_y);
00203     char tfname[80];
00204     sprintf(tfname, "tgt-desc-%d.pgm", iter_num);
00205     bot_pgm_write_fname(tfname, tgt_desc, fwsize, fwsize, fwsize);
00206 #endif
00207 
00208     if (sse < final_sse) {
00209       // only step if the error decreases.
00210       tx = next_tx;
00211       ty = next_ty;
00212       final_sse = sse;
00213     } else {
00214       // if stepping would increase the error, then just give up.
00215       // TODO modify damping parameters instead?  Might not be worth it
00216 #ifdef DEBUG_SUBPIXEL_MATCHING
00217       printf("  Error is increasing! Abort optimization.\n");
00218 #endif
00219       break;
00220     }
00221 
00222     // stop if we're not moving that much
00223     if (fabs(delta_x) < 0.1 && fabs(delta_y) < 0.1)
00224       break;
00225   }
00226 
00227   *final_target_uv = Eigen::Vector2d(tx, ty);
00228   *delta_sse = final_sse - initial_sse;
00229 }
00230 
00231 } /*  */


libfovis
Author(s): Albert Huang, Maurice Fallon
autogenerated on Thu Jun 6 2019 20:16:12