00001 #include "refine_feature_match.hpp"
00002 #include "internal_utils.hpp"
00003
00004 #include "pyramid_level.hpp"
00005
00006 #ifdef FOVIS_USE_SSE
00007 #include <emmintrin.h>
00008 #endif
00009
00010 namespace fovis
00011 {
00012
00013 static inline int dot_int16_aligned(const int16_t* a, const int16_t* b, int num_taps)
00014 {
00015 #ifdef FOVIS_USE_SSE
00016 assert(FOVIS_IS_ALIGNED16(a) && FOVIS_IS_ALIGNED16(b));
00017 const int16_t* ap = a;
00018 const int16_t* bp = b;
00019 __m128i tmp;
00020 __m128i ma = _mm_load_si128((const __m128i *) a);
00021 __m128i mb = _mm_load_si128((const __m128i *) b);
00022 __m128i hi = _mm_mulhi_epi16(ma, mb);
00023 __m128i lo = _mm_mullo_epi16(ma, mb);
00024 ma = _mm_unpackhi_epi16(lo, hi);
00025 mb = _mm_unpacklo_epi16(lo, hi);
00026 __m128i sums = _mm_add_epi32(ma, mb);
00027 ap += 8;
00028 bp += 8;
00029 for (int i = 1; i < num_taps; i++) {
00030 ma = _mm_load_si128((const __m128i *) ap);
00031 mb = _mm_load_si128((const __m128i *) bp);
00032 hi = _mm_mulhi_epi16(ma, mb);
00033 lo = _mm_mullo_epi16(ma, mb);
00034 ma = _mm_unpackhi_epi16(lo, hi);
00035 mb = _mm_unpacklo_epi16(lo, hi);
00036 sums = _mm_add_epi32(sums, ma);
00037 sums = _mm_add_epi32(sums, mb);
00038 ap += 8;
00039 bp += 8;
00040 }
00041 tmp = _mm_shuffle_epi32(sums, _MM_SHUFFLE(0, 1, 2, 3));
00042 sums = _mm_add_epi32(sums, tmp);
00043 tmp = _mm_shuffle_epi32(sums, _MM_SHUFFLE(1, 0, 0, 1));
00044 sums = _mm_add_epi32(sums, tmp);
00045 return _mm_cvtsi128_si32(sums);
00046
00047
00048
00049
00050 #else
00051 int result = 0;
00052 for(int i=0; i<num_taps * 8; i++) {
00053 result += a[i] * b[i];
00054 }
00055 return result;
00056 #endif
00057 }
00058
00059 #ifdef DEBUG_SUBPIXEL_MATCHING
00060 static int
00061 bot_pgm_write_fname(const char *fname, const uint8_t * pixels,
00062 int width, int height, int rowstride)
00063 {
00064 FILE *fp = fopen(fname, "wb");
00065 if (!fp) return -1;
00066 fprintf(fp, "P5\n%d\n%d\n%d\n", width, height, 255);
00067 int i, count;
00068 for (i=0; i<height; i++) {
00069 count = fwrite(pixels + i*rowstride, width, 1, fp);
00070 if (1 != count) return -1;
00071 }
00072 fclose(fp);
00073 return 0;
00074 }
00075 #endif
00076
00077
00078
00079
00080 void refineFeatureMatch(PyramidLevel* ref_level,
00081 PyramidLevel* target_level,
00082 Eigen::Vector2d ref_uv,
00083 Eigen::Vector2d init_target_uv,
00084 Eigen::Vector2d * final_target_uv,
00085 float *delta_sse)
00086 {
00087
00088 const uint8_t* ref_gray = ref_level->getGrayscaleImage();
00089 int ref_gray_stride = ref_level->getGrayscaleImageStride();
00090
00091 int desc_len = ref_level->getDescriptorLength();
00092 int desc_stride = ref_level->getDescriptorStride();
00093
00094 uint8_t ref_descriptor[desc_stride] __attribute__ ((aligned (16)));
00095 ref_level->populateDescriptorInterp(ref_uv.x(), ref_uv.y(), ref_descriptor);
00096
00097 int buf_num_bytes = round_up_to_multiple(desc_len * sizeof(int16_t), 16);
00098 int buf_num_elements = buf_num_bytes / sizeof(int16_t);
00099 int buf_num_pad_bytes = (buf_num_elements - desc_len) * sizeof(int16_t);
00100
00101
00102
00103 int dot_prod_num_taps = buf_num_elements / 8;
00104
00105
00106 uint8_t orig_target_desc[desc_stride] __attribute__ ((aligned (16)));
00107 target_level->populateDescriptorInterp(init_target_uv.x(), init_target_uv.y(),
00108 orig_target_desc);
00109 uint8_t tgt_desc[desc_stride] __attribute__ ((aligned (16)));
00110 memcpy(tgt_desc, orig_target_desc, desc_stride);
00111
00112 float tx = init_target_uv.x();
00113 float ty = init_target_uv.y();
00114
00115 const uint8_t* target_gray = target_level->getGrayscaleImage();
00116 int tgt_gray_stride = target_level->getGrayscaleImageStride();
00117
00118 int16_t pix_errs[buf_num_elements] __attribute__ ((aligned (16)));
00119 memset(pix_errs + desc_len, 0, buf_num_pad_bytes);
00120
00121
00122 for (int i = 0; i < desc_len; i++) {
00123 pix_errs[i] = tgt_desc[i] - ref_descriptor[i];
00124 }
00125 int initial_sse = dot_int16_aligned(pix_errs, pix_errs, dot_prod_num_taps);
00126
00127 int32_t final_sse = initial_sse;
00128
00129
00130
00131
00132
00133
00134
00135 int16_t ref_desc_dx[buf_num_elements] __attribute__ ((aligned (16)));
00136 int16_t ref_desc_dy[buf_num_elements] __attribute__ ((aligned (16)));
00137 memset(ref_desc_dx + desc_len, 0, buf_num_pad_bytes);
00138 memset(ref_desc_dy + desc_len, 0, buf_num_pad_bytes);
00139 int rdesc_offset = ref_uv.y() * ref_gray_stride + ref_uv.x();
00140 const int* ref_desc_offsets = ref_level->getDescriptorIndexOffsets();
00141 for (int i = 0; i < desc_len; i++) {
00142 int k = rdesc_offset + ref_desc_offsets[i];
00143 ref_desc_dx[i] = ref_gray[k + 1] - ref_gray[k];
00144 ref_desc_dy[i] = ref_gray[k + ref_gray_stride] - ref_gray[k];
00145 assert(k + ref_gray_stride < ref_level->getHeight() * ref_gray_stride);
00146 }
00147
00148 int16_t Mx[buf_num_elements] __attribute__ ((aligned (16)));
00149 int16_t My[buf_num_elements] __attribute__ ((aligned (16)));
00150 memset(Mx + desc_len, 0, buf_num_pad_bytes);
00151 memset(My + desc_len, 0, buf_num_pad_bytes);
00152
00153 int max_iterations = 6;
00154 const int* tgt_desc_offsets = target_level->getDescriptorIndexOffsets();
00155 for (int iter_num = 0; iter_num < max_iterations; iter_num++) {
00156
00157
00158 int tdesc_center = ((int) ty) * tgt_gray_stride + (int) tx;
00159 for (int i = 0; i < desc_len; i++) {
00160 int k = tdesc_center + tgt_desc_offsets[i];
00161 Mx[i] = ref_desc_dx[i] + (target_gray[k + 1] - target_gray[k]);
00162 My[i] = ref_desc_dy[i] + (target_gray[k + tgt_gray_stride] - target_gray[k]);
00163 }
00164
00165
00166 double S_00 = dot_int16_aligned(Mx, Mx, dot_prod_num_taps);
00167 double S_01 = dot_int16_aligned(Mx, My, dot_prod_num_taps);
00168 double S_11 = dot_int16_aligned(My, My, dot_prod_num_taps);
00169
00170
00171 double det = S_00 * S_11 - S_01 * S_01;
00172 if (det <= 1e-9)
00173 break;
00174 double Sinv_00 = S_11 / det;
00175 double Sinv_01 = -S_01 / det;
00176 double Sinv_11 = S_00 / det;
00177
00178
00179 double gx = dot_int16_aligned(Mx, pix_errs, dot_prod_num_taps);
00180 double gy = dot_int16_aligned(My, pix_errs, dot_prod_num_taps);
00181
00182
00183
00184 double delta_x = -2 * (Sinv_00 * gx + Sinv_01 * gy);
00185 double delta_y = -2 * (Sinv_01 * gx + Sinv_11 * gy);
00186
00187 float next_tx = tx + delta_x;
00188 float next_ty = ty + delta_y;
00189
00190
00191 if (!target_level->isLegalKeypointCoordinate(next_tx, next_ty))
00192 break;
00193
00194
00195 target_level->populateDescriptorInterp(next_tx, next_ty, tgt_desc);
00196
00197 for (int i = 0; i < desc_len; i++)
00198 pix_errs[i] = tgt_desc[i] - ref_descriptor[i];
00199 int sse = dot_int16_aligned(pix_errs, pix_errs, dot_prod_num_taps);
00200
00201 #ifdef DEBUG_SUBPIXEL_MATCHING
00202 printf("sse: %d, delta : %f, %f\n", sse, delta_x, delta_y);
00203 char tfname[80];
00204 sprintf(tfname, "tgt-desc-%d.pgm", iter_num);
00205 bot_pgm_write_fname(tfname, tgt_desc, fwsize, fwsize, fwsize);
00206 #endif
00207
00208 if (sse < final_sse) {
00209
00210 tx = next_tx;
00211 ty = next_ty;
00212 final_sse = sse;
00213 } else {
00214
00215
00216 #ifdef DEBUG_SUBPIXEL_MATCHING
00217 printf(" Error is increasing! Abort optimization.\n");
00218 #endif
00219 break;
00220 }
00221
00222
00223 if (fabs(delta_x) < 0.1 && fabs(delta_y) < 0.1)
00224 break;
00225 }
00226
00227 *final_target_uv = Eigen::Vector2d(tx, ty);
00228 *delta_sse = final_sse - initial_sse;
00229 }
00230
00231 }