00001
00002 #include "PatchFinder.h"
00003 #include "SmallMatrixOpts.h"
00004 #include "KeyFrame.h"
00005
00006 #include <cvd/vision.h>
00007 #include <cvd/vector_image_ref.h>
00008 #include <cvd/image_interpolate.h>
00009 #include <TooN/Cholesky.h>
00010
00011
00012 #if CVD_HAVE_XMMINTRIN
00013 #include <tmmintrin.h>
00014 #endif
00015
00016 using namespace CVD;
00017 using namespace std;
00018
00019 PatchFinder::PatchFinder(int nPatchSize)
00020 : mimTemplate(ImageRef(nPatchSize,nPatchSize))
00021 {
00022 mnPatchSize = nPatchSize;
00023 mirCenter = ImageRef(nPatchSize/2, nPatchSize/2);
00024 int nMaxSSDPerPixel = 500;
00025 mnMaxSSD = mnPatchSize * mnPatchSize * nMaxSSDPerPixel;
00026
00027 mm2LastWarpMatrix = 9999.9 * Identity;
00028 mpLastTemplateMapPoint = NULL;
00029 };
00030
00031
00032
00033 int PatchFinder::CalcSearchLevelAndWarpMatrix(MapPoint &p,
00034 SE3<> se3CFromW,
00035 Matrix<2> &m2CamDerivs)
00036 {
00037
00038
00039 Vector<3> v3Cam = se3CFromW * p.v3WorldPos;
00040 double dOneOverCameraZ = 1.0 / v3Cam[2];
00041
00042 Vector<3> v3MotionRight = se3CFromW.get_rotation() * p.v3PixelRight_W;
00043 Vector<3> v3MotionDown = se3CFromW.get_rotation() * p.v3PixelDown_W;
00044
00045 mm2WarpInverse.T()[0] = m2CamDerivs * (v3MotionRight.slice<0,2>() - v3Cam.slice<0,2>() * v3MotionRight[2] * dOneOverCameraZ) * dOneOverCameraZ;
00046 mm2WarpInverse.T()[1] = m2CamDerivs * (v3MotionDown.slice<0,2>() - v3Cam.slice<0,2>() * v3MotionDown[2] * dOneOverCameraZ) * dOneOverCameraZ;
00047 double dDet = mm2WarpInverse[0][0] * mm2WarpInverse[1][1] - mm2WarpInverse[0][1] * mm2WarpInverse[1][0];
00048 mnSearchLevel = 0;
00049
00050
00051
00052
00053 while(dDet > 3 && mnSearchLevel < LEVELS-1)
00054 {
00055 mnSearchLevel++;
00056 dDet *= 0.25;
00057 };
00058
00059
00060
00061 if(dDet > 3 || dDet < 0.25)
00062 {
00063 mbTemplateBad = true;
00064 return -1;
00065 }
00066 else
00067 return mnSearchLevel;
00068 }
00069
00070
00071
00072 void PatchFinder::MakeTemplateCoarse(MapPoint &p,
00073 SE3<> se3CFromW,
00074 Matrix<2> &m2CamDerivs)
00075 {
00076 CalcSearchLevelAndWarpMatrix(p, se3CFromW, m2CamDerivs);
00077 MakeTemplateCoarseCont(p);
00078 };
00079
00080
00081 void PatchFinder::MakeTemplateCoarseCont(MapPoint &p)
00082 {
00083
00084 Matrix<2> m2 = M2Inverse(mm2WarpInverse) * LevelScale(mnSearchLevel);
00085
00086
00087
00088
00089
00090
00091
00092
00093 bool bNeedToRefreshTemplate = false;
00094 if(&p != mpLastTemplateMapPoint)
00095 bNeedToRefreshTemplate = true;
00096
00097 for(int i=0; !bNeedToRefreshTemplate && i<2; i++)
00098 {
00099 Vector<2> v2Diff = m2.T()[i] - mm2LastWarpMatrix.T()[i];
00100 const double dRefreshLimit = 0.07;
00101 if(v2Diff * v2Diff > dRefreshLimit * dRefreshLimit)
00102 bNeedToRefreshTemplate = true;
00103 }
00104
00105
00106 if(bNeedToRefreshTemplate)
00107 {
00108 int nOutside;
00109
00110 nOutside = CVD::transform(p.pPatchSourceKF->aLevels[p.nSourceLevel].im,
00111 mimTemplate,
00112 m2,
00113 vec(p.irCenter),
00114 vec(mirCenter));
00115
00116 if(nOutside)
00117 mbTemplateBad = true;
00118 else
00119 mbTemplateBad = false;
00120
00121 MakeTemplateSums();
00122
00123
00124
00125 mpLastTemplateMapPoint = &p;
00126 mm2LastWarpMatrix = m2;
00127 }
00128 };
00129
00130
00131
00132
00133 void PatchFinder::MakeTemplateCoarseNoWarp(KeyFrame &k, int nLevel, ImageRef irLevelPos)
00134 {
00135 mnSearchLevel = nLevel;
00136 Image<byte> &im = k.aLevels[nLevel].im;
00137 if(!im.in_image_with_border(irLevelPos, mnPatchSize / 2 + 1))
00138 {
00139 mbTemplateBad = true;
00140 return;
00141 }
00142 mbTemplateBad = false;
00143 copy(im,
00144 mimTemplate,
00145 mimTemplate.size(),
00146 irLevelPos - mirCenter);
00147
00148 MakeTemplateSums();
00149 }
00150
00151
00152 void PatchFinder::MakeTemplateCoarseNoWarp(MapPoint &p)
00153 {
00154 MakeTemplateCoarseNoWarp(*p.pPatchSourceKF, p.nSourceLevel, p.irCenter);
00155 };
00156
00157
00158
00159 inline void PatchFinder::MakeTemplateSums()
00160 {
00161 int nSum = 0;
00162 int nSumSq = 0;
00163 ImageRef ir;
00164 do
00165 {
00166 int b = mimTemplate[ir];
00167 nSum += b;
00168 nSumSq +=b * b;
00169 }
00170 while(ir.next(mimTemplate.size()));
00171 mnTemplateSum = nSum;
00172 mnTemplateSumSq = nSumSq;
00173 }
00174
00175
00176
00177
00178
00179 bool PatchFinder::FindPatchCoarse(ImageRef irPos, KeyFrame &kf, unsigned int nRange)
00180 {
00181 mbFound = false;
00182
00183
00184 int nLevelScale = LevelScale(mnSearchLevel);
00185 mirPredictedPos = irPos;
00186 irPos = irPos / nLevelScale;
00187 nRange = (nRange + nLevelScale - 1) / nLevelScale;
00188
00189
00190 int nTop = irPos.y - nRange;
00191 int nBottomPlusOne = irPos.y + nRange + 1;
00192 int nLeft = irPos.x - nRange;
00193 int nRight = irPos.x + nRange;
00194
00195
00196 Level &L = kf.aLevels[mnSearchLevel];
00197
00198
00199 if(nTop < 0)
00200 nTop = 0;
00201 if(nTop >= L.im.size().y)
00202 return false;
00203 if(nBottomPlusOne <= 0)
00204 return false;
00205
00206
00207
00208
00209
00210 vector<ImageRef>::iterator i;
00211 vector<ImageRef>::iterator i_end;
00212
00213 i = L.vCorners.begin() + L.vCornerRowLUT[nTop];
00214
00215 if(nBottomPlusOne >= L.im.size().y)
00216 i_end = L.vCorners.end();
00217 else
00218 i_end = L.vCorners.begin() + L.vCornerRowLUT[nBottomPlusOne];
00219
00220 ImageRef irBest;
00221 int nBestSSD = mnMaxSSD + 1;
00222
00223 for(; i<i_end; i++)
00224 {
00225 if( i->x < nLeft || i->x > nRight)
00226 continue;
00227 if((irPos - *i).mag_squared() > nRange * nRange)
00228 continue;
00229
00230 int nSSD;
00231 nSSD = ZMSSDAtPoint(L.im, *i);
00232 if(nSSD < nBestSSD)
00233 {
00234 irBest = *i;
00235 nBestSSD = nSSD;
00236 }
00237 }
00238
00239 if(nBestSSD < mnMaxSSD)
00240 {
00241 mv2CoarsePos= LevelZeroPos(irBest, mnSearchLevel);
00242 mbFound = true;
00243 }
00244 else
00245 mbFound = false;
00246 return mbFound;
00247 }
00248
00249
00250
00251
00252
00253
00254 void PatchFinder::MakeSubPixTemplate()
00255 {
00256 mimJacs.resize(mimTemplate.size() - ImageRef(2,2));
00257 Matrix<3> m3H = Zeros;
00258 ImageRef ir;
00259 for(ir.x = 1; ir.x < mnPatchSize - 1; ir.x++)
00260 for(ir.y = 1; ir.y < mnPatchSize - 1; ir.y++)
00261 {
00262 Vector<2> v2Grad;
00263 v2Grad[0] = 0.5 * (mimTemplate[ir + ImageRef(1,0)] - mimTemplate[ir - ImageRef(1,0)]);
00264 v2Grad[1] = 0.5 * (mimTemplate[ir + ImageRef(0,1)] - mimTemplate[ir - ImageRef(0,1)]);
00265 mimJacs[ir-ImageRef(1,1)].first = v2Grad[0];
00266 mimJacs[ir-ImageRef(1,1)].second = v2Grad[1];
00267 Vector<3> v3Grad = unproject(v2Grad);
00268 m3H += v3Grad.as_col() * v3Grad.as_row();
00269 }
00270
00271
00272 Cholesky<3> chol(m3H);
00273 mm3HInv = chol.get_inverse();
00274
00275
00276
00277
00278
00279 mv2SubPixPos = mv2CoarsePos;
00280 mdMeanDiff = 0.0;
00281 }
00282
00283
00284
00285
00286 bool PatchFinder::IterateSubPixToConvergence(KeyFrame &kf, int nMaxIts)
00287 {
00288 const double dConvLimit = 0.03;
00289 bool bConverged = false;
00290 int nIts;
00291 for(nIts = 0; nIts < nMaxIts && !bConverged; nIts++)
00292 {
00293 double dUpdateSquared = IterateSubPix(kf);
00294 if(dUpdateSquared < 0)
00295 return false;
00296 if(dUpdateSquared < dConvLimit*dConvLimit)
00297 return true;
00298 }
00299 return false;
00300 }
00301
00302
00303
00304
00305
00306 double PatchFinder::IterateSubPix(KeyFrame &kf)
00307 {
00308
00309 Vector<2> v2Center = LevelNPos(mv2SubPixPos, mnSearchLevel);
00310 BasicImage<byte> &im = kf.aLevels[mnSearchLevel].im;
00311 if(!im.in_image_with_border(ir_rounded(v2Center), mnPatchSize / 2 + 1))
00312 return -1.0;
00313
00314
00315 Vector<2> v2Base = v2Center - vec(mirCenter);
00316
00317
00318 Vector<3> v3Accum = Zeros;
00319
00320 ImageRef ir;
00321
00322 byte* pTopLeftPixel;
00323
00324
00325
00326
00327 double dX = v2Base[0]-floor(v2Base[0]);
00328 double dY = v2Base[1]-floor(v2Base[1]);
00329 float fMixTL = (1.0 - dX) * (1.0 - dY);
00330 float fMixTR = (dX) * (1.0 - dY);
00331 float fMixBL = (1.0 - dX) * (dY);
00332 float fMixBR = (dX) * (dY);
00333
00334
00335 unsigned long nRowOffset = &kf.aLevels[mnSearchLevel].im[ImageRef(0,1)] - &kf.aLevels[mnSearchLevel].im[ImageRef(0,0)];
00336 for(ir.y = 1; ir.y < mnPatchSize - 1; ir.y++)
00337 {
00338 pTopLeftPixel = &im[::ir(v2Base) + ImageRef(1,ir.y)];
00339 for(ir.x = 1; ir.x < mnPatchSize - 1; ir.x++)
00340 {
00341 float fPixel =
00342 fMixTL * pTopLeftPixel[0] + fMixTR * pTopLeftPixel[1] +
00343 fMixBL * pTopLeftPixel[nRowOffset] + fMixBR * pTopLeftPixel[nRowOffset + 1];
00344 pTopLeftPixel++;
00345 double dDiff = fPixel - mimTemplate[ir] + mdMeanDiff;
00346 v3Accum[0] += dDiff * mimJacs[ir - ImageRef(1,1)].first;
00347 v3Accum[1] += dDiff * mimJacs[ir - ImageRef(1,1)].second;
00348 v3Accum[2] += dDiff;
00349 };
00350 }
00351
00352
00353 Vector<3> v3Update = mm3HInv * v3Accum;
00354 mv2SubPixPos -= v3Update.slice<0,2>() * LevelScale(mnSearchLevel);
00355 mdMeanDiff -= v3Update[2];
00356
00357 double dPixelUpdateSquared = v3Update.slice<0,2>() * v3Update.slice<0,2>();
00358 return dPixelUpdateSquared;
00359 }
00360
00363
00364
00365
00366
00367
00368
00369
00370
00371
00372
00373
00374
00375
00376
00379
00380 #if CVD_HAVE_XMMINTRIN
00381
00382 inline int SumXMM_16(__m128i &target)
00383 {
00384 unsigned short int sums_store[8];
00385 _mm_storeu_si128((__m128i*)sums_store, target);
00386 return sums_store[0] + sums_store[1] + sums_store[2] + sums_store[3] +
00387 sums_store[4] + sums_store[5] + sums_store[6] + sums_store[7];
00388 }
00389
00390 inline int SumXMM_32(__m128i &target)
00391 {
00392 unsigned int sums_store[4];
00393 _mm_storeu_si128((__m128i*)sums_store, target);
00394 return sums_store[0] + sums_store[1] + sums_store[2] + sums_store[3];
00395 }
00396 #endif
00397
00398
00399
00400 int PatchFinder::ZMSSDAtPoint(CVD::BasicImage<CVD::byte> &im, const CVD::ImageRef &ir)
00401 {
00402 if(!im.in_image_with_border(ir, mirCenter[0]))
00403 return mnMaxSSD + 1;
00404
00405 ImageRef irImgBase = ir - mirCenter;
00406 byte *imagepointer;
00407 byte *templatepointer;
00408
00409 int nImageSumSq = 0;
00410 int nImageSum = 0;
00411 int nCrossSum = 0;
00412
00413 #if CVD_HAVE_XMMINTRIN
00414 if(mnPatchSize == 8)
00415 {
00416 long unsigned int imagepointerincrement;
00417
00418 __m128i xImageAsEightBytes;
00419 __m128i xImageAsWords;
00420 __m128i xTemplateAsEightBytes;
00421 __m128i xTemplateAsWords;
00422 __m128i xZero;
00423 __m128i xImageSums;
00424 __m128i xImageSqSums;
00425 __m128i xCrossSums;
00426 __m128i xProduct;
00427
00428
00429 xImageSums = _mm_setzero_si128();
00430 xImageSqSums = _mm_setzero_si128();
00431 xCrossSums = _mm_setzero_si128();
00432 xZero = _mm_setzero_si128();
00433
00434 imagepointer = &im[irImgBase + ImageRef(0,0)];
00435 templatepointer = &mimTemplate[ImageRef(0,0)];
00436 imagepointerincrement = &im[irImgBase + ImageRef(0,1)] - imagepointer;
00437
00438 xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer);
00439 imagepointer += imagepointerincrement;
00440 xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero);
00441 xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums);
00442 xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords);
00443 xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums);
00444 xTemplateAsEightBytes=_mm_load_si128((__m128i*) templatepointer);
00445 templatepointer += 16;
00446 xTemplateAsWords = _mm_unpacklo_epi8(xTemplateAsEightBytes,xZero);
00447 xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords);
00448 xCrossSums = _mm_add_epi32(xProduct, xCrossSums);
00449 xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer);
00450 imagepointer += imagepointerincrement;
00451 xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero);
00452 xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums);
00453 xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords);
00454 xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums);
00455 xTemplateAsWords = _mm_unpackhi_epi8(xTemplateAsEightBytes,xZero);
00456 xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords);
00457 xCrossSums = _mm_add_epi32(xProduct, xCrossSums);
00458
00459 xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer);
00460 imagepointer += imagepointerincrement;
00461 xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero);
00462 xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums);
00463 xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords);
00464 xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums);
00465 xTemplateAsEightBytes=_mm_load_si128((__m128i*) templatepointer);
00466 templatepointer += 16;
00467 xTemplateAsWords = _mm_unpacklo_epi8(xTemplateAsEightBytes,xZero);
00468 xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords);
00469 xCrossSums = _mm_add_epi32(xProduct, xCrossSums);
00470 xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer);
00471 imagepointer += imagepointerincrement;
00472 xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero);
00473 xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums);
00474 xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords);
00475 xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums);
00476 xTemplateAsWords = _mm_unpackhi_epi8(xTemplateAsEightBytes,xZero);
00477 xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords);
00478 xCrossSums = _mm_add_epi32(xProduct, xCrossSums);
00479
00480 xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer);
00481 imagepointer += imagepointerincrement;
00482 xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero);
00483 xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums);
00484 xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords);
00485 xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums);
00486 xTemplateAsEightBytes=_mm_load_si128((__m128i*) templatepointer);
00487 templatepointer += 16;
00488 xTemplateAsWords = _mm_unpacklo_epi8(xTemplateAsEightBytes,xZero);
00489 xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords);
00490 xCrossSums = _mm_add_epi32(xProduct, xCrossSums);
00491 xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer);
00492 imagepointer += imagepointerincrement;
00493 xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero);
00494 xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums);
00495 xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords);
00496 xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums);
00497 xTemplateAsWords = _mm_unpackhi_epi8(xTemplateAsEightBytes,xZero);
00498 xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords);
00499 xCrossSums = _mm_add_epi32(xProduct, xCrossSums);
00500
00501 xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer);
00502 imagepointer += imagepointerincrement;
00503 xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero);
00504 xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums);
00505 xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords);
00506 xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums);
00507 xTemplateAsEightBytes=_mm_load_si128((__m128i*) templatepointer);
00508 templatepointer += 16;
00509 xTemplateAsWords = _mm_unpacklo_epi8(xTemplateAsEightBytes,xZero);
00510 xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords);
00511 xCrossSums = _mm_add_epi32(xProduct, xCrossSums);
00512 xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer);
00513 xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero);
00514 xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums);
00515 xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords);
00516 xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums);
00517 xTemplateAsWords = _mm_unpackhi_epi8(xTemplateAsEightBytes,xZero);
00518 xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords);
00519 xCrossSums = _mm_add_epi32(xProduct, xCrossSums);
00520
00521 nImageSum = SumXMM_16(xImageSums);
00522 nCrossSum = SumXMM_32(xCrossSums);
00523 nImageSumSq = SumXMM_32(xImageSqSums);
00524 }
00525 else
00526 #endif
00527 {
00528 for(int nRow = 0; nRow < mnPatchSize; nRow++)
00529 {
00530 imagepointer = &im[irImgBase + ImageRef(0,nRow)];
00531 templatepointer = &mimTemplate[ImageRef(0,nRow)];
00532 for(int nCol = 0; nCol < mnPatchSize; nCol++)
00533 {
00534 int n = imagepointer[nCol];
00535 nImageSum += n;
00536 nImageSumSq += n*n;
00537 nCrossSum += n * templatepointer[nCol];
00538 };
00539 }
00540 };
00541
00542 int SA = mnTemplateSum;
00543 int SB = nImageSum;
00544
00545 int N = mnPatchSize * mnPatchSize;
00546 return ((2*SA*SB - SA*SA - SB*SB)/N + nImageSumSq + mnTemplateSumSq - 2*nCrossSum);
00547 }
00548
00549
00550
00551
00552
00553