00001
00002 #include "ptam/PatchFinder.h"
00003 #include "ptam/SmallMatrixOpts.h"
00004 #include "ptam/KeyFrame.h"
00005
00006 #include <cvd/vision.h>
00007 #include <cvd/vector_image_ref.h>
00008 #include <cvd/image_interpolate.h>
00009 #include <TooN/Cholesky.h>
00010
00011
00012 #if CVD_HAVE_XMMINTRIN
00013 #include <tmmintrin.h>
00014 #endif
00015
00016 using namespace CVD;
00017 using namespace std;
00018
00019 PatchFinder::PatchFinder(int nPatchSize)
00020 : mimTemplate(ImageRef(nPatchSize,nPatchSize))
00021 {
00022 mnPatchSize = nPatchSize;
00023 mirCenter = ImageRef(nPatchSize/2, nPatchSize/2);
00024 int nMaxSSDPerPixel = 500;
00025 mnMaxSSD = mnPatchSize * mnPatchSize * nMaxSSDPerPixel;
00026
00027 mm2LastWarpMatrix = 9999.9 * Identity;
00028 };
00029
00030
00031
00032 int PatchFinder::CalcSearchLevelAndWarpMatrix(MapPoint::Ptr p,
00033 SE3<> se3CFromW,
00034 Matrix<2> &m2CamDerivs)
00035 {
00036
00037
00038 Vector<3> v3Cam = se3CFromW * p->v3WorldPos;
00039 double dOneOverCameraZ = 1.0 / v3Cam[2];
00040
00041 Vector<3> v3MotionRight = se3CFromW.get_rotation() * p->v3PixelRight_W;
00042 Vector<3> v3MotionDown = se3CFromW.get_rotation() * p->v3PixelDown_W;
00043
00044 mm2WarpInverse.T()[0] = m2CamDerivs * (v3MotionRight.slice<0,2>() - v3Cam.slice<0,2>() * v3MotionRight[2] * dOneOverCameraZ) * dOneOverCameraZ;
00045 mm2WarpInverse.T()[1] = m2CamDerivs * (v3MotionDown.slice<0,2>() - v3Cam.slice<0,2>() * v3MotionDown[2] * dOneOverCameraZ) * dOneOverCameraZ;
00046 double dDet = mm2WarpInverse[0][0] * mm2WarpInverse[1][1] - mm2WarpInverse[0][1] * mm2WarpInverse[1][0];
00047 mnSearchLevel = 0;
00048
00049
00050
00051
00052 while(dDet > 3 && mnSearchLevel < LEVELS-1)
00053 {
00054 mnSearchLevel++;
00055 dDet *= 0.25;
00056 };
00057
00058
00059
00060 if(dDet > 3 || dDet < 0.25)
00061 {
00062 mbTemplateBad = true;
00063 return -1;
00064 }
00065 else
00066 return mnSearchLevel;
00067 }
00068
00069
00070
00071 void PatchFinder::MakeTemplateCoarse(MapPoint::Ptr p,
00072 SE3<> se3CFromW,
00073 Matrix<2> &m2CamDerivs)
00074 {
00075 CalcSearchLevelAndWarpMatrix(p, se3CFromW, m2CamDerivs);
00076 MakeTemplateCoarseCont(p);
00077 };
00078
00079
00080 void PatchFinder::MakeTemplateCoarseCont(MapPoint::Ptr p)
00081 {
00082
00083 Matrix<2> m2 = M2Inverse(mm2WarpInverse) * LevelScale(mnSearchLevel);
00084
00085
00086
00087
00088
00089
00090
00091
00092 bool bNeedToRefreshTemplate = false;
00093 if(p != mpLastTemplateMapPoint)
00094 bNeedToRefreshTemplate = true;
00095
00096 for(int i=0; !bNeedToRefreshTemplate && i<2; i++)
00097 {
00098 Vector<2> v2Diff = m2.T()[i] - mm2LastWarpMatrix.T()[i];
00099 const double dRefreshLimit = 0.07;
00100 if(v2Diff * v2Diff > dRefreshLimit * dRefreshLimit)
00101 bNeedToRefreshTemplate = true;
00102 }
00103
00104
00105 if(bNeedToRefreshTemplate)
00106 {
00107 int nOutside;
00108
00109 nOutside = CVD::transform(p->pPatchSourceKF->aLevels[p->nSourceLevel].im,
00110 mimTemplate,
00111 m2,
00112 vec(p->irCenter),
00113 vec(mirCenter));
00114
00115 if(nOutside)
00116 mbTemplateBad = true;
00117 else
00118 mbTemplateBad = false;
00119
00120 MakeTemplateSums();
00121
00122
00123
00124 mpLastTemplateMapPoint = p;
00125 mm2LastWarpMatrix = m2;
00126 }
00127 };
00128
00129
00130
00131
00132 void PatchFinder::MakeTemplateCoarseNoWarp(KeyFrame::Ptr k, int nLevel, ImageRef irLevelPos)
00133 {
00134 mnSearchLevel = nLevel;
00135 Image<CVD::byte> &im = k->aLevels[nLevel].im;
00136 if(!im.in_image_with_border(irLevelPos, mnPatchSize / 2 + 1))
00137 {
00138 mbTemplateBad = true;
00139 return;
00140 }
00141 mbTemplateBad = false;
00142 copy(im,
00143 mimTemplate,
00144 mimTemplate.size(),
00145 irLevelPos - mirCenter);
00146
00147 MakeTemplateSums();
00148 }
00149
00150
00151 void PatchFinder::MakeTemplateCoarseNoWarp(MapPoint::Ptr p)
00152 {
00153 MakeTemplateCoarseNoWarp(p->pPatchSourceKF, p->nSourceLevel, p->irCenter);
00154 };
00155
00156
00157
00158 inline void PatchFinder::MakeTemplateSums()
00159 {
00160 int nSum = 0;
00161 int nSumSq = 0;
00162 ImageRef ir;
00163 do
00164 {
00165 int b = mimTemplate[ir];
00166 nSum += b;
00167 nSumSq +=b * b;
00168 }
00169 while(ir.next(mimTemplate.size()));
00170 mnTemplateSum = nSum;
00171 mnTemplateSumSq = nSumSq;
00172 }
00173
00174
00175
00176
00177
00178 bool PatchFinder::FindPatchCoarse(ImageRef irPos, KeyFrame::Ptr kf, unsigned int nRange)
00179 {
00180 mbFound = false;
00181
00182
00183 int nLevelScale = LevelScale(mnSearchLevel);
00184 mirPredictedPos = irPos;
00185 irPos = irPos / nLevelScale;
00186 nRange = (nRange + nLevelScale - 1) / nLevelScale;
00187
00188
00189 int nTop = irPos.y - nRange;
00190 int nBottomPlusOne = irPos.y + nRange + 1;
00191 int nLeft = irPos.x - nRange;
00192 int nRight = irPos.x + nRange;
00193
00194
00195 Level &L = kf->aLevels[mnSearchLevel];
00196
00197
00198 if(nTop < 0)
00199 nTop = 0;
00200 if(nTop >= L.im.size().y)
00201 return false;
00202 if(nBottomPlusOne <= 0)
00203 return false;
00204
00205
00206
00207
00208
00209 vector<ImageRef>::iterator i;
00210 vector<ImageRef>::iterator i_end;
00211
00212 i = L.vCorners.begin() + L.vCornerRowLUT[nTop];
00213
00214 if(nBottomPlusOne >= L.im.size().y)
00215 i_end = L.vCorners.end();
00216 else
00217 i_end = L.vCorners.begin() + L.vCornerRowLUT[nBottomPlusOne];
00218
00219 ImageRef irBest;
00220 int nBestSSD = mnMaxSSD + 1;
00221
00222 for(; i<i_end; i++)
00223 {
00224 if( i->x < nLeft || i->x > nRight)
00225 continue;
00226 if((irPos - *i).mag_squared() > nRange * nRange)
00227 continue;
00228
00229 int nSSD;
00230 nSSD = ZMSSDAtPoint(L.im, *i);
00231 if(nSSD < nBestSSD)
00232 {
00233 irBest = *i;
00234 nBestSSD = nSSD;
00235 }
00236 }
00237
00238 if(nBestSSD < mnMaxSSD)
00239 {
00240 mv2CoarsePos= LevelZeroPos(irBest, mnSearchLevel);
00241 mbFound = true;
00242 }
00243 else
00244 mbFound = false;
00245 return mbFound;
00246 }
00247
00248
00249
00250
00251
00252
00253 void PatchFinder::MakeSubPixTemplate()
00254 {
00255 mimJacs.resize(mimTemplate.size() - ImageRef(2,2));
00256 Matrix<3> m3H = Zeros;
00257 ImageRef ir;
00258 for(ir.x = 1; ir.x < mnPatchSize - 1; ir.x++)
00259 for(ir.y = 1; ir.y < mnPatchSize - 1; ir.y++)
00260 {
00261 Vector<2> v2Grad;
00262 v2Grad[0] = 0.5 * (mimTemplate[ir + ImageRef(1,0)] - mimTemplate[ir - ImageRef(1,0)]);
00263 v2Grad[1] = 0.5 * (mimTemplate[ir + ImageRef(0,1)] - mimTemplate[ir - ImageRef(0,1)]);
00264 mimJacs[ir-ImageRef(1,1)].first = v2Grad[0];
00265 mimJacs[ir-ImageRef(1,1)].second = v2Grad[1];
00266 Vector<3> v3Grad = unproject(v2Grad);
00267 m3H += v3Grad.as_col() * v3Grad.as_row();
00268 }
00269
00270
00271 Cholesky<3> chol(m3H);
00272 mm3HInv = chol.get_inverse();
00273
00274
00275
00276
00277
00278 mv2SubPixPos = mv2CoarsePos;
00279 mdMeanDiff = 0.0;
00280 }
00281
00282
00283
00284
00285 bool PatchFinder::IterateSubPixToConvergence(KeyFrame& kf, int nMaxIts)
00286 {
00287 const double dConvLimit = 0.03;
00288 bool bConverged = false;
00289 int nIts;
00290 for(nIts = 0; nIts < nMaxIts && !bConverged; nIts++)
00291 {
00292 double dUpdateSquared = IterateSubPix(kf);
00293 if(dUpdateSquared < 0)
00294 return false;
00295 if(dUpdateSquared < dConvLimit*dConvLimit)
00296 return true;
00297 }
00298 return false;
00299 }
00300
00301
00302
00303
00304
00305 double PatchFinder::IterateSubPix(KeyFrame& kf)
00306 {
00307
00308 Vector<2> v2Center = LevelNPos(mv2SubPixPos, mnSearchLevel);
00309 BasicImage<CVD::byte> &im = kf.aLevels[mnSearchLevel].im;
00310 if(!im.in_image_with_border(ir_rounded(v2Center), mnPatchSize / 2 + 1))
00311 return -1.0;
00312
00313
00314 Vector<2> v2Base = v2Center - vec(mirCenter);
00315
00316
00317 Vector<3> v3Accum = Zeros;
00318
00319 ImageRef ir;
00320
00321 CVD::byte* pTopLeftPixel;
00322
00323
00324
00325
00326 double dX = v2Base[0]-floor(v2Base[0]);
00327 double dY = v2Base[1]-floor(v2Base[1]);
00328 float fMixTL = (1.0 - dX) * (1.0 - dY);
00329 float fMixTR = (dX) * (1.0 - dY);
00330 float fMixBL = (1.0 - dX) * (dY);
00331 float fMixBR = (dX) * (dY);
00332
00333
00334 unsigned long nRowOffset = &kf.aLevels[mnSearchLevel].im[ImageRef(0,1)] - &kf.aLevels[mnSearchLevel].im[ImageRef(0,0)];
00335 for(ir.y = 1; ir.y < mnPatchSize - 1; ir.y++)
00336 {
00337 pTopLeftPixel = &im[::ir(v2Base) + ImageRef(1,ir.y)];
00338 for(ir.x = 1; ir.x < mnPatchSize - 1; ir.x++)
00339 {
00340 float fPixel =
00341 fMixTL * pTopLeftPixel[0] + fMixTR * pTopLeftPixel[1] +
00342 fMixBL * pTopLeftPixel[nRowOffset] + fMixBR * pTopLeftPixel[nRowOffset + 1];
00343 pTopLeftPixel++;
00344 double dDiff = fPixel - mimTemplate[ir] + mdMeanDiff;
00345 v3Accum[0] += dDiff * mimJacs[ir - ImageRef(1,1)].first;
00346 v3Accum[1] += dDiff * mimJacs[ir - ImageRef(1,1)].second;
00347 v3Accum[2] += dDiff;
00348 };
00349 }
00350
00351
00352 Vector<3> v3Update = mm3HInv * v3Accum;
00353 mv2SubPixPos -= v3Update.slice<0,2>() * LevelScale(mnSearchLevel);
00354 mdMeanDiff -= v3Update[2];
00355
00356 double dPixelUpdateSquared = v3Update.slice<0,2>() * v3Update.slice<0,2>();
00357 return dPixelUpdateSquared;
00358 }
00359
00362
00363
00364
00365
00366
00367
00368
00369
00370
00371
00372
00373
00374
00375
00378
00379 #if CVD_HAVE_XMMINTRIN
00380
00381 inline int SumXMM_16(__m128i &target)
00382 {
00383 unsigned short int sums_store[8];
00384 _mm_storeu_si128((__m128i*)sums_store, target);
00385 return sums_store[0] + sums_store[1] + sums_store[2] + sums_store[3] +
00386 sums_store[4] + sums_store[5] + sums_store[6] + sums_store[7];
00387 }
00388
00389 inline int SumXMM_32(__m128i &target)
00390 {
00391 unsigned int sums_store[4];
00392 _mm_storeu_si128((__m128i*)sums_store, target);
00393 return sums_store[0] + sums_store[1] + sums_store[2] + sums_store[3];
00394 }
00395 #endif
00396
00397
00398
00399 int PatchFinder::ZMSSDAtPoint(CVD::BasicImage<CVD::byte> &im, const CVD::ImageRef &ir)
00400 {
00401 if(!im.in_image_with_border(ir, mirCenter[0]))
00402 return mnMaxSSD + 1;
00403
00404 ImageRef irImgBase = ir - mirCenter;
00405 CVD::byte *imagepointer;
00406 CVD::byte *templatepointer;
00407
00408 int nImageSumSq = 0;
00409 int nImageSum = 0;
00410 int nCrossSum = 0;
00411
00412 #if CVD_HAVE_XMMINTRIN
00413 if(mnPatchSize == 8)
00414 {
00415 long unsigned int imagepointerincrement;
00416
00417 __m128i xImageAsEightBytes;
00418 __m128i xImageAsWords;
00419 __m128i xTemplateAsEightBytes;
00420 __m128i xTemplateAsWords;
00421 __m128i xZero;
00422 __m128i xImageSums;
00423 __m128i xImageSqSums;
00424 __m128i xCrossSums;
00425 __m128i xProduct;
00426
00427
00428 xImageSums = _mm_setzero_si128();
00429 xImageSqSums = _mm_setzero_si128();
00430 xCrossSums = _mm_setzero_si128();
00431 xZero = _mm_setzero_si128();
00432
00433 imagepointer = &im[irImgBase + ImageRef(0,0)];
00434 templatepointer = &mimTemplate[ImageRef(0,0)];
00435 imagepointerincrement = &im[irImgBase + ImageRef(0,1)] - imagepointer;
00436
00437 xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer);
00438 imagepointer += imagepointerincrement;
00439 xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero);
00440 xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums);
00441 xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords);
00442 xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums);
00443 xTemplateAsEightBytes=_mm_load_si128((__m128i*) templatepointer);
00444 templatepointer += 16;
00445 xTemplateAsWords = _mm_unpacklo_epi8(xTemplateAsEightBytes,xZero);
00446 xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords);
00447 xCrossSums = _mm_add_epi32(xProduct, xCrossSums);
00448 xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer);
00449 imagepointer += imagepointerincrement;
00450 xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero);
00451 xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums);
00452 xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords);
00453 xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums);
00454 xTemplateAsWords = _mm_unpackhi_epi8(xTemplateAsEightBytes,xZero);
00455 xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords);
00456 xCrossSums = _mm_add_epi32(xProduct, xCrossSums);
00457
00458 xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer);
00459 imagepointer += imagepointerincrement;
00460 xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero);
00461 xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums);
00462 xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords);
00463 xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums);
00464 xTemplateAsEightBytes=_mm_load_si128((__m128i*) templatepointer);
00465 templatepointer += 16;
00466 xTemplateAsWords = _mm_unpacklo_epi8(xTemplateAsEightBytes,xZero);
00467 xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords);
00468 xCrossSums = _mm_add_epi32(xProduct, xCrossSums);
00469 xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer);
00470 imagepointer += imagepointerincrement;
00471 xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero);
00472 xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums);
00473 xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords);
00474 xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums);
00475 xTemplateAsWords = _mm_unpackhi_epi8(xTemplateAsEightBytes,xZero);
00476 xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords);
00477 xCrossSums = _mm_add_epi32(xProduct, xCrossSums);
00478
00479 xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer);
00480 imagepointer += imagepointerincrement;
00481 xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero);
00482 xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums);
00483 xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords);
00484 xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums);
00485 xTemplateAsEightBytes=_mm_load_si128((__m128i*) templatepointer);
00486 templatepointer += 16;
00487 xTemplateAsWords = _mm_unpacklo_epi8(xTemplateAsEightBytes,xZero);
00488 xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords);
00489 xCrossSums = _mm_add_epi32(xProduct, xCrossSums);
00490 xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer);
00491 imagepointer += imagepointerincrement;
00492 xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero);
00493 xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums);
00494 xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords);
00495 xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums);
00496 xTemplateAsWords = _mm_unpackhi_epi8(xTemplateAsEightBytes,xZero);
00497 xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords);
00498 xCrossSums = _mm_add_epi32(xProduct, xCrossSums);
00499
00500 xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer);
00501 imagepointer += imagepointerincrement;
00502 xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero);
00503 xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums);
00504 xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords);
00505 xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums);
00506 xTemplateAsEightBytes=_mm_load_si128((__m128i*) templatepointer);
00507 templatepointer += 16;
00508 xTemplateAsWords = _mm_unpacklo_epi8(xTemplateAsEightBytes,xZero);
00509 xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords);
00510 xCrossSums = _mm_add_epi32(xProduct, xCrossSums);
00511 xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer);
00512 xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero);
00513 xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums);
00514 xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords);
00515 xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums);
00516 xTemplateAsWords = _mm_unpackhi_epi8(xTemplateAsEightBytes,xZero);
00517 xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords);
00518 xCrossSums = _mm_add_epi32(xProduct, xCrossSums);
00519
00520 nImageSum = SumXMM_16(xImageSums);
00521 nCrossSum = SumXMM_32(xCrossSums);
00522 nImageSumSq = SumXMM_32(xImageSqSums);
00523 }
00524 else
00525 #endif
00526 {
00527 for(int nRow = 0; nRow < mnPatchSize; nRow++)
00528 {
00529 imagepointer = &im[irImgBase + ImageRef(0,nRow)];
00530 templatepointer = &mimTemplate[ImageRef(0,nRow)];
00531 for(int nCol = 0; nCol < mnPatchSize; nCol++)
00532 {
00533 int n = imagepointer[nCol];
00534 nImageSum += n;
00535 nImageSumSq += n*n;
00536 nCrossSum += n * templatepointer[nCol];
00537 };
00538 }
00539 };
00540
00541 int SA = mnTemplateSum;
00542 int SB = nImageSum;
00543
00544 int N = mnPatchSize * mnPatchSize;
00545 return ((2*SA*SB - SA*SA - SB*SB)/N + nImageSumSq + mnTemplateSumSq - 2*nCrossSum);
00546 }
00547
00548
00549
00550
00551
00552