libvlfeat: dsift.c Source File

Go to the documentation of this file.
00001 
00007 /*
00008 Copyright (C) 2007-12 Andrea Vedaldi and Brian Fulkerson.
00009 All rights reserved.
00010 
00011 This file is part of the VLFeat library and is made available under
00012 the terms of the BSD license (see the COPYING file).
00013 */
00014 
00015 #include "dsift.h"
00016 #include "pgm.h"
00017 #include "mathop.h"
00018 #include "imopv.h"
00019 #include <math.h>
00020 #include <string.h>
00021 
00249 float *
00250 _vl_dsift_new_kernel (int binSize, int numBins, int binIndex, double windowSize)
00251 {
00252   int filtLen = 2 * binSize - 1 ;
00253   float * ker = vl_malloc (sizeof(float) * filtLen) ;
00254   float * kerIter = ker ;
00255   float delta = binSize * (binIndex - 0.5F * (numBins - 1)) ;
00256   /*
00257   float sigma = 0.5F * ((numBins - 1) * binSize + 1) ;
00258   float sigma = 0.5F * ((numBins) * binSize) ;
00259   */
00260   float sigma = (float) binSize * (float) windowSize ;
00261   int x ;
00262 
00263   for (x = - binSize + 1 ; x <= + binSize - 1 ; ++ x) {
00264     float z = (x - delta) / sigma ;
00265     *kerIter++ = (1.0F - fabsf(x) / binSize) *
00266       ((binIndex >= 0) ? expf(- 0.5F * z*z) : 1.0F) ;
00267   }
00268   return ker ;
00269 }
00270 
00271 static float
00272 _vl_dsift_get_bin_window_mean
00273 (int binSize, int numBins, int binIndex, double windowSize)
00274 {
00275   float delta = binSize * (binIndex - 0.5F * (numBins - 1)) ;
00276   /*float sigma = 0.5F * ((numBins - 1) * binSize + 1) ;*/
00277   float sigma = (float) binSize * (float) windowSize ;
00278   int x ;
00279 
00280   float acc = 0.0 ;
00281   for (x = - binSize + 1 ; x <= + binSize - 1 ; ++ x) {
00282     float z = (x - delta) / sigma ;
00283     acc += ((binIndex >= 0) ? expf(- 0.5F * z*z) : 1.0F) ;
00284   }
00285   return acc /= (2 * binSize - 1) ;
00286 }
00287 
00296 VL_INLINE float
00297 _vl_dsift_normalize_histogram (float * begin, float * end)
00298 {
00299   float * iter ;
00300   float  norm = 0.0F ;
00301 
00302   for (iter = begin ; iter < end ; ++ iter) {
00303     norm += (*iter) * (*iter) ;
00304   }
00305   norm = vl_fast_sqrt_f (norm) + VL_EPSILON_F ;
00306 
00307   for (iter = begin; iter < end ; ++ iter) {
00308     *iter /= norm ;
00309   }
00310   return norm ;
00311 }
00312 
00318 static void
00319 _vl_dsift_free_buffers (VlDsiftFilter* self)
00320 {
00321   if (self->frames) {
00322     vl_free(self->frames) ;
00323     self->frames = NULL ;
00324   }
00325   if (self->descrs) {
00326     vl_free(self->descrs) ;
00327     self->descrs = NULL ;
00328   }
00329   if (self->grads) {
00330     int t ;
00331     for (t = 0 ; t < self->numGradAlloc ; ++t)
00332       if (self->grads[t]) vl_free(self->grads[t]) ;
00333     vl_free(self->grads) ;
00334     self->grads = NULL ;
00335   }
00336   self->numFrameAlloc = 0 ;
00337   self->numBinAlloc = 0 ;
00338   self->numGradAlloc = 0 ;
00339 }
00340 
00345 VL_EXPORT void
00346 _vl_dsift_update_buffers (VlDsiftFilter * self)
00347 {
00348   int x1 = self->boundMinX ;
00349   int x2 = self->boundMaxX ;
00350   int y1 = self->boundMinY ;
00351   int y2 = self->boundMaxY ;
00352 
00353   int rangeX = x2 - x1 - (self->geom.numBinX - 1) * self->geom.binSizeX ;
00354   int rangeY = y2 - y1 - (self->geom.numBinY - 1) * self->geom.binSizeY ;
00355 
00356   int numFramesX = (rangeX >= 0) ? rangeX / self->stepX + 1 : 0 ;
00357   int numFramesY = (rangeY >= 0) ? rangeY / self->stepY + 1 : 0 ;
00358 
00359   self->numFrames = numFramesX * numFramesY ;
00360   self->descrSize = self->geom.numBinT *
00361                     self->geom.numBinX *
00362                     self->geom.numBinY ;
00363 }
00364 
00373 static void
00374 _vl_dsift_alloc_buffers (VlDsiftFilter* self)
00375 {
00376   _vl_dsift_update_buffers (self) ;
00377   {
00378     int numFrameAlloc = vl_dsift_get_keypoint_num (self) ;
00379     int numBinAlloc   = vl_dsift_get_descriptor_size (self) ;
00380     int numGradAlloc  = self->geom.numBinT ;
00381 
00382     /* see if we need to update the buffers */
00383     if (numBinAlloc != self->numBinAlloc ||
00384         numGradAlloc != self->numGradAlloc ||
00385         numFrameAlloc != self->numFrameAlloc) {
00386 
00387       int t ;
00388 
00389       _vl_dsift_free_buffers(self) ;
00390 
00391       self->frames = vl_malloc(sizeof(VlDsiftKeypoint) * numFrameAlloc) ;
00392       self->descrs = vl_malloc(sizeof(float) * numBinAlloc * numFrameAlloc) ;
00393       self->grads  = vl_malloc(sizeof(float*) * numGradAlloc) ;
00394       for (t = 0 ; t < numGradAlloc ; ++t) {
00395         self->grads[t] =
00396           vl_malloc(sizeof(float) * self->imWidth * self->imHeight) ;
00397       }
00398       self->numBinAlloc = numBinAlloc ;
00399       self->numGradAlloc = numGradAlloc ;
00400       self->numFrameAlloc = numFrameAlloc ;
00401     }
00402   }
00403 }
00404 
00414 VL_EXPORT VlDsiftFilter *
00415 vl_dsift_new (int imWidth, int imHeight)
00416 {
00417   VlDsiftFilter * self = vl_malloc (sizeof(VlDsiftFilter)) ;
00418   self->imWidth  = imWidth ;
00419   self->imHeight = imHeight ;
00420 
00421   self->stepX = 5 ;
00422   self->stepY = 5 ;
00423 
00424   self->boundMinX = 0 ;
00425   self->boundMinY = 0 ;
00426   self->boundMaxX = imWidth - 1 ;
00427   self->boundMaxY = imHeight - 1 ;
00428 
00429   self->geom.numBinX = 4 ;
00430   self->geom.numBinY = 4 ;
00431   self->geom.numBinT = 8 ;
00432   self->geom.binSizeX = 5 ;
00433   self->geom.binSizeY = 5 ;
00434 
00435   self->useFlatWindow = VL_FALSE ;
00436   self->windowSize = 2.0 ;
00437 
00438   self->convTmp1 = vl_malloc(sizeof(float) * self->imWidth * self->imHeight) ;
00439   self->convTmp2 = vl_malloc(sizeof(float) * self->imWidth * self->imHeight) ;
00440 
00441   self->numBinAlloc = 0 ;
00442   self->numFrameAlloc = 0 ;
00443   self->numGradAlloc = 0 ;
00444 
00445   self->descrSize = 0 ;
00446   self->numFrames = 0 ;
00447   self->grads = NULL ;
00448   self->frames = NULL ;
00449   self->descrs = NULL ;
00450 
00451   _vl_dsift_update_buffers(self) ;
00452   return self ;
00453 }
00454 
00466 VL_EXPORT VlDsiftFilter *
00467 vl_dsift_new_basic (int imWidth, int imHeight, int step, int binSize)
00468 {
00469   VlDsiftFilter* self = vl_dsift_new(imWidth, imHeight) ;
00470   VlDsiftDescriptorGeometry geom = *vl_dsift_get_geometry(self) ;
00471   geom.binSizeX = binSize ;
00472   geom.binSizeY = binSize ;
00473   vl_dsift_set_geometry(self, &geom) ;
00474   vl_dsift_set_steps(self, step, step) ;
00475   return self ;
00476 }
00477 
00483 VL_EXPORT void
00484 vl_dsift_delete (VlDsiftFilter * self)
00485 {
00486   _vl_dsift_free_buffers (self) ;
00487   if (self->convTmp2) vl_free (self->convTmp2) ;
00488   if (self->convTmp1) vl_free (self->convTmp1) ;
00489   vl_free (self) ;
00490 }
00491 
00492 
00498 VL_INLINE void
00499 _vl_dsift_with_gaussian_window (VlDsiftFilter * self)
00500 {
00501   int binx, biny, bint ;
00502   int framex, framey ;
00503   float *xker, *yker ;
00504 
00505   int Wx = self->geom.binSizeX - 1 ;
00506   int Wy = self->geom.binSizeY - 1 ;
00507 
00508   for (biny = 0 ; biny < self->geom.numBinY ; ++biny) {
00509 
00510     yker = _vl_dsift_new_kernel (self->geom.binSizeY,
00511                                  self->geom.numBinY,
00512                                  biny,
00513                                  self->windowSize) ;
00514 
00515     for (binx = 0 ; binx < self->geom.numBinX ; ++binx) {
00516 
00517       xker = _vl_dsift_new_kernel(self->geom.binSizeX,
00518                                   self->geom.numBinX,
00519                                   binx,
00520                                   self->windowSize) ;
00521 
00522       for (bint = 0 ; bint < self->geom.numBinT ; ++bint) {
00523 
00524         vl_imconvcol_vf (self->convTmp1, self->imHeight,
00525                          self->grads[bint], self->imWidth, self->imHeight,
00526                          self->imWidth,
00527                          yker, -Wy, +Wy, 1,
00528                          VL_PAD_BY_CONTINUITY|VL_TRANSPOSE) ;
00529 
00530         vl_imconvcol_vf (self->convTmp2, self->imWidth,
00531                          self->convTmp1, self->imHeight, self->imWidth,
00532                          self->imHeight,
00533                          xker, -Wx, +Wx, 1,
00534                          VL_PAD_BY_CONTINUITY|VL_TRANSPOSE) ;
00535 
00536         {
00537           float *dst = self->descrs
00538             + bint
00539             + binx * self->geom.numBinT
00540             + biny * (self->geom.numBinX * self->geom.numBinT)  ;
00541 
00542           float *src = self->convTmp2 ;
00543 
00544           int frameSizeX = self->geom.binSizeX * (self->geom.numBinX - 1) + 1 ;
00545           int frameSizeY = self->geom.binSizeY * (self->geom.numBinY - 1) + 1 ;
00546           int descrSize = vl_dsift_get_descriptor_size (self) ;
00547 
00548           for (framey  = self->boundMinY ;
00549                framey <= self->boundMaxY - frameSizeY + 1 ;
00550                framey += self->stepY) {
00551             for (framex  = self->boundMinX ;
00552                  framex <= self->boundMaxX - frameSizeX + 1 ;
00553                  framex += self->stepX) {
00554               *dst = src [(framex + binx * self->geom.binSizeX) * 1 +
00555                           (framey + biny * self->geom.binSizeY) * self->imWidth]  ;
00556               dst += descrSize ;
00557             } /* framex */
00558           } /* framey */
00559         }
00560 
00561       } /* for bint */
00562       vl_free (xker) ;
00563     } /* for binx */
00564     vl_free (yker) ;
00565   } /* for biny */
00566 }
00567 
00573 VL_INLINE void
00574 _vl_dsift_with_flat_window (VlDsiftFilter* self)
00575 {
00576   int binx, biny, bint ;
00577   int framex, framey ;
00578 
00579   /* for each orientation bin */
00580   for (bint = 0 ; bint < self->geom.numBinT ; ++bint) {
00581 
00582     vl_imconvcoltri_f (self->convTmp1, self->imHeight,
00583                        self->grads [bint], self->imWidth, self->imHeight,
00584                        self->imWidth,
00585                        self->geom.binSizeY, /* filt size */
00586                        1, /* subsampling step */
00587                        VL_PAD_BY_CONTINUITY|VL_TRANSPOSE) ;
00588 
00589     vl_imconvcoltri_f (self->convTmp2, self->imWidth,
00590                        self->convTmp1, self->imHeight, self->imWidth,
00591                        self->imHeight,
00592                        self->geom.binSizeX,
00593                        1,
00594                        VL_PAD_BY_CONTINUITY|VL_TRANSPOSE) ;
00595 
00596     for (biny = 0 ; biny < self->geom.numBinY ; ++biny) {
00597 
00598       /*
00599       This fast version of DSIFT does not use a proper Gaussian
00600       weighting scheme for the gradiens that are accumulated on the
00601       spatial bins. Instead each spatial bins is accumulated based on
00602       the triangular kernel only, equivalent to bilinear interpolation
00603       plus a flat, rather than Gaussian, window. Eventually, however,
00604       the magnitude of the spatial bins in the SIFT descriptor is
00605       reweighted by the average of the Gaussian window on each bin.
00606       */
00607 
00608       float wy = _vl_dsift_get_bin_window_mean
00609         (self->geom.binSizeY, self->geom.numBinY, biny,
00610          self->windowSize) ;
00611 
00612       /* The convolution functions vl_imconvcoltri_* convolve by a
00613        * triangular kernel with unit integral. Instead for SIFT the
00614        * triangular kernel should have unit height. This is
00615        * compensated for by multiplying by the bin size:
00616        */
00617 
00618       wy *= self->geom.binSizeY ;
00619 
00620       for (binx = 0 ; binx < self->geom.numBinX ; ++binx) {
00621         float w ;
00622         float wx = _vl_dsift_get_bin_window_mean (self->geom.binSizeX,
00623                                                   self->geom.numBinX,
00624                                                   binx,
00625                                                   self->windowSize) ;
00626 
00627         float *dst = self->descrs
00628           + bint
00629           + binx * self->geom.numBinT
00630           + biny * (self->geom.numBinX * self->geom.numBinT)  ;
00631 
00632         float *src = self->convTmp2 ;
00633 
00634         int frameSizeX = self->geom.binSizeX * (self->geom.numBinX - 1) + 1 ;
00635         int frameSizeY = self->geom.binSizeY * (self->geom.numBinY - 1) + 1 ;
00636         int descrSize = vl_dsift_get_descriptor_size (self) ;
00637 
00638         wx *= self->geom.binSizeX ;
00639         w = wx * wy ;
00640 
00641         for (framey  = self->boundMinY ;
00642              framey <= self->boundMaxY - frameSizeY + 1 ;
00643              framey += self->stepY) {
00644           for (framex  = self->boundMinX ;
00645                framex <= self->boundMaxX - frameSizeX + 1 ;
00646                framex += self->stepX) {
00647             *dst = w * src [(framex + binx * self->geom.binSizeX) * 1 +
00648                             (framey + biny * self->geom.binSizeY) * self->imWidth]  ;
00649             dst += descrSize ;
00650           } /* framex */
00651         } /* framey */
00652       } /* binx */
00653     } /* biny */
00654   } /* bint */
00655 }
00656 
00664 void vl_dsift_process (VlDsiftFilter* self, float const* im)
00665 {
00666   int t, x, y ;
00667 
00668   /* update buffers */
00669   _vl_dsift_alloc_buffers (self) ;
00670 
00671   /* clear integral images */
00672   for (t = 0 ; t < self->geom.numBinT ; ++t)
00673     memset (self->grads[t], 0,
00674             sizeof(float) * self->imWidth * self->imHeight) ;
00675 
00676 #undef at
00677 #define at(x,y) (im[(y)*self->imWidth+(x)])
00678 
00679   /* Compute gradients, their norm, and their angle */
00680 
00681   for (y = 0 ; y < self->imHeight ; ++ y) {
00682     for (x = 0 ; x < self->imWidth ; ++ x) {
00683       float gx, gy ;
00684       float angle, mod, nt, rbint ;
00685       int bint ;
00686 
00687       /* y derivative */
00688       if (y == 0) {
00689         gy = at(x,y+1) - at(x,y) ;
00690       } else if (y == self->imHeight - 1) {
00691         gy = at(x,y) - at(x,y-1) ;
00692       } else {
00693         gy = 0.5F * (at(x,y+1) - at(x,y-1)) ;
00694       }
00695 
00696       /* x derivative */
00697       if (x == 0) {
00698         gx = at(x+1,y) - at(x,y) ;
00699       } else if (x == self->imWidth - 1) {
00700         gx = at(x,y) - at(x-1,y) ;
00701       } else {
00702         gx = 0.5F * (at(x+1,y) - at(x-1,y)) ;
00703       }
00704 
00705       /* angle and modulus */
00706       angle = vl_fast_atan2_f (gy,gx) ;
00707       mod = vl_fast_sqrt_f (gx*gx + gy*gy) ;
00708 
00709       /* quantize angle */
00710       nt = vl_mod_2pi_f (angle) * (self->geom.numBinT / (2*VL_PI)) ;
00711       bint = (int) vl_floor_f (nt) ;
00712       rbint = nt - bint ;
00713 
00714       /* write it back */
00715       self->grads [(bint    ) % self->geom.numBinT][x + y * self->imWidth] = (1 - rbint) * mod ;
00716       self->grads [(bint + 1) % self->geom.numBinT][x + y * self->imWidth] = (    rbint) * mod ;
00717     }
00718   }
00719 
00720   if (self->useFlatWindow) {
00721     _vl_dsift_with_flat_window(self) ;
00722   } else {
00723     _vl_dsift_with_gaussian_window(self) ;
00724   }
00725 
00726   {
00727     VlDsiftKeypoint* frameIter = self->frames ;
00728     float * descrIter = self->descrs ;
00729     int framex, framey, bint ;
00730 
00731     int frameSizeX = self->geom.binSizeX * (self->geom.numBinX - 1) + 1 ;
00732     int frameSizeY = self->geom.binSizeY * (self->geom.numBinY - 1) + 1 ;
00733     int descrSize = vl_dsift_get_descriptor_size (self) ;
00734 
00735     float deltaCenterX = 0.5F * self->geom.binSizeX * (self->geom.numBinX - 1) ;
00736     float deltaCenterY = 0.5F * self->geom.binSizeY * (self->geom.numBinY - 1) ;
00737 
00738     float normConstant = frameSizeX * frameSizeY ;
00739 
00740     for (framey  = self->boundMinY ;
00741          framey <= self->boundMaxY - frameSizeY + 1 ;
00742          framey += self->stepY) {
00743 
00744       for (framex  = self->boundMinX ;
00745            framex <= self->boundMaxX - frameSizeX + 1 ;
00746            framex += self->stepX) {
00747 
00748         frameIter->x    = framex + deltaCenterX ;
00749         frameIter->y    = framey + deltaCenterY ;
00750 
00751         /* mass */
00752         {
00753           float mass = 0 ;
00754           for (bint = 0 ; bint < descrSize ; ++ bint)
00755             mass += descrIter[bint] ;
00756           mass /= normConstant ;
00757           frameIter->norm = mass ;
00758         }
00759 
00760         /* L2 normalize */
00761         _vl_dsift_normalize_histogram (descrIter, descrIter + descrSize) ;
00762 
00763         /* clamp */
00764         for(bint = 0 ; bint < descrSize ; ++ bint)
00765           if (descrIter[bint] > 0.2F) descrIter[bint] = 0.2F ;
00766 
00767         /* L2 normalize */
00768         _vl_dsift_normalize_histogram (descrIter, descrIter + descrSize) ;
00769 
00770         frameIter ++ ;
00771         descrIter += descrSize ;
00772       } /* for framex */
00773     } /* for framey */
00774   }
00775 }