iri_sift: dsift.c Source File

Go to the documentation of this file.
00001 
00007 /*
00008 Copyright (C) 2007-12 Andrea Vedaldi and Brian Fulkerson.
00009 All rights reserved.
00010 
00011 This file is part of the VLFeat library and is made available under
00012 the terms of the BSD license (see the COPYING file).
00013 */
00014 
00015 #include "dsift.h"
00016 //#include "mathop.h"
00017 //#include "imopv.h"
00018 #include <math.h>
00019 #include <string.h>
00020 #include <stdio.h>
00021 #include <stdlib.h>
00022 #include <stddef.h>
00023 #include <time.h>
00024 
00025 
00258 float *
00259 _vl_dsift_new_kernel (int binSize, int numBins, int binIndex, double windowSize)
00260 {
00261   int filtLen = 2 * binSize - 1 ;
00262   float * ker = (float*) malloc (sizeof(float) * filtLen) ;
00263   float * kerIter = ker ;
00264   float delta = binSize * (binIndex - 0.5F * (numBins - 1)) ;
00265   /*
00266   float sigma = 0.5F * ((numBins - 1) * binSize + 1) ;
00267   float sigma = 0.5F * ((numBins) * binSize) ;
00268   */
00269   float sigma = (float) binSize * (float) windowSize ;
00270   int x ;
00271 
00272   for (x = - binSize + 1 ; x <= + binSize - 1 ; ++ x) {
00273     float z = (x - delta) / sigma ;
00274     *kerIter++ = (1.0F - fabsf(x) / binSize) *
00275       ((binIndex >= 0) ? expf(- 0.5F * z*z) : 1.0F) ;
00276   }
00277   return ker ;
00278 }
00279 
00280 static float
00281 _vl_dsift_get_bin_window_mean
00282 (int binSize, int numBins, int binIndex, double windowSize)
00283 {
00284   float delta = binSize * (binIndex - 0.5F * (numBins - 1)) ;
00285   /*float sigma = 0.5F * ((numBins - 1) * binSize + 1) ;*/
00286   float sigma = (float) binSize * (float) windowSize ;
00287   int x ;
00288 
00289   float acc = 0.0 ;
00290   for (x = - binSize + 1 ; x <= + binSize - 1 ; ++ x) {
00291     float z = (x - delta) / sigma ;
00292     acc += ((binIndex >= 0) ? expf(- 0.5F * z*z) : 1.0F) ;
00293   }
00294   return acc /= (2 * binSize - 1) ;
00295 }
00296 
00305 VL_INLINE float
00306 _vl_dsift_normalize_histogram (float * begin, float * end)
00307 {
00308   float * iter ;
00309   float  norm = 0.0F ;
00310 
00311   for (iter = begin ; iter < end ; ++ iter) {
00312     norm += (*iter) * (*iter) ;
00313   }
00314   norm = vl_fast_sqrt_f (norm) + VL_EPSILON_F ;
00315 
00316   for (iter = begin; iter < end ; ++ iter) {
00317     *iter /= norm ;
00318   }
00319   return norm ;
00320 }
00321 
00327 static void
00328 _vl_dsift_free_buffers (VlDsiftFilter* self)
00329 {
00330   if (self->frames) {
00331     free(self->frames) ;
00332     self->frames = NULL ;
00333   }
00334   if (self->descrs) {
00335     free(self->descrs) ;
00336     self->descrs = NULL ;
00337   }
00338   if (self->grads) {
00339     int t ;
00340     for (t = 0 ; t < self->numGradAlloc ; ++t)
00341       if (self->grads[t]) free(self->grads[t]) ;
00342     free(self->grads) ;
00343     self->grads = NULL ;
00344   }
00345   self->numFrameAlloc = 0 ;
00346   self->numBinAlloc = 0 ;
00347   self->numGradAlloc = 0 ;
00348 }
00349 
00354 void
00355 _vl_dsift_update_buffers (VlDsiftFilter * self)
00356 {
00357   int x1 = self->boundMinX ;
00358   int x2 = self->boundMaxX ;
00359   int y1 = self->boundMinY ;
00360   int y2 = self->boundMaxY ;
00361 
00362   int rangeX = x2 - x1 - (self->geom.numBinX - 1) * self->geom.binSizeX ;
00363   int rangeY = y2 - y1 - (self->geom.numBinY - 1) * self->geom.binSizeY ;
00364 
00365   int numFramesX = (rangeX >= 0) ? rangeX / self->stepX + 1 : 0 ;
00366   int numFramesY = (rangeY >= 0) ? rangeY / self->stepY + 1 : 0 ;
00367 
00368   self->numFrames = numFramesX * numFramesY ;
00369   self->descrSize = self->geom.numBinT *
00370                     self->geom.numBinX *
00371                     self->geom.numBinY ;
00372 }
00373 
00382 static void
00383 _vl_dsift_alloc_buffers (VlDsiftFilter* self)
00384 {
00385   _vl_dsift_update_buffers (self) ;
00386   {
00387     int numFrameAlloc = vl_dsift_get_keypoint_num (self) ;
00388     int numBinAlloc   = vl_dsift_get_descriptor_size (self) ;
00389     int numGradAlloc  = self->geom.numBinT ;
00390 
00391     /* see if we need to update the buffers */
00392     if (numBinAlloc != self->numBinAlloc ||
00393         numGradAlloc != self->numGradAlloc ||
00394         numFrameAlloc != self->numFrameAlloc) {
00395 
00396       int t ;
00397 
00398       _vl_dsift_free_buffers(self) ;
00399 
00400       self->frames = (VlDsiftKeypoint*) malloc(sizeof(VlDsiftKeypoint) * numFrameAlloc) ;
00401       self->descrs = (float*) malloc(sizeof(float) * numBinAlloc * numFrameAlloc) ;
00402       self->grads  = (float**) malloc(sizeof(float*) * numGradAlloc) ;
00403       for (t = 0 ; t < numGradAlloc ; ++t) {
00404         self->grads[t] =
00405           (float*) malloc(sizeof(float) * self->imWidth * self->imHeight) ;
00406       }
00407       self->numBinAlloc = numBinAlloc ;
00408       self->numGradAlloc = numGradAlloc ;
00409       self->numFrameAlloc = numFrameAlloc ;
00410     }
00411   }
00412 }
00413 
00423 VlDsiftFilter *
00424 vl_dsift_new (int imWidth, int imHeight)
00425 {
00426   VlDsiftFilter * self = (VlDsiftFilter*) malloc (sizeof(VlDsiftFilter)) ;
00427   self->imWidth  = imWidth ;
00428   self->imHeight = imHeight ;
00429 
00430   self->stepX = 5 ;
00431   self->stepY = 5 ;
00432 
00433   self->boundMinX = 0 ;
00434   self->boundMinY = 0 ;
00435   self->boundMaxX = imWidth - 1 ;
00436   self->boundMaxY = imHeight - 1 ;
00437 
00438   self->geom.numBinX = 4 ;
00439   self->geom.numBinY = 4 ;
00440   self->geom.numBinT = 8 ;
00441   self->geom.binSizeX = 5 ;
00442   self->geom.binSizeY = 5 ;
00443 
00444   self->useFlatWindow = VL_FALSE ;
00445   self->windowSize = 2.0 ;
00446 
00447   self->convTmp1 = (float*) malloc(sizeof(float) * self->imWidth * self->imHeight) ;
00448   self->convTmp2 = (float*) malloc(sizeof(float) * self->imWidth * self->imHeight) ;
00449 
00450   self->numBinAlloc = 0 ;
00451   self->numFrameAlloc = 0 ;
00452   self->numGradAlloc = 0 ;
00453 
00454   self->descrSize = 0 ;
00455   self->numFrames = 0 ;
00456   self->grads = NULL ;
00457   self->frames = NULL ;
00458   self->descrs = NULL ;
00459 
00460   _vl_dsift_update_buffers(self) ;
00461   return self ;
00462 }
00463 
00475 VlDsiftFilter *
00476 vl_dsift_new_basic (int imWidth, int imHeight, int step, int binSize)
00477 {
00478   VlDsiftFilter* self = vl_dsift_new(imWidth, imHeight) ;
00479   VlDsiftDescriptorGeometry geom = *vl_dsift_get_geometry(self) ;
00480   geom.binSizeX = binSize ;
00481   geom.binSizeY = binSize ;
00482   vl_dsift_set_geometry(self, &geom) ;
00483   vl_dsift_set_steps(self, step, step) ;
00484   return self ;
00485 }
00486 
00492 void
00493 vl_dsift_delete (VlDsiftFilter * self)
00494 {
00495   _vl_dsift_free_buffers (self) ;
00496   if (self->convTmp2) free (self->convTmp2) ;
00497   if (self->convTmp1) free (self->convTmp1) ;
00498   free (self) ;
00499 }
00500 
00501 
00507 VL_INLINE void
00508 _vl_dsift_with_gaussian_window (VlDsiftFilter * self)
00509 {
00510   int binx, biny, bint ;
00511   int framex, framey ;
00512   float *xker, *yker ;
00513 
00514   int Wx = self->geom.binSizeX - 1 ;
00515   int Wy = self->geom.binSizeY - 1 ;
00516 
00517   for (biny = 0 ; biny < self->geom.numBinY ; ++biny) {
00518 
00519     yker = _vl_dsift_new_kernel (self->geom.binSizeY,
00520                                  self->geom.numBinY,
00521                                  biny,
00522                                  self->windowSize) ;
00523 
00524     for (binx = 0 ; binx < self->geom.numBinX ; ++binx) {
00525 
00526       xker = _vl_dsift_new_kernel(self->geom.binSizeX,
00527                                   self->geom.numBinX,
00528                                   binx,
00529                                   self->windowSize) ;
00530 
00531       for (bint = 0 ; bint < self->geom.numBinT ; ++bint) {
00532 
00533         vl_imconvcol_vf (self->convTmp1, self->imHeight,
00534                          self->grads[bint], self->imWidth, self->imHeight,
00535                          self->imWidth,
00536                          yker, -Wy, +Wy, 1,
00537                          VL_PAD_BY_CONTINUITY|VL_TRANSPOSE) ;
00538 
00539         vl_imconvcol_vf (self->convTmp2, self->imWidth,
00540                          self->convTmp1, self->imHeight, self->imWidth,
00541                          self->imHeight,
00542                          xker, -Wx, +Wx, 1,
00543                          VL_PAD_BY_CONTINUITY|VL_TRANSPOSE) ;
00544 
00545         {
00546           float *dst = self->descrs
00547             + bint
00548             + binx * self->geom.numBinT
00549             + biny * (self->geom.numBinX * self->geom.numBinT)  ;
00550 
00551           float *src = self->convTmp2 ;
00552 
00553           int frameSizeX = self->geom.binSizeX * (self->geom.numBinX - 1) + 1 ;
00554           int frameSizeY = self->geom.binSizeY * (self->geom.numBinY - 1) + 1 ;
00555           int descrSize = vl_dsift_get_descriptor_size (self) ;
00556 
00557           for (framey  = self->boundMinY ;
00558                framey <= self->boundMaxY - frameSizeY + 1 ;
00559                framey += self->stepY) {
00560             for (framex  = self->boundMinX ;
00561                  framex <= self->boundMaxX - frameSizeX + 1 ;
00562                  framex += self->stepX) {
00563               *dst = src [(framex + binx * self->geom.binSizeX) * 1 +
00564                           (framey + biny * self->geom.binSizeY) * self->imWidth]  ;
00565               dst += descrSize ;
00566             } /* framex */
00567           } /* framey */
00568         }
00569 
00570       } /* for bint */
00571       free (xker) ;
00572     } /* for binx */
00573     free (yker) ;
00574   } /* for biny */
00575 }
00576 
00582 VL_INLINE void
00583 _vl_dsift_with_flat_window (VlDsiftFilter* self)
00584 {
00585   int binx, biny, bint ;
00586   int framex, framey ;
00587 
00588   /* for each orientation bin */
00589   for (bint = 0 ; bint < self->geom.numBinT ; ++bint) {
00590 
00591     vl_imconvcoltri_f (self->convTmp1, self->imHeight,
00592                        self->grads [bint], self->imWidth, self->imHeight,
00593                        self->imWidth,
00594                        self->geom.binSizeY, /* filt size */
00595                        1, /* subsampling step */
00596                        VL_PAD_BY_CONTINUITY|VL_TRANSPOSE) ;
00597 
00598     vl_imconvcoltri_f (self->convTmp2, self->imWidth,
00599                        self->convTmp1, self->imHeight, self->imWidth,
00600                        self->imHeight,
00601                        self->geom.binSizeX,
00602                        1,
00603                        VL_PAD_BY_CONTINUITY|VL_TRANSPOSE) ;
00604 
00605     for (biny = 0 ; biny < self->geom.numBinY ; ++biny) {
00606 
00607       /*
00608       This fast version of DSIFT does not use a proper Gaussian
00609       weighting scheme for the gradiens that are accumulated on the
00610       spatial bins. Instead each spatial bins is accumulated based on
00611       the triangular kernel only, equivalent to bilinear interpolation
00612       plus a flat, rather than Gaussian, window. Eventually, however,
00613       the magnitude of the spatial bins in the SIFT descriptor is
00614       reweighted by the average of the Gaussian window on each bin.
00615       */
00616 
00617       float wy = _vl_dsift_get_bin_window_mean
00618         (self->geom.binSizeY, self->geom.numBinY, biny,
00619          self->windowSize) ;
00620 
00621       /* The convolution functions vl_imconvcoltri_* convolve by a
00622        * triangular kernel with unit integral. Instead for SIFT the
00623        * triangular kernel should have unit height. This is
00624        * compensated for by multiplying by the bin size:
00625        */
00626 
00627       wy *= self->geom.binSizeY ;
00628 
00629       for (binx = 0 ; binx < self->geom.numBinX ; ++binx) {
00630         float w ;
00631         float wx = _vl_dsift_get_bin_window_mean (self->geom.binSizeX,
00632                                                   self->geom.numBinX,
00633                                                   binx,
00634                                                   self->windowSize) ;
00635 
00636         float *dst = self->descrs
00637           + bint
00638           + binx * self->geom.numBinT
00639           + biny * (self->geom.numBinX * self->geom.numBinT)  ;
00640 
00641         float *src = self->convTmp2 ;
00642 
00643         int frameSizeX = self->geom.binSizeX * (self->geom.numBinX - 1) + 1 ;
00644         int frameSizeY = self->geom.binSizeY * (self->geom.numBinY - 1) + 1 ;
00645         int descrSize = vl_dsift_get_descriptor_size (self) ;
00646 
00647         wx *= self->geom.binSizeX ;
00648         w = wx * wy ;
00649 
00650         for (framey  = self->boundMinY ;
00651              framey <= self->boundMaxY - frameSizeY + 1 ;
00652              framey += self->stepY) {
00653           for (framex  = self->boundMinX ;
00654                framex <= self->boundMaxX - frameSizeX + 1 ;
00655                framex += self->stepX) {
00656             *dst = w * src [(framex + binx * self->geom.binSizeX) * 1 +
00657                             (framey + biny * self->geom.binSizeY) * self->imWidth]  ;
00658             dst += descrSize ;
00659           } /* framex */
00660         } /* framey */
00661       } /* binx */
00662     } /* biny */
00663   } /* bint */
00664 }
00665 
00673 void vl_dsift_process (VlDsiftFilter* self, float const* im)
00674 {
00675   int t, x, y ;
00676 
00677   /* update buffers */
00678   _vl_dsift_alloc_buffers (self) ;
00679 
00680   /* clear integral images */
00681   for (t = 0 ; t < self->geom.numBinT ; ++t)
00682     memset (self->grads[t], 0,
00683             sizeof(float) * self->imWidth * self->imHeight) ;
00684 
00685 #undef at
00686 #define at(x,y) (im[(y)*self->imWidth+(x)])
00687 
00688   /* Compute gradients, their norm, and their angle */
00689 
00690   for (y = 0 ; y < self->imHeight ; ++ y) {
00691     for (x = 0 ; x < self->imWidth ; ++ x) {
00692       float gx, gy ;
00693       float angle, mod, nt, rbint ;
00694       int bint ;
00695 
00696       /* y derivative */
00697       if (y == 0) {
00698         gy = at(x,y+1) - at(x,y) ;
00699       } else if (y == self->imHeight - 1) {
00700         gy = at(x,y) - at(x,y-1) ;
00701       } else {
00702         gy = 0.5F * (at(x,y+1) - at(x,y-1)) ;
00703       }
00704 
00705       /* x derivative */
00706       if (x == 0) {
00707         gx = at(x+1,y) - at(x,y) ;
00708       } else if (x == self->imWidth - 1) {
00709         gx = at(x,y) - at(x-1,y) ;
00710       } else {
00711         gx = 0.5F * (at(x+1,y) - at(x-1,y)) ;
00712       }
00713 
00714       /* angle and modulus */
00715       angle = vl_fast_atan2_f (gy,gx) ;
00716       mod = vl_fast_sqrt_f (gx*gx + gy*gy) ;
00717 
00718       /* quantize angle */
00719       nt = vl_mod_2pi_f (angle) * (self->geom.numBinT / (2*VL_PI)) ;
00720       bint = vl_floor_f (nt) ;
00721       rbint = nt - bint ;
00722 
00723       /* write it back */
00724       self->grads [(bint    ) % self->geom.numBinT][x + y * self->imWidth] = (1 - rbint) * mod ;
00725       self->grads [(bint + 1) % self->geom.numBinT][x + y * self->imWidth] = (    rbint) * mod ;
00726     }
00727   }
00728 
00729   if (self->useFlatWindow) {
00730     _vl_dsift_with_flat_window(self) ;
00731   } else {
00732     _vl_dsift_with_gaussian_window(self) ;
00733   }
00734 
00735   {
00736     VlDsiftKeypoint* frameIter = self->frames ;
00737     float * descrIter = self->descrs ;
00738     int framex, framey, bint ;
00739 
00740     int frameSizeX = self->geom.binSizeX * (self->geom.numBinX - 1) + 1 ;
00741     int frameSizeY = self->geom.binSizeY * (self->geom.numBinY - 1) + 1 ;
00742     int descrSize = vl_dsift_get_descriptor_size (self) ;
00743 
00744     float deltaCenterX = 0.5F * self->geom.binSizeX * (self->geom.numBinX - 1) ;
00745     float deltaCenterY = 0.5F * self->geom.binSizeY * (self->geom.numBinY - 1) ;
00746 
00747     float normConstant = frameSizeX * frameSizeY ;
00748 
00749     for (framey  = self->boundMinY ;
00750          framey <= self->boundMaxY - frameSizeY + 1 ;
00751          framey += self->stepY) {
00752 
00753       for (framex  = self->boundMinX ;
00754            framex <= self->boundMaxX - frameSizeX + 1 ;
00755            framex += self->stepX) {
00756 
00757         frameIter->x    = framex + deltaCenterX ;
00758         frameIter->y    = framey + deltaCenterY ;
00759 
00760         /* mass */
00761         {
00762           float mass = 0 ;
00763           for (bint = 0 ; bint < descrSize ; ++ bint)
00764             mass += descrIter[bint] ;
00765           mass /= normConstant ;
00766           frameIter->norm = mass ;
00767         }
00768 
00769         /* L2 normalize */
00770         _vl_dsift_normalize_histogram (descrIter, descrIter + descrSize) ;
00771 
00772         /* clamp */
00773         for(bint = 0 ; bint < descrSize ; ++ bint)
00774           if (descrIter[bint] > 0.2F) descrIter[bint] = 0.2F ;
00775 
00776         /* L2 normalize */
00777         _vl_dsift_normalize_histogram (descrIter, descrIter + descrSize) ;
00778 
00779         frameIter ++ ;
00780         descrIter += descrSize ;
00781       } /* for framex */
00782     } /* for framey */
00783   }
00784 }
00785 
00786 
00787 // aux functions imported from other modules
00788 
00789 // __ASSUMING SSE2 IS AVAILABLE EVERYWHERE 
00790 
00791 
00792 void
00793 vl_imconvcol_vf //_sse2
00794 (float* dst, int dst_stride,
00795  float const* src,
00796  int src_width, int src_height, int src_stride,
00797  float const* filt, int filt_begin, int filt_end,
00798  int step, unsigned int flags)
00799 {
00800   int x = 0 ;
00801   int y ;
00802   int dheight = (src_height - 1) / step + 1 ;
00803   vl_bool use_simd  = VALIGNED(src_stride) ;
00804   vl_bool transp    = flags & VL_TRANSPOSE ;
00805   vl_bool zeropad   = (flags & VL_PAD_MASK) == VL_PAD_BY_ZERO ;
00806   double totcol = 0 ;
00807   double simdcol = 0 ;
00808 
00809   /* let filt point to the last sample of the filter */
00810   filt += filt_end - filt_begin ;
00811 
00812   while (x < src_width) {
00813     /* Calculate dest[x,y] = sum_p image[x,p] filt[y - p]
00814      * where supp(filt) = [filt_begin, filt_end] = [fb,fe].
00815      *
00816      * CHUNK_A: y - fe <= p < 0
00817      *          completes VL_MAX(fe - y, 0) samples
00818      * CHUNK_B: VL_MAX(y - fe, 0) <= p < VL_MIN(y - fb, height - 1)
00819      *          completes fe - VL_MAX(fb, height - y) + 1 samples
00820      * CHUNK_C: completes all samples
00821      */
00822 
00823     float const *filti ;
00824     int stop ;
00825 
00826     if ((x + VSIZE < src_width) & VALIGNED(src + x) & use_simd)
00827     {
00828       /* ----------------------------------------------  Vectorized */
00829       for (y = 0 ; y < src_height ; y += step)  {
00830         union {VTYPE v ; float x [VSIZE] ; } acc ;
00831         VTYPE v, c ;
00832         float const *srci ;
00833         acc.v = VSTZ () ;
00834         v = VSTZ() ;
00835 
00836         filti = filt ;
00837         stop = filt_end - y ;
00838         srci = src + x - stop * src_stride ;
00839 
00840         if (stop > 0) {
00841           if (zeropad) {
00842             v = VSTZ () ;
00843           } else {
00844             v = * (VTYPE*) (src + x) ;
00845           }
00846           while (filti > filt - stop) {
00847             c = VLD1 (filti--) ;
00848             acc.v = VADD (acc.v,  VMUL (v, c)) ;
00849             srci += src_stride ;
00850           }
00851         }
00852 
00853         stop = filt_end - VL_MAX(filt_begin, y - src_height + 1) + 1 ;
00854         while (filti > filt - stop) {
00855           v = * (VTYPE*) srci ;
00856           c = VLD1 (filti--) ;
00857           acc.v = VADD (acc.v, VMUL (v, c)) ;
00858           srci += src_stride ;
00859         }
00860 
00861         if (zeropad) v = VSTZ () ;
00862 
00863         stop = filt_end - filt_begin + 1;
00864         while (filti > filt - stop) {
00865           c = VLD1 (filti--) ;
00866           acc.v = VADD (acc.v, VMUL (v, c)) ;
00867         }
00868 
00869         if (transp) {
00870           *dst = acc.x[0] ; dst += dst_stride ;
00871           *dst = acc.x[1] ; dst += dst_stride ;
00872 #if(VSIZE == 4)
00873           *dst = acc.x[2] ; dst += dst_stride ;
00874           *dst = acc.x[3] ; dst += dst_stride ;
00875 #endif
00876           dst += 1 * 1 - VSIZE * dst_stride ;
00877         } else {
00878           *dst = acc.x[0] ; dst += 1 ;
00879           *dst = acc.x[1] ; dst += 1 ;
00880 #if(VSIZE == 4)
00881           *dst = acc.x[2] ; dst += 1 ;
00882           *dst = acc.x[3] ; dst += 1 ;
00883 #endif
00884           dst += 1 * dst_stride - VSIZE * 1 ;
00885         }
00886       } /* next y */
00887       if (transp) {
00888         dst += VSIZE * dst_stride - dheight * 1 ;
00889       } else {
00890         dst += VSIZE * 1 - dheight * dst_stride ;
00891       }
00892       x       += VSIZE ;
00893       simdcol += VSIZE ;
00894       totcol  += VSIZE ;
00895     } else {
00896       /* -------------------------------------------------  Vanilla */
00897       for (y = 0 ; y < src_height ; y += step) {
00898         float acc = 0 ;
00899         float v = 0, c ;
00900         float const* srci ;
00901 
00902         filti = filt ;
00903         stop = filt_end - y ;
00904         srci = src + x - stop * src_stride ;
00905 
00906         if (stop > 0) {
00907           if (zeropad) {
00908             v = 0 ;
00909           } else {
00910             v = *(src + x) ;
00911           }
00912           while (filti > filt - stop) {
00913             c = *filti-- ;
00914             acc += v * c ;
00915             srci += src_stride ;
00916           }
00917         }
00918 
00919         stop = filt_end - VL_MAX(filt_begin, y - src_height + 1) + 1 ;
00920         while (filti > filt - stop) {
00921           v = *srci ;
00922           c = *filti-- ;
00923           acc += v * c ;
00924           srci += src_stride ;
00925         }
00926 
00927         if (zeropad) v = 0 ;
00928 
00929         stop = filt_end - filt_begin + 1 ;
00930         while (filti > filt - stop) {
00931           c = *filti-- ;
00932           acc += v * c ;
00933         }
00934 
00935         if (transp) {
00936           *dst = acc ; dst += 1 ;
00937         } else {
00938           *dst = acc ; dst += dst_stride ;
00939         }
00940       } /* next y */
00941       if (transp) {
00942         dst += 1 * dst_stride - dheight * 1 ;
00943       } else {
00944         dst += 1 * 1 - dheight * dst_stride ;
00945       }
00946       x      += 1 ;
00947       totcol += 1 ;
00948     } /* next x */
00949   }
00950 }
00951 
00952 
00954 void
00955 vl_imconvcoltri_f
00956 (float * dest, vl_size destStride,
00957  float const * image,
00958  vl_size imageWidth, vl_size imageHeight, vl_size imageStride,
00959  vl_size filterSize,
00960  vl_size step, unsigned int flags)
00961 {
00962   vl_index x, y, dheight ;
00963   vl_bool transp = flags & VL_TRANSPOSE ;
00964   vl_bool zeropad = (flags & VL_PAD_MASK) == VL_PAD_BY_ZERO ;
00965   float scale = (float) (1.0 / ((double)filterSize * (double)filterSize)) ;
00966   float * buffer = (float*) malloc (sizeof(float) * (imageHeight + filterSize)) ;
00967   buffer += filterSize ;
00968 
00969   if (imageHeight == 0) {
00970     return  ;
00971   }
00972 
00973   x = 0 ;
00974   dheight = (imageHeight - 1) / step + 1 ;
00975 
00976   while (x < (signed)imageWidth) {
00977     float const * imagei ;
00978     imagei = image + x + imageStride * (imageHeight - 1) ;
00979 
00980     /* We decompose the convolution by a triangluar signal as the convolution
00981      * by two rectangular signals. The rectangular convolutions are computed
00982      * quickly by computing the integral signals. Each rectangular convolution
00983      * introduces a delay, which is compensated by convolving each in opposite
00984      * directions.
00985      */
00986 
00987     /* integrate backward the column */
00988     buffer[imageHeight - 1] = *imagei ;
00989     for (y = (signed)imageHeight - 2 ; y >=  0 ; --y) {
00990       imagei -= imageStride ;
00991       buffer[y] = buffer[y + 1] + *imagei ;
00992     }
00993     if (zeropad) {
00994       for ( ; y >= - (signed)filterSize ; --y) {
00995         buffer[y] = buffer[y + 1] ;
00996       }
00997     } else {
00998       for ( ; y >= - (signed)filterSize ; --y) {
00999         buffer[y] = buffer[y + 1] + *imagei ;
01000       }
01001     }
01002 
01003     /* compute the filter forward */
01004     for (y = - (signed)filterSize ;
01005          y < (signed)imageHeight - (signed)filterSize ; ++y) {
01006       buffer[y] = buffer[y] - buffer[y + filterSize] ;
01007     }
01008     if (! zeropad) {
01009       for (y = (signed)imageHeight - (signed)filterSize ;
01010            y < (signed)imageHeight ;
01011            ++y) {
01012         buffer[y] = buffer[y] - buffer[imageHeight - 1]  *
01013         ((signed)imageHeight - (signed)filterSize - y) ;
01014       }
01015     }
01016 
01017     /* integrate forward the column */
01018     for (y = - (signed)filterSize + 1 ;
01019          y < (signed)imageHeight ; ++y) {
01020       buffer[y] += buffer[y - 1] ;
01021     }
01022 
01023     /* compute the filter backward */
01024     {
01025       vl_size stride = transp ? 1 : destStride ;
01026       dest += dheight * stride ;
01027       for (y = step * (dheight - 1) ; y >= 0 ; y -= step) {
01028         dest -= stride ;
01029         *dest = scale * (buffer[y] - buffer[y - (signed)filterSize]) ;
01030       }
01031       dest += transp ? destStride : 1 ;
01032     }
01033     x += 1 ;
01034   } /* next x */
01035   free (buffer - filterSize) ;
01036 }
01037 
01038 
01039 
01040 
01041 
01042 
01043 
01044 
01045 
01046 
01047 
01048 
01049 
01050 
01051