hog.cpp
Go to the documentation of this file.
00001 /*
00002  * Software License Agreement (Simplified BSD License)
00003  *
00004  * Point Cloud Library (PCL) - www.pointclouds.org
00005  * Copyright (c) 2013-, Open Perception, Inc.
00006  * Copyright (c) 2012, Piotr Dollar & Ron Appel.  [pdollar-at-caltech.edu]
00007  *
00008  * All rights reserved.
00009  *
00010  * Redistribution and use in source and binary forms, with or without
00011  * modification, are permitted provided that the following conditions are met: 
00012  *
00013  * 1. Redistributions of source code must retain the above copyright notice, this
00014  *    list of conditions and the following disclaimer.
00015  *
00016  * 2. Redistributions in binary form must reproduce the above copyright notice,
00017  *    this list of conditions and the following disclaimer in the documentation
00018  *    and/or other materials provided with the distribution. 
00019  * 
00020  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
00021  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
00022  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
00023  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
00024  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
00025  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00026  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
00027  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00028  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00029  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00030  *
00031  * The views and conclusions contained in the software and documentation are those
00032  * of the authors and should not be interpreted as representing official policies,
00033  * either expressed or implied, of the FreeBSD Project.
00034  *
00035  * Derived from Piotr Dollar's MATLAB Image&Video Toolbox      Version 3.00.
00036  *
00037  * hog.cpp
00038  * Created on: Nov 30, 2012
00039  * Authors: Matteo Munaro, Stefano Ghidoni, Stefano Michieletto
00040  */
00041 
00042 #include <pcl/people/hog.h>
00043 
00044 #define _USE_MATH_DEFINES
00045 #include <math.h>
00046 #include <string.h>
00047 
00048 #if defined(__SSE2__)
00049 #include <pcl/sse.h>
00050 #else
00051 #include <cstdlib>
00052 #endif
00053 
00055 pcl::people::HOG::HOG () 
00056 {
00057   // set default values for optional parameters:
00058   h_ = 128;
00059   w_ = 64;
00060   n_channels_ = 3;
00061   bin_size_ = 8;
00062   n_orients_ = 9;
00063   soft_bin_ = true;
00064   clip_ = 0.2;
00065 }  
00066 
00068 pcl::people::HOG::~HOG () {}
00069 
00070 void 
00071 pcl::people::HOG::gradMag( float *I, int h, int w, int d, float *M, float *O ) const
00072 {
00073 #if defined(__SSE2__)
00074   int x, y, y1, c, h4, s; float *Gx, *Gy, *M2; __m128 *_Gx, *_Gy, *_M2, _m;
00075   float *acost = acosTable(), acMult=25000/2.02f;
00076 
00077   // allocate memory for storing one column of output (padded so h4%4==0)
00078   h4=(h%4==0) ? h : h-(h%4)+4; s=d*h4*sizeof(float);
00079 
00080   M2=(float*) alMalloc(s,16);
00081   _M2=(__m128*) M2;
00082   Gx=(float*) alMalloc(s,16); _Gx=(__m128*) Gx;
00083   Gy=(float*) alMalloc(s,16); _Gy=(__m128*) Gy;
00084 
00085   // compute gradient magnitude and orientation for each column
00086   for( x=0; x<w; x++ ) {
00087   // compute gradients (Gx, Gy) and squared magnitude (M2) for each channel
00088   for( c=0; c<d; c++ ) grad1( I+x*h+c*w*h, Gx+c*h4, Gy+c*h4, h, w, x );
00089   for( y=0; y<d*h4/4; y++ ) _M2[y]=pcl::sse_add(pcl::sse_mul(_Gx[y],_Gx[y]),pcl::sse_mul(_Gy[y],_Gy[y]));
00090   // store gradients with maximum response in the first channel
00091   for(c=1; c<d; c++) {
00092     for( y=0; y<h4/4; y++ ) {
00093     y1=h4/4*c+y; _m = pcl::sse_cmpgt( _M2[y1], _M2[y] );
00094     _M2[y] = pcl::sse_or( pcl::sse_and(_m,_M2[y1]), pcl::sse_andnot(_m,_M2[y]) );
00095     _Gx[y] = pcl::sse_or( pcl::sse_and(_m,_Gx[y1]), pcl::sse_andnot(_m,_Gx[y]) );
00096     _Gy[y] = pcl::sse_or( pcl::sse_and(_m,_Gy[y1]), pcl::sse_andnot(_m,_Gy[y]) );
00097     }
00098   }
00099   // compute gradient magnitude (M) and normalize Gx
00100   for( y=0; y<h4/4; y++ ) {
00101     _m = pcl::sse_min( pcl::sse_rcpsqrt(_M2[y]), pcl::sse_set(1e10f) );
00102     _M2[y] = pcl::sse_rcp(_m);
00103     _Gx[y] = pcl::sse_mul( pcl::sse_mul(_Gx[y],_m), pcl::sse_set(acMult) );
00104     _Gx[y] = pcl::sse_xor( _Gx[y], pcl::sse_and(_Gy[y], pcl::sse_set(-0.f)) );
00105   };
00106 
00107   memcpy( M+x*h, M2, h*sizeof(float) );
00108   // compute and store gradient orientation (O) via table lookup
00109   if(O!=0) for( y=0; y<h; y++ ) O[x*h+y] = acost[(int)Gx[y]];
00110   }
00111   alFree(Gx); alFree(Gy); alFree(M2); 
00112 #else
00113   int x, y, y1, c, h4, s; float *Gx, *Gy, *M2; 
00114   float *acost = acosTable(), acMult=25000/2.02f;
00115 
00116   // allocate memory for storing one column of output (padded so h4%4==0)
00117   h4=(h%4==0) ? h : h-(h%4)+4; s=d*h4*sizeof(float);
00118 
00119   M2=(float*) alMalloc(s,16);
00120   Gx=(float*) alMalloc(s,16); 
00121   Gy=(float*) alMalloc(s,16); 
00122   float m;
00123 
00124   // compute gradient magnitude and orientation for each column
00125   for( x=0; x<w; x++ ) {
00126   // compute gradients (Gx, Gy) and squared magnitude (M2) for each channel
00127   for( c=0; c<d; c++ ) grad1( I+x*h+c*w*h, Gx+c*h4, Gy+c*h4, h, w, x );
00128   for( y=0; y<d*h4; y++ ) 
00129   {
00130     M2[y] = Gx[y] * Gx[y] + Gy[y] * Gy[y];
00131   }  
00132   
00133   // store gradients with maximum response in the first channel
00134   for(c=1; c<d; c++) 
00135   {
00136   for( y=0; y<h4/4; y++ ) 
00137     {
00138       y1=h4/4*c+y; 
00139 
00140     for (int ii = 0; ii < 4; ++ii)
00141     {
00142       if (M2[y1 * 4 + ii] > M2[y * 4 + ii])
00143       {
00144         M2[y * 4 + ii] = M2[y1 * 4 + ii];
00145         Gx[y * 4 + ii] = Gx[y1 * 4 + ii];
00146         Gy[y * 4 + ii] = Gy[y1 * 4 + ii];
00147       }
00148     }
00149     }
00150   }
00151   // compute gradient magnitude (M) and normalize Gx
00152   for( y=0; y<h4; y++ ) 
00153   {
00154   m = 1.0f/sqrtf(M2[y]);
00155   m = m < 1e10f ? m : 1e10f;
00156     M2[y] = 1.0f / m;
00157     Gx[y] = ((Gx[y] * m) * acMult);
00158     if (Gy[y] < 0)
00159     Gx[y] = -Gx[y];
00160   }
00161   
00162   memcpy( M+x*h, M2, h*sizeof(float) );
00163   // compute and store gradient orientation (O) via table lookup
00164   if(O!=0) for( y=0; y<h; y++ ) O[x*h+y] = acost[(int)Gx[y]];
00165   }
00166   alFree(Gx); alFree(Gy); alFree(M2);
00167 #endif  
00168 }
00169 
00170 void 
00171 pcl::people::HOG::gradHist( float *M, float *O, int h, int w, int bin_size, int n_orients, bool soft_bin, float *H ) const
00172 {
00173   const int hb=h/bin_size, wb=w/bin_size, h0=hb*bin_size, w0=wb*bin_size, nb=wb*hb;
00174   const float s=(float)bin_size, sInv=1/s, sInv2=1/s/s;
00175   float *H0, *H1, *M0, *M1; int x, y; int *O0, *O1;
00176   O0=(int*)alMalloc(h*sizeof(int),16); M0=(float*) alMalloc(h*sizeof(float),16);
00177   O1=(int*)alMalloc(h*sizeof(int),16); M1=(float*) alMalloc(h*sizeof(float),16);
00178 
00179   // main loop
00180   float xb = 0;
00181   float init = 0;
00182   for( x=0; x<w0; x++ ) {
00183     // compute target orientation bins for entire column - very fast
00184     gradQuantize( O+x*h, M+x*h, O0, O1, M0, M1, n_orients, nb, h0, sInv2 );
00185 
00186     if( !soft_bin || bin_size==1 ) {
00187       // interpolate w.r.t. orientation only, not spatial bin_size
00188       H1=H+(x/bin_size)*hb;
00189       #define GH H1[O0[y]]+=M0[y]; H1[O1[y]]+=M1[y]; y++;
00190       if( bin_size==1 )      for(y=0; y<h0;) { GH; H1++; }
00191       else if( bin_size==2 ) for(y=0; y<h0;) { GH; GH; H1++; }
00192       else if( bin_size==3 ) for(y=0; y<h0;) { GH; GH; GH; H1++; }
00193       else if( bin_size==4 ) for(y=0; y<h0;) { GH; GH; GH; GH; H1++; }
00194       else for( y=0; y<h0;) { for( int y1=0; y1<bin_size; y1++ ) { GH; } H1++; }
00195       #undef GH
00196 
00197     } else {
00198       // interpolate using trilinear interpolation
00199 #if defined(__SSE2__)
00200       float ms[4], xyd, yb, xd, yd; __m128 _m, _m0, _m1;
00201       bool hasLf, hasRt; int xb0, yb0;
00202       if( x==0 ) { init=(0+.5f)*sInv-0.5f; xb=init; }
00203       hasLf = xb>=0; xb0 = hasLf?(int)xb:-1; hasRt = xb0 < wb-1;
00204       xd=xb-xb0; xb+=sInv; yb=init; y=0;
00205       // macros for code conciseness
00206       #define GHinit yd=yb-yb0; yb+=sInv; H0=H+xb0*hb+yb0; xyd=xd*yd; \
00207       ms[0]=1-xd-yd+xyd; ms[1]=yd-xyd; ms[2]=xd-xyd; ms[3]=xyd;
00208       #define GH(H,ma,mb) H1=H; pcl::sse_stru(*H1,pcl::sse_add(pcl::sse_ldu(*H1),pcl::sse_mul(ma,mb)));
00209       // leading rows, no top bin_size
00210       for( ; y<bin_size/2; y++ ) {
00211         yb0=-1; GHinit;
00212         if(hasLf) { H0[O0[y]+1]+=ms[1]*M0[y]; H0[O1[y]+1]+=ms[1]*M1[y]; }
00213         if(hasRt) { H0[O0[y]+hb+1]+=ms[3]*M0[y]; H0[O1[y]+hb+1]+=ms[3]*M1[y]; }
00214       }
00215       // main rows, has top and bottom bins, use SSE for minor speedup
00216       for( ; ; y++ ) {
00217         yb0 = (int) yb; if(yb0>=hb-1) break; GHinit;
00218         _m0=pcl::sse_set(M0[y]); _m1=pcl::sse_set(M1[y]);
00219         if(hasLf) { _m=pcl::sse_set(0,0,ms[1],ms[0]);
00220         GH(H0+O0[y],_m,_m0); GH(H0+O1[y],_m,_m1); }
00221         if(hasRt) { _m=pcl::sse_set(0,0,ms[3],ms[2]);
00222         GH(H0+O0[y]+hb,_m,_m0); GH(H0+O1[y]+hb,_m,_m1); }
00223       }      
00224       // final rows, no bottom bin_size
00225       for( ; y<h0; y++ ) {
00226         yb0 = (int) yb; GHinit;
00227         if(hasLf) { H0[O0[y]]+=ms[0]*M0[y]; H0[O1[y]]+=ms[0]*M1[y]; }
00228         if(hasRt) { H0[O0[y]+hb]+=ms[2]*M0[y]; H0[O1[y]+hb]+=ms[2]*M1[y]; }
00229       }       
00230       #undef GHinit
00231       #undef GH
00232 #else
00233       float ms[4], xyd, yb, xd, yd;  
00234       bool hasLf, hasRt; int xb0, yb0;
00235       if( x==0 ) { init=(0+.5f)*sInv-0.5f; xb=init; }
00236       hasLf = xb>=0; xb0 = hasLf?(int)xb:-1; hasRt = xb0 < wb-1;
00237       xd=xb-xb0; xb+=sInv; yb=init; y=0;
00238       // macros for code conciseness
00239       #define GHinit yd=yb-yb0; yb+=sInv; H0=H+xb0*hb+yb0; xyd=xd*yd; \
00240       ms[0]=1-xd-yd+xyd; ms[1]=yd-xyd; ms[2]=xd-xyd; ms[3]=xyd;
00241       // leading rows, no top bin_size
00242       for( ; y<bin_size/2; y++ ) {
00243         yb0=-1; GHinit;
00244         if(hasLf) { H0[O0[y]+1]+=ms[1]*M0[y]; H0[O1[y]+1]+=ms[1]*M1[y]; }
00245         if(hasRt) { H0[O0[y]+hb+1]+=ms[3]*M0[y]; H0[O1[y]+hb+1]+=ms[3]*M1[y]; }
00246       }
00247       // main rows, has top and bottom bins
00248       for( ; ; y++ ) {
00249         yb0 = (int) yb;
00250         if(yb0>=hb-1) 
00251           break; 
00252         GHinit;
00253   
00254         if(hasLf) 
00255         {
00256           H0[O0[y]+1]+=ms[1]*M0[y]; 
00257           H0[O1[y]+1]+=ms[1]*M1[y];
00258           H0[O0[y]]+=ms[0]*M0[y]; 
00259           H0[O1[y]]+=ms[0]*M1[y];
00260         }
00261             if(hasRt) 
00262         {
00263           H0[O0[y]+hb+1]+=ms[3]*M0[y];
00264           H0[O1[y]+hb+1]+=ms[3]*M1[y]; 
00265           H0[O0[y]+hb]+=ms[2]*M0[y];
00266           H0[O1[y]+hb]+=ms[2]*M1[y];
00267         }
00268       }      
00269       // final rows, no bottom bin_size
00270       for( ; y<h0; y++ ) {
00271         yb0 = (int) yb; GHinit;
00272         if(hasLf) { H0[O0[y]]+=ms[0]*M0[y]; H0[O1[y]]+=ms[0]*M1[y]; }
00273         if(hasRt) { H0[O0[y]+hb]+=ms[2]*M0[y]; H0[O1[y]+hb]+=ms[2]*M1[y]; }
00274       }       
00275       #undef GHinit
00276 #endif     
00277     }
00278   }
00279 
00280   alFree(O0); alFree(O1); alFree(M0); alFree(M1);
00281 }
00282       
00283 void 
00284 pcl::people::HOG::normalization (float *H, int h, int w, int bin_size, int n_orients, float clip, float *G) const
00285 {
00286   float *N, *N1, *H1; int o, x, y, hb=h/bin_size, wb=w/bin_size, nb=wb*hb;
00287   float eps = 1e-4f/4/bin_size/bin_size/bin_size/bin_size; // precise backward equality
00288   // compute 2x2 block normalization values
00289   N = (float*) calloc(nb,sizeof(float));
00290   for( o=0; o<n_orients; o++ ) for( x=0; x<nb; x++ ) N[x]+=H[x+o*nb]*H[x+o*nb];
00291   for( x=0; x<wb-1; x++ ) for( y=0; y<hb-1; y++ ) {
00292     N1=N+x*hb+y; *N1=1/float(sqrt( N1[0] + N1[1] + N1[hb] + N1[hb+1] +eps )); }
00293   // perform 4 normalizations per spatial block (handling boundary regions)
00294   #define U(a,b) Gs[a][y]=H1[y]*N1[y-(b)]; if(Gs[a][y]>clip) Gs[a][y]=clip;
00295   for( o=0; o<n_orients; o++ ) for( x=0; x<wb; x++ ) {
00296     H1=H+o*nb+x*hb; N1=N+x*hb; float *Gs[4]; Gs[0]=G+o*nb+x*hb;
00297     for( y=1; y<4; y++ ) Gs[y]=Gs[y-1]+nb*n_orients;
00298     bool lf, md, rt; lf=(x==0); rt=(x==wb-1); md=(!lf && !rt);
00299     y=0; if(!rt) U(0,0); if(!lf) U(2,hb);
00300     if(lf) for( y=1; y<hb-1; y++ ) { U(0,0); U(1,1); }
00301     if(md) for( y=1; y<hb-1; y++ ) { U(0,0); U(1,1); U(2,hb); U(3,hb+1); }
00302     if(rt) for( y=1; y<hb-1; y++ ) { U(2,hb); U(3,hb+1); }
00303     y=hb-1; if(!rt) U(1,1); if(!lf) U(3,hb+1);
00304   } free(N);
00305   #undef U
00306 }
00307       
00308 void
00309 pcl::people::HOG::compute (float *I, int h, int w, int n_channels, int bin_size, int n_orients, bool soft_bin, float *descriptor)
00310 {
00311   h_ = h;
00312   w_ = w;
00313   n_channels_ = n_channels;
00314   bin_size_ = bin_size;
00315   n_orients_ = n_orients;
00316   soft_bin_ = soft_bin;
00317   
00318   compute (I, descriptor);
00319 }
00320 
00321 void
00322 pcl::people::HOG::compute (float *I, float *descriptor) const
00323 {
00324   // HOG computation:
00325   float *M, *O, *G, *H;
00326   M = new float[h_ * w_];
00327   O = new float[h_ * w_];
00328   H = (float*) calloc((w_ / bin_size_) * (h_ / bin_size_) * n_orients_, sizeof(float));
00329   G = (float*) calloc((w_ / bin_size_) * (h_ / bin_size_) * n_orients_ *4, sizeof(float));
00330 
00331   // Compute gradient magnitude and orientation at each location (uses sse):
00332   gradMag (I, h_, w_, n_channels_, M, O );
00333  
00334   // Compute n_orients gradient histograms per bin_size x bin_size block of pixels:
00335   gradHist ( M, O, h_, w_, bin_size_, n_orients_, soft_bin_, H );
00336 
00337   // Apply normalizations:
00338   normalization ( H, h_, w_, bin_size_, n_orients_, clip_, G );
00339  
00340   // Select descriptor of internal part of the image (remove borders):
00341   int k = 0;    
00342   for (unsigned int l = 0; l < (n_orients_ * 4); l++)
00343   {
00344     for (unsigned int j = 1; j < (w_ / bin_size_ - 1); j++)
00345     {
00346       for (unsigned int i = 1; i < (h_ / bin_size_ - 1); i++)
00347       {
00348         descriptor[k] = G[i + j * h_ / bin_size_ + l * (h_ / bin_size_) * (w_ / bin_size_)];
00349         k++;
00350       }
00351     }
00352   }
00353   free(M); free(O); free(H); free(G);
00354 }
00355 
00356 void 
00357 pcl::people::HOG::grad1 (float *I, float *Gx, float *Gy, int h, int w, int x) const
00358 {
00359 #if defined(__SSE2__)
00360   int y, y1; float *Ip, *In, r; __m128 *_Ip, *_In, *_G, _r;
00361   // compute column of Gx
00362   Ip=I-h; In=I+h; r=.5f;
00363   if(x==0) { r=1; Ip+=h; } else if(x==w-1) { r=1; In-=h; }
00364   if( h<4 || h%4>0 || (size_t(I)&15) || (size_t(Gx)&15) ) {
00365   for( y=0; y<h; y++ ) *Gx++=(*In++-*Ip++)*r;
00366   } else {
00367   _G=(__m128*) Gx; _Ip=(__m128*) Ip; _In=(__m128*) In; _r = pcl::sse_set(r);
00368   for(y=0; y<h; y+=4) *_G++=pcl::sse_mul(pcl::sse_sub(*_In++,*_Ip++),_r);
00369   }
00370   // compute column of Gy
00371   #define GRADY(r) *Gy++=(*In++-*Ip++)*r;
00372   Ip=I; In=Ip+1;
00373   // GRADY(1); Ip--; for(y=1; y<h-1; y++) GRADY(.5f); In--; GRADY(1);
00374   y1=((~((size_t) Gy) + 1) & 15)/4; if(y1==0) y1=4; if(y1>h-1) y1=h-1;
00375   GRADY(1); Ip--; for(y=1; y<y1; y++) GRADY(.5f);
00376   _r = pcl::sse_set(.5f); _G=(__m128*) Gy;
00377   for(; y+4<h-1; y+=4, Ip+=4, In+=4, Gy+=4)
00378   *_G++=pcl::sse_mul(pcl::sse_sub(pcl::sse_ldu(*In),pcl::sse_ldu(*Ip)),_r);
00379   for(; y<h-1; y++) GRADY(.5f); In--; GRADY(1);
00380   #undef GRADY
00381 #else
00382   int y, y1;
00383   float *Ip, *In, r;
00384   
00385   // compute column of Gx
00386   Ip = I - h;
00387   In = I + h;
00388   r = .5f;
00389   
00390   if(x == 0)
00391   {
00392   r = 1;
00393   Ip += h;
00394   }
00395   else if (x == w-1)
00396   {
00397   r = 1;
00398   In -= h;
00399   }
00400 
00401   for (y = 0; y < h; y++)
00402   *Gx++=(*In++ - *Ip++)*r;
00403   
00404   // compute column of Gy
00405   #define GRADY(r) *Gy++=(*In++-*Ip++)*r;
00406   Ip=I; In=Ip+1;
00407   // GRADY(1); Ip--; for(y=1; y<h-1; y++) GRADY(.5f); In--; GRADY(1);
00408   y1=((~((size_t) Gy) + 1) & 15)/4; if(y1==0) y1=4; if(y1>h-1) y1=h-1;
00409   GRADY(1); Ip--; for(y=1; y<y1; y++) GRADY(.5f);
00410   
00411   r = 0.5f;
00412   for(; y<h-1; y++)
00413     GRADY(.5f); In--; GRADY(1);
00414   #undef GRADY
00415 #endif
00416 }
00417       
00418 float* 
00419 pcl::people::HOG::acosTable () const
00420 {
00421   int i, n=25000, n2=n/2; float t, ni;
00422   static float a[25000]; static bool init=false;
00423   if( init ) return a+n2; ni = 2.02f/(float) n;
00424   for( i=0; i<n; i++ ) {
00425     t = i*ni - 1.01f;
00426     t = t<-1 ? -1 : (t>1 ? 1 : t);
00427     t = (float) acos( t );
00428     a[i] = (t <= M_PI-1e-5f) ? t : 0;
00429   }
00430   init=true; return a+n2;
00431 }
00432       
00433 void 
00434 pcl::people::HOG::gradQuantize (float *O, float *M, int *O0, int *O1, float *M0, float *M1, int n_orients, int nb, int n, float norm) const
00435 {
00436 #if defined(__SSE2__)
00437   // assumes all *OUTPUT* matrices are 4-byte aligned
00438   int i, o0, o1; float o, od, m;
00439   __m128i _o0, _o1, *_O0, *_O1; __m128 _o, _o0f, _m, *_M0, *_M1;
00440   // define useful constants
00441   const float oMult=(float)n_orients/M_PI; const int oMax=n_orients*nb;
00442   const __m128 _norm=pcl::sse_set(norm), _oMult=pcl::sse_set(oMult), _nbf=pcl::sse_set((float)nb);
00443   const __m128i _oMax=pcl::sse_set(oMax), _nb=pcl::sse_set(nb);
00444 
00445   // perform the majority of the work with sse
00446   _O0=(__m128i*) O0; _O1=(__m128i*) O1; _M0=(__m128*) M0; _M1=(__m128*) M1;
00447   for( i=0; i<=n-4; i+=4 ) {
00448   _o=pcl::sse_mul(pcl::sse_ldu(O[i]),_oMult); _o0f=pcl::sse_cvt(pcl::sse_cvt(_o)); _o0=pcl::sse_cvt(pcl::sse_mul(_o0f,_nbf));
00449   _o1=pcl::sse_add(_o0,_nb); _o1=pcl::sse_and(pcl::sse_cmpgt(_oMax,_o1),_o1);
00450   *_O0++=_o0; *_O1++=_o1; _m=pcl::sse_mul(pcl::sse_ldu(M[i]),_norm);
00451   *_M1=pcl::sse_mul(pcl::sse_sub(_o,_o0f),_m); *_M0=pcl::sse_sub(_m,*_M1); _M0++; _M1++;
00452   }
00453 
00454   // compute trailing locations without sse
00455   for( ; i<n; i++ ) {
00456   o=O[i]*oMult; m=M[i]*norm; o0=(int) o; od=o-o0;
00457   o0*=nb; o1=o0+nb; if(o1==oMax) o1=0;
00458   O0[i]=o0; O1[i]=o1; M1[i]=od*m; M0[i]=m-M1[i];
00459   }
00460 #else
00461   int i, o0, o1;
00462   float o, od, m;
00463 
00464   // define useful constants
00465   const float oMult=(float)n_orients/M_PI; const int oMax=n_orients*nb;
00466 
00467   // compute trailing locations without sse
00468   for( i = 0; i<n; i++ )
00469   {
00470     o=O[i]*oMult; m=M[i]*norm; o0=(int) o; od=o-o0;
00471     o0*=nb; o1=o0+nb; if(o1==oMax) o1=0;
00472     O0[i]=o0; O1[i]=o1; M1[i]=od*m; M0[i]=m-M1[i];
00473   }
00474 #endif
00475 }
00476 
00477 inline void* 
00478 pcl::people::HOG::alMalloc (size_t size, int alignment) const
00479 {
00480   const size_t pSize = sizeof(void*), a = alignment-1;
00481   void *raw = malloc(size + a + pSize);
00482   void *aligned = (void*) (((size_t) raw + pSize + a) & ~a);
00483   *(void**) ((size_t) aligned-pSize) = raw;
00484   return aligned;
00485 }
00486 
00487 inline void 
00488 pcl::people::HOG::alFree (void* aligned) const
00489 {
00490   void* raw = *(void**)((char*)aligned-sizeof(void*));
00491   free(raw);
00492 }
00493 


pcl
Author(s): Open Perception
autogenerated on Wed Aug 26 2015 15:24:43