00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042 #include <pcl/people/hog.h>
00043
00044 #define _USE_MATH_DEFINES
00045 #include <math.h>
00046 #include <string.h>
00047
00048 #if defined(__SSE2__)
00049 #include <pcl/sse.h>
00050 #else
00051 #include <cstdlib>
00052 #endif
00053
00055 pcl::people::HOG::HOG ()
00056 {
00057
00058 h_ = 128;
00059 w_ = 64;
00060 n_channels_ = 3;
00061 bin_size_ = 8;
00062 n_orients_ = 9;
00063 soft_bin_ = true;
00064 clip_ = 0.2;
00065 }
00066
00068 pcl::people::HOG::~HOG () {}
00069
00070 void
00071 pcl::people::HOG::gradMag( float *I, int h, int w, int d, float *M, float *O ) const
00072 {
00073 #if defined(__SSE2__)
00074 int x, y, y1, c, h4, s; float *Gx, *Gy, *M2; __m128 *_Gx, *_Gy, *_M2, _m;
00075 float *acost = acosTable(), acMult=25000/2.02f;
00076
00077
00078 h4=(h%4==0) ? h : h-(h%4)+4; s=d*h4*sizeof(float);
00079
00080 M2=(float*) alMalloc(s,16);
00081 _M2=(__m128*) M2;
00082 Gx=(float*) alMalloc(s,16); _Gx=(__m128*) Gx;
00083 Gy=(float*) alMalloc(s,16); _Gy=(__m128*) Gy;
00084
00085
00086 for( x=0; x<w; x++ ) {
00087
00088 for( c=0; c<d; c++ ) grad1( I+x*h+c*w*h, Gx+c*h4, Gy+c*h4, h, w, x );
00089 for( y=0; y<d*h4/4; y++ ) _M2[y]=pcl::sse_add(pcl::sse_mul(_Gx[y],_Gx[y]),pcl::sse_mul(_Gy[y],_Gy[y]));
00090
00091 for(c=1; c<d; c++) {
00092 for( y=0; y<h4/4; y++ ) {
00093 y1=h4/4*c+y; _m = pcl::sse_cmpgt( _M2[y1], _M2[y] );
00094 _M2[y] = pcl::sse_or( pcl::sse_and(_m,_M2[y1]), pcl::sse_andnot(_m,_M2[y]) );
00095 _Gx[y] = pcl::sse_or( pcl::sse_and(_m,_Gx[y1]), pcl::sse_andnot(_m,_Gx[y]) );
00096 _Gy[y] = pcl::sse_or( pcl::sse_and(_m,_Gy[y1]), pcl::sse_andnot(_m,_Gy[y]) );
00097 }
00098 }
00099
00100 for( y=0; y<h4/4; y++ ) {
00101 _m = pcl::sse_min( pcl::sse_rcpsqrt(_M2[y]), pcl::sse_set(1e10f) );
00102 _M2[y] = pcl::sse_rcp(_m);
00103 _Gx[y] = pcl::sse_mul( pcl::sse_mul(_Gx[y],_m), pcl::sse_set(acMult) );
00104 _Gx[y] = pcl::sse_xor( _Gx[y], pcl::sse_and(_Gy[y], pcl::sse_set(-0.f)) );
00105 };
00106
00107 memcpy( M+x*h, M2, h*sizeof(float) );
00108
00109 if(O!=0) for( y=0; y<h; y++ ) O[x*h+y] = acost[(int)Gx[y]];
00110 }
00111 alFree(Gx); alFree(Gy); alFree(M2);
00112 #else
00113 int x, y, y1, c, h4, s; float *Gx, *Gy, *M2;
00114 float *acost = acosTable(), acMult=25000/2.02f;
00115
00116
00117 h4=(h%4==0) ? h : h-(h%4)+4; s=d*h4*sizeof(float);
00118
00119 M2=(float*) alMalloc(s,16);
00120 Gx=(float*) alMalloc(s,16);
00121 Gy=(float*) alMalloc(s,16);
00122 float m;
00123
00124
00125 for( x=0; x<w; x++ ) {
00126
00127 for( c=0; c<d; c++ ) grad1( I+x*h+c*w*h, Gx+c*h4, Gy+c*h4, h, w, x );
00128 for( y=0; y<d*h4; y++ )
00129 {
00130 M2[y] = Gx[y] * Gx[y] + Gy[y] * Gy[y];
00131 }
00132
00133
00134 for(c=1; c<d; c++)
00135 {
00136 for( y=0; y<h4/4; y++ )
00137 {
00138 y1=h4/4*c+y;
00139
00140 for (int ii = 0; ii < 4; ++ii)
00141 {
00142 if (M2[y1 * 4 + ii] > M2[y * 4 + ii])
00143 {
00144 M2[y * 4 + ii] = M2[y1 * 4 + ii];
00145 Gx[y * 4 + ii] = Gx[y1 * 4 + ii];
00146 Gy[y * 4 + ii] = Gy[y1 * 4 + ii];
00147 }
00148 }
00149 }
00150 }
00151
00152 for( y=0; y<h4; y++ )
00153 {
00154 m = 1.0f/sqrtf(M2[y]);
00155 m = m < 1e10f ? m : 1e10f;
00156 M2[y] = 1.0f / m;
00157 Gx[y] = ((Gx[y] * m) * acMult);
00158 if (Gy[y] < 0)
00159 Gx[y] = -Gx[y];
00160 }
00161
00162 memcpy( M+x*h, M2, h*sizeof(float) );
00163
00164 if(O!=0) for( y=0; y<h; y++ ) O[x*h+y] = acost[(int)Gx[y]];
00165 }
00166 alFree(Gx); alFree(Gy); alFree(M2);
00167 #endif
00168 }
00169
00170 void
00171 pcl::people::HOG::gradHist( float *M, float *O, int h, int w, int bin_size, int n_orients, bool soft_bin, float *H ) const
00172 {
00173 const int hb=h/bin_size, wb=w/bin_size, h0=hb*bin_size, w0=wb*bin_size, nb=wb*hb;
00174 const float s=(float)bin_size, sInv=1/s, sInv2=1/s/s;
00175 float *H0, *H1, *M0, *M1; int x, y; int *O0, *O1;
00176 O0=(int*)alMalloc(h*sizeof(int),16); M0=(float*) alMalloc(h*sizeof(float),16);
00177 O1=(int*)alMalloc(h*sizeof(int),16); M1=(float*) alMalloc(h*sizeof(float),16);
00178
00179
00180 float xb = 0;
00181 float init = 0;
00182 for( x=0; x<w0; x++ ) {
00183
00184 gradQuantize( O+x*h, M+x*h, O0, O1, M0, M1, n_orients, nb, h0, sInv2 );
00185
00186 if( !soft_bin || bin_size==1 ) {
00187
00188 H1=H+(x/bin_size)*hb;
00189 #define GH H1[O0[y]]+=M0[y]; H1[O1[y]]+=M1[y]; y++;
00190 if( bin_size==1 ) for(y=0; y<h0;) { GH; H1++; }
00191 else if( bin_size==2 ) for(y=0; y<h0;) { GH; GH; H1++; }
00192 else if( bin_size==3 ) for(y=0; y<h0;) { GH; GH; GH; H1++; }
00193 else if( bin_size==4 ) for(y=0; y<h0;) { GH; GH; GH; GH; H1++; }
00194 else for( y=0; y<h0;) { for( int y1=0; y1<bin_size; y1++ ) { GH; } H1++; }
00195 #undef GH
00196
00197 } else {
00198
00199 #if defined(__SSE2__)
00200 float ms[4], xyd, yb, xd, yd; __m128 _m, _m0, _m1;
00201 bool hasLf, hasRt; int xb0, yb0;
00202 if( x==0 ) { init=(0+.5f)*sInv-0.5f; xb=init; }
00203 hasLf = xb>=0; xb0 = hasLf?(int)xb:-1; hasRt = xb0 < wb-1;
00204 xd=xb-xb0; xb+=sInv; yb=init; y=0;
00205
00206 #define GHinit yd=yb-yb0; yb+=sInv; H0=H+xb0*hb+yb0; xyd=xd*yd; \
00207 ms[0]=1-xd-yd+xyd; ms[1]=yd-xyd; ms[2]=xd-xyd; ms[3]=xyd;
00208 #define GH(H,ma,mb) H1=H; pcl::sse_stru(*H1,pcl::sse_add(pcl::sse_ldu(*H1),pcl::sse_mul(ma,mb)));
00209
00210 for( ; y<bin_size/2; y++ ) {
00211 yb0=-1; GHinit;
00212 if(hasLf) { H0[O0[y]+1]+=ms[1]*M0[y]; H0[O1[y]+1]+=ms[1]*M1[y]; }
00213 if(hasRt) { H0[O0[y]+hb+1]+=ms[3]*M0[y]; H0[O1[y]+hb+1]+=ms[3]*M1[y]; }
00214 }
00215
00216 for( ; ; y++ ) {
00217 yb0 = (int) yb; if(yb0>=hb-1) break; GHinit;
00218 _m0=pcl::sse_set(M0[y]); _m1=pcl::sse_set(M1[y]);
00219 if(hasLf) { _m=pcl::sse_set(0,0,ms[1],ms[0]);
00220 GH(H0+O0[y],_m,_m0); GH(H0+O1[y],_m,_m1); }
00221 if(hasRt) { _m=pcl::sse_set(0,0,ms[3],ms[2]);
00222 GH(H0+O0[y]+hb,_m,_m0); GH(H0+O1[y]+hb,_m,_m1); }
00223 }
00224
00225 for( ; y<h0; y++ ) {
00226 yb0 = (int) yb; GHinit;
00227 if(hasLf) { H0[O0[y]]+=ms[0]*M0[y]; H0[O1[y]]+=ms[0]*M1[y]; }
00228 if(hasRt) { H0[O0[y]+hb]+=ms[2]*M0[y]; H0[O1[y]+hb]+=ms[2]*M1[y]; }
00229 }
00230 #undef GHinit
00231 #undef GH
00232 #else
00233 float ms[4], xyd, yb, xd, yd;
00234 bool hasLf, hasRt; int xb0, yb0;
00235 if( x==0 ) { init=(0+.5f)*sInv-0.5f; xb=init; }
00236 hasLf = xb>=0; xb0 = hasLf?(int)xb:-1; hasRt = xb0 < wb-1;
00237 xd=xb-xb0; xb+=sInv; yb=init; y=0;
00238
00239 #define GHinit yd=yb-yb0; yb+=sInv; H0=H+xb0*hb+yb0; xyd=xd*yd; \
00240 ms[0]=1-xd-yd+xyd; ms[1]=yd-xyd; ms[2]=xd-xyd; ms[3]=xyd;
00241
00242 for( ; y<bin_size/2; y++ ) {
00243 yb0=-1; GHinit;
00244 if(hasLf) { H0[O0[y]+1]+=ms[1]*M0[y]; H0[O1[y]+1]+=ms[1]*M1[y]; }
00245 if(hasRt) { H0[O0[y]+hb+1]+=ms[3]*M0[y]; H0[O1[y]+hb+1]+=ms[3]*M1[y]; }
00246 }
00247
00248 for( ; ; y++ ) {
00249 yb0 = (int) yb;
00250 if(yb0>=hb-1)
00251 break;
00252 GHinit;
00253
00254 if(hasLf)
00255 {
00256 H0[O0[y]+1]+=ms[1]*M0[y];
00257 H0[O1[y]+1]+=ms[1]*M1[y];
00258 H0[O0[y]]+=ms[0]*M0[y];
00259 H0[O1[y]]+=ms[0]*M1[y];
00260 }
00261 if(hasRt)
00262 {
00263 H0[O0[y]+hb+1]+=ms[3]*M0[y];
00264 H0[O1[y]+hb+1]+=ms[3]*M1[y];
00265 H0[O0[y]+hb]+=ms[2]*M0[y];
00266 H0[O1[y]+hb]+=ms[2]*M1[y];
00267 }
00268 }
00269
00270 for( ; y<h0; y++ ) {
00271 yb0 = (int) yb; GHinit;
00272 if(hasLf) { H0[O0[y]]+=ms[0]*M0[y]; H0[O1[y]]+=ms[0]*M1[y]; }
00273 if(hasRt) { H0[O0[y]+hb]+=ms[2]*M0[y]; H0[O1[y]+hb]+=ms[2]*M1[y]; }
00274 }
00275 #undef GHinit
00276 #endif
00277 }
00278 }
00279
00280 alFree(O0); alFree(O1); alFree(M0); alFree(M1);
00281 }
00282
00283 void
00284 pcl::people::HOG::normalization (float *H, int h, int w, int bin_size, int n_orients, float clip, float *G) const
00285 {
00286 float *N, *N1, *H1; int o, x, y, hb=h/bin_size, wb=w/bin_size, nb=wb*hb;
00287 float eps = 1e-4f/4/bin_size/bin_size/bin_size/bin_size;
00288
00289 N = (float*) calloc(nb,sizeof(float));
00290 for( o=0; o<n_orients; o++ ) for( x=0; x<nb; x++ ) N[x]+=H[x+o*nb]*H[x+o*nb];
00291 for( x=0; x<wb-1; x++ ) for( y=0; y<hb-1; y++ ) {
00292 N1=N+x*hb+y; *N1=1/float(sqrt( N1[0] + N1[1] + N1[hb] + N1[hb+1] +eps )); }
00293
00294 #define U(a,b) Gs[a][y]=H1[y]*N1[y-(b)]; if(Gs[a][y]>clip) Gs[a][y]=clip;
00295 for( o=0; o<n_orients; o++ ) for( x=0; x<wb; x++ ) {
00296 H1=H+o*nb+x*hb; N1=N+x*hb; float *Gs[4]; Gs[0]=G+o*nb+x*hb;
00297 for( y=1; y<4; y++ ) Gs[y]=Gs[y-1]+nb*n_orients;
00298 bool lf, md, rt; lf=(x==0); rt=(x==wb-1); md=(!lf && !rt);
00299 y=0; if(!rt) U(0,0); if(!lf) U(2,hb);
00300 if(lf) for( y=1; y<hb-1; y++ ) { U(0,0); U(1,1); }
00301 if(md) for( y=1; y<hb-1; y++ ) { U(0,0); U(1,1); U(2,hb); U(3,hb+1); }
00302 if(rt) for( y=1; y<hb-1; y++ ) { U(2,hb); U(3,hb+1); }
00303 y=hb-1; if(!rt) U(1,1); if(!lf) U(3,hb+1);
00304 } free(N);
00305 #undef U
00306 }
00307
00308 void
00309 pcl::people::HOG::compute (float *I, int h, int w, int n_channels, int bin_size, int n_orients, bool soft_bin, float *descriptor)
00310 {
00311 h_ = h;
00312 w_ = w;
00313 n_channels_ = n_channels;
00314 bin_size_ = bin_size;
00315 n_orients_ = n_orients;
00316 soft_bin_ = soft_bin;
00317
00318 compute (I, descriptor);
00319 }
00320
00321 void
00322 pcl::people::HOG::compute (float *I, float *descriptor) const
00323 {
00324
00325 float *M, *O, *G, *H;
00326 M = new float[h_ * w_];
00327 O = new float[h_ * w_];
00328 H = (float*) calloc((w_ / bin_size_) * (h_ / bin_size_) * n_orients_, sizeof(float));
00329 G = (float*) calloc((w_ / bin_size_) * (h_ / bin_size_) * n_orients_ *4, sizeof(float));
00330
00331
00332 gradMag (I, h_, w_, n_channels_, M, O );
00333
00334
00335 gradHist ( M, O, h_, w_, bin_size_, n_orients_, soft_bin_, H );
00336
00337
00338 normalization ( H, h_, w_, bin_size_, n_orients_, clip_, G );
00339
00340
00341 int k = 0;
00342 for (unsigned int l = 0; l < (n_orients_ * 4); l++)
00343 {
00344 for (unsigned int j = 1; j < (w_ / bin_size_ - 1); j++)
00345 {
00346 for (unsigned int i = 1; i < (h_ / bin_size_ - 1); i++)
00347 {
00348 descriptor[k] = G[i + j * h_ / bin_size_ + l * (h_ / bin_size_) * (w_ / bin_size_)];
00349 k++;
00350 }
00351 }
00352 }
00353 free(M); free(O); free(H); free(G);
00354 }
00355
00356 void
00357 pcl::people::HOG::grad1 (float *I, float *Gx, float *Gy, int h, int w, int x) const
00358 {
00359 #if defined(__SSE2__)
00360 int y, y1; float *Ip, *In, r; __m128 *_Ip, *_In, *_G, _r;
00361
00362 Ip=I-h; In=I+h; r=.5f;
00363 if(x==0) { r=1; Ip+=h; } else if(x==w-1) { r=1; In-=h; }
00364 if( h<4 || h%4>0 || (size_t(I)&15) || (size_t(Gx)&15) ) {
00365 for( y=0; y<h; y++ ) *Gx++=(*In++-*Ip++)*r;
00366 } else {
00367 _G=(__m128*) Gx; _Ip=(__m128*) Ip; _In=(__m128*) In; _r = pcl::sse_set(r);
00368 for(y=0; y<h; y+=4) *_G++=pcl::sse_mul(pcl::sse_sub(*_In++,*_Ip++),_r);
00369 }
00370
00371 #define GRADY(r) *Gy++=(*In++-*Ip++)*r;
00372 Ip=I; In=Ip+1;
00373
00374 y1=((~((size_t) Gy) + 1) & 15)/4; if(y1==0) y1=4; if(y1>h-1) y1=h-1;
00375 GRADY(1); Ip--; for(y=1; y<y1; y++) GRADY(.5f);
00376 _r = pcl::sse_set(.5f); _G=(__m128*) Gy;
00377 for(; y+4<h-1; y+=4, Ip+=4, In+=4, Gy+=4)
00378 *_G++=pcl::sse_mul(pcl::sse_sub(pcl::sse_ldu(*In),pcl::sse_ldu(*Ip)),_r);
00379 for(; y<h-1; y++) GRADY(.5f); In--; GRADY(1);
00380 #undef GRADY
00381 #else
00382 int y, y1;
00383 float *Ip, *In, r;
00384
00385
00386 Ip = I - h;
00387 In = I + h;
00388 r = .5f;
00389
00390 if(x == 0)
00391 {
00392 r = 1;
00393 Ip += h;
00394 }
00395 else if (x == w-1)
00396 {
00397 r = 1;
00398 In -= h;
00399 }
00400
00401 for (y = 0; y < h; y++)
00402 *Gx++=(*In++ - *Ip++)*r;
00403
00404
00405 #define GRADY(r) *Gy++=(*In++-*Ip++)*r;
00406 Ip=I; In=Ip+1;
00407
00408 y1=((~((size_t) Gy) + 1) & 15)/4; if(y1==0) y1=4; if(y1>h-1) y1=h-1;
00409 GRADY(1); Ip--; for(y=1; y<y1; y++) GRADY(.5f);
00410
00411 r = 0.5f;
00412 for(; y<h-1; y++)
00413 GRADY(.5f); In--; GRADY(1);
00414 #undef GRADY
00415 #endif
00416 }
00417
00418 float*
00419 pcl::people::HOG::acosTable () const
00420 {
00421 int i, n=25000, n2=n/2; float t, ni;
00422 static float a[25000]; static bool init=false;
00423 if( init ) return a+n2; ni = 2.02f/(float) n;
00424 for( i=0; i<n; i++ ) {
00425 t = i*ni - 1.01f;
00426 t = t<-1 ? -1 : (t>1 ? 1 : t);
00427 t = (float) acos( t );
00428 a[i] = (t <= M_PI-1e-5f) ? t : 0;
00429 }
00430 init=true; return a+n2;
00431 }
00432
00433 void
00434 pcl::people::HOG::gradQuantize (float *O, float *M, int *O0, int *O1, float *M0, float *M1, int n_orients, int nb, int n, float norm) const
00435 {
00436 #if defined(__SSE2__)
00437
00438 int i, o0, o1; float o, od, m;
00439 __m128i _o0, _o1, *_O0, *_O1; __m128 _o, _o0f, _m, *_M0, *_M1;
00440
00441 const float oMult=(float)n_orients/M_PI; const int oMax=n_orients*nb;
00442 const __m128 _norm=pcl::sse_set(norm), _oMult=pcl::sse_set(oMult), _nbf=pcl::sse_set((float)nb);
00443 const __m128i _oMax=pcl::sse_set(oMax), _nb=pcl::sse_set(nb);
00444
00445
00446 _O0=(__m128i*) O0; _O1=(__m128i*) O1; _M0=(__m128*) M0; _M1=(__m128*) M1;
00447 for( i=0; i<=n-4; i+=4 ) {
00448 _o=pcl::sse_mul(pcl::sse_ldu(O[i]),_oMult); _o0f=pcl::sse_cvt(pcl::sse_cvt(_o)); _o0=pcl::sse_cvt(pcl::sse_mul(_o0f,_nbf));
00449 _o1=pcl::sse_add(_o0,_nb); _o1=pcl::sse_and(pcl::sse_cmpgt(_oMax,_o1),_o1);
00450 *_O0++=_o0; *_O1++=_o1; _m=pcl::sse_mul(pcl::sse_ldu(M[i]),_norm);
00451 *_M1=pcl::sse_mul(pcl::sse_sub(_o,_o0f),_m); *_M0=pcl::sse_sub(_m,*_M1); _M0++; _M1++;
00452 }
00453
00454
00455 for( ; i<n; i++ ) {
00456 o=O[i]*oMult; m=M[i]*norm; o0=(int) o; od=o-o0;
00457 o0*=nb; o1=o0+nb; if(o1==oMax) o1=0;
00458 O0[i]=o0; O1[i]=o1; M1[i]=od*m; M0[i]=m-M1[i];
00459 }
00460 #else
00461 int i, o0, o1;
00462 float o, od, m;
00463
00464
00465 const float oMult=(float)n_orients/M_PI; const int oMax=n_orients*nb;
00466
00467
00468 for( i = 0; i<n; i++ )
00469 {
00470 o=O[i]*oMult; m=M[i]*norm; o0=(int) o; od=o-o0;
00471 o0*=nb; o1=o0+nb; if(o1==oMax) o1=0;
00472 O0[i]=o0; O1[i]=o1; M1[i]=od*m; M0[i]=m-M1[i];
00473 }
00474 #endif
00475 }
00476
00477 inline void*
00478 pcl::people::HOG::alMalloc (size_t size, int alignment) const
00479 {
00480 const size_t pSize = sizeof(void*), a = alignment-1;
00481 void *raw = malloc(size + a + pSize);
00482 void *aligned = (void*) (((size_t) raw + pSize + a) & ~a);
00483 *(void**) ((size_t) aligned-pSize) = raw;
00484 return aligned;
00485 }
00486
00487 inline void
00488 pcl::people::HOG::alFree (void* aligned) const
00489 {
00490 void* raw = *(void**)((char*)aligned-sizeof(void*));
00491 free(raw);
00492 }
00493