00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016 #include "pcl/common/fft/_kiss_fft_guts.h"
00017
00018
00019
00020
00021 static void kf_bfly2(
00022 kiss_fft_cpx * Fout,
00023 const size_t fstride,
00024 const kiss_fft_cfg st,
00025 int m
00026 )
00027 {
00028 kiss_fft_cpx * Fout2;
00029 kiss_fft_cpx * tw1 = st->twiddles;
00030 kiss_fft_cpx t;
00031 Fout2 = Fout + m;
00032 do{
00033 C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2);
00034
00035 C_MUL (t, *Fout2 , *tw1);
00036 tw1 += fstride;
00037 C_SUB( *Fout2 , *Fout , t );
00038 C_ADDTO( *Fout , t );
00039 ++Fout2;
00040 ++Fout;
00041 }while (--m);
00042 }
00043
00044 static void kf_bfly4(
00045 kiss_fft_cpx * Fout,
00046 const size_t fstride,
00047 const kiss_fft_cfg st,
00048 const size_t m
00049 )
00050 {
00051 kiss_fft_cpx *tw1,*tw2,*tw3;
00052 kiss_fft_cpx scratch[6];
00053 size_t k=m;
00054 const size_t m2=2*m;
00055 const size_t m3=3*m;
00056
00057
00058 tw3 = tw2 = tw1 = st->twiddles;
00059
00060 do {
00061 C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4);
00062
00063 C_MUL(scratch[0],Fout[m] , *tw1 );
00064 C_MUL(scratch[1],Fout[m2] , *tw2 );
00065 C_MUL(scratch[2],Fout[m3] , *tw3 );
00066
00067 C_SUB( scratch[5] , *Fout, scratch[1] );
00068 C_ADDTO(*Fout, scratch[1]);
00069 C_ADD( scratch[3] , scratch[0] , scratch[2] );
00070 C_SUB( scratch[4] , scratch[0] , scratch[2] );
00071 C_SUB( Fout[m2], *Fout, scratch[3] );
00072 tw1 += fstride;
00073 tw2 += fstride*2;
00074 tw3 += fstride*3;
00075 C_ADDTO( *Fout , scratch[3] );
00076
00077 if(st->inverse) {
00078 Fout[m].r = scratch[5].r - scratch[4].i;
00079 Fout[m].i = scratch[5].i + scratch[4].r;
00080 Fout[m3].r = scratch[5].r + scratch[4].i;
00081 Fout[m3].i = scratch[5].i - scratch[4].r;
00082 }else{
00083 Fout[m].r = scratch[5].r + scratch[4].i;
00084 Fout[m].i = scratch[5].i - scratch[4].r;
00085 Fout[m3].r = scratch[5].r - scratch[4].i;
00086 Fout[m3].i = scratch[5].i + scratch[4].r;
00087 }
00088 ++Fout;
00089 }while(--k);
00090 }
00091
00092 static void kf_bfly3(
00093 kiss_fft_cpx * Fout,
00094 const size_t fstride,
00095 const kiss_fft_cfg st,
00096 size_t m
00097 )
00098 {
00099 size_t k=m;
00100 const size_t m2 = 2*m;
00101 kiss_fft_cpx *tw1,*tw2;
00102 kiss_fft_cpx scratch[5];
00103 kiss_fft_cpx epi3;
00104 epi3 = st->twiddles[fstride*m];
00105
00106 tw1=tw2=st->twiddles;
00107
00108 do{
00109 C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
00110
00111 C_MUL(scratch[1],Fout[m] , *tw1);
00112 C_MUL(scratch[2],Fout[m2] , *tw2);
00113
00114 C_ADD(scratch[3],scratch[1],scratch[2]);
00115 C_SUB(scratch[0],scratch[1],scratch[2]);
00116 tw1 += fstride;
00117 tw2 += fstride*2;
00118
00119 Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
00120 Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
00121
00122 C_MULBYSCALAR( scratch[0] , epi3.i );
00123
00124 C_ADDTO(*Fout,scratch[3]);
00125
00126 Fout[m2].r = Fout[m].r + scratch[0].i;
00127 Fout[m2].i = Fout[m].i - scratch[0].r;
00128
00129 Fout[m].r -= scratch[0].i;
00130 Fout[m].i += scratch[0].r;
00131
00132 ++Fout;
00133 }while(--k);
00134 }
00135
00136 static void kf_bfly5(
00137 kiss_fft_cpx * Fout,
00138 const size_t fstride,
00139 const kiss_fft_cfg st,
00140 int m
00141 )
00142 {
00143 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
00144 int u;
00145 kiss_fft_cpx scratch[13];
00146 kiss_fft_cpx * twiddles = st->twiddles;
00147 kiss_fft_cpx *tw;
00148 kiss_fft_cpx ya,yb;
00149 ya = twiddles[fstride*m];
00150 yb = twiddles[fstride*2*m];
00151
00152 Fout0=Fout;
00153 Fout1=Fout0+m;
00154 Fout2=Fout0+2*m;
00155 Fout3=Fout0+3*m;
00156 Fout4=Fout0+4*m;
00157
00158 tw=st->twiddles;
00159 for ( u=0; u<m; ++u ) {
00160 C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
00161 scratch[0] = *Fout0;
00162
00163 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
00164 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
00165 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
00166 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
00167
00168 C_ADD( scratch[7],scratch[1],scratch[4]);
00169 C_SUB( scratch[10],scratch[1],scratch[4]);
00170 C_ADD( scratch[8],scratch[2],scratch[3]);
00171 C_SUB( scratch[9],scratch[2],scratch[3]);
00172
00173 Fout0->r += scratch[7].r + scratch[8].r;
00174 Fout0->i += scratch[7].i + scratch[8].i;
00175
00176 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
00177 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
00178
00179 scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
00180 scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
00181
00182 C_SUB(*Fout1,scratch[5],scratch[6]);
00183 C_ADD(*Fout4,scratch[5],scratch[6]);
00184
00185 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
00186 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
00187 scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
00188 scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
00189
00190 C_ADD(*Fout2,scratch[11],scratch[12]);
00191 C_SUB(*Fout3,scratch[11],scratch[12]);
00192
00193 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
00194 }
00195 }
00196
00197
00198 static void kf_bfly_generic(
00199 kiss_fft_cpx * Fout,
00200 const size_t fstride,
00201 const kiss_fft_cfg st,
00202 int m,
00203 int p
00204 )
00205 {
00206 int u,k,q1,q;
00207 kiss_fft_cpx * twiddles = st->twiddles;
00208 kiss_fft_cpx t;
00209 int Norig = st->nfft;
00210
00211 kiss_fft_cpx * scratch = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx)*p);
00212
00213 for ( u=0; u<m; ++u ) {
00214 k=u;
00215 for ( q1=0 ; q1<p ; ++q1 ) {
00216 scratch[q1] = Fout[ k ];
00217 C_FIXDIV(scratch[q1],p);
00218 k += m;
00219 }
00220
00221 k=u;
00222 for ( q1=0 ; q1<p ; ++q1 ) {
00223 int twidx=0;
00224 Fout[ k ] = scratch[0];
00225 for (q=1;q<p;++q ) {
00226 twidx += fstride * k;
00227 if (twidx>=Norig) twidx-=Norig;
00228 C_MUL(t,scratch[q] , twiddles[twidx] );
00229 C_ADDTO( Fout[ k ] ,t);
00230 }
00231 k += m;
00232 }
00233 }
00234 KISS_FFT_TMP_FREE(scratch);
00235 }
00236
00237 static
00238 void kf_work(
00239 kiss_fft_cpx * Fout,
00240 const kiss_fft_cpx * f,
00241 const size_t fstride,
00242 int in_stride,
00243 int * factors,
00244 const kiss_fft_cfg st
00245 )
00246 {
00247 kiss_fft_cpx * Fout_beg=Fout;
00248 const int p=*factors++;
00249 const int m=*factors++;
00250 const kiss_fft_cpx * Fout_end = Fout + p*m;
00251
00252 #ifdef _OPENMP
00253
00254
00255 if (fstride==1 && p<=5)
00256 {
00257 int k;
00258
00259
00260 # pragma omp parallel for
00261 for (k=0;k<p;++k)
00262 kf_work( Fout +k*m, f+ fstride*in_stride*k,fstride*p,in_stride,factors,st);
00263
00264
00265 switch (p) {
00266 case 2: kf_bfly2(Fout,fstride,st,m); break;
00267 case 3: kf_bfly3(Fout,fstride,st,m); break;
00268 case 4: kf_bfly4(Fout,fstride,st,m); break;
00269 case 5: kf_bfly5(Fout,fstride,st,m); break;
00270 default: kf_bfly_generic(Fout,fstride,st,m,p); break;
00271 }
00272 return;
00273 }
00274 #endif
00275
00276 if (m==1) {
00277 do{
00278 *Fout = *f;
00279 f += fstride*in_stride;
00280 }while(++Fout != Fout_end );
00281 }else{
00282 do{
00283
00284
00285
00286
00287 kf_work( Fout , f, fstride*p, in_stride, factors,st);
00288 f += fstride*in_stride;
00289 }while( (Fout += m) != Fout_end );
00290 }
00291
00292 Fout=Fout_beg;
00293
00294
00295 switch (p) {
00296 case 2: kf_bfly2(Fout,fstride,st,m); break;
00297 case 3: kf_bfly3(Fout,fstride,st,m); break;
00298 case 4: kf_bfly4(Fout,fstride,st,m); break;
00299 case 5: kf_bfly5(Fout,fstride,st,m); break;
00300 default: kf_bfly_generic(Fout,fstride,st,m,p); break;
00301 }
00302 }
00303
00304
00305
00306
00307
00308 static
00309 void kf_factor(int n,int * facbuf)
00310 {
00311 int p=4;
00312 double floor_sqrt;
00313 floor_sqrt = floor( sqrt((double)n) );
00314
00315
00316 do {
00317 while (n % p) {
00318 switch (p) {
00319 case 4: p = 2; break;
00320 case 2: p = 3; break;
00321 default: p += 2; break;
00322 }
00323 if (p > floor_sqrt)
00324 p = n;
00325 }
00326 n /= p;
00327 *facbuf++ = p;
00328 *facbuf++ = n;
00329 } while (n > 1);
00330 }
00331
00332
00333
00334
00335
00336
00337
00338
00339 kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem )
00340 {
00341 kiss_fft_cfg st=NULL;
00342 size_t memneeded = sizeof(struct kiss_fft_state)
00343 + sizeof(kiss_fft_cpx)*(nfft-1);
00344
00345 if ( lenmem==NULL ) {
00346 st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded );
00347 }else{
00348 if (mem != NULL && *lenmem >= memneeded)
00349 st = (kiss_fft_cfg)mem;
00350 *lenmem = memneeded;
00351 }
00352 if (st) {
00353 int i;
00354 st->nfft=nfft;
00355 st->inverse = inverse_fft;
00356
00357 for (i=0;i<nfft;++i) {
00358 const double pi=3.141592653589793238462643383279502884197169399375105820974944;
00359 double phase = -2*pi*i / nfft;
00360 if (st->inverse)
00361 phase *= -1;
00362 kf_cexp(st->twiddles+i, phase );
00363 }
00364
00365 kf_factor(nfft,st->factors);
00366 }
00367 return st;
00368 }
00369
00370
00371 void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
00372 {
00373 if (fin == fout) {
00374
00375
00376 kiss_fft_cpx * tmpbuf = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC( sizeof(kiss_fft_cpx)*st->nfft);
00377 kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
00378 memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft);
00379 KISS_FFT_TMP_FREE(tmpbuf);
00380 }else{
00381 kf_work( fout, fin, 1,in_stride, st->factors,st );
00382 }
00383 }
00384
00385 void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
00386 {
00387 kiss_fft_stride(cfg,fin,fout,1);
00388 }
00389
00390
00391 void kiss_fft_cleanup(void)
00392 {
00393
00394 }
00395
00396 int kiss_fft_next_fast_size(int n)
00397 {
00398 while(1) {
00399 int m=n;
00400 while ( (m%2) == 0 ) m/=2;
00401 while ( (m%3) == 0 ) m/=3;
00402 while ( (m%5) == 0 ) m/=5;
00403 if (m<=1)
00404 break;
00405 n++;
00406 }
00407 return n;
00408 }