kiss_fft.c
Go to the documentation of this file.
00001 /*
00002 Copyright (c) 2003-2010, Mark Borgerding
00003 
00004 All rights reserved.
00005 
00006 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
00007 
00008     * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
00009     * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
00010     * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
00011 
00012 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00013 */
00014 
00015 
00016 #include "pcl/common/fft/_kiss_fft_guts.h"
00017 /* The guts header contains all the multiplication and addition macros that are defined for
00018  fixed or floating point complex numbers.  It also delares the kf_ internal functions.
00019  */
00020 
00021 static void kf_bfly2(
00022         kiss_fft_cpx * Fout,
00023         const size_t fstride,
00024         const kiss_fft_cfg st,
00025         int m
00026         )
00027 {
00028     kiss_fft_cpx * Fout2;
00029     kiss_fft_cpx * tw1 = st->twiddles;
00030     kiss_fft_cpx t;
00031     Fout2 = Fout + m;
00032     do{
00033         C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2);
00034 
00035         C_MUL (t,  *Fout2 , *tw1);
00036         tw1 += fstride;
00037         C_SUB( *Fout2 ,  *Fout , t );
00038         C_ADDTO( *Fout ,  t );
00039         ++Fout2;
00040         ++Fout;
00041     }while (--m);
00042 }
00043 
00044 static void kf_bfly4(
00045         kiss_fft_cpx * Fout,
00046         const size_t fstride,
00047         const kiss_fft_cfg st,
00048         const size_t m
00049         )
00050 {
00051     kiss_fft_cpx *tw1,*tw2,*tw3;
00052     kiss_fft_cpx scratch[6];
00053     size_t k=m;
00054     const size_t m2=2*m;
00055     const size_t m3=3*m;
00056 
00057 
00058     tw3 = tw2 = tw1 = st->twiddles;
00059 
00060     do {
00061         C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4);
00062 
00063         C_MUL(scratch[0],Fout[m] , *tw1 );
00064         C_MUL(scratch[1],Fout[m2] , *tw2 );
00065         C_MUL(scratch[2],Fout[m3] , *tw3 );
00066 
00067         C_SUB( scratch[5] , *Fout, scratch[1] );
00068         C_ADDTO(*Fout, scratch[1]);
00069         C_ADD( scratch[3] , scratch[0] , scratch[2] );
00070         C_SUB( scratch[4] , scratch[0] , scratch[2] );
00071         C_SUB( Fout[m2], *Fout, scratch[3] );
00072         tw1 += fstride;
00073         tw2 += fstride*2;
00074         tw3 += fstride*3;
00075         C_ADDTO( *Fout , scratch[3] );
00076 
00077         if(st->inverse) {
00078             Fout[m].r = scratch[5].r - scratch[4].i;
00079             Fout[m].i = scratch[5].i + scratch[4].r;
00080             Fout[m3].r = scratch[5].r + scratch[4].i;
00081             Fout[m3].i = scratch[5].i - scratch[4].r;
00082         }else{
00083             Fout[m].r = scratch[5].r + scratch[4].i;
00084             Fout[m].i = scratch[5].i - scratch[4].r;
00085             Fout[m3].r = scratch[5].r - scratch[4].i;
00086             Fout[m3].i = scratch[5].i + scratch[4].r;
00087         }
00088         ++Fout;
00089     }while(--k);
00090 }
00091 
00092 static void kf_bfly3(
00093          kiss_fft_cpx * Fout,
00094          const size_t fstride,
00095          const kiss_fft_cfg st,
00096          size_t m
00097          )
00098 {
00099      size_t k=m;
00100      const size_t m2 = 2*m;
00101      kiss_fft_cpx *tw1,*tw2;
00102      kiss_fft_cpx scratch[5];
00103      kiss_fft_cpx epi3;
00104      epi3 = st->twiddles[fstride*m];
00105 
00106      tw1=tw2=st->twiddles;
00107 
00108      do{
00109          C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
00110 
00111          C_MUL(scratch[1],Fout[m] , *tw1);
00112          C_MUL(scratch[2],Fout[m2] , *tw2);
00113 
00114          C_ADD(scratch[3],scratch[1],scratch[2]);
00115          C_SUB(scratch[0],scratch[1],scratch[2]);
00116          tw1 += fstride;
00117          tw2 += fstride*2;
00118 
00119          Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
00120          Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
00121 
00122          C_MULBYSCALAR( scratch[0] , epi3.i );
00123 
00124          C_ADDTO(*Fout,scratch[3]);
00125 
00126          Fout[m2].r = Fout[m].r + scratch[0].i;
00127          Fout[m2].i = Fout[m].i - scratch[0].r;
00128 
00129          Fout[m].r -= scratch[0].i;
00130          Fout[m].i += scratch[0].r;
00131 
00132          ++Fout;
00133      }while(--k);
00134 }
00135 
00136 static void kf_bfly5(
00137         kiss_fft_cpx * Fout,
00138         const size_t fstride,
00139         const kiss_fft_cfg st,
00140         int m
00141         )
00142 {
00143     kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
00144     int u;
00145     kiss_fft_cpx scratch[13];
00146     kiss_fft_cpx * twiddles = st->twiddles;
00147     kiss_fft_cpx *tw;
00148     kiss_fft_cpx ya,yb;
00149     ya = twiddles[fstride*m];
00150     yb = twiddles[fstride*2*m];
00151 
00152     Fout0=Fout;
00153     Fout1=Fout0+m;
00154     Fout2=Fout0+2*m;
00155     Fout3=Fout0+3*m;
00156     Fout4=Fout0+4*m;
00157 
00158     tw=st->twiddles;
00159     for ( u=0; u<m; ++u ) {
00160         C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
00161         scratch[0] = *Fout0;
00162 
00163         C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
00164         C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
00165         C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
00166         C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
00167 
00168         C_ADD( scratch[7],scratch[1],scratch[4]);
00169         C_SUB( scratch[10],scratch[1],scratch[4]);
00170         C_ADD( scratch[8],scratch[2],scratch[3]);
00171         C_SUB( scratch[9],scratch[2],scratch[3]);
00172 
00173         Fout0->r += scratch[7].r + scratch[8].r;
00174         Fout0->i += scratch[7].i + scratch[8].i;
00175 
00176         scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
00177         scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
00178 
00179         scratch[6].r =  S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
00180         scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
00181 
00182         C_SUB(*Fout1,scratch[5],scratch[6]);
00183         C_ADD(*Fout4,scratch[5],scratch[6]);
00184 
00185         scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
00186         scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
00187         scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
00188         scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
00189 
00190         C_ADD(*Fout2,scratch[11],scratch[12]);
00191         C_SUB(*Fout3,scratch[11],scratch[12]);
00192 
00193         ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
00194     }
00195 }
00196 
00197 /* perform the butterfly for one stage of a mixed radix FFT */
00198 static void kf_bfly_generic(
00199         kiss_fft_cpx * Fout,
00200         const size_t fstride,
00201         const kiss_fft_cfg st,
00202         int m,
00203         int p
00204         )
00205 {
00206     int u,k,q1,q;
00207     kiss_fft_cpx * twiddles = st->twiddles;
00208     kiss_fft_cpx t;
00209     int Norig = st->nfft;
00210 
00211     kiss_fft_cpx * scratch = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx)*p);
00212 
00213     for ( u=0; u<m; ++u ) {
00214         k=u;
00215         for ( q1=0 ; q1<p ; ++q1 ) {
00216             scratch[q1] = Fout[ k  ];
00217             C_FIXDIV(scratch[q1],p);
00218             k += m;
00219         }
00220 
00221         k=u;
00222         for ( q1=0 ; q1<p ; ++q1 ) {
00223             int twidx=0;
00224             Fout[ k ] = scratch[0];
00225             for (q=1;q<p;++q ) {
00226                 twidx += fstride * k;
00227                 if (twidx>=Norig) twidx-=Norig;
00228                 C_MUL(t,scratch[q] , twiddles[twidx] );
00229                 C_ADDTO( Fout[ k ] ,t);
00230             }
00231             k += m;
00232         }
00233     }
00234     KISS_FFT_TMP_FREE(scratch);
00235 }
00236 
00237 static
00238 void kf_work(
00239         kiss_fft_cpx * Fout,
00240         const kiss_fft_cpx * f,
00241         const size_t fstride,
00242         int in_stride,
00243         int * factors,
00244         const kiss_fft_cfg st
00245         )
00246 {
00247     kiss_fft_cpx * Fout_beg=Fout;
00248     const int p=*factors++; /* the radix  */
00249     const int m=*factors++; /* stage's fft length/p */
00250     const kiss_fft_cpx * Fout_end = Fout + p*m;
00251 
00252 #ifdef _OPENMP
00253     // use openmp extensions at the 
00254     // top-level (not recursive)
00255     if (fstride==1 && p<=5)
00256     {
00257         int k;
00258 
00259         // execute the p different work units in different threads
00260 #       pragma omp parallel for
00261         for (k=0;k<p;++k) 
00262             kf_work( Fout +k*m, f+ fstride*in_stride*k,fstride*p,in_stride,factors,st);
00263         // all threads have joined by this point
00264 
00265         switch (p) {
00266             case 2: kf_bfly2(Fout,fstride,st,m); break;
00267             case 3: kf_bfly3(Fout,fstride,st,m); break; 
00268             case 4: kf_bfly4(Fout,fstride,st,m); break;
00269             case 5: kf_bfly5(Fout,fstride,st,m); break; 
00270             default: kf_bfly_generic(Fout,fstride,st,m,p); break;
00271         }
00272         return;
00273     }
00274 #endif
00275 
00276     if (m==1) {
00277         do{
00278             *Fout = *f;
00279             f += fstride*in_stride;
00280         }while(++Fout != Fout_end );
00281     }else{
00282         do{
00283             // recursive call:
00284             // DFT of size m*p performed by doing
00285             // p instances of smaller DFTs of size m, 
00286             // each one takes a decimated version of the input
00287             kf_work( Fout , f, fstride*p, in_stride, factors,st);
00288             f += fstride*in_stride;
00289         }while( (Fout += m) != Fout_end );
00290     }
00291 
00292     Fout=Fout_beg;
00293 
00294     // recombine the p smaller DFTs 
00295     switch (p) {
00296         case 2: kf_bfly2(Fout,fstride,st,m); break;
00297         case 3: kf_bfly3(Fout,fstride,st,m); break; 
00298         case 4: kf_bfly4(Fout,fstride,st,m); break;
00299         case 5: kf_bfly5(Fout,fstride,st,m); break; 
00300         default: kf_bfly_generic(Fout,fstride,st,m,p); break;
00301     }
00302 }
00303 
00304 /*  facbuf is populated by p1,m1,p2,m2, ...
00305     where 
00306     p[i] * m[i] = m[i-1]
00307     m0 = n                  */
00308 static 
00309 void kf_factor(int n,int * facbuf)
00310 {
00311     int p=4;
00312     double floor_sqrt;
00313     floor_sqrt = floor( sqrt((double)n) );
00314 
00315     /*factor out powers of 4, powers of 2, then any remaining primes */
00316     do {
00317         while (n % p) {
00318             switch (p) {
00319                 case 4: p = 2; break;
00320                 case 2: p = 3; break;
00321                 default: p += 2; break;
00322             }
00323             if (p > floor_sqrt)
00324                 p = n;          /* no more factors, skip to end */
00325         }
00326         n /= p;
00327         *facbuf++ = p;
00328         *facbuf++ = n;
00329     } while (n > 1);
00330 }
00331 
00332 /*
00333  *
00334  * User-callable function to allocate all necessary storage space for the fft.
00335  *
00336  * The return value is a contiguous block of memory, allocated with malloc.  As such,
00337  * It can be freed with free(), rather than a kiss_fft-specific function.
00338  * */
00339 kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem )
00340 {
00341     kiss_fft_cfg st=NULL;
00342     size_t memneeded = sizeof(struct kiss_fft_state)
00343         + sizeof(kiss_fft_cpx)*(nfft-1); /* twiddle factors*/
00344 
00345     if ( lenmem==NULL ) {
00346         st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded );
00347     }else{
00348         if (mem != NULL && *lenmem >= memneeded)
00349             st = (kiss_fft_cfg)mem;
00350         *lenmem = memneeded;
00351     }
00352     if (st) {
00353         int i;
00354         st->nfft=nfft;
00355         st->inverse = inverse_fft;
00356 
00357         for (i=0;i<nfft;++i) {
00358             const double pi=3.141592653589793238462643383279502884197169399375105820974944;
00359             double phase = -2*pi*i / nfft;
00360             if (st->inverse)
00361                 phase *= -1;
00362             kf_cexp(st->twiddles+i, phase );
00363         }
00364 
00365         kf_factor(nfft,st->factors);
00366     }
00367     return st;
00368 }
00369 
00370 
00371 void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
00372 {
00373     if (fin == fout) {
00374         //NOTE: this is not really an in-place FFT algorithm.
00375         //It just performs an out-of-place FFT into a temp buffer
00376         kiss_fft_cpx * tmpbuf = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC( sizeof(kiss_fft_cpx)*st->nfft);
00377         kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
00378         memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft);
00379         KISS_FFT_TMP_FREE(tmpbuf);
00380     }else{
00381         kf_work( fout, fin, 1,in_stride, st->factors,st );
00382     }
00383 }
00384 
00385 void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
00386 {
00387     kiss_fft_stride(cfg,fin,fout,1);
00388 }
00389 
00390 
00391 void kiss_fft_cleanup(void)
00392 {
00393     // nothing needed any more
00394 }
00395 
00396 int kiss_fft_next_fast_size(int n)
00397 {
00398     while(1) {
00399         int m=n;
00400         while ( (m%2) == 0 ) m/=2;
00401         while ( (m%3) == 0 ) m/=3;
00402         while ( (m%5) == 0 ) m/=5;
00403         if (m<=1)
00404             break; /* n is completely factorable by twos, threes, and fives */
00405         n++;
00406     }
00407     return n;
00408 }


pcl
Author(s): Open Perception
autogenerated on Wed Aug 26 2015 15:25:07