acado: ei_kissfft_impl.h Source File

Go to the documentation of this file.
00001 // This file is part of Eigen, a lightweight C++ template library
00002 // for linear algebra.
00003 //
00004 // Copyright (C) 2009 Mark Borgerding mark a borgerding net
00005 //
00006 // This Source Code Form is subject to the terms of the Mozilla
00007 // Public License v. 2.0. If a copy of the MPL was not distributed
00008 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
00009 
00010 namespace Eigen { 
00011 
00012 namespace internal {
00013 
00014   // This FFT implementation was derived from kissfft http:sourceforge.net/projects/kissfft
00015   // Copyright 2003-2009 Mark Borgerding
00016 
00017 template <typename _Scalar>
00018 struct kiss_cpx_fft
00019 {
00020   typedef _Scalar Scalar;
00021   typedef std::complex<Scalar> Complex;
00022   std::vector<Complex> m_twiddles;
00023   std::vector<int> m_stageRadix;
00024   std::vector<int> m_stageRemainder;
00025   std::vector<Complex> m_scratchBuf;
00026   bool m_inverse;
00027 
00028   inline
00029     void make_twiddles(int nfft,bool inverse)
00030     {
00031       using std::acos;
00032       m_inverse = inverse;
00033       m_twiddles.resize(nfft);
00034       Scalar phinc =  (inverse?2:-2)* acos( (Scalar) -1)  / nfft;
00035       for (int i=0;i<nfft;++i)
00036         m_twiddles[i] = exp( Complex(0,i*phinc) );
00037     }
00038 
00039   void factorize(int nfft)
00040   {
00041     //start factoring out 4's, then 2's, then 3,5,7,9,...
00042     int n= nfft;
00043     int p=4;
00044     do {
00045       while (n % p) {
00046         switch (p) {
00047           case 4: p = 2; break;
00048           case 2: p = 3; break;
00049           default: p += 2; break;
00050         }
00051         if (p*p>n)
00052           p=n;// impossible to have a factor > sqrt(n)
00053       }
00054       n /= p;
00055       m_stageRadix.push_back(p);
00056       m_stageRemainder.push_back(n);
00057       if ( p > 5 )
00058         m_scratchBuf.resize(p); // scratchbuf will be needed in bfly_generic
00059     }while(n>1);
00060   }
00061 
00062   template <typename _Src>
00063     inline
00064     void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride)
00065     {
00066       int p = m_stageRadix[stage];
00067       int m = m_stageRemainder[stage];
00068       Complex * Fout_beg = xout;
00069       Complex * Fout_end = xout + p*m;
00070 
00071       if (m>1) {
00072         do{
00073           // recursive call:
00074           // DFT of size m*p performed by doing
00075           // p instances of smaller DFTs of size m, 
00076           // each one takes a decimated version of the input
00077           work(stage+1, xout , xin, fstride*p,in_stride);
00078           xin += fstride*in_stride;
00079         }while( (xout += m) != Fout_end );
00080       }else{
00081         do{
00082           *xout = *xin;
00083           xin += fstride*in_stride;
00084         }while(++xout != Fout_end );
00085       }
00086       xout=Fout_beg;
00087 
00088       // recombine the p smaller DFTs 
00089       switch (p) {
00090         case 2: bfly2(xout,fstride,m); break;
00091         case 3: bfly3(xout,fstride,m); break;
00092         case 4: bfly4(xout,fstride,m); break;
00093         case 5: bfly5(xout,fstride,m); break;
00094         default: bfly_generic(xout,fstride,m,p); break;
00095       }
00096     }
00097 
00098   inline
00099     void bfly2( Complex * Fout, const size_t fstride, int m)
00100     {
00101       for (int k=0;k<m;++k) {
00102         Complex t = Fout[m+k] * m_twiddles[k*fstride];
00103         Fout[m+k] = Fout[k] - t;
00104         Fout[k] += t;
00105       }
00106     }
00107 
00108   inline
00109     void bfly4( Complex * Fout, const size_t fstride, const size_t m)
00110     {
00111       Complex scratch[6];
00112       int negative_if_inverse = m_inverse * -2 +1;
00113       for (size_t k=0;k<m;++k) {
00114         scratch[0] = Fout[k+m] * m_twiddles[k*fstride];
00115         scratch[1] = Fout[k+2*m] * m_twiddles[k*fstride*2];
00116         scratch[2] = Fout[k+3*m] * m_twiddles[k*fstride*3];
00117         scratch[5] = Fout[k] - scratch[1];
00118 
00119         Fout[k] += scratch[1];
00120         scratch[3] = scratch[0] + scratch[2];
00121         scratch[4] = scratch[0] - scratch[2];
00122         scratch[4] = Complex( scratch[4].imag()*negative_if_inverse , -scratch[4].real()* negative_if_inverse );
00123 
00124         Fout[k+2*m]  = Fout[k] - scratch[3];
00125         Fout[k] += scratch[3];
00126         Fout[k+m] = scratch[5] + scratch[4];
00127         Fout[k+3*m] = scratch[5] - scratch[4];
00128       }
00129     }
00130 
00131   inline
00132     void bfly3( Complex * Fout, const size_t fstride, const size_t m)
00133     {
00134       size_t k=m;
00135       const size_t m2 = 2*m;
00136       Complex *tw1,*tw2;
00137       Complex scratch[5];
00138       Complex epi3;
00139       epi3 = m_twiddles[fstride*m];
00140 
00141       tw1=tw2=&m_twiddles[0];
00142 
00143       do{
00144         scratch[1]=Fout[m] * *tw1;
00145         scratch[2]=Fout[m2] * *tw2;
00146 
00147         scratch[3]=scratch[1]+scratch[2];
00148         scratch[0]=scratch[1]-scratch[2];
00149         tw1 += fstride;
00150         tw2 += fstride*2;
00151         Fout[m] = Complex( Fout->real() - Scalar(.5)*scratch[3].real() , Fout->imag() - Scalar(.5)*scratch[3].imag() );
00152         scratch[0] *= epi3.imag();
00153         *Fout += scratch[3];
00154         Fout[m2] = Complex(  Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() );
00155         Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() );
00156         ++Fout;
00157       }while(--k);
00158     }
00159 
00160   inline
00161     void bfly5( Complex * Fout, const size_t fstride, const size_t m)
00162     {
00163       Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
00164       size_t u;
00165       Complex scratch[13];
00166       Complex * twiddles = &m_twiddles[0];
00167       Complex *tw;
00168       Complex ya,yb;
00169       ya = twiddles[fstride*m];
00170       yb = twiddles[fstride*2*m];
00171 
00172       Fout0=Fout;
00173       Fout1=Fout0+m;
00174       Fout2=Fout0+2*m;
00175       Fout3=Fout0+3*m;
00176       Fout4=Fout0+4*m;
00177 
00178       tw=twiddles;
00179       for ( u=0; u<m; ++u ) {
00180         scratch[0] = *Fout0;
00181 
00182         scratch[1]  = *Fout1 * tw[u*fstride];
00183         scratch[2]  = *Fout2 * tw[2*u*fstride];
00184         scratch[3]  = *Fout3 * tw[3*u*fstride];
00185         scratch[4]  = *Fout4 * tw[4*u*fstride];
00186 
00187         scratch[7] = scratch[1] + scratch[4];
00188         scratch[10] = scratch[1] - scratch[4];
00189         scratch[8] = scratch[2] + scratch[3];
00190         scratch[9] = scratch[2] - scratch[3];
00191 
00192         *Fout0 +=  scratch[7];
00193         *Fout0 +=  scratch[8];
00194 
00195         scratch[5] = scratch[0] + Complex(
00196             (scratch[7].real()*ya.real() ) + (scratch[8].real() *yb.real() ),
00197             (scratch[7].imag()*ya.real()) + (scratch[8].imag()*yb.real())
00198             );
00199 
00200         scratch[6] = Complex(
00201             (scratch[10].imag()*ya.imag()) + (scratch[9].imag()*yb.imag()),
00202             -(scratch[10].real()*ya.imag()) - (scratch[9].real()*yb.imag())
00203             );
00204 
00205         *Fout1 = scratch[5] - scratch[6];
00206         *Fout4 = scratch[5] + scratch[6];
00207 
00208         scratch[11] = scratch[0] +
00209           Complex(
00210               (scratch[7].real()*yb.real()) + (scratch[8].real()*ya.real()),
00211               (scratch[7].imag()*yb.real()) + (scratch[8].imag()*ya.real())
00212               );
00213 
00214         scratch[12] = Complex(
00215             -(scratch[10].imag()*yb.imag()) + (scratch[9].imag()*ya.imag()),
00216             (scratch[10].real()*yb.imag()) - (scratch[9].real()*ya.imag())
00217             );
00218 
00219         *Fout2=scratch[11]+scratch[12];
00220         *Fout3=scratch[11]-scratch[12];
00221 
00222         ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
00223       }
00224     }
00225 
00226   /* perform the butterfly for one stage of a mixed radix FFT */
00227   inline
00228     void bfly_generic(
00229         Complex * Fout,
00230         const size_t fstride,
00231         int m,
00232         int p
00233         )
00234     {
00235       int u,k,q1,q;
00236       Complex * twiddles = &m_twiddles[0];
00237       Complex t;
00238       int Norig = static_cast<int>(m_twiddles.size());
00239       Complex * scratchbuf = &m_scratchBuf[0];
00240 
00241       for ( u=0; u<m; ++u ) {
00242         k=u;
00243         for ( q1=0 ; q1<p ; ++q1 ) {
00244           scratchbuf[q1] = Fout[ k  ];
00245           k += m;
00246         }
00247 
00248         k=u;
00249         for ( q1=0 ; q1<p ; ++q1 ) {
00250           int twidx=0;
00251           Fout[ k ] = scratchbuf[0];
00252           for (q=1;q<p;++q ) {
00253             twidx += static_cast<int>(fstride) * k;
00254             if (twidx>=Norig) twidx-=Norig;
00255             t=scratchbuf[q] * twiddles[twidx];
00256             Fout[ k ] += t;
00257           }
00258           k += m;
00259         }
00260       }
00261     }
00262 };
00263 
00264 template <typename _Scalar>
00265 struct kissfft_impl
00266 {
00267   typedef _Scalar Scalar;
00268   typedef std::complex<Scalar> Complex;
00269 
00270   void clear() 
00271   {
00272     m_plans.clear();
00273     m_realTwiddles.clear();
00274   }
00275 
00276   inline
00277     void fwd( Complex * dst,const Complex *src,int nfft)
00278     {
00279       get_plan(nfft,false).work(0, dst, src, 1,1);
00280     }
00281 
00282   inline
00283     void fwd2( Complex * dst,const Complex *src,int n0,int n1)
00284     {
00285         EIGEN_UNUSED_VARIABLE(dst);
00286         EIGEN_UNUSED_VARIABLE(src);
00287         EIGEN_UNUSED_VARIABLE(n0);
00288         EIGEN_UNUSED_VARIABLE(n1);
00289     }
00290 
00291   inline
00292     void inv2( Complex * dst,const Complex *src,int n0,int n1)
00293     {
00294         EIGEN_UNUSED_VARIABLE(dst);
00295         EIGEN_UNUSED_VARIABLE(src);
00296         EIGEN_UNUSED_VARIABLE(n0);
00297         EIGEN_UNUSED_VARIABLE(n1);
00298     }
00299 
00300   // real-to-complex forward FFT
00301   // perform two FFTs of src even and src odd
00302   // then twiddle to recombine them into the half-spectrum format
00303   // then fill in the conjugate symmetric half
00304   inline
00305     void fwd( Complex * dst,const Scalar * src,int nfft) 
00306     {
00307       if ( nfft&3  ) {
00308         // use generic mode for odd
00309         m_tmpBuf1.resize(nfft);
00310         get_plan(nfft,false).work(0, &m_tmpBuf1[0], src, 1,1);
00311         std::copy(m_tmpBuf1.begin(),m_tmpBuf1.begin()+(nfft>>1)+1,dst );
00312       }else{
00313         int ncfft = nfft>>1;
00314         int ncfft2 = nfft>>2;
00315         Complex * rtw = real_twiddles(ncfft2);
00316 
00317         // use optimized mode for even real
00318         fwd( dst, reinterpret_cast<const Complex*> (src), ncfft);
00319         Complex dc = dst[0].real() +  dst[0].imag();
00320         Complex nyquist = dst[0].real() -  dst[0].imag();
00321         int k;
00322         for ( k=1;k <= ncfft2 ; ++k ) {
00323           Complex fpk = dst[k];
00324           Complex fpnk = conj(dst[ncfft-k]);
00325           Complex f1k = fpk + fpnk;
00326           Complex f2k = fpk - fpnk;
00327           Complex tw= f2k * rtw[k-1];
00328           dst[k] =  (f1k + tw) * Scalar(.5);
00329           dst[ncfft-k] =  conj(f1k -tw)*Scalar(.5);
00330         }
00331         dst[0] = dc;
00332         dst[ncfft] = nyquist;
00333       }
00334     }
00335 
00336   // inverse complex-to-complex
00337   inline
00338     void inv(Complex * dst,const Complex  *src,int nfft)
00339     {
00340       get_plan(nfft,true).work(0, dst, src, 1,1);
00341     }
00342 
00343   // half-complex to scalar
00344   inline
00345     void inv( Scalar * dst,const Complex * src,int nfft) 
00346     {
00347       if (nfft&3) {
00348         m_tmpBuf1.resize(nfft);
00349         m_tmpBuf2.resize(nfft);
00350         std::copy(src,src+(nfft>>1)+1,m_tmpBuf1.begin() );
00351         for (int k=1;k<(nfft>>1)+1;++k)
00352           m_tmpBuf1[nfft-k] = conj(m_tmpBuf1[k]);
00353         inv(&m_tmpBuf2[0],&m_tmpBuf1[0],nfft);
00354         for (int k=0;k<nfft;++k)
00355           dst[k] = m_tmpBuf2[k].real();
00356       }else{
00357         // optimized version for multiple of 4
00358         int ncfft = nfft>>1;
00359         int ncfft2 = nfft>>2;
00360         Complex * rtw = real_twiddles(ncfft2);
00361         m_tmpBuf1.resize(ncfft);
00362         m_tmpBuf1[0] = Complex( src[0].real() + src[ncfft].real(), src[0].real() - src[ncfft].real() );
00363         for (int k = 1; k <= ncfft / 2; ++k) {
00364           Complex fk = src[k];
00365           Complex fnkc = conj(src[ncfft-k]);
00366           Complex fek = fk + fnkc;
00367           Complex tmp = fk - fnkc;
00368           Complex fok = tmp * conj(rtw[k-1]);
00369           m_tmpBuf1[k] = fek + fok;
00370           m_tmpBuf1[ncfft-k] = conj(fek - fok);
00371         }
00372         get_plan(ncfft,true).work(0, reinterpret_cast<Complex*>(dst), &m_tmpBuf1[0], 1,1);
00373       }
00374     }
00375 
00376   protected:
00377   typedef kiss_cpx_fft<Scalar> PlanData;
00378   typedef std::map<int,PlanData> PlanMap;
00379 
00380   PlanMap m_plans;
00381   std::map<int, std::vector<Complex> > m_realTwiddles;
00382   std::vector<Complex> m_tmpBuf1;
00383   std::vector<Complex> m_tmpBuf2;
00384 
00385   inline
00386     int PlanKey(int nfft, bool isinverse) const { return (nfft<<1) | int(isinverse); }
00387 
00388   inline
00389     PlanData & get_plan(int nfft, bool inverse)
00390     {
00391       // TODO look for PlanKey(nfft, ! inverse) and conjugate the twiddles
00392       PlanData & pd = m_plans[ PlanKey(nfft,inverse) ];
00393       if ( pd.m_twiddles.size() == 0 ) {
00394         pd.make_twiddles(nfft,inverse);
00395         pd.factorize(nfft);
00396       }
00397       return pd;
00398     }
00399 
00400   inline
00401     Complex * real_twiddles(int ncfft2)
00402     {
00403       using std::acos;
00404       std::vector<Complex> & twidref = m_realTwiddles[ncfft2];// creates new if not there
00405       if ( (int)twidref.size() != ncfft2 ) {
00406         twidref.resize(ncfft2);
00407         int ncfft= ncfft2<<1;
00408         Scalar pi =  acos( Scalar(-1) );
00409         for (int k=1;k<=ncfft2;++k) 
00410           twidref[k-1] = exp( Complex(0,-pi * (Scalar(k) / ncfft + Scalar(.5)) ) );
00411       }
00412       return &twidref[0];
00413     }
00414 };
00415 
00416 } // end namespace internal
00417 
00418 } // end namespace Eigen
00419 
00420 /* vim: set filetype=cpp et sw=2 ts=2 ai: */