kiss_fft.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2003-2010, Mark Borgerding. All rights reserved.
3  * This file is part of KISS FFT - https://github.com/mborgerding/kissfft
4  *
5  * SPDX-License-Identifier: BSD-3-Clause
6  * See COPYING file for more information.
7  */
8 
9 
10 #include "_kiss_fft_guts.h"
11 /* The guts header contains all the multiplication and addition macros that are defined for
12  fixed or floating point complex numbers. It also delares the kf_ internal functions.
13  */
14 
15 static void kf_bfly2(
16  kiss_fft_cpx * Fout,
17  const size_t fstride,
18  const kiss_fft_cfg st,
19  int m
20  )
21 {
22  kiss_fft_cpx * Fout2;
23  kiss_fft_cpx * tw1 = st->twiddles;
24  kiss_fft_cpx t;
25  Fout2 = Fout + m;
26  do{
27  C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2);
28 
29  C_MUL (t, *Fout2 , *tw1);
30  tw1 += fstride;
31  C_SUB( *Fout2 , *Fout , t );
32  C_ADDTO( *Fout , t );
33  ++Fout2;
34  ++Fout;
35  }while (--m);
36 }
37 
38 static void kf_bfly4(
39  kiss_fft_cpx * Fout,
40  const size_t fstride,
41  const kiss_fft_cfg st,
42  const size_t m
43  )
44 {
45  kiss_fft_cpx *tw1,*tw2,*tw3;
46  kiss_fft_cpx scratch[6];
47  size_t k=m;
48  const size_t m2=2*m;
49  const size_t m3=3*m;
50 
51 
52  tw3 = tw2 = tw1 = st->twiddles;
53 
54  do {
55  C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4);
56 
57  C_MUL(scratch[0],Fout[m] , *tw1 );
58  C_MUL(scratch[1],Fout[m2] , *tw2 );
59  C_MUL(scratch[2],Fout[m3] , *tw3 );
60 
61  C_SUB( scratch[5] , *Fout, scratch[1] );
62  C_ADDTO(*Fout, scratch[1]);
63  C_ADD( scratch[3] , scratch[0] , scratch[2] );
64  C_SUB( scratch[4] , scratch[0] , scratch[2] );
65  C_SUB( Fout[m2], *Fout, scratch[3] );
66  tw1 += fstride;
67  tw2 += fstride*2;
68  tw3 += fstride*3;
69  C_ADDTO( *Fout , scratch[3] );
70 
71  if(st->inverse) {
72  Fout[m].r = scratch[5].r - scratch[4].i;
73  Fout[m].i = scratch[5].i + scratch[4].r;
74  Fout[m3].r = scratch[5].r + scratch[4].i;
75  Fout[m3].i = scratch[5].i - scratch[4].r;
76  }else{
77  Fout[m].r = scratch[5].r + scratch[4].i;
78  Fout[m].i = scratch[5].i - scratch[4].r;
79  Fout[m3].r = scratch[5].r - scratch[4].i;
80  Fout[m3].i = scratch[5].i + scratch[4].r;
81  }
82  ++Fout;
83  }while(--k);
84 }
85 
86 static void kf_bfly3(
87  kiss_fft_cpx * Fout,
88  const size_t fstride,
89  const kiss_fft_cfg st,
90  size_t m
91  )
92 {
93  size_t k=m;
94  const size_t m2 = 2*m;
95  kiss_fft_cpx *tw1,*tw2;
96  kiss_fft_cpx scratch[5];
97  kiss_fft_cpx epi3;
98  epi3 = st->twiddles[fstride*m];
99 
100  tw1=tw2=st->twiddles;
101 
102  do{
103  C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
104 
105  C_MUL(scratch[1],Fout[m] , *tw1);
106  C_MUL(scratch[2],Fout[m2] , *tw2);
107 
108  C_ADD(scratch[3],scratch[1],scratch[2]);
109  C_SUB(scratch[0],scratch[1],scratch[2]);
110  tw1 += fstride;
111  tw2 += fstride*2;
112 
113  Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
114  Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
115 
116  C_MULBYSCALAR( scratch[0] , epi3.i );
117 
118  C_ADDTO(*Fout,scratch[3]);
119 
120  Fout[m2].r = Fout[m].r + scratch[0].i;
121  Fout[m2].i = Fout[m].i - scratch[0].r;
122 
123  Fout[m].r -= scratch[0].i;
124  Fout[m].i += scratch[0].r;
125 
126  ++Fout;
127  }while(--k);
128 }
129 
130 static void kf_bfly5(
131  kiss_fft_cpx * Fout,
132  const size_t fstride,
133  const kiss_fft_cfg st,
134  int m
135  )
136 {
137  kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
138  int u;
139  kiss_fft_cpx scratch[13];
140  kiss_fft_cpx * twiddles = st->twiddles;
141  kiss_fft_cpx *tw;
142  kiss_fft_cpx ya,yb;
143  ya = twiddles[fstride*m];
144  yb = twiddles[fstride*2*m];
145 
146  Fout0=Fout;
147  Fout1=Fout0+m;
148  Fout2=Fout0+2*m;
149  Fout3=Fout0+3*m;
150  Fout4=Fout0+4*m;
151 
152  tw=st->twiddles;
153  for ( u=0; u<m; ++u ) {
154  C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
155  scratch[0] = *Fout0;
156 
157  C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
158  C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
159  C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
160  C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
161 
162  C_ADD( scratch[7],scratch[1],scratch[4]);
163  C_SUB( scratch[10],scratch[1],scratch[4]);
164  C_ADD( scratch[8],scratch[2],scratch[3]);
165  C_SUB( scratch[9],scratch[2],scratch[3]);
166 
167  Fout0->r += scratch[7].r + scratch[8].r;
168  Fout0->i += scratch[7].i + scratch[8].i;
169 
170  scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
171  scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
172 
173  scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
174  scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
175 
176  C_SUB(*Fout1,scratch[5],scratch[6]);
177  C_ADD(*Fout4,scratch[5],scratch[6]);
178 
179  scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
180  scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
181  scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
182  scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
183 
184  C_ADD(*Fout2,scratch[11],scratch[12]);
185  C_SUB(*Fout3,scratch[11],scratch[12]);
186 
187  ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
188  }
189 }
190 
191 /* perform the butterfly for one stage of a mixed radix FFT */
192 static void kf_bfly_generic(
193  kiss_fft_cpx * Fout,
194  const size_t fstride,
195  const kiss_fft_cfg st,
196  int m,
197  int p
198  )
199 {
200  int u,k,q1,q;
201  kiss_fft_cpx * twiddles = st->twiddles;
202  kiss_fft_cpx t;
203  int Norig = st->nfft;
204 
205  kiss_fft_cpx * scratch = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx)*p);
206  if (scratch == NULL){
207  KISS_FFT_ERROR("Memory allocation failed.");
208  return;
209  }
210 
211  for ( u=0; u<m; ++u ) {
212  k=u;
213  for ( q1=0 ; q1<p ; ++q1 ) {
214  scratch[q1] = Fout[ k ];
215  C_FIXDIV(scratch[q1],p);
216  k += m;
217  }
218 
219  k=u;
220  for ( q1=0 ; q1<p ; ++q1 ) {
221  int twidx=0;
222  Fout[ k ] = scratch[0];
223  for (q=1;q<p;++q ) {
224  twidx += fstride * k;
225  if (twidx>=Norig) twidx-=Norig;
226  C_MUL(t,scratch[q] , twiddles[twidx] );
227  C_ADDTO( Fout[ k ] ,t);
228  }
229  k += m;
230  }
231  }
232  KISS_FFT_TMP_FREE(scratch);
233 }
234 
235 static
236 void kf_work(
237  kiss_fft_cpx * Fout,
238  const kiss_fft_cpx * f,
239  const size_t fstride,
240  int in_stride,
241  int * factors,
242  const kiss_fft_cfg st
243  )
244 {
245  kiss_fft_cpx * Fout_beg=Fout;
246  const int p=*factors++; /* the radix */
247  const int m=*factors++; /* stage's fft length/p */
248  const kiss_fft_cpx * Fout_end = Fout + p*m;
249 
250 #ifdef _OPENMP
251  // use openmp extensions at the
252  // top-level (not recursive)
253  if (fstride==1 && p<=5 && m!=1)
254  {
255  int k;
256 
257  // execute the p different work units in different threads
258 # pragma omp parallel for
259  for (k=0;k<p;++k)
260  kf_work( Fout +k*m, f+ fstride*in_stride*k,fstride*p,in_stride,factors,st);
261  // all threads have joined by this point
262 
263  switch (p) {
264  case 2: kf_bfly2(Fout,fstride,st,m); break;
265  case 3: kf_bfly3(Fout,fstride,st,m); break;
266  case 4: kf_bfly4(Fout,fstride,st,m); break;
267  case 5: kf_bfly5(Fout,fstride,st,m); break;
268  default: kf_bfly_generic(Fout,fstride,st,m,p); break;
269  }
270  return;
271  }
272 #endif
273 
274  if (m==1) {
275  do{
276  *Fout = *f;
277  f += fstride*in_stride;
278  }while(++Fout != Fout_end );
279  }else{
280  do{
281  // recursive call:
282  // DFT of size m*p performed by doing
283  // p instances of smaller DFTs of size m,
284  // each one takes a decimated version of the input
285  kf_work( Fout , f, fstride*p, in_stride, factors,st);
286  f += fstride*in_stride;
287  }while( (Fout += m) != Fout_end );
288  }
289 
290  Fout=Fout_beg;
291 
292  // recombine the p smaller DFTs
293  switch (p) {
294  case 2: kf_bfly2(Fout,fstride,st,m); break;
295  case 3: kf_bfly3(Fout,fstride,st,m); break;
296  case 4: kf_bfly4(Fout,fstride,st,m); break;
297  case 5: kf_bfly5(Fout,fstride,st,m); break;
298  default: kf_bfly_generic(Fout,fstride,st,m,p); break;
299  }
300 }
301 
302 /* facbuf is populated by p1,m1,p2,m2, ...
303  where
304  p[i] * m[i] = m[i-1]
305  m0 = n */
306 static
307 void kf_factor(int n,int * facbuf)
308 {
309  int p=4;
310  double floor_sqrt;
311  floor_sqrt = floor( sqrt((double)n) );
312 
313  /*factor out powers of 4, powers of 2, then any remaining primes */
314  do {
315  while (n % p) {
316  switch (p) {
317  case 4: p = 2; break;
318  case 2: p = 3; break;
319  default: p += 2; break;
320  }
321  if (p > floor_sqrt)
322  p = n; /* no more factors, skip to end */
323  }
324  n /= p;
325  *facbuf++ = p;
326  *facbuf++ = n;
327  } while (n > 1);
328 }
329 
330 /*
331  *
332  * User-callable function to allocate all necessary storage space for the fft.
333  *
334  * The return value is a contiguous block of memory, allocated with malloc. As such,
335  * It can be freed with free(), rather than a kiss_fft-specific function.
336  * */
337 kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem )
338 {
340 
341  kiss_fft_cfg st=NULL;
342  size_t memneeded = KISS_FFT_ALIGN_SIZE_UP(sizeof(struct kiss_fft_state)
343  + sizeof(kiss_fft_cpx)*(nfft-1)); /* twiddle factors*/
344 
345  if ( lenmem==NULL ) {
346  st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded );
347  }else{
348  if (mem != NULL && *lenmem >= memneeded)
349  st = (kiss_fft_cfg)mem;
350  *lenmem = memneeded;
351  }
352  if (st) {
353  int i;
354  st->nfft=nfft;
355  st->inverse = inverse_fft;
356 
357  for (i=0;i<nfft;++i) {
358  const double pi=3.141592653589793238462643383279502884197169399375105820974944;
359  double phase = -2*pi*i / nfft;
360  if (st->inverse)
361  phase *= -1;
362  kf_cexp(st->twiddles+i, phase );
363  }
364 
365  kf_factor(nfft,st->factors);
366  }
367  return st;
368 }
369 
370 
371 void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
372 {
373  if (fin == fout) {
374  //NOTE: this is not really an in-place FFT algorithm.
375  //It just performs an out-of-place FFT into a temp buffer
376  if (fout == NULL){
377  KISS_FFT_ERROR("fout buffer NULL.");
378  return;
379  }
380 
381  kiss_fft_cpx * tmpbuf = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC( sizeof(kiss_fft_cpx)*st->nfft);
382  if (tmpbuf == NULL){
383  KISS_FFT_ERROR("Memory allocation error.");
384  return;
385  }
386 
387 
388 
389  kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
390  memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft);
391  KISS_FFT_TMP_FREE(tmpbuf);
392  }else{
393  kf_work( fout, fin, 1,in_stride, st->factors,st );
394  }
395 }
396 
398 {
399  kiss_fft_stride(cfg,fin,fout,1);
400 }
401 
402 
404 {
405  // nothing needed any more
406 }
407 
409 {
410  while(1) {
411  int m=n;
412  while ( (m%2) == 0 ) m/=2;
413  while ( (m%3) == 0 ) m/=3;
414  while ( (m%5) == 0 ) m/=5;
415  if (m<=1)
416  break; /* n is completely factorable by twos, threes, and fives */
417  n++;
418  }
419  return n;
420 }
C_MUL
#define C_MUL(m, a, b)
Definition: _kiss_fft_guts.h:99
kf_bfly_generic
static void kf_bfly_generic(kiss_fft_cpx *Fout, const size_t fstride, const kiss_fft_cfg st, int m, int p)
Definition: kiss_fft.c:192
kf_factor
static void kf_factor(int n, int *facbuf)
Definition: kiss_fft.c:307
cached_fft::cfg
kiss_fft_cfg cfg
Definition: kfc.c:17
KISS_FFT_TMP_ALLOC
#define KISS_FFT_TMP_ALLOC(nbytes)
Definition: _kiss_fft_guts.h:186
kf_bfly4
static void kf_bfly4(kiss_fft_cpx *Fout, const size_t fstride, const kiss_fft_cfg st, const size_t m)
Definition: kiss_fft.c:38
KISS_FFT_MALLOC
#define KISS_FFT_MALLOC
Definition: kiss_fft.h:65
kiss_fft_cpx::r
kiss_fft_scalar r
Definition: kiss_fft.h:88
kiss_fft_cleanup
void kiss_fft_cleanup(void)
Definition: kiss_fft.c:403
KISS_FFT_ALIGN_SIZE_UP
#define KISS_FFT_ALIGN_SIZE_UP(size)
Definition: kiss_fft.h:63
C_SUB
#define C_SUB(res, a, b)
Definition: _kiss_fft_guts.h:126
kiss_fft_state::inverse
int inverse
Definition: _kiss_fft_guts.h:30
kiss_fft_cfg
struct kiss_fft_state * kiss_fft_cfg
Definition: kiss_fft.h:92
f
f
C_MULBYSCALAR
#define C_MULBYSCALAR(c, s)
Definition: _kiss_fft_guts.h:106
_kiss_fft_guts.h
kiss_fft_state::factors
int factors[2 *MAXFACTORS]
Definition: _kiss_fft_guts.h:31
kiss_fft_next_fast_size
int kiss_fft_next_fast_size(int n)
Definition: kiss_fft.c:408
kf_bfly3
static void kf_bfly3(kiss_fft_cpx *Fout, const size_t fstride, const kiss_fft_cfg st, size_t m)
Definition: kiss_fft.c:86
kiss_fft_alloc
kiss_fft_cfg kiss_fft_alloc(int nfft, int inverse_fft, void *mem, size_t *lenmem)
Definition: kiss_fft.c:337
kiss_fft_state::twiddles
kiss_fft_cpx twiddles[1]
Definition: _kiss_fft_guts.h:32
kf_bfly2
static void kf_bfly2(kiss_fft_cpx *Fout, const size_t fstride, const kiss_fft_cfg st, int m)
Definition: kiss_fft.c:15
C_ADDTO
#define C_ADDTO(res, a)
Definition: _kiss_fft_guts.h:134
C_ADD
#define C_ADD(res, a, b)
Definition: _kiss_fft_guts.h:118
kiss_fft_cpx::i
kiss_fft_scalar i
Definition: kiss_fft.h:89
C_FIXDIV
#define C_FIXDIV(c, div)
Definition: _kiss_fft_guts.h:105
KISS_FFT_ALIGN_CHECK
#define KISS_FFT_ALIGN_CHECK(ptr)
Definition: kiss_fft.h:62
kf_cexp
#define kf_cexp(x, phase)
Definition: _kiss_fft_guts.h:166
S_MUL
#define S_MUL(a, b)
Definition: _kiss_fft_guts.h:98
kiss_fft_state
Definition: _kiss_fft_guts.h:27
kiss_fft_stride
void kiss_fft_stride(kiss_fft_cfg st, const kiss_fft_cpx *fin, kiss_fft_cpx *fout, int in_stride)
Definition: kiss_fft.c:371
kiss_fft_state::nfft
int nfft
Definition: _kiss_fft_guts.h:29
kiss_fft
void kiss_fft(kiss_fft_cfg cfg, const kiss_fft_cpx *fin, kiss_fft_cpx *fout)
Definition: kiss_fft.c:397
HALF_OF
#define HALF_OF(x)
Definition: _kiss_fft_guts.h:163
kiss_fft_cpx
Definition: kiss_fft.h:86
kf_work
static void kf_work(kiss_fft_cpx *Fout, const kiss_fft_cpx *f, const size_t fstride, int in_stride, int *factors, const kiss_fft_cfg st)
Definition: kiss_fft.c:236
kf_bfly5
static void kf_bfly5(kiss_fft_cpx *Fout, const size_t fstride, const kiss_fft_cfg st, int m)
Definition: kiss_fft.c:130
KISS_FFT_ERROR
#define KISS_FFT_ERROR(...)
Definition: kiss_fft_log.h:29
cached_fft::nfft
int nfft
Definition: kfc.c:15
KISS_FFT_TMP_FREE
#define KISS_FFT_TMP_FREE(ptr)
Definition: _kiss_fft_guts.h:187


plotjuggler
Author(s): Davide Faconti
autogenerated on Tue Nov 26 2024 03:24:08