29 C_MUL (t, *Fout2 , *tw1);
31 C_SUB( *Fout2 , *Fout , t );
57 C_MUL(scratch[0],Fout[m] , *tw1 );
58 C_MUL(scratch[1],Fout[m2] , *tw2 );
59 C_MUL(scratch[2],Fout[m3] , *tw3 );
61 C_SUB( scratch[5] , *Fout, scratch[1] );
63 C_ADD( scratch[3] , scratch[0] , scratch[2] );
64 C_SUB( scratch[4] , scratch[0] , scratch[2] );
65 C_SUB( Fout[m2], *Fout, scratch[3] );
72 Fout[m].
r = scratch[5].
r - scratch[4].
i;
73 Fout[m].
i = scratch[5].
i + scratch[4].
r;
74 Fout[m3].
r = scratch[5].
r + scratch[4].
i;
75 Fout[m3].
i = scratch[5].
i - scratch[4].
r;
77 Fout[m].
r = scratch[5].
r + scratch[4].
i;
78 Fout[m].
i = scratch[5].
i - scratch[4].
r;
79 Fout[m3].
r = scratch[5].
r - scratch[4].
i;
80 Fout[m3].
i = scratch[5].
i + scratch[4].
r;
94 const size_t m2 = 2*m;
105 C_MUL(scratch[1],Fout[m] , *tw1);
106 C_MUL(scratch[2],Fout[m2] , *tw2);
108 C_ADD(scratch[3],scratch[1],scratch[2]);
109 C_SUB(scratch[0],scratch[1],scratch[2]);
113 Fout[m].
r = Fout->
r -
HALF_OF(scratch[3].r);
114 Fout[m].
i = Fout->
i -
HALF_OF(scratch[3].i);
120 Fout[m2].
r = Fout[m].
r + scratch[0].
i;
121 Fout[m2].
i = Fout[m].
i - scratch[0].
r;
123 Fout[m].
r -= scratch[0].
i;
124 Fout[m].
i += scratch[0].
r;
132 const size_t fstride,
143 ya = twiddles[fstride*m];
144 yb = twiddles[fstride*2*m];
153 for ( u=0; u<m; ++u ) {
157 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
158 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
159 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
160 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
162 C_ADD( scratch[7],scratch[1],scratch[4]);
163 C_SUB( scratch[10],scratch[1],scratch[4]);
164 C_ADD( scratch[8],scratch[2],scratch[3]);
165 C_SUB( scratch[9],scratch[2],scratch[3]);
167 Fout0->
r += scratch[7].
r + scratch[8].
r;
168 Fout0->
i += scratch[7].
i + scratch[8].
i;
170 scratch[5].
r = scratch[0].
r +
S_MUL(scratch[7].r,ya.
r) +
S_MUL(scratch[8].r,yb.
r);
171 scratch[5].
i = scratch[0].
i +
S_MUL(scratch[7].i,ya.
r) +
S_MUL(scratch[8].i,yb.
r);
173 scratch[6].
r =
S_MUL(scratch[10].i,ya.
i) +
S_MUL(scratch[9].i,yb.
i);
174 scratch[6].
i = -
S_MUL(scratch[10].r,ya.
i) -
S_MUL(scratch[9].r,yb.
i);
176 C_SUB(*Fout1,scratch[5],scratch[6]);
177 C_ADD(*Fout4,scratch[5],scratch[6]);
179 scratch[11].
r = scratch[0].
r +
S_MUL(scratch[7].r,yb.
r) +
S_MUL(scratch[8].r,ya.
r);
180 scratch[11].
i = scratch[0].
i +
S_MUL(scratch[7].i,yb.
r) +
S_MUL(scratch[8].i,ya.
r);
181 scratch[12].
r = -
S_MUL(scratch[10].i,yb.
i) +
S_MUL(scratch[9].i,ya.
i);
182 scratch[12].
i =
S_MUL(scratch[10].r,yb.
i) -
S_MUL(scratch[9].r,ya.
i);
184 C_ADD(*Fout2,scratch[11],scratch[12]);
185 C_SUB(*Fout3,scratch[11],scratch[12]);
187 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
194 const size_t fstride,
203 int Norig = st->
nfft;
206 if (scratch == NULL){
211 for ( u=0; u<m; ++u ) {
213 for ( q1=0 ; q1<p ; ++q1 ) {
214 scratch[q1] = Fout[ k ];
220 for ( q1=0 ; q1<p ; ++q1 ) {
222 Fout[ k ] = scratch[0];
224 twidx += fstride * k;
225 if (twidx>=Norig) twidx-=Norig;
226 C_MUL(t,scratch[q] , twiddles[twidx] );
239 const size_t fstride,
246 const int p=*factors++;
247 const int m=*factors++;
253 if (fstride==1 && p<=5 && m!=1)
258 # pragma omp parallel for
260 kf_work( Fout +k*m,
f+ fstride*in_stride*k,fstride*p,in_stride,factors,st);
264 case 2:
kf_bfly2(Fout,fstride,st,m);
break;
265 case 3:
kf_bfly3(Fout,fstride,st,m);
break;
266 case 4:
kf_bfly4(Fout,fstride,st,m);
break;
267 case 5:
kf_bfly5(Fout,fstride,st,m);
break;
277 f += fstride*in_stride;
278 }
while(++Fout != Fout_end );
285 kf_work( Fout ,
f, fstride*p, in_stride, factors,st);
286 f += fstride*in_stride;
287 }
while( (Fout += m) != Fout_end );
294 case 2:
kf_bfly2(Fout,fstride,st,m);
break;
295 case 3:
kf_bfly3(Fout,fstride,st,m);
break;
296 case 4:
kf_bfly4(Fout,fstride,st,m);
break;
297 case 5:
kf_bfly5(Fout,fstride,st,m);
break;
311 floor_sqrt = floor( sqrt((
double)n) );
317 case 4: p = 2;
break;
318 case 2: p = 3;
break;
319 default: p += 2;
break;
345 if ( lenmem==NULL ) {
348 if (mem != NULL && *lenmem >= memneeded)
357 for (i=0;i<
nfft;++i) {
358 const double pi=3.141592653589793238462643383279502884197169399375105820974944;
359 double phase = -2*pi*i /
nfft;
412 while ( (m%2) == 0 ) m/=2;
413 while ( (m%3) == 0 ) m/=3;
414 while ( (m%5) == 0 ) m/=5;