10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_FFT_H 11 #define EIGEN_CXX11_TENSOR_TENSOR_FFT_H 35 std::complex<T>
operator() (
const T& val)
const {
return std::complex<T>(val, 0); }
41 std::complex<T>
operator() (
const std::complex<T>& val)
const {
return val; }
44 template <
int ResultType>
struct PartOf {
45 template <
typename T>
T operator() (
const T& val)
const {
return val; }
49 template <
typename T>
T operator() (
const std::complex<T>& val)
const {
return val.real(); }
53 template <
typename T>
T operator() (
const std::complex<T>& val)
const {
return val.imag(); }
57 template <
typename FFT,
typename XprType,
int FFTResultType,
int FFTDir>
66 typedef typename XprType::Nested
Nested;
68 static const int NumDimensions = XprTraits::NumDimensions;
69 static const int Layout = XprTraits::Layout;
73 template <
typename FFT,
typename XprType,
int FFTResultType,
int FFTDirection>
78 template <
typename FFT,
typename XprType,
int FFTResultType,
int FFTDirection>
85 template <
typename FFT,
typename XprType,
int FFTResultType,
int FFTDir>
98 : m_xpr(expr), m_fft(fft) {}
101 const FFT&
fft()
const {
return m_fft; }
114 template <
typename FFT,
typename ArgType,
typename Device,
int FFTResultType,
int FFTDir>
137 PreferBlockAccess =
false,
149 for (
int i = 0;
i < NumDims; ++
i) {
151 m_dimensions[
i] = input_dims[
i];
154 if (static_cast<int>(Layout) == static_cast<int>(
ColMajor)) {
156 for (
int i = 1;
i < NumDims; ++
i) {
157 m_strides[
i] = m_strides[
i - 1] * m_dimensions[
i - 1];
160 m_strides[NumDims - 1] = 1;
161 for (
int i = NumDims - 2;
i >= 0; --
i) {
162 m_strides[
i] = m_strides[
i + 1] * m_dimensions[
i + 1];
165 m_size = m_dimensions.TotalSize();
173 m_impl.evalSubExprsIfNeeded(
NULL);
178 m_data = (EvaluatorPointerType)m_device.get((CoeffReturnType*)(m_device.allocate_temp(
sizeof(CoeffReturnType) * m_size)));
186 m_device.deallocate(m_data);
193 return m_data[index];
196 template <
int LoadMode>
199 return internal::ploadt<PacketReturnType, LoadMode>(m_data + index);
204 return TensorOpCost(
sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize);
208 #ifdef EIGEN_USE_SYCL 218 ComplexScalar* buf = write_to_out ? (ComplexScalar*)data : (ComplexScalar*)m_device.allocate(
sizeof(ComplexScalar) * m_size);
220 for (Index
i = 0;
i < m_size; ++
i) {
224 for (
size_t i = 0;
i < m_fft.size(); ++
i) {
225 Index dim = m_fft[
i];
227 Index line_len = m_dimensions[dim];
229 ComplexScalar* line_buf = (ComplexScalar*)m_device.allocate(
sizeof(ComplexScalar) * line_len);
230 const bool is_power_of_two = isPowerOfTwo(line_len);
231 const Index good_composite = is_power_of_two ? 0 : findGoodComposite(line_len);
232 const Index log_len = is_power_of_two ? getLog2(line_len) : getLog2(good_composite);
234 ComplexScalar*
a = is_power_of_two ?
NULL : (ComplexScalar*)m_device.allocate(
sizeof(ComplexScalar) * good_composite);
235 ComplexScalar*
b = is_power_of_two ?
NULL : (ComplexScalar*)m_device.allocate(
sizeof(ComplexScalar) * good_composite);
236 ComplexScalar* pos_j_base_powered = is_power_of_two ?
NULL : (ComplexScalar*)m_device.allocate(
sizeof(ComplexScalar) * (line_len + 1));
237 if (!is_power_of_two) {
264 for (
int j = 0;
j < line_len + 1; ++
j) {
267 pos_j_base_powered[
j] =
static_cast<ComplexScalar
>(tmp);
271 for (Index partial_index = 0; partial_index < m_size / line_len; ++partial_index) {
272 const Index base_offset = getBaseOffsetFromIndex(partial_index, dim);
275 const Index stride = m_strides[dim];
277 m_device.memcpy(line_buf, &buf[base_offset], line_len*
sizeof(ComplexScalar));
279 Index
offset = base_offset;
280 for (
int j = 0;
j < line_len; ++
j, offset += stride) {
286 if (is_power_of_two) {
287 processDataLineCooleyTukey(line_buf, line_len, log_len);
290 processDataLineBluestein(line_buf, line_len, good_composite, log_len, a, b, pos_j_base_powered);
295 m_device.memcpy(&buf[base_offset], line_buf, line_len*
sizeof(ComplexScalar));
297 Index
offset = base_offset;
298 const ComplexScalar div_factor = ComplexScalar(1.0 / line_len, 0);
299 for (
int j = 0;
j < line_len; ++
j, offset += stride) {
304 m_device.deallocate(line_buf);
305 if (!is_power_of_two) {
306 m_device.deallocate(a);
307 m_device.deallocate(b);
308 m_device.deallocate(pos_j_base_powered);
313 for (Index
i = 0;
i < m_size; ++
i) {
316 m_device.deallocate(buf);
322 return !(x & (x - 1));
328 while (i < 2 * n - 1) i *= 2;
334 while (m >>= 1) log2m++;
341 scramble_FFT(line_buf, line_len);
342 compute_1D_Butterfly<FFTDir>(line_buf, line_len, log_len);
348 Index
m = good_composite;
349 ComplexScalar*
data = line_buf;
351 for (Index
i = 0;
i <
n; ++
i) {
356 a[
i] = data[
i] * pos_j_base_powered[
i];
359 for (Index
i = n;
i <
m; ++
i) {
360 a[
i] = ComplexScalar(0, 0);
363 for (Index
i = 0;
i <
n; ++
i) {
365 b[
i] = pos_j_base_powered[
i];
371 for (Index
i = n;
i < m -
n; ++
i) {
372 b[
i] = ComplexScalar(0, 0);
374 for (Index
i = m - n;
i <
m; ++
i) {
376 b[
i] = pos_j_base_powered[m-
i];
384 compute_1D_Butterfly<FFT_FORWARD>(
a,
m, log_len);
387 compute_1D_Butterfly<FFT_FORWARD>(
b,
m, log_len);
389 for (Index
i = 0;
i <
m; ++
i) {
394 compute_1D_Butterfly<FFT_REVERSE>(
a,
m, log_len);
397 for (Index
i = 0;
i <
m; ++
i) {
401 for (Index
i = 0;
i <
n; ++
i) {
406 data[
i] = a[
i] * pos_j_base_powered[
i];
414 for (Index
i = 1;
i <
n; ++
i){
419 while (m >= 2 && j > m) {
429 ComplexScalar tmp = data[1];
430 data[1] = data[0] - data[1];
436 ComplexScalar tmp[4];
437 tmp[0] = data[0] + data[1];
438 tmp[1] = data[0] - data[1];
439 tmp[2] = data[2] + data[3];
441 tmp[3] = ComplexScalar(0.0, -1.0) * (data[2] - data[3]);
443 tmp[3] = ComplexScalar(0.0, 1.0) * (data[2] - data[3]);
445 data[0] = tmp[0] + tmp[2];
446 data[1] = tmp[1] + tmp[3];
447 data[2] = tmp[0] - tmp[2];
448 data[3] = tmp[1] - tmp[3];
453 ComplexScalar tmp_1[8];
454 ComplexScalar tmp_2[8];
456 tmp_1[0] = data[0] + data[1];
457 tmp_1[1] = data[0] - data[1];
458 tmp_1[2] = data[2] + data[3];
460 tmp_1[3] = (data[2] - data[3]) * ComplexScalar(0, -1);
462 tmp_1[3] = (data[2] - data[3]) * ComplexScalar(0, 1);
464 tmp_1[4] = data[4] + data[5];
465 tmp_1[5] = data[4] - data[5];
466 tmp_1[6] = data[6] + data[7];
468 tmp_1[7] = (data[6] - data[7]) * ComplexScalar(0, -1);
470 tmp_1[7] = (data[6] - data[7]) * ComplexScalar(0, 1);
472 tmp_2[0] = tmp_1[0] + tmp_1[2];
473 tmp_2[1] = tmp_1[1] + tmp_1[3];
474 tmp_2[2] = tmp_1[0] - tmp_1[2];
475 tmp_2[3] = tmp_1[1] - tmp_1[3];
476 tmp_2[4] = tmp_1[4] + tmp_1[6];
478 #define SQRT2DIV2 0.7071067811865476 481 tmp_2[6] = (tmp_1[4] - tmp_1[6]) * ComplexScalar(0, -1);
485 tmp_2[6] = (tmp_1[4] - tmp_1[6]) * ComplexScalar(0, 1);
488 data[0] = tmp_2[0] + tmp_2[4];
489 data[1] = tmp_2[1] + tmp_2[5];
490 data[2] = tmp_2[2] + tmp_2[6];
491 data[3] = tmp_2[3] + tmp_2[7];
492 data[4] = tmp_2[0] - tmp_2[4];
493 data[5] = tmp_2[1] - tmp_2[5];
494 data[6] = tmp_2[2] - tmp_2[6];
495 data[7] = tmp_2[3] - tmp_2[7];
500 ComplexScalar*
data, Index
n, Index n_power_of_2) {
504 const RealScalar wtemp = m_sin_PI_div_n_LUT[n_power_of_2];
506 ? m_minus_sin_2_PI_div_n_LUT[n_power_of_2]
507 : -m_minus_sin_2_PI_div_n_LUT[n_power_of_2];
509 const ComplexScalar wp(wtemp, wpi);
510 const ComplexScalar wp_one = wp + ComplexScalar(1, 0);
511 const ComplexScalar wp_one_2 = wp_one * wp_one;
512 const ComplexScalar wp_one_3 = wp_one_2 * wp_one;
513 const ComplexScalar wp_one_4 = wp_one_3 * wp_one;
514 const Index
n2 = n / 2;
515 ComplexScalar
w(1.0, 0.0);
516 for (Index
i = 0;
i <
n2;
i += 4) {
517 ComplexScalar temp0(data[
i + n2] * w);
518 ComplexScalar temp1(data[
i + 1 + n2] * w * wp_one);
519 ComplexScalar temp2(data[
i + 2 + n2] * w * wp_one_2);
520 ComplexScalar temp3(data[
i + 3 + n2] * w * wp_one_3);
523 data[
i +
n2] = data[
i] - temp0;
526 data[
i + 1 +
n2] = data[
i + 1] - temp1;
527 data[
i + 1] += temp1;
529 data[
i + 2 +
n2] = data[
i + 2] - temp2;
530 data[
i + 2] += temp2;
532 data[
i + 3 +
n2] = data[
i + 3] - temp3;
533 data[
i + 3] += temp3;
539 ComplexScalar*
data, Index
n, Index n_power_of_2) {
542 compute_1D_Butterfly<Dir>(
data, n / 2, n_power_of_2 - 1);
543 compute_1D_Butterfly<Dir>(data + n / 2, n / 2, n_power_of_2 - 1);
544 butterfly_1D_merge<Dir>(
data,
n, n_power_of_2);
546 butterfly_8<Dir>(
data);
548 butterfly_4<Dir>(
data);
550 butterfly_2<Dir>(
data);
557 if (static_cast<int>(Layout) == static_cast<int>(
ColMajor)) {
558 for (
int i = NumDims - 1;
i > omitted_dim; --
i) {
559 const Index partial_m_stride = m_strides[
i] / m_dimensions[omitted_dim];
560 const Index idx = index / partial_m_stride;
561 index -= idx * partial_m_stride;
562 result += idx * m_strides[
i];
567 for (Index
i = 0;
i < omitted_dim; ++
i) {
568 const Index partial_m_stride = m_strides[
i] / m_dimensions[omitted_dim];
569 const Index idx = index / partial_m_stride;
570 index -= idx * partial_m_stride;
571 result += idx * m_strides[
i];
580 Index
result = base + offset * m_strides[omitted_dim] ;
595 const RealScalar m_sin_PI_div_n_LUT[32] = {
631 const RealScalar m_minus_sin_2_PI_div_n_LUT[32] = {
669 #endif // EIGEN_CXX11_TENSOR_TENSOR_FFT_H
StorageMemory< CoeffReturnType, Device > Storage
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index getLog2(Index m)
#define EIGEN_ALWAYS_INLINE
EIGEN_STRONG_INLINE void cleanup()
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void compute_1D_Butterfly(ComplexScalar *data, Index n, Index n_power_of_2)
#define EIGEN_STRONG_INLINE
Eigen::internal::nested< TensorFFTOp >::type Nested
Eigen::internal::traits< TensorFFTOp >::Index Index
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_1D_merge(ComplexScalar *data, Index n, Index n_power_of_2)
Eigen::NumTraits< Scalar >::Real RealScalar
const FFT EIGEN_DEVICE_REF m_fft
internal::TensorBlockNotImplemented TensorBlock
EvaluatorPointerType m_data
const TensorFFTOp< FFT, XprType, FFTResultType, FFTDirection > & type
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_2(ComplexScalar *data)
Eigen::internal::traits< TensorFFTOp >::Scalar Scalar
Eigen::NumTraits< Scalar >::Real RealScalar
internal::conditional< FFTResultType==RealPart||FFTResultType==ImagPart, RealScalar, ComplexScalar >::type OutputScalar
EIGEN_DEVICE_FUNC const internal::remove_all< typename XprType::Nested >::type & expression() const
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE bool isPowerOfTwo(Index x)
set noclip points set clip one set noclip two set bar set border lt lw set xdata set ydata set zdata set x2data set y2data set boxwidth set dummy y set format x g set format y g set format x2 g set format y2 g set format z g set angles radians set nogrid set key title set key left top Right noreverse box linetype linewidth samplen spacing width set nolabel set noarrow set nologscale set logscale x set set pointsize set encoding default set nopolar set noparametric set set set set surface set nocontour set clabel set mapping cartesian set nohidden3d set cntrparam order set cntrparam linear set cntrparam levels auto set cntrparam points set size set set xzeroaxis lt lw set x2zeroaxis lt lw set yzeroaxis lt lw set y2zeroaxis lt lw set tics in set ticslevel set tics set mxtics default set mytics default set mx2tics default set my2tics default set xtics border mirror norotate autofreq set ytics border mirror norotate autofreq set ztics border nomirror norotate autofreq set nox2tics set noy2tics set timestamp bottom norotate offset
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index getIndexFromOffset(Index base, Index omitted_dim, Index offset) const
Namespace containing all symbols from the Eigen library.
A cost model used to limit the number of threads used for evaluating tensor expression.
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T cos(const T &x)
AnnoyingScalar conj(const AnnoyingScalar &x)
array< Index, NumDims > m_strides
TensorEvaluator< ArgType, Device > m_impl
internal::conditional< FFTResultType==RealPart||FFTResultType==ImagPart, RealScalar, ComplexScalar >::type OutputScalar
OutputScalar CoeffReturnType
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_8(ComplexScalar *data)
Generic expression where a coefficient-wise binary operator is applied to two expressions.
PacketType< OutputScalar, Device >::type PacketReturnType
EIGEN_DEVICE_FUNC T operator()(const T &val) const
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketReturnType packet(Index index) const
XprTraits::StorageKind StorageKind
EIGEN_DEVICE_FUNC const FFT & fft() const
EIGEN_DEVICE_FUNC EvaluatorPointerType data() const
XprTraits::Scalar InputScalar
NumTraits< typename XprTraits::Scalar >::Real RealScalar
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
traits< XprType > XprTraits
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void processDataLineCooleyTukey(ComplexScalar *line_buf, Index line_len, Index log_len)
const Device EIGEN_DEVICE_REF m_device
std::complex< RealScalar > ComplexScalar
Array< double, 1, 3 > e(1./3., 0.5, 2.)
Eigen::internal::traits< TensorFFTOp >::StorageKind StorageKind
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const
EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T sin(const T &x)
NumTraits< Scalar >::Real RealScalar
void swap(GeographicLib::NearestNeighbor< dist_t, pos_t, distfun_t > &a, GeographicLib::NearestNeighbor< dist_t, pos_t, distfun_t > &b)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Storage::Type EvaluatorPointerType
#define EIGEN_DEVICE_FUNC
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorFFTOp(const XprType &expr, const FFT &fft)
std::complex< RealScalar > ComplexScalar
DSizes< Index, NumDims > Dimensions
TensorEvaluator< ArgType, Device >::Dimensions InputDimensions
TensorFFTOp< FFT, XprType, FFTResultType, FFTDirection > type
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index findGoodComposite(Index n)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index getBaseOffsetFromIndex(Index index, Index omitted_dim) const
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data)
conditional< FFTResultType==RealPart||FFTResultType==ImagPart, RealScalar, ComplexScalar >::type OutputScalar
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void scramble_FFT(ComplexScalar *data, Index n)
std::complex< RealScalar > ComplexScalar
Annotation indicating that a class derives from another given type.
Generic expression where a coefficient-wise unary operator is applied to an expression.
set noclip points set clip one set noclip two set bar set border lt lw set xdata set ydata set zdata set x2data set y2data set boxwidth set dummy x
remove_reference< Nested >::type _Nested
XprTraits::Scalar InputScalar
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void processDataLineBluestein(ComplexScalar *line_buf, Index line_len, Index good_composite, Index log_len, ComplexScalar *a, ComplexScalar *b, const ComplexScalar *pos_j_base_powered)
internal::traits< XprType > XprTraits
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalToBuf(EvaluatorPointerType data)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_4(ComplexScalar *data)
TensorFFTOp< FFT, ArgType, FFTResultType, FFTDir > XprType
traits< XprType >::PointerType PointerType
OutputScalar CoeffReturnType
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffReturnType coeff(Index index) const