1 #ifndef THIRD_PARTY_EIGEN3_TENSOR_BENCHMARKS_H_ 2 #define THIRD_PARTY_EIGEN3_TENSOR_BENCHMARKS_H_ 5 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int 7 #include "unsupported/Eigen/CXX11/Tensor" 10 #define BENCHMARK_RANGE(bench, lo, hi) \ 11 BENCHMARK(bench)->Range(lo, hi) 43 #ifdef EIGEN_USE_SYCL // warmup for sycl 59 if (
sizeof(
T) >=
sizeof(
int)) {
63 sizes[0] =
m_ *
sizeof(
T) /
sizeof(
int);
64 sizes[1] =
k_ *
sizeof(
T) /
sizeof(
int);
68 #ifdef EIGEN_USE_SYCL // warmup for sycl 87 #ifdef EIGEN_USE_SYCL // warmup for sycl 114 #ifdef EIGEN_USE_SYCL // warmup for sycl 116 C.
slice(first_quadrant, quarter_sizes).device(
device_) =
117 A.
slice(first_quadrant, quarter_sizes);
118 C.
slice(second_quadrant, quarter_sizes).device(
device_) =
119 B.
slice(second_quadrant, quarter_sizes);
120 C.
slice(third_quadrant, quarter_sizes).device(
device_) =
121 A.
slice(third_quadrant, quarter_sizes);
122 C.
slice(fourth_quadrant, quarter_sizes).device(
device_) =
123 B.
slice(fourth_quadrant, quarter_sizes);
128 C.
slice(first_quadrant, quarter_sizes).device(
device_) =
129 A.
slice(first_quadrant, quarter_sizes);
130 C.
slice(second_quadrant, quarter_sizes).device(
device_) =
131 B.
slice(second_quadrant, quarter_sizes);
132 C.
slice(third_quadrant, quarter_sizes).device(
device_) =
133 A.
slice(third_quadrant, quarter_sizes);
134 C.
slice(fourth_quadrant, quarter_sizes).device(
device_) =
135 B.
slice(fourth_quadrant, quarter_sizes);
150 #ifdef EIGEN_USE_SYCL // warmup for sycl 171 #ifdef EIGEN_USE_SYCL // warmup for sycl 198 #ifdef EIGEN_USE_SYCL // warmup for sycl 222 #if defined(EIGEN_HAS_INDEX_LIST) 223 Eigen::IndexPairList<Eigen::type2indexpair<0, 0>,
224 Eigen::type2indexpair<2, 1> > paddings;
230 #ifdef EIGEN_USE_SYCL // warmup for sycl 254 #ifndef EIGEN_HAS_INDEX_LIST 261 Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<2> >
strides;
264 #ifdef EIGEN_USE_SYCL // warmup for sycl 288 #ifndef EIGEN_HAS_INDEX_LIST 295 Eigen::IndexList<Eigen::type2index<1>,
int>
broadcast;
296 broadcast.set(1,
n_);
299 #ifdef EIGEN_USE_SYCL // warmup for sycl 320 #ifdef EIGEN_USE_SYCL // warmup for sycl 322 C.
device(
device_) = A * A.constant(static_cast<T>(3.14)) + B * B.constant(static_cast<T>(2.7));
327 C.
device(
device_) = A * A.constant(static_cast<T>(3.14)) + B * B.constant(static_cast<T>(2.7));
343 #ifdef EIGEN_USE_SYCL // warmup for sycl 365 #ifdef EIGEN_USE_SYCL // warmup for sycl 389 #ifndef EIGEN_HAS_INDEX_LIST 391 sum_along_dim[0] = 0;
395 Eigen::IndexList<Eigen::type2index<0>> sum_along_dim;
397 #ifdef EIGEN_USE_SYCL // warmup for sycl 423 #ifndef EIGEN_HAS_INDEX_LIST 425 sum_along_dim[0] = 1;
429 Eigen::IndexList<Eigen::type2index<1>> sum_along_dim;
431 #ifdef EIGEN_USE_SYCL // warmup for sycl 455 #ifdef EIGEN_USE_SYCL // warmup for sycl 473 contraction<static_cast<int>(
Eigen::ColMajor)>(num_iters,
false,
false);
477 contraction<static_cast<int>(
Eigen::RowMajor)>(num_iters,
false,
false);
481 contraction<static_cast<int>(
Eigen::RowMajor)>(num_iters,
true,
false);
485 contraction<static_cast<int>(
Eigen::RowMajor)>(num_iters,
false,
true);
498 kernel_sizes[0] = kernel_x;
499 kernel_sizes[1] = kernel_y;
502 result_sizes[0] =
m_ - kernel_x + 1;
503 result_sizes[1] =
n_ - kernel_y + 1;
508 #ifdef EIGEN_USE_SYCL // warmup for sycl 520 (
m_ - kernel_x + 1) * (
n_ - kernel_y + 1) * kernel_x * kernel_y * num_iters);
528 sizeA[0] = (trans_a ?
k_:
m_);
529 sizeA[1] = (trans_a ?
m_:
k_);
531 sizeB[0] = (trans_b ?
n_:
k_);
532 sizeB[1] = (trans_b ?
k_:
n_);
545 dims[0] =
DimPair(a_contract_dim, b_contract_dim);
546 #ifdef EIGEN_USE_SYCL // warmup for sycl 574 #if defined(EIGEN_USE_GPU) && defined(__CUDACC__) 578 #elif defined(EIGEN_USE_SYCL) 597 #endif // THIRD_PARTY_EIGEN3_TENSOR_BENCHMARKS_H_
void algebraicFunc(int num_iters)
void colChip(int num_iters)
void transcendentalFunc(int num_iters)
void convolution(int num_iters, int kernel_x, int kernel_y)
void contractionRowMajorBT(int num_iters)
std::vector< Array2i > sizes
void fullReduction(int num_iters)
void rowChip(int num_iters)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorChippingOp< DimId, const TensorMap< PlainObjectType, Options_, MakePointer_ > > chip(const Index offset) const
iterator iter(handle obj)
void memcpy(int num_iters)
void slicing(int num_iters)
void contractionRowMajor(int num_iters)
BenchmarkSuite(const Device &device, size_t m)
void contractionRowMajorABT(int num_iters)
void contraction(int num_iters, bool trans_a, bool trans_b)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorShufflingOp< const Shuffle, const TensorMap< PlainObjectType, Options_, MakePointer_ > > shuffle(const Shuffle &shfl) const
void typeCasting(int num_iters)
void finalizeBenchmark(int64_t num_items)
BenchmarkSuite(const Device &device, size_t m, size_t k, size_t n)
void striding(int num_iters)
A tensor expression mapping an existing array of data.
Tensor< float, 1 >::DimensionPair DimPair
void contraction(int num_iters)
void contractionRowMajorAT(int num_iters)
EIGEN_ALWAYS_INLINE DSizes< IndexType, NumDims > strides(const DSizes< IndexType, NumDims > &dimensions)
Eigen::Triplet< double > T
void padding(int num_iters)
TensorDevice< TensorMap< PlainObjectType, Options_, MakePointer_ >, DeviceType > device(const DeviceType &dev)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorSlicingOp< const StartIndices, const Sizes, const TensorMap< PlainObjectType, Options_, MakePointer_ > > slice(const StartIndices &startIndices, const Sizes &sizes) const
void colReduction(int num_iters)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorStridingOp< const Strides, const TensorMap< PlainObjectType, Options_, MakePointer_ > > stride(const Strides &strides) const
Matrix< Scalar, Dynamic, Dynamic > C
void random(int num_iters)
void StartBenchmarkTiming()
void StopBenchmarkTiming()
void rowReduction(int num_iters)
void shuffling(int num_iters)
void SetBenchmarkFlopsProcessed(int64_t)
BenchmarkSuite(const Device &device, size_t m, size_t k)
void coeffWiseOp(int num_iters)
void broadcasting(int num_iters)
broadcast_trivial broadcast(const std::array< buffer_info, N > &buffers, ssize_t &ndim, std::vector< ssize_t > &shape)