10 #define EIGEN_USE_THREADS 15 #include <Eigen/CXX11/Tensor> 29 Eigen::ThreadPool tp(internal::random<int>(3, 11));
30 Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(3, 11));
31 out.
device(thread_pool_device) = in1 + in2 * 3.14f;
33 for (
int i = 0; i < 2; ++i) {
34 for (
int j = 0; j < 3; ++j) {
35 for (
int k = 0; k < 7; ++k) {
36 VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) + in2(i,j,k) * 3.14
f);
52 Eigen::ThreadPool tp(internal::random<int>(3, 11));
53 Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(3, 11));
54 out.
device(thread_pool_device) = in1;
55 out.
device(thread_pool_device) += in2 * 3.14f;
57 for (
int i = 0; i < 2; ++i) {
58 for (
int j = 0; j < 3; ++j) {
59 for (
int k = 0; k < 7; ++k) {
60 VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) + in2(i,j,k) * 3.14
f);
66 template<
int DataLayout>
80 typedef Map<Matrix<float, Dynamic, Dynamic, DataLayout>> MapXf;
81 MapXf m_left(t_left.
data(), 1500, 1147);
82 MapXf m_right(t_right.
data(), 1147, 1400);
83 Matrix<float, Dynamic, Dynamic, DataLayout> m_result(1500, 1400);
85 Eigen::ThreadPool tp(4);
86 Eigen::ThreadPoolDevice thread_pool_device(&tp, 4);
89 t_result.
device(thread_pool_device) = t_left.contract(t_right, dims);
90 m_result = m_left * m_right;
92 for (ptrdiff_t i = 0; i < t_result.
size(); i++) {
93 VERIFY(&t_result.
data()[i] != &m_result.data()[i]);
94 if (fabsf(t_result(i) - m_result(i)) < 1e-4
f) {
97 if (Eigen::internal::isApprox(t_result(i), m_result(i), 1e-4
f)) {
100 std::cout <<
"mismatch detected at index " << i <<
": " << t_result(i)
101 <<
" vs " << m_result(i) << std::endl;
106 template<
int DataLayout>
113 t_left = (t_left.constant(-0.5
f) + t_left.random()) * 2.0
f;
114 t_right = (t_right.constant(-0.6
f) + t_right.random()) * 2.0
f;
115 t_result = t_result.constant(NAN);
121 typedef Map<Matrix<float, Dynamic, Dynamic, DataLayout>> MapXf;
122 MapXf m_left(t_left.
data(), 32, 500);
123 MapXf m_right(t_right.
data(), 32, 28*28);
124 Matrix<float, Dynamic, Dynamic, DataLayout> m_result(500, 28*28);
126 Eigen::ThreadPool tp(12);
127 Eigen::ThreadPoolDevice thread_pool_device(&tp, 12);
130 t_result.
device(thread_pool_device) = t_left.contract(t_right, dims);
131 m_result = m_left.transpose() * m_right;
133 for (ptrdiff_t i = 0; i < t_result.
size(); i++) {
135 if (fabsf(t_result.
data()[i] - m_result.data()[i]) >= 1e-4
f) {
136 std::cout <<
"mismatch detected at index " << i <<
" : " << t_result.
data()[i] <<
" vs " << m_result.data()[i] << std::endl;
142 t_left = (t_left.constant(-0.5
f) + t_left.random()) * 2.0
f;
143 t_result.
resize (1, 28*28);
144 t_result = t_result.constant(NAN);
145 t_result.
device(thread_pool_device) = t_left.contract(t_right, dims);
146 new(&m_left) MapXf(t_left.
data(), 32, 1);
147 m_result = m_left.transpose() * m_right;
148 for (ptrdiff_t i = 0; i < t_result.
size(); i++) {
150 if (fabsf(t_result.
data()[i] - m_result.data()[i]) >= 1e-4
f) {
151 std::cout <<
"mismatch detected: " << t_result.
data()[i] <<
" vs " << m_result.data()[i] << std::endl;
158 t_left = (t_left.constant(-0.5
f) + t_left.random()) * 2.0
f;
159 t_right = (t_right.constant(-0.6
f) + t_right.random()) * 2.0
f;
161 t_result = t_result.constant(NAN);
162 t_result.
device(thread_pool_device) = t_left.contract(t_right, dims);
163 new(&m_left) MapXf(t_left.
data(), 32, 500);
164 new(&m_right) MapXf(t_right.
data(), 32, 4);
165 m_result = m_left.transpose() * m_right;
166 for (ptrdiff_t i = 0; i < t_result.
size(); i++) {
168 if (fabsf(t_result.
data()[i] - m_result.data()[i]) >= 1e-4
f) {
169 std::cout <<
"mismatch detected: " << t_result.
data()[i] <<
" vs " << m_result.data()[i] << std::endl;
176 t_left = (t_left.constant(-0.5
f) + t_left.random()) * 2.0
f;
177 t_right = (t_right.constant(-0.6
f) + t_right.random()) * 2.0
f;
179 t_result = t_result.constant(NAN);
180 t_result.
device(thread_pool_device) = t_left.contract(t_right, dims);
181 new(&m_left) MapXf(t_left.
data(), 32, 1);
182 new(&m_right) MapXf(t_right.
data(), 32, 4);
183 m_result = m_left.transpose() * m_right;
184 for (ptrdiff_t i = 0; i < t_result.
size(); i++) {
186 if (fabsf(t_result.
data()[i] - m_result.data()[i]) >= 1e-4
f) {
187 std::cout <<
"mismatch detected: " << t_result.
data()[i] <<
" vs " << m_result.data()[i] << std::endl;
193 template<
int DataLayout>
195 int contract_size = internal::random<int>(1, 5000);
199 internal::random<int>(1, 100));
202 internal::random<int>(1, 37),
204 internal::random<int>(1, 51));
210 left += left.constant(1.5
f);
211 right += right.constant(1.5
f);
216 Eigen::ThreadPool tp(internal::random<int>(2, 11));
217 Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(2, 11));
220 st_result = left.contract(right, dims);
223 tp_result.
device(thread_pool_device) = left.contract(right, dims);
226 for (ptrdiff_t i = 0; i < st_result.
size(); i++) {
230 VERIFY_IS_APPROX(st_result.
data()[i], tp_result.data()[i]);
236 template<
int DataLayout>
238 int contract_size1 = internal::random<int>(1, 500);
239 int contract_size2 = internal::random<int>(1, 500);
249 left += left.constant(1.5
f);
250 right += right.constant(1.5
f);
255 Eigen::ThreadPool tp(internal::random<int>(2, 11));
256 Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(2, 11));
259 st_result = left.contract(right, dims);
262 tp_result.
device(thread_pool_device) = left.contract(right, dims);
268 VERIFY_IS_APPROX(st_result(), tp_result());
272 template<
int DataLayout>
274 const int num_threads = internal::random<int>(3, 11);
275 ThreadPool thread_pool(num_threads);
276 Eigen::ThreadPoolDevice thread_pool_device(&thread_pool, num_threads);
278 const int num_rows = internal::random<int>(13, 732);
279 const int num_cols = internal::random<int>(13, 732);
284 full_redux = t1.sum();
287 full_redux_tp.
device(thread_pool_device) = t1.sum();
291 VERIFY_IS_APPROX(full_redux(), full_redux_tp());
297 for (
int i = 0; i < 5; ++i) {
298 const int num_threads = internal::random<int>(3, 11);
299 Eigen::ThreadPool tp(num_threads);
300 Eigen::ThreadPoolDevice thread_pool_device(&tp, num_threads);
302 const int size = internal::random<int>(13, 7632);
305 std::vector<float> result(size);
306 thread_pool_device.memcpy(&result[0], t1.data(), size*
sizeof(float));
307 for (
int j = 0; j <
size; j++) {
308 VERIFY_IS_EQUAL(t1(j), result[j]);
316 Eigen::ThreadPool tp(2);
317 Eigen::ThreadPoolDevice device(&tp, 2);
322 template<
int DataLayout>
328 const int num_threads = internal::random<int>(2, 11);
329 ThreadPool threads(num_threads);
330 Eigen::ThreadPoolDevice device(&threads, num_threads);
333 array<ptrdiff_t, 4> shuffles = {{2,1,3,0}};
336 for (
int i = 0; i < 17; ++i) {
337 for (
int j = 0; j < 5; ++j) {
338 for (
int k = 0; k < 7; ++k) {
339 for (
int l = 0; l < 11; ++l) {
340 VERIFY_IS_EQUAL(tensor(i,j,k,l), shuffle(k,j,l,i));
353 CALL_SUBTEST_2(test_multithread_contraction<ColMajor>());
354 CALL_SUBTEST_2(test_multithread_contraction<RowMajor>());
356 CALL_SUBTEST_3(test_multithread_contraction_agrees_with_singlethread<ColMajor>());
357 CALL_SUBTEST_3(test_multithread_contraction_agrees_with_singlethread<RowMajor>());
360 CALL_SUBTEST_4(test_contraction_corner_cases<ColMajor>());
361 CALL_SUBTEST_4(test_contraction_corner_cases<RowMajor>());
363 CALL_SUBTEST_4(test_full_contraction<ColMajor>());
364 CALL_SUBTEST_4(test_full_contraction<RowMajor>());
366 CALL_SUBTEST_5(test_multithreaded_reductions<ColMajor>());
367 CALL_SUBTEST_5(test_multithreaded_reductions<RowMajor>());
371 CALL_SUBTEST_6(test_multithread_shuffle<ColMajor>());
372 CALL_SUBTEST_6(test_multithread_shuffle<RowMajor>());
void test_contraction_corner_cases()
void test_multithread_contraction()
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorShufflingOp< const Shuffle, const Tensor< Scalar_, NumIndices_, Options_, IndexType_ > > shuffle(const Shuffle &shuffle) const
static int f(const TensorMap< Tensor< int, 3 > > &tensor)
void test_multithread_random()
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor< Scalar_, NumIndices_, Options_, IndexType_ > & setRandom()
static constexpr size_t size(Tuple< Args... > &)
Provides access to the number of elements in a tuple as a compile-time constant expression.
TensorDevice< Tensor< Scalar_, NumIndices_, Options_, IndexType_ >, DeviceType > device(const DeviceType &device)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const AbsReturnType abs() const
Tensor< float, 1 >::DimensionPair DimPair
void test_multithread_contraction_agrees_with_singlethread()
EIGEN_DEVICE_FUNC bool dimensions_match(Dims1 &dims1, Dims2 &dims2)
void test_multithread_compound_assignment()
EIGEN_DEVICE_FUNC void resize(const array< Index, NumIndices > &dimensions)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar * data()
void test_multithread_shuffle()
void test_multithreaded_reductions()
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool() isnan(const half &a)
void test_full_contraction()
void test_multithread_elementwise()
void test_cxx11_tensor_thread_pool()
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const