cxx11_tensor_thread_pool.cpp
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #define EIGEN_USE_THREADS
11 
12 
13 #include "main.h"
14 #include <iostream>
15 #include <Eigen/CXX11/Tensor>
16 
17 using Eigen::Tensor;
18 
19 
21 {
22  Tensor<float, 3> in1(2,3,7);
23  Tensor<float, 3> in2(2,3,7);
24  Tensor<float, 3> out(2,3,7);
25 
26  in1.setRandom();
27  in2.setRandom();
28 
29  Eigen::ThreadPool tp(internal::random<int>(3, 11));
30  Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(3, 11));
31  out.device(thread_pool_device) = in1 + in2 * 3.14f;
32 
33  for (int i = 0; i < 2; ++i) {
34  for (int j = 0; j < 3; ++j) {
35  for (int k = 0; k < 7; ++k) {
36  VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) + in2(i,j,k) * 3.14f);
37  }
38  }
39  }
40 }
41 
42 
44 {
45  Tensor<float, 3> in1(2,3,7);
46  Tensor<float, 3> in2(2,3,7);
47  Tensor<float, 3> out(2,3,7);
48 
49  in1.setRandom();
50  in2.setRandom();
51 
52  Eigen::ThreadPool tp(internal::random<int>(3, 11));
53  Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(3, 11));
54  out.device(thread_pool_device) = in1;
55  out.device(thread_pool_device) += in2 * 3.14f;
56 
57  for (int i = 0; i < 2; ++i) {
58  for (int j = 0; j < 3; ++j) {
59  for (int k = 0; k < 7; ++k) {
60  VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) + in2(i,j,k) * 3.14f);
61  }
62  }
63  }
64 }
65 
66 template<int DataLayout>
68 {
69  Tensor<float, 4, DataLayout> t_left(30, 50, 37, 31);
70  Tensor<float, 5, DataLayout> t_right(37, 31, 70, 2, 10);
71  Tensor<float, 5, DataLayout> t_result(30, 50, 70, 2, 10);
72 
73  t_left.setRandom();
74  t_right.setRandom();
75 
76  // this contraction should be equivalent to a single matrix multiplication
78  Eigen::array<DimPair, 2> dims({{DimPair(2, 0), DimPair(3, 1)}});
79 
80  typedef Map<Matrix<float, Dynamic, Dynamic, DataLayout>> MapXf;
81  MapXf m_left(t_left.data(), 1500, 1147);
82  MapXf m_right(t_right.data(), 1147, 1400);
83  Matrix<float, Dynamic, Dynamic, DataLayout> m_result(1500, 1400);
84 
85  Eigen::ThreadPool tp(4);
86  Eigen::ThreadPoolDevice thread_pool_device(&tp, 4);
87 
88  // compute results by separate methods
89  t_result.device(thread_pool_device) = t_left.contract(t_right, dims);
90  m_result = m_left * m_right;
91 
92  for (ptrdiff_t i = 0; i < t_result.size(); i++) {
93  VERIFY(&t_result.data()[i] != &m_result.data()[i]);
94  if (fabsf(t_result(i) - m_result(i)) < 1e-4f) {
95  continue;
96  }
97  if (Eigen::internal::isApprox(t_result(i), m_result(i), 1e-4f)) {
98  continue;
99  }
100  std::cout << "mismatch detected at index " << i << ": " << t_result(i)
101  << " vs " << m_result(i) << std::endl;
102  assert(false);
103  }
104 }
105 
106 template<int DataLayout>
108 {
109  Tensor<float, 2, DataLayout> t_left(32, 500);
110  Tensor<float, 2, DataLayout> t_right(32, 28*28);
111  Tensor<float, 2, DataLayout> t_result(500, 28*28);
112 
113  t_left = (t_left.constant(-0.5f) + t_left.random()) * 2.0f;
114  t_right = (t_right.constant(-0.6f) + t_right.random()) * 2.0f;
115  t_result = t_result.constant(NAN);
116 
117  // this contraction should be equivalent to a single matrix multiplication
119  Eigen::array<DimPair, 1> dims{{DimPair(0, 0)}};
120 
121  typedef Map<Matrix<float, Dynamic, Dynamic, DataLayout>> MapXf;
122  MapXf m_left(t_left.data(), 32, 500);
123  MapXf m_right(t_right.data(), 32, 28*28);
124  Matrix<float, Dynamic, Dynamic, DataLayout> m_result(500, 28*28);
125 
126  Eigen::ThreadPool tp(12);
127  Eigen::ThreadPoolDevice thread_pool_device(&tp, 12);
128 
129  // compute results by separate methods
130  t_result.device(thread_pool_device) = t_left.contract(t_right, dims);
131  m_result = m_left.transpose() * m_right;
132 
133  for (ptrdiff_t i = 0; i < t_result.size(); i++) {
134  assert(!(numext::isnan)(t_result.data()[i]));
135  if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) {
136  std::cout << "mismatch detected at index " << i << " : " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl;
137  assert(false);
138  }
139  }
140 
141  t_left.resize(32, 1);
142  t_left = (t_left.constant(-0.5f) + t_left.random()) * 2.0f;
143  t_result.resize (1, 28*28);
144  t_result = t_result.constant(NAN);
145  t_result.device(thread_pool_device) = t_left.contract(t_right, dims);
146  new(&m_left) MapXf(t_left.data(), 32, 1);
147  m_result = m_left.transpose() * m_right;
148  for (ptrdiff_t i = 0; i < t_result.size(); i++) {
149  assert(!(numext::isnan)(t_result.data()[i]));
150  if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) {
151  std::cout << "mismatch detected: " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl;
152  assert(false);
153  }
154  }
155 
156  t_left.resize(32, 500);
157  t_right.resize(32, 4);
158  t_left = (t_left.constant(-0.5f) + t_left.random()) * 2.0f;
159  t_right = (t_right.constant(-0.6f) + t_right.random()) * 2.0f;
160  t_result.resize (500, 4);
161  t_result = t_result.constant(NAN);
162  t_result.device(thread_pool_device) = t_left.contract(t_right, dims);
163  new(&m_left) MapXf(t_left.data(), 32, 500);
164  new(&m_right) MapXf(t_right.data(), 32, 4);
165  m_result = m_left.transpose() * m_right;
166  for (ptrdiff_t i = 0; i < t_result.size(); i++) {
167  assert(!(numext::isnan)(t_result.data()[i]));
168  if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) {
169  std::cout << "mismatch detected: " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl;
170  assert(false);
171  }
172  }
173 
174  t_left.resize(32, 1);
175  t_right.resize(32, 4);
176  t_left = (t_left.constant(-0.5f) + t_left.random()) * 2.0f;
177  t_right = (t_right.constant(-0.6f) + t_right.random()) * 2.0f;
178  t_result.resize (1, 4);
179  t_result = t_result.constant(NAN);
180  t_result.device(thread_pool_device) = t_left.contract(t_right, dims);
181  new(&m_left) MapXf(t_left.data(), 32, 1);
182  new(&m_right) MapXf(t_right.data(), 32, 4);
183  m_result = m_left.transpose() * m_right;
184  for (ptrdiff_t i = 0; i < t_result.size(); i++) {
185  assert(!(numext::isnan)(t_result.data()[i]));
186  if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) {
187  std::cout << "mismatch detected: " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl;
188  assert(false);
189  }
190  }
191 }
192 
193 template<int DataLayout>
195  int contract_size = internal::random<int>(1, 5000);
196 
197  Tensor<float, 3, DataLayout> left(internal::random<int>(1, 80),
198  contract_size,
199  internal::random<int>(1, 100));
200 
201  Tensor<float, 4, DataLayout> right(internal::random<int>(1, 25),
202  internal::random<int>(1, 37),
203  contract_size,
204  internal::random<int>(1, 51));
205 
206  left.setRandom();
207  right.setRandom();
208 
209  // add constants to shift values away from 0 for more precision
210  left += left.constant(1.5f);
211  right += right.constant(1.5f);
212 
214  Eigen::array<DimPair, 1> dims({{DimPair(1, 2)}});
215 
216  Eigen::ThreadPool tp(internal::random<int>(2, 11));
217  Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(2, 11));
218 
220  st_result = left.contract(right, dims);
221 
222  Tensor<float, 5, DataLayout> tp_result(st_result.dimensions());
223  tp_result.device(thread_pool_device) = left.contract(right, dims);
224 
225  VERIFY(dimensions_match(st_result.dimensions(), tp_result.dimensions()));
226  for (ptrdiff_t i = 0; i < st_result.size(); i++) {
227  // if both of the values are very small, then do nothing (because the test will fail
228  // due to numerical precision issues when values are small)
229  if (numext::abs(st_result.data()[i] - tp_result.data()[i]) >= 1e-4f) {
230  VERIFY_IS_APPROX(st_result.data()[i], tp_result.data()[i]);
231  }
232  }
233 }
234 
235 
236 template<int DataLayout>
238  int contract_size1 = internal::random<int>(1, 500);
239  int contract_size2 = internal::random<int>(1, 500);
240 
241  Tensor<float, 2, DataLayout> left(contract_size1,
242  contract_size2);
243  Tensor<float, 2, DataLayout> right(contract_size1,
244  contract_size2);
245  left.setRandom();
246  right.setRandom();
247 
248  // add constants to shift values away from 0 for more precision
249  left += left.constant(1.5f);
250  right += right.constant(1.5f);
251 
253  Eigen::array<DimPair, 2> dims({{DimPair(0, 0), DimPair(1, 1)}});
254 
255  Eigen::ThreadPool tp(internal::random<int>(2, 11));
256  Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(2, 11));
257 
259  st_result = left.contract(right, dims);
260 
262  tp_result.device(thread_pool_device) = left.contract(right, dims);
263 
264  VERIFY(dimensions_match(st_result.dimensions(), tp_result.dimensions()));
265  // if both of the values are very small, then do nothing (because the test will fail
266  // due to numerical precision issues when values are small)
267  if (numext::abs(st_result() - tp_result()) >= 1e-4f) {
268  VERIFY_IS_APPROX(st_result(), tp_result());
269  }
270 }
271 
272 template<int DataLayout>
274  const int num_threads = internal::random<int>(3, 11);
275  ThreadPool thread_pool(num_threads);
276  Eigen::ThreadPoolDevice thread_pool_device(&thread_pool, num_threads);
277 
278  const int num_rows = internal::random<int>(13, 732);
279  const int num_cols = internal::random<int>(13, 732);
280  Tensor<float, 2, DataLayout> t1(num_rows, num_cols);
281  t1.setRandom();
282 
283  Tensor<float, 0, DataLayout> full_redux;
284  full_redux = t1.sum();
285 
286  Tensor<float, 0, DataLayout> full_redux_tp;
287  full_redux_tp.device(thread_pool_device) = t1.sum();
288 
289  // Check that the single threaded and the multi threaded reductions return
290  // the same result.
291  VERIFY_IS_APPROX(full_redux(), full_redux_tp());
292 }
293 
294 
295 void test_memcpy() {
296 
297  for (int i = 0; i < 5; ++i) {
298  const int num_threads = internal::random<int>(3, 11);
299  Eigen::ThreadPool tp(num_threads);
300  Eigen::ThreadPoolDevice thread_pool_device(&tp, num_threads);
301 
302  const int size = internal::random<int>(13, 7632);
303  Tensor<float, 1> t1(size);
304  t1.setRandom();
305  std::vector<float> result(size);
306  thread_pool_device.memcpy(&result[0], t1.data(), size*sizeof(float));
307  for (int j = 0; j < size; j++) {
308  VERIFY_IS_EQUAL(t1(j), result[j]);
309  }
310  }
311 }
312 
313 
315 {
316  Eigen::ThreadPool tp(2);
317  Eigen::ThreadPoolDevice device(&tp, 2);
318  Tensor<float, 1> t(1 << 20);
320 }
321 
322 template<int DataLayout>
324 {
325  Tensor<float, 4, DataLayout> tensor(17,5,7,11);
326  tensor.setRandom();
327 
328  const int num_threads = internal::random<int>(2, 11);
329  ThreadPool threads(num_threads);
330  Eigen::ThreadPoolDevice device(&threads, num_threads);
331 
332  Tensor<float, 4, DataLayout> shuffle(7,5,11,17);
333  array<ptrdiff_t, 4> shuffles = {{2,1,3,0}};
334  shuffle.device(device) = tensor.shuffle(shuffles);
335 
336  for (int i = 0; i < 17; ++i) {
337  for (int j = 0; j < 5; ++j) {
338  for (int k = 0; k < 7; ++k) {
339  for (int l = 0; l < 11; ++l) {
340  VERIFY_IS_EQUAL(tensor(i,j,k,l), shuffle(k,j,l,i));
341  }
342  }
343  }
344  }
345 }
346 
347 
349 {
350  CALL_SUBTEST_1(test_multithread_elementwise());
351  CALL_SUBTEST_1(test_multithread_compound_assignment());
352 
353  CALL_SUBTEST_2(test_multithread_contraction<ColMajor>());
354  CALL_SUBTEST_2(test_multithread_contraction<RowMajor>());
355 
356  CALL_SUBTEST_3(test_multithread_contraction_agrees_with_singlethread<ColMajor>());
357  CALL_SUBTEST_3(test_multithread_contraction_agrees_with_singlethread<RowMajor>());
358 
359  // Exercise various cases that have been problematic in the past.
360  CALL_SUBTEST_4(test_contraction_corner_cases<ColMajor>());
361  CALL_SUBTEST_4(test_contraction_corner_cases<RowMajor>());
362 
363  CALL_SUBTEST_4(test_full_contraction<ColMajor>());
364  CALL_SUBTEST_4(test_full_contraction<RowMajor>());
365 
366  CALL_SUBTEST_5(test_multithreaded_reductions<ColMajor>());
367  CALL_SUBTEST_5(test_multithreaded_reductions<RowMajor>());
368 
369  CALL_SUBTEST_6(test_memcpy());
370  CALL_SUBTEST_6(test_multithread_random());
371  CALL_SUBTEST_6(test_multithread_shuffle<ColMajor>());
372  CALL_SUBTEST_6(test_multithread_shuffle<RowMajor>());
373 }
void test_contraction_corner_cases()
void test_multithread_contraction()
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorShufflingOp< const Shuffle, const Tensor< Scalar_, NumIndices_, Options_, IndexType_ > > shuffle(const Shuffle &shuffle) const
Definition: TensorBase.h:975
static int f(const TensorMap< Tensor< int, 3 > > &tensor)
void test_multithread_random()
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor< Scalar_, NumIndices_, Options_, IndexType_ > & setRandom()
Definition: TensorBase.h:848
static constexpr size_t size(Tuple< Args... > &)
Provides access to the number of elements in a tuple as a compile-time constant expression.
TensorDevice< Tensor< Scalar_, NumIndices_, Options_, IndexType_ >, DeviceType > device(const DeviceType &device)
Definition: TensorBase.h:997
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const AbsReturnType abs() const
Tensor< float, 1 >::DimensionPair DimPair
void test_multithread_contraction_agrees_with_singlethread()
EIGEN_DEVICE_FUNC bool dimensions_match(Dims1 &dims1, Dims2 &dims2)
void test_multithread_compound_assignment()
EIGEN_DEVICE_FUNC void resize(const array< Index, NumIndices > &dimensions)
Definition: Tensor.h:432
void test_memcpy()
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar * data()
Definition: Tensor.h:104
void test_multithread_shuffle()
void test_multithreaded_reductions()
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Definition: Tensor.h:102
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool() isnan(const half &a)
Definition: Half.h:372
void test_full_contraction()
void test_multithread_elementwise()
void test_cxx11_tensor_thread_pool()
The tensor class.
Definition: Tensor.h:63
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const
Definition: Tensor.h:103


hebiros
Author(s): Xavier Artache , Matthew Tesch
autogenerated on Thu Sep 3 2020 04:08:09