10 #include <Eigen/CXX11/Tensor>
16 template <
int NumDims>
19 for (
int i = 0;
i < NumDims; ++
i) {
20 dims[
i] = internal::random<Index>(
min,
max);
26 return internal::random<bool>()
27 ? internal::TensorBlockShapeType::kUniformAllDims
28 : internal::TensorBlockShapeType::kSkewedInnerDims;
31 template <
int NumDims>
33 return internal::random<size_t>(1, dims.
TotalSize());
36 template <
int Layout,
int NumDims>
43 for (
int i = NumDims - 1;
i > 0; --
i) {
44 const Index idx = output_index / output_strides[
i];
45 input_index += idx * input_strides[output_to_input_dim_map[
i]];
46 output_index -= idx * output_strides[
i];
49 output_index * input_strides[output_to_input_dim_map[0]];
51 for (
int i = 0;
i < NumDims - 1; ++
i) {
52 const Index idx = output_index / output_strides[
i];
53 input_index += idx * input_strides[output_to_input_dim_map[
i]];
54 output_index -= idx * output_strides[
i];
57 output_index * input_strides[output_to_input_dim_map[NumDims - 1]];
61 template <
typename T,
int NumDims,
int Layout>
63 using TensorBlockIO = internal::TensorBlockIO<T, Index, NumDims, Layout>;
64 using IODst =
typename TensorBlockIO::Dst;
65 using IOSrc =
typename TensorBlockIO::Src;
76 using TensorBlockMapper =
77 internal::TensorBlockMapper<NumDims, Layout, Index>;
78 TensorBlockMapper block_mapper(
85 auto input_strides = internal::strides<Layout>(dims);
86 auto output_strides = internal::strides<Layout>(dims);
88 const T* input_data = input.
data();
89 T* output_data = output.
data();
90 T* block_data =
block.data();
92 for (
int i = 0;
i < block_mapper.blockCount(); ++
i) {
93 auto desc = block_mapper.blockDescriptor(
i);
95 auto blk_dims =
desc.dimensions();
96 auto blk_strides = internal::strides<Layout>(blk_dims);
100 IODst dst(blk_dims, blk_strides, block_data, 0);
101 IOSrc src(input_strides, input_data,
desc.offset());
103 TensorBlockIO::Copy(dst, src);
108 IODst dst(blk_dims, output_strides, output_data,
desc.offset());
109 IOSrc src(blk_strides, block_data, 0);
111 TensorBlockIO::Copy(dst, src);
120 template <
typename T,
int NumDims,
int Layout>
128 std::vector<int> shuffle;
130 for (
int i = 0;
i < NumDims; ++
i) shuffle.push_back(
i);
131 std::shuffle(shuffle.begin(), shuffle.end(), std::mt19937(
g_seed));
136 for (
Index i = 0;
i < NumDims; ++
i) {
137 output_tensor_dims[shuffle[
i]] = dims[
i];
138 input_to_output_dim_map[
i] = shuffle[
i];
139 output_to_input_dim_map[shuffle[
i]] =
i;
147 using TensorBlockMapper =
148 internal::TensorBlockMapper<NumDims, Layout, Index>;
149 TensorBlockMapper block_mapper(output_tensor_dims,
158 auto input_strides = internal::strides<Layout>(dims);
159 auto output_strides = internal::strides<Layout>(output_tensor_dims);
161 const T* input_data = input.
data();
162 T* output_data = output.
data();
163 T* block_data =
block.data();
165 for (
Index i = 0;
i < block_mapper.blockCount(); ++
i) {
166 auto desc = block_mapper.blockDescriptor(
i);
168 const Index first_coeff_index = GetInputIndex<Layout, NumDims>(
169 desc.offset(), output_to_input_dim_map, input_strides,
174 using TensorBlockIO = internal::TensorBlockIO<T, Index, NumDims, Layout>;
175 using IODst =
typename TensorBlockIO::Dst;
176 using IOSrc =
typename TensorBlockIO::Src;
178 auto blk_dims =
desc.dimensions();
179 auto blk_strides = internal::strides<Layout>(blk_dims);
183 IODst dst(blk_dims, blk_strides, block_data, 0);
184 IOSrc src(input_strides, input_data, first_coeff_index);
188 for (
int j = 0;
j < NumDims; ++
j)
189 dim_map[
j] =
static_cast<int>(output_to_input_dim_map[
j]);
190 TensorBlockIO::Copy(dst, src, dim_map);
195 auto dst_dims = blk_dims;
196 for (
int out_dim = 0; out_dim < NumDims; ++out_dim) {
197 dst_dims[output_to_input_dim_map[out_dim]] = blk_dims[out_dim];
201 IODst dst(dst_dims, input_strides, output_data, first_coeff_index);
202 IOSrc src(blk_strides, block_data, 0);
206 for (
int j = 0;
j < NumDims; ++
j)
207 dim_map[
j] =
static_cast<int>(input_to_output_dim_map[
j]);
208 TensorBlockIO::Copy(dst, src, dim_map);
220 template <
int Layout>
226 block_to_tensor_dim[0] = 2;
227 block_to_tensor_dim[1] = 1;
228 block_to_tensor_dim[2] = 0;
230 auto tensor_strides = internal::strides<Layout>(tensor_dims);
231 auto block_strides = internal::strides<Layout>(block_dims);
237 float* tensor_data = tensor.
data();
238 float* block_data =
block.data();
240 using TensorBlockIO = internal::TensorBlockIO<float, Index, 3, Layout>;
241 using IODst =
typename TensorBlockIO::Dst;
242 using IOSrc =
typename TensorBlockIO::Src;
245 IODst dst(block_dims, block_strides, block_data, 0);
246 IOSrc src(tensor_strides, tensor_data, 0);
248 TensorBlockIO::Copy(dst, src, block_to_tensor_dim);
253 for (
Index d0 = 0; d0 < tensor_dims[0]; ++d0) {
254 for (
Index d1 = 0; d1 < tensor_dims[1]; ++d1) {
256 float block_value = block_tensor(
d2, d1, d0);
257 float tensor_value = tensor_tensor(d0, d1,
d2);
267 template <
int Layout>
273 block_to_tensor_dim[0] = 0;
274 block_to_tensor_dim[1] = 1;
275 block_to_tensor_dim[2] = 3;
276 block_to_tensor_dim[3] = 2;
278 auto tensor_strides = internal::strides<Layout>(tensor_dims);
279 auto block_strides = internal::strides<Layout>(block_dims);
285 float* tensor_data = tensor.
data();
286 float* block_data =
block.data();
288 using TensorBlockIO = internal::TensorBlockIO<float, Index, 4, Layout>;
289 using IODst =
typename TensorBlockIO::Dst;
290 using IOSrc =
typename TensorBlockIO::Src;
293 IODst dst(block_dims, block_strides, block_data, 0);
294 IOSrc src(tensor_strides, tensor_data, 0);
296 TensorBlockIO::Copy(dst, src, block_to_tensor_dim);
301 for (
Index d0 = 0; d0 < tensor_dims[0]; ++d0) {
302 for (
Index d1 = 0; d1 < tensor_dims[1]; ++d1) {
304 for (
Index d3 = 0; d3 < tensor_dims[3]; ++d3) {
305 float block_value = block_tensor(d0, d1, d3,
d2);
306 float tensor_value = tensor_tensor(d0, d1,
d2, d3);
314 template <
int Layout>
319 input_tensor_dims[0] = 1;
320 input_tensor_dims[2] = 1;
321 input_tensor_dims[4] = 1;
328 auto input_tensor_strides = internal::strides<Layout>(input_tensor_dims);
329 auto output_tensor_strides = internal::strides<Layout>(output_tensor_dims);
331 auto input_tensor_strides_with_zeros = input_tensor_strides;
332 input_tensor_strides_with_zeros[0] = 0;
333 input_tensor_strides_with_zeros[2] = 0;
334 input_tensor_strides_with_zeros[4] = 0;
339 using TensorBlockIO = internal::TensorBlockIO<float, Index, 5, Layout>;
340 using IODst =
typename TensorBlockIO::Dst;
341 using IOSrc =
typename TensorBlockIO::Src;
344 IODst dst(output_tensor_dims, output_tensor_strides, output.
data(), 0);
345 IOSrc src(input_tensor_strides_with_zeros, input.
data(), 0);
346 TensorBlockIO::Copy(dst, src);
348 for (
int i = 0;
i < output_tensor_dims[0]; ++
i) {
349 for (
int j = 0;
j < output_tensor_dims[1]; ++
j) {
350 for (
int k = 0; k < output_tensor_dims[2]; ++k) {
351 for (
int l = 0;
l < output_tensor_dims[3]; ++
l) {
352 for (
int m = 0;
m < output_tensor_dims[4]; ++
m) {
353 float input_value = input(0,
j, 0,
l, 0);
354 float output_value = output(
i,
j, k,
l,
m);
363 template <
int Layout>
365 using TensorBlockIO = internal::TensorBlockIO<float, Index, 5, Layout>;
366 using IODst =
typename TensorBlockIO::Dst;
367 using IOSrc =
typename TensorBlockIO::Src;
372 auto strides = internal::strides<Layout>(block_sizes);
382 TensorBlockIO::Copy(dst, src);
392 auto strides = internal::strides<Layout>(block_sizes);
402 TensorBlockIO::Copy(dst, src);
410 #define CALL_SUBTESTS(NAME) \
411 CALL_SUBTEST((NAME<float, 1, RowMajor>())); \
412 CALL_SUBTEST((NAME<float, 2, RowMajor>())); \
413 CALL_SUBTEST((NAME<float, 4, RowMajor>())); \
414 CALL_SUBTEST((NAME<float, 5, RowMajor>())); \
415 CALL_SUBTEST((NAME<float, 1, ColMajor>())); \
416 CALL_SUBTEST((NAME<float, 2, ColMajor>())); \
417 CALL_SUBTEST((NAME<float, 4, ColMajor>())); \
418 CALL_SUBTEST((NAME<float, 5, ColMajor>())); \
419 CALL_SUBTEST((NAME<bool, 1, RowMajor>())); \
420 CALL_SUBTEST((NAME<bool, 2, RowMajor>())); \
421 CALL_SUBTEST((NAME<bool, 4, RowMajor>())); \
422 CALL_SUBTEST((NAME<bool, 5, RowMajor>())); \
423 CALL_SUBTEST((NAME<bool, 1, ColMajor>())); \
424 CALL_SUBTEST((NAME<bool, 2, ColMajor>())); \
425 CALL_SUBTEST((NAME<bool, 4, ColMajor>())); \
426 CALL_SUBTEST((NAME<bool, 5, ColMajor>()))
433 CALL_SUBTEST(test_block_io_copy_using_reordered_dimensions_do_not_squeeze<RowMajor>());
434 CALL_SUBTEST(test_block_io_copy_using_reordered_dimensions_do_not_squeeze<ColMajor>());
436 CALL_SUBTEST(test_block_io_copy_using_reordered_dimensions_squeeze<RowMajor>());
437 CALL_SUBTEST(test_block_io_copy_using_reordered_dimensions_squeeze<ColMajor>());