8 #ifndef EIGEN_CXX11_TENSOR_TENSOR_BLOCK_H 9 #define EIGEN_CXX11_TENSOR_TENSOR_BLOCK_H 16 template <
typename Scalar,
typename IndexType,
int NumDims,
int Layout>
25 template <
int Layout,
typename IndexType,
int NumDims>
29 if (NumDims == 0)
return strides;
33 if (static_cast<int>(Layout) == static_cast<int>(
ColMajor)) {
35 for (
int i = 1;
i < NumDims; ++
i) {
36 strides[
i] = strides[
i - 1] * dimensions[
i - 1];
39 strides[NumDims - 1] = 1;
40 for (
int i = NumDims - 2;
i >= 0; --
i) {
41 strides[
i] = strides[
i + 1] * dimensions[
i + 1];
48 template <
int Layout,
typename IndexType,
size_t NumDims>
54 template <
int Layout, std::ptrdiff_t...
Indices>
57 return strides<Layout>(
DSizes<std::ptrdiff_t,
sizeof...(Indices)>(sizes));
87 : shape_type(shape_type_), size(size_), cost_per_coeff(cost_)
91 template <
typename Scalar>
96 return {shape_type,
size, cost};
99 template <
typename Scalar>
116 return withShapeAndSize<Scalar>(shape_type, size_in_bytes,
122 template <
typename Scalar>
124 size_t size_in_bytes) {
129 template <
typename Scalar>
131 size_t size_in_bytes) {
147 cost_per_coeff += cost;
179 return lhs_cost + rhs_cost;
187 template <
int NumDims,
typename IndexType = Eigen::Index>
235 template <
typename Scalar>
238 return static_cast<Scalar*
>(m_data);
241 const Dimensions&
strides()
const {
return m_strides; }
249 template <
typename Scalar>
252 : m_data(static_cast<void*>(data)),
253 m_data_type_size(sizeof(
Scalar)),
257 template <
int Layout,
typename Scalar>
263 template <
int Layout>
266 const Dimensions& desc_dims = desc.
dimensions();
267 const Dimensions& desc_strides = internal::strides<Layout>(desc_dims);
268 for (
int i = 0;
i < NumDims; ++
i) {
269 if (desc_dims[
i] == 1)
continue;
270 if (desc_strides[
i] != strides[
i])
return kStrided;
288 const DestinationBuffer& destination)
290 m_dimensions(dimensions),
291 m_destination(destination) {}
295 m_dimensions(dimensions),
296 m_destination(DestinationBuffer()) {}
298 IndexType
offset()
const {
return m_offset; }
299 const Dimensions&
dimensions()
const {
return m_dimensions; }
300 IndexType
dimension(
int index)
const {
return m_dimensions[index]; }
301 IndexType
size()
const {
return array_prod<IndexType>(m_dimensions); }
303 const DestinationBuffer&
destination()
const {
return m_destination; }
305 template <
int Layout,
typename Scalar>
309 DestinationBuffer::template make<Layout>(*
this, dst_base, dst_strides);
312 template <
int Layout,
typename Scalar,
typename DstStr
idesIndexType>
317 AddDestinationBuffer<Layout>(dst_base, Dimensions(dst_strides));
321 m_destination.m_data =
NULL;
322 m_destination.m_kind = DestinationBuffer::kEmpty;
327 return m_destination.kind() != DestinationBuffer::kEmpty;
346 template <
int NumDims,
int Layout,
typename IndexType = Eigen::Index>
356 : m_tensor_dimensions(dimensions), m_requirements(requirements) {
358 InitializeBlockDimensions();
362 return m_total_block_count;
366 return m_block_dimensions.TotalSize();
371 return m_block_dimensions;
376 static const bool isColMajor = Layout ==
static_cast<int>(
ColMajor);
381 if (NumDims == 0)
return BlockDescriptor(offset, dimensions);
384 for (
int i = NumDims - 1;
i >= 0; --
i) {
385 const int dim = isColMajor ?
i : NumDims -
i - 1;
387 const IndexType idx = block_index / m_block_strides[dim];
388 block_index -= idx * m_block_strides[dim];
390 const IndexType coord = idx * m_block_dimensions[dim];
391 dimensions[dim] =
numext::mini(m_tensor_dimensions[dim] - coord,
392 m_block_dimensions[dim]);
393 offset += coord * m_tensor_strides[dim];
396 return {
offset, dimensions};
403 IndexType target_block_size =
404 numext::maxi<IndexType>(1,
static_cast<IndexType
>(m_requirements.size));
406 IndexType tensor_size = m_tensor_dimensions.TotalSize();
412 if (tensor_size == 0) {
413 for (
int i = 0;
i < NumDims; ++
i) {
414 m_block_dimensions[
i] = 1;
416 m_total_block_count = 0;
421 if (tensor_size <= target_block_size) {
422 m_block_dimensions = m_tensor_dimensions;
423 m_total_block_count = 1;
426 for (
int i = 0;
i < NumDims; ++
i) {
427 m_tensor_strides[
i] = 0;
428 m_block_strides[
i] = 1;
433 static const bool isColMajor = Layout ==
static_cast<int>(
ColMajor);
437 IndexType coeff_to_allocate = target_block_size;
439 for (
int i = 0;
i < NumDims; ++
i) {
440 const int dim = isColMajor ?
i : NumDims -
i - 1;
441 m_block_dimensions[dim] =
442 numext::mini(coeff_to_allocate, m_tensor_dimensions[dim]);
443 coeff_to_allocate =
divup(
445 numext::maxi(static_cast<IndexType>(1), m_block_dimensions[dim]));
452 const IndexType dim_size_target = convert_index<IndexType>(
453 std::pow(static_cast<float>(target_block_size),
454 1.0
f / static_cast<float>(m_block_dimensions.rank())));
456 for (
int i = 0;
i < NumDims; ++
i) {
461 m_block_dimensions[
i] =
466 IndexType total_size = m_block_dimensions.TotalSize();
467 for (
int i = 0;
i < NumDims; ++
i) {
468 const int dim = isColMajor ?
i : NumDims -
i - 1;
470 if (m_block_dimensions[dim] < m_tensor_dimensions[dim]) {
471 const IndexType total_size_other_dims =
472 total_size / m_block_dimensions[dim];
473 const IndexType alloc_avail =
474 divup<IndexType>(target_block_size, total_size_other_dims);
475 if (alloc_avail == m_block_dimensions[dim]) {
479 m_block_dimensions[dim] =
481 total_size = total_size_other_dims * m_block_dimensions[dim];
490 numext::mini<IndexType>(target_block_size,
491 m_tensor_dimensions.TotalSize()));
495 for (
int i = 0;
i < NumDims; ++
i) {
496 block_count[
i] =
divup(m_tensor_dimensions[
i], m_block_dimensions[i]);
498 m_total_block_count =
array_prod(block_count);
501 m_tensor_strides = strides<Layout>(m_tensor_dimensions);
502 m_block_strides = strides<Layout>(block_count);
524 template <
typename Device>
528 : m_device(device), m_allocation_index(0) {}
531 for (
size_t i = 0;
i < m_allocations.size(); ++
i) {
532 m_device.deallocate(m_allocations[
i].ptr);
538 if (m_allocations.capacity() == 0) m_allocations.reserve(8);
541 const int num_allocations =
static_cast<int>(m_allocations.size());
542 const bool has_allocation = m_allocation_index < num_allocations;
552 if (has_allocation && m_allocations[m_allocation_index].size < size) {
553 m_device.deallocate(m_allocations[m_allocation_index].ptr);
554 m_allocations[m_allocation_index].ptr = m_device.allocate(size);
555 m_allocations[m_allocation_index].size =
size;
559 if (!has_allocation) {
561 allocation.
ptr = m_device.allocate(size);
563 m_allocations.push_back(allocation);
567 eigen_assert(m_allocations[m_allocation_index].size >= size);
569 return m_allocations[m_allocation_index++].ptr;
572 void reset() { m_allocation_index = 0; }
627 template <
typename XprType>
654 template <
typename Scalar,
int NumDims,
int Layout,
662 const Dimensions&
dimensions,
bool valid_expr =
true)
665 m_dimensions(dimensions),
666 m_expr(m_data, m_dimensions),
667 m_valid_expr(valid_expr) {
697 const Dimensions&
dimensions()
const {
return m_dimensions; }
698 const Dimensions&
strides()
const {
return m_strides; }
702 m_materialized_in_output
705 m_data, m_dimensions, !m_strided_storage);
712 const Dimensions&
strides,
bool materialized_in_output,
713 bool strided_storage)
715 m_dimensions(dimensions),
717 m_materialized_in_output(materialized_in_output),
718 m_strided_storage(strided_storage) {}
729 template <
typename TensorBlockScratch>
731 TensorBlockDesc& desc, TensorBlockScratch& scratch,
732 bool allow_strided_storage =
false) {
745 allow_strided_storage) {
752 void* mem = scratch.allocate(desc.
size() *
sizeof(
Scalar));
761 template <
typename DataDimensions,
typename TensorBlockScratch>
763 const Scalar*
data,
const DataDimensions& data_dims,
764 TensorBlockDesc& desc, TensorBlockScratch& scratch) {
777 static const bool is_col_major = Layout ==
ColMajor;
780 int num_matching_inner_dims = 0;
781 for (
int i = 0;
i < NumDims; ++
i) {
782 int dim = is_col_major ?
i : NumDims -
i - 1;
783 if (data_dims[dim] != desc.
dimensions()[dim])
break;
784 ++num_matching_inner_dims;
789 bool can_use_direct_access =
true;
790 for (
int i = num_matching_inner_dims + 1;
i < NumDims; ++
i) {
791 int dim = is_col_major ?
i : NumDims -
i - 1;
793 can_use_direct_access =
false;
798 if (can_use_direct_access) {
805 const Storage storage = prepareStorage(desc, scratch);
812 TensorBlockIOSrc src(internal::strides<Layout>(Dimensions(data_dims)),
834 template <
typename UnaryOp,
typename ArgTensorBlock>
836 static const bool NoArgBlockAccess =
841 NoArgBlockAccess, void,
848 : m_arg_block(arg_block), m_functor(functor) {}
852 XprType
expr()
const {
return XprType(m_arg_block.expr(), m_functor); }
865 template <
typename BinaryOp,
typename LhsTensorBlock,
typename RhsTensorBlock>
867 static const bool NoArgBlockAccess =
873 NoArgBlockAccess, void,
881 const RhsTensorBlock& right_block,
882 const BinaryOp& functor)
883 : m_left_block(left_block),
884 m_right_block(right_block),
885 m_functor(functor) {}
890 return XprType(m_left_block.expr(), m_right_block.expr(), m_functor);
896 m_left_block.cleanup();
897 m_right_block.cleanup();
911 template <
typename BlockFactory,
typename ArgTensorBlock>
918 NoArgBlockAccess, void,
925 : m_arg_block(arg_block), m_factory(factory) {}
928 XprType
expr()
const {
return m_factory.expr(m_arg_block.expr()); }
941 template <
typename BlockFactory,
typename Arg1TensorBlock,
942 typename Arg2TensorBlock,
typename Arg3TensorBlock>
954 NoArgBlockAccess, void,
955 typename BlockFactory::template
XprType<Arg1XprType, Arg2XprType,
961 const Arg2TensorBlock& arg2_block,
962 const Arg3TensorBlock& arg3_block,
964 : m_arg1_block(arg1_block),
965 m_arg2_block(arg2_block),
966 m_arg3_block(arg3_block),
967 m_factory(factory) {}
971 return m_factory.expr(m_arg1_block.expr(), m_arg2_block.expr(),
972 m_arg3_block.expr());
976 m_arg1_block.cleanup();
977 m_arg2_block.cleanup();
978 m_arg3_block.cleanup();
992 template <
typename Scalar,
typename IndexType>
1028 template <
typename Str
idedLinearBufferCopy::Kind kind>
1031 const size_t count) {
1037 template <
typename Str
idedLinearBufferCopy::Kind kind>
1039 const IndexType count,
const IndexType dst_offset,
1041 const IndexType src_offset,
const IndexType src_stride,
1043 const Scalar* src = &src_data[src_offset];
1044 Scalar* dst = &dst_data[dst_offset];
1046 if (!Vectorizable) {
1047 for (
Index i = 0;
i < count; ++
i) {
1048 dst[
i * dst_stride] = src[
i * src_stride];
1053 const IndexType vectorized_size = count - PacketSize;
1059 const IndexType unrolled_size = count - 4 * PacketSize;
1061 for (; i <= unrolled_size; i += 4 * PacketSize) {
1062 for (
int j = 0;
j < 4; ++
j) {
1063 Packet
p = ploadu<Packet>(src + i +
j * PacketSize);
1064 pstoreu<Scalar, Packet>(dst + i +
j * PacketSize,
p);
1067 for (; i <= vectorized_size; i += PacketSize) {
1068 Packet
p = ploadu<Packet>(src +
i);
1069 pstoreu<Scalar, Packet>(dst +
i,
p);
1071 for (; i < count; ++
i) {
1078 for (; i <= vectorized_size; i += PacketSize) {
1079 Packet
p = ploadu<Packet>(src +
i);
1080 pscatter<Scalar, Packet>(dst + i * dst_stride,
p, dst_stride);
1082 for (; i < count; ++
i) {
1083 dst[i * dst_stride] = src[
i];
1089 const IndexType unrolled_size = count - 4 * PacketSize;
1090 Packet
p = pload1<Packet>(src);
1091 for (; i <= unrolled_size; i += 4 * PacketSize) {
1092 for (
int j = 0;
j < 4; ++
j) {
1093 pstoreu<Scalar, Packet>(dst + i +
j * PacketSize,
p);
1096 for (; i <= vectorized_size; i += PacketSize) {
1097 pstoreu<Scalar, Packet>(dst +
i,
p);
1099 for (; i < count; ++
i) {
1106 Packet
p = pload1<Packet>(src);
1107 for (; i <= vectorized_size; i += PacketSize) {
1108 pscatter<Scalar, Packet>(dst + i * dst_stride,
p, dst_stride);
1110 for (; i < count; ++
i) {
1111 dst[i * dst_stride] = *src;
1117 for (; i <= vectorized_size; i += PacketSize) {
1118 Packet
p = pgather<Scalar, Packet>(src + i * src_stride, src_stride);
1119 pstoreu<Scalar, Packet>(dst +
i,
p);
1121 for (; i < count; ++
i) {
1122 dst[
i] = src[i * src_stride];
1127 for (; i < count; ++
i) {
1128 dst[i * dst_stride] = src[i * src_stride];
1142 template <
typename Scalar,
typename IndexType,
int NumDims,
int Layout>
1153 Dst(
const Dimensions& dst_dims,
const Dimensions& dst_strides,
Scalar* dst,
1154 IndexType dst_offset = 0)
1165 IndexType src_offset = 0)
1179 const Dst& dst,
const Src& src,
const DimensionsMap& dst_to_src_dim_map) {
1190 int inner_dim = IsColMajor ? 0 : NumDims - 1;
1197 const DimensionsMap& dim_map = dst_to_src_dim_map;
1200 int num_squeezable_dims = NumSqueezableInnerDims(dim_map);
1212 int num_size_one_inner_dims = 0;
1213 for (
int i = 0;
i < num_squeezable_dims; ++
i) {
1214 const int dst_dim = IsColMajor ?
i : NumDims -
i - 1;
1215 if (dst.
dims[dst_dim] != 1)
break;
1216 num_size_one_inner_dims++;
1220 if (num_size_one_inner_dims == NumDims) {
1226 const int dst_stride1_dim = IsColMajor
1227 ? num_size_one_inner_dims
1228 : NumDims - num_size_one_inner_dims - 1;
1231 const int src_dim_for_dst_stride1_dim =
1232 NumDims == 0 ? 1 : dim_map[dst_stride1_dim];
1235 IndexType dst_inner_dim_size = NumDims == 0 ? 1 : dst.
dims[dst_stride1_dim];
1239 for (
int i = num_size_one_inner_dims + 1;
i < num_squeezable_dims; ++
i) {
1240 const int dst_dim = IsColMajor ?
i : NumDims -
i - 1;
1241 const IndexType dst_stride = dst.
strides[dst_dim];
1242 const IndexType src_stride = src.
strides[dim_map[dst_dim]];
1243 if (dst_inner_dim_size == dst_stride && dst_stride == src_stride) {
1244 dst_inner_dim_size *= dst.
dims[dst_dim];
1245 ++num_size_one_inner_dims;
1252 IndexType input_offset = src.
offset;
1253 IndexType output_offset = dst.
offset;
1254 IndexType input_stride =
1255 NumDims == 0 ? 1 : src.
strides[src_dim_for_dst_stride1_dim];
1256 IndexType output_stride = NumDims == 0 ? 1 : dst.
strides[dst_stride1_dim];
1258 const int at_least_1_dim = NumDims <= 1 ? 1 : NumDims - 1;
1263 for (
int i = num_size_one_inner_dims;
i < NumDims - 1; ++
i) {
1264 const int dst_dim = IsColMajor ?
i + 1 : NumDims -
i - 2;
1265 if (dst.
dims[dst_dim] == 1)
continue;
1268 it[idx].input_stride = src.
strides[dim_map[dst_dim]];
1269 it[idx].output_stride = dst.
strides[dst_dim];
1271 it[idx].input_span = it[idx].input_stride * (it[idx].
size - 1);
1272 it[idx].output_span = it[idx].output_stride * (it[idx].
size - 1);
1278 const IndexType block_total_size = NumDims == 0 ? 1 : dst.
dims.
TotalSize();
1280 #define COPY_INNER_DIM(KIND) \ 1281 IndexType num_copied = 0; \ 1282 for (num_copied = 0; num_copied < block_total_size; \ 1283 num_copied += dst_inner_dim_size) { \ 1284 LinCopy::template Run<KIND>( \ 1285 typename LinCopy::Dst(output_offset, output_stride, dst.data), \ 1286 typename LinCopy::Src(input_offset, input_stride, src.data), \ 1287 dst_inner_dim_size); \ 1289 for (int j = 0; j < idx; ++j) { \ 1290 if (++it[j].count < it[j].size) { \ 1291 input_offset += it[j].input_stride; \ 1292 output_offset += it[j].output_stride; \ 1296 input_offset -= it[j].input_span; \ 1297 output_offset -= it[j].output_span; \ 1302 if (input_stride == 1 && output_stride == 1) {
1304 }
else if (input_stride == 1 && output_stride != 1) {
1306 }
else if (input_stride == 0 && output_stride == 1) {
1308 }
else if (input_stride == 0 && output_stride != 1) {
1310 }
else if (output_stride == 1) {
1316 #undef COPY_INNER_DIM 1323 DimensionsMap dst_to_src_map;
1324 for (
int i = 0;
i < NumDims; ++
i) dst_to_src_map[
i] =
i;
1325 return Copy(dst, src, dst_to_src_map);
1350 int num_squeezable_dims = 0;
1351 for (
int i = 0;
i < NumDims; ++
i) {
1352 const int dim = IsColMajor ?
i : NumDims -
i - 1;
1353 if (dim_map[dim] != dim)
break;
1354 num_squeezable_dims++;
1356 return num_squeezable_dims;
1379 template <
typename Scalar,
int NumDims,
typename TensorBlockExpr,
1393 template <
bool Vectorizable,
typename Evaluator>
1396 const Evaluator&
eval,
1397 IndexType eval_offset) {
1398 for (IndexType
i = 0;
i < count; ++
i) {
1399 target[
i] = eval.coeff(eval_offset +
i);
1404 template <
typename Evaluator>
1407 const Evaluator&
eval,
1408 IndexType eval_offset) {
1411 const IndexType unrolled_size = count - 4 * PacketSize;
1412 const IndexType vectorized_size = count - PacketSize;
1415 for (; i <= unrolled_size; i += 4 * PacketSize) {
1416 for (
int j = 0;
j < 4; ++
j) {
1417 const IndexType idx = eval_offset + i +
j * PacketSize;
1418 Packet
p = eval.template packet<Unaligned>(idx);
1419 pstoreu<Scalar>(target + i +
j * PacketSize,
p);
1423 for (; i <= vectorized_size; i += PacketSize) {
1424 Packet
p = eval.template packet<Unaligned>(eval_offset +
i);
1425 pstoreu<Scalar>(target +
i,
p);
1428 for (; i < count; ++
i) {
1429 target[
i] = eval.coeff(eval_offset + i);
1436 Target(
const Dimensions& target_dims,
const Dimensions& target_strides,
1437 Scalar* target_data, IndexType target_offset = 0)
1438 : dims(target_dims),
1450 const Dimensions& target_strides,
Scalar* target_data,
1451 IndexType target_offset = 0) {
1452 return Target(target_dims, target_strides, target_data, target_offset);
1455 template <
typename TargetDimsIndexType,
typename TargetStr
idesIndexType>
1459 Scalar* target_data, IndexType target_offset = 0) {
1461 return Target(Dimensions(target_dims), Dimensions(target_strides),
1462 target_data, target_offset);
1466 const Target& target,
const TensorBlockExpr& expr) {
1474 static const int Layout = TensorBlockEvaluator::Layout;
1475 static const bool is_col_major = Layout ==
ColMajor;
1478 const IndexType output_size = NumDims == 0 ? 1 : target.
dims.
TotalSize();
1479 const int inner_dim_idx = is_col_major ? 0 : NumDims - 1;
1480 IndexType output_inner_dim_size = target.
dims[inner_dim_idx];
1486 IndexType num_squeezed_dims = 0;
1487 for (
Index i = 1;
i < NumDims; ++
i) {
1488 const Index dim = is_col_major ?
i : NumDims -
i - 1;
1489 const IndexType target_stride = target.
strides[dim];
1491 if (output_inner_dim_size == target_stride) {
1492 output_inner_dim_size *= target.
dims[dim];
1493 num_squeezed_dims++;
1504 for (
Index i = num_squeezed_dims;
i < NumDims - 1; ++
i) {
1505 const Index dim = is_col_major ?
i + 1 : NumDims -
i - 2;
1509 it[idx].output_stride = target.
strides[dim];
1510 it[idx].output_span = it[idx].output_stride * (it[idx].
size - 1);
1516 IndexType input_offset = 0;
1517 IndexType output_offset = target.
offset;
1520 for (IndexType
i = 0;
i < output_size;
i += output_inner_dim_size) {
1522 InnerDimAssign<Vectorizable && TensorBlockEvaluator::PacketAccess,
1524 output_inner_dim_size, eval,
1528 input_offset += output_inner_dim_size;
1531 for (
int j = 0;
j < idx; ++
j) {
1532 if (++it[
j].count < it[
j].
size) {
1533 output_offset += it[
j].output_stride;
1537 output_offset -= it[
j].output_span;
1545 : count(0),
size(0), output_stride(0), output_span(0) {}
1559 #endif // EIGEN_CXX11_TENSOR_TENSOR_BLOCK_H
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE IndexType blockTotalSize() const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const DSizes< IndexType, NumDims > & blockDimensions() const
Arg3TensorBlock::XprType Arg3XprType
internal::packet_traits< Scalar >::type Packet
TensorBlockKind kind() const
#define EIGEN_ALWAYS_INLINE
TensorBlockDescriptor(const IndexType offset, const Dimensions &dimensions, const DestinationBuffer &destination)
bool m_materialized_in_output
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_prod(const Sizes< Indices... > &)
#define EIGEN_STRONG_INLINE
packet_traits< Scalar >::type Packet
static EIGEN_DEVICE_FUNC TensorBlockResourceRequirements withShapeAndSize(TensorBlockShapeType shape_type, size_t size_in_bytes, TensorOpCost cost)
DestinationBufferKind m_kind
TensorBlockKind kind() const
DestinationBuffer m_destination
Arg1TensorBlock m_arg1_block
TensorBlockKind kind() const
TensorBlockMapper(const DSizes< IndexType, NumDims > &dimensions, const TensorBlockResourceRequirements &requirements)
EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE std::size_t size()
const XprType & expr() const
TensorTernaryExprBlock(const Arg1TensorBlock &arg1_block, const Arg2TensorBlock &arg2_block, const Arg3TensorBlock &arg3_block, const BlockFactory &factory)
void * allocate(size_t size)
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE IndexType Copy(const Dst &dst, const Src &src)
bool HasDestinationBuffer() const
std::vector< Array2i > sizes
const Scalar * data() const
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool dimensions_match(Dims1 dims1, Dims2 dims2)
~TensorBlockScratchAllocator()
set noclip points set clip one set noclip two set bar set border lt lw set xdata set ydata set zdata set x2data set y2data set boxwidth set dummy y set format x g set format y g set format x2 g set format y2 g set format z g set angles radians set nogrid set key title set key left top Right noreverse box linetype linewidth samplen spacing width set nolabel set noarrow set nologscale set logscale x set set pointsize set encoding default set nopolar set noparametric set set set set surface set nocontour set clabel set mapping cartesian set nohidden3d set cntrparam order set cntrparam linear set cntrparam levels auto set cntrparam points set size set set xzeroaxis lt lw set x2zeroaxis lt lw set yzeroaxis lt lw set y2zeroaxis lt lw set tics in set ticslevel set tics set mxtics default set mytics default set mx2tics default set my2tics default set xtics border mirror norotate autofreq set ytics border mirror norotate autofreq set ztics border nomirror norotate autofreq set nox2tics set noy2tics set timestamp bottom norotate offset
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE BlockDescriptor blockDescriptor(IndexType block_index) const
TensorBlockScratchAllocator(const Device &device)
Namespace containing all symbols from the Eigen library.
void InitializeBlockDimensions()
static EIGEN_STRONG_INLINE Storage prepareStorage(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool allow_strided_storage=false)
A cost model used to limit the number of threads used for evaluating tensor expression.
TensorMaterializedBlock AsTensorMaterializedBlock() const
TensorOpCost cost_per_coeff
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T maxi(const T &x, const T &y)
DSizes< IndexType, NumDims > m_tensor_strides
const Scalar * data() const
IndexType m_total_block_count
IndexType dimension(int index) const
static EIGEN_ALWAYS_INLINE void Run(Scalar *target, IndexType count, const Evaluator &eval, IndexType eval_offset)
TensorMap< const Tensor< Scalar, NumDims, Layout > > XprType
TensorBlockDescriptor & DropDestinationBuffer()
static DestinationBuffer make(const TensorBlockDescriptor &desc, Scalar *data, const Dimensions &strides)
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(const Dst &dst, const Src &src, const size_t count)
const DestinationBuffer & destination() const
Src(IndexType o, IndexType s, const Scalar *d)
DestinationBuffer(Scalar *data, const Dimensions &strides, DestinationBufferKind kind)
DSizes< IndexType, NumDims > Dimensions
const Scalar * data() const
Storage(Scalar *data, const Dimensions &dimensions, const Dimensions &strides, bool materialized_in_output, bool strided_storage)
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(const Target &target, const TensorBlockExpr &expr)
TensorBlockKind kind() const
DSizes< IndexType, NumDims > m_tensor_dimensions
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex TotalSize() const
const Dimensions & dimensions() const
DSizes< IndexType, NumDims > Dimensions
A tensor expression mapping an existing array of data.
TensorCwiseBinaryBlock(const LhsTensorBlock &left_block, const RhsTensorBlock &right_block, const BinaryOp &functor)
DSizes< IndexType, NumDims > Dimensions
Src(const Dimensions &src_strides, const Scalar *src, IndexType src_offset=0)
static EIGEN_DEVICE_FUNC TensorBlockResourceRequirements uniform(size_t size_in_bytes)
const DestinationBufferKind & kind() const
TensorBlockKind kind() const
XprScalar< XprType >::type Scalar
EIGEN_ALWAYS_INLINE DSizes< IndexType, NumDims > strides(const DSizes< IndexType, NumDims > &dimensions)
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
#define COPY_INNER_DIM(KIND)
static Target target(const Dimensions &target_dims, const Dimensions &target_strides, Scalar *target_data, IndexType target_offset=0)
TensorBlockDescriptor< NumDims, IndexType > BlockDescriptor
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE IndexType Copy(const Dst &dst, const Src &src, const DimensionsMap &dst_to_src_dim_map)
conditional< NoArgBlockAccess, void, typename BlockFactory::template XprType< Arg1XprType, Arg2XprType, Arg3XprType >::type >::type XprType
DSizes< IndexType, NumDims > m_block_dimensions
TensorBlockDescriptor WithOffset(IndexType offset) const
Point2(* f)(const Point3 &, OptionalJacobian< 2, 3 >)
TensorEvaluator< const TensorBlockExpr, DefaultDevice > TensorBlockEvaluator
TensorMaterializedBlock(TensorBlockKind kind, const Scalar *data, const Dimensions &dimensions, bool valid_expr=true)
Dst(const Dimensions &dst_dims, const Dimensions &dst_strides, Scalar *dst, IndexType dst_offset=0)
static EIGEN_ALWAYS_INLINE void Run(Scalar *target, IndexType count, const Evaluator &eval, IndexType eval_offset)
DSizes< IndexType, NumDims > Dimensions
XprScalar< XprType >::type Scalar
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T mini(const T &x, const T &y)
const Dimensions & strides() const
const Dimensions & strides() const
TensorCwiseUnaryBlock(const ArgTensorBlock &arg_block, const UnaryOp &functor)
Arg2TensorBlock m_arg2_block
EIGEN_CONSTEXPR Index size(const T &x)
#define EIGEN_DEVICE_FUNC
EIGEN_DEVICE_FUNC TensorBlockResourceRequirements & addCostPerCoeff(TensorOpCost cost)
static EIGEN_STRONG_INLINE TensorMaterializedBlock materialize(const Scalar *data, const DataDimensions &data_dims, TensorBlockDesc &desc, TensorBlockScratch &scratch)
StridedLinearBufferCopy< Scalar, IndexType > LinCopy
void AddDestinationBuffer(Scalar *dst_base, const DSizes< DstStridesIndexType, NumDims > &dst_strides)
DSizes< int, NumDims > DimensionsMap
internal::TensorBlockDescriptor< NumDims, IndexType > TensorBlockDesc
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE IndexType blockCount() const
TensorBlockShapeType shape_type
ArgTensorBlock m_arg_block
Arg3TensorBlock m_arg3_block
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t merge(size_t lhs_size, size_t rhs_size)
CwiseBinaryOp< internal::scalar_sum_op< double, double >, const CpyMatrixXd, const CpyMatrixXd > XprType
const Dimensions & dimensions() const
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(const IndexType count, const IndexType dst_offset, const IndexType dst_stride, Scalar *EIGEN_RESTRICT dst_data, const IndexType src_offset, const IndexType src_stride, const Scalar *EIGEN_RESTRICT src_data)
DSizes< IndexType, NumDims > Dimensions
void AddDestinationBuffer(Scalar *dst_base, const Dimensions &dst_strides)
XprScalar< XprType >::type Scalar
ArgTensorBlock::XprType ArgXprType
Arg2TensorBlock::XprType Arg2XprType
static Target target(const DSizes< TargetDimsIndexType, NumDims > &target_dims, const DSizes< TargetStridesIndexType, NumDims > &target_strides, Scalar *target_data, IndexType target_offset=0)
static int NumSqueezableInnerDims(const DimensionsMap &dim_map)
conditional< NoArgBlockAccess, void, typename BlockFactory::template XprType< ArgXprType >::type >::type XprType
static EIGEN_DEVICE_FUNC TensorBlockResourceRequirements skewed(size_t size_in_bytes)
static EIGEN_DEVICE_FUNC TensorBlockResourceRequirements withShapeAndSize(TensorBlockShapeType shape_type, size_t size_in_bytes)
std::vector< size_t > Indices
const Dimensions m_dimensions
const Scalar * data() const
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost merge(TensorOpCost lhs_cost, TensorOpCost rhs_cost)
internal::nested_eval< T, 1 >::type eval(const T &xpr)
Dst(IndexType o, IndexType s, Scalar *d)
ArgTensorBlock m_arg_block
XprScalar< XprType >::type Scalar
RhsTensorBlock m_right_block
LhsTensorBlock m_left_block
TensorUnaryExprBlock(const ArgTensorBlock &arg_block, const BlockFactory &factory)
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockShapeType merge(TensorBlockShapeType lhs, TensorBlockShapeType rhs)
TensorBlockResourceRequirements m_requirements
Jet< T, N > pow(const Jet< T, N > &f, double g)
Generic expression where a coefficient-wise unary operator is applied to an expression.
Arg1TensorBlock::XprType Arg1XprType
conditional< NoArgBlockAccess, void, TensorCwiseUnaryOp< UnaryOp, const typename ArgTensorBlock::XprType > >::type XprType
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockResourceRequirements any()
static DestinationBufferKind kind(const TensorBlockDescriptor &desc, const Dimensions &strides)
const std::vector< size_t > dimensions
conditional< NoArgBlockAccess, void, TensorCwiseBinaryOp< BinaryOp, const typename LhsTensorBlock::XprType, const typename RhsTensorBlock::XprType > >::type XprType
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T divup(const X x, const Y y)
const Scalar * data() const
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockResourceRequirements merge(const TensorBlockResourceRequirements &lhs, const TensorBlockResourceRequirements &rhs)
#define EIGEN_UNUSED_VARIABLE(var)
Target(const Dimensions &target_dims, const Dimensions &target_strides, Scalar *target_data, IndexType target_offset=0)
DSizes< IndexType, NumDims > m_block_strides
TensorBlockDescriptor(const IndexType offset, const Dimensions &dimensions)
std::vector< Allocation > m_allocations