TensorScan.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2016 Igor Babuschkin <igor@babuschk.in>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_SCAN_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_SCAN_H
12 
13 namespace Eigen {
14 
15 namespace internal {
16 
17 template <typename Op, typename XprType>
19  : public traits<XprType> {
20  typedef typename XprType::Scalar Scalar;
22  typedef typename XprTraits::StorageKind StorageKind;
23  typedef typename XprType::Nested Nested;
25  static const int NumDimensions = XprTraits::NumDimensions;
26  static const int Layout = XprTraits::Layout;
27  typedef typename XprTraits::PointerType PointerType;
28 };
29 
30 template<typename Op, typename XprType>
32 {
34 };
35 
36 template<typename Op, typename XprType>
37 struct nested<TensorScanOp<Op, XprType>, 1,
38  typename eval<TensorScanOp<Op, XprType> >::type>
39 {
41 };
42 } // end namespace internal
43 
49 template <typename Op, typename XprType>
50 class TensorScanOp
51  : public TensorBase<TensorScanOp<Op, XprType>, ReadOnlyAccessors> {
52 public:
55  typedef typename XprType::CoeffReturnType CoeffReturnType;
59 
61  const XprType& expr, const Index& axis, bool exclusive = false, const Op& op = Op())
63 
65  const Index axis() const { return m_axis; }
67  const XprType& expression() const { return m_expr; }
69  const Op accumulator() const { return m_accumulator; }
71  bool exclusive() const { return m_exclusive; }
72 
73 protected:
74  typename XprType::Nested m_expr;
75  const Index m_axis;
76  const Op m_accumulator;
77  const bool m_exclusive;
78 };
79 
80 
81 namespace internal {
82 
83 template <typename Self>
85  typename Self::CoeffReturnType* data) {
86  // Compute the scan along the axis, starting at the given offset
87  typename Self::CoeffReturnType accum = self.accumulator().initialize();
88  if (self.stride() == 1) {
89  if (self.exclusive()) {
90  for (Index curr = offset; curr < offset + self.size(); ++curr) {
91  data[curr] = self.accumulator().finalize(accum);
92  self.accumulator().reduce(self.inner().coeff(curr), &accum);
93  }
94  } else {
95  for (Index curr = offset; curr < offset + self.size(); ++curr) {
96  self.accumulator().reduce(self.inner().coeff(curr), &accum);
97  data[curr] = self.accumulator().finalize(accum);
98  }
99  }
100  } else {
101  if (self.exclusive()) {
102  for (Index idx3 = 0; idx3 < self.size(); idx3++) {
103  Index curr = offset + idx3 * self.stride();
104  data[curr] = self.accumulator().finalize(accum);
105  self.accumulator().reduce(self.inner().coeff(curr), &accum);
106  }
107  } else {
108  for (Index idx3 = 0; idx3 < self.size(); idx3++) {
109  Index curr = offset + idx3 * self.stride();
110  self.accumulator().reduce(self.inner().coeff(curr), &accum);
111  data[curr] = self.accumulator().finalize(accum);
112  }
113  }
114  }
115 }
116 
117 template <typename Self>
119  typename Self::CoeffReturnType* data) {
120  using Scalar = typename Self::CoeffReturnType;
121  using Packet = typename Self::PacketReturnType;
122  // Compute the scan along the axis, starting at the calculated offset
123  Packet accum = self.accumulator().template initializePacket<Packet>();
124  if (self.stride() == 1) {
125  if (self.exclusive()) {
126  for (Index curr = offset; curr < offset + self.size(); ++curr) {
127  internal::pstoreu<Scalar, Packet>(data + curr, self.accumulator().finalizePacket(accum));
128  self.accumulator().reducePacket(self.inner().template packet<Unaligned>(curr), &accum);
129  }
130  } else {
131  for (Index curr = offset; curr < offset + self.size(); ++curr) {
132  self.accumulator().reducePacket(self.inner().template packet<Unaligned>(curr), &accum);
133  internal::pstoreu<Scalar, Packet>(data + curr, self.accumulator().finalizePacket(accum));
134  }
135  }
136  } else {
137  if (self.exclusive()) {
138  for (Index idx3 = 0; idx3 < self.size(); idx3++) {
139  const Index curr = offset + idx3 * self.stride();
140  internal::pstoreu<Scalar, Packet>(data + curr, self.accumulator().finalizePacket(accum));
141  self.accumulator().reducePacket(self.inner().template packet<Unaligned>(curr), &accum);
142  }
143  } else {
144  for (Index idx3 = 0; idx3 < self.size(); idx3++) {
145  const Index curr = offset + idx3 * self.stride();
146  self.accumulator().reducePacket(self.inner().template packet<Unaligned>(curr), &accum);
147  internal::pstoreu<Scalar, Packet>(data + curr, self.accumulator().finalizePacket(accum));
148  }
149  }
150  }
151 }
152 
153 template <typename Self, bool Vectorize, bool Parallel>
154 struct ReduceBlock {
155  EIGEN_STRONG_INLINE void operator()(Self& self, Index idx1,
156  typename Self::CoeffReturnType* data) {
157  for (Index idx2 = 0; idx2 < self.stride(); idx2++) {
158  // Calculate the starting offset for the scan
159  Index offset = idx1 + idx2;
160  ReduceScalar(self, offset, data);
161  }
162  }
163 };
164 
165 // Specialization for vectorized reduction.
166 template <typename Self>
167 struct ReduceBlock<Self, /*Vectorize=*/true, /*Parallel=*/false> {
168  EIGEN_STRONG_INLINE void operator()(Self& self, Index idx1,
169  typename Self::CoeffReturnType* data) {
170  using Packet = typename Self::PacketReturnType;
171  const int PacketSize = internal::unpacket_traits<Packet>::size;
172  Index idx2 = 0;
173  for (; idx2 + PacketSize <= self.stride(); idx2 += PacketSize) {
174  // Calculate the starting offset for the packet scan
175  Index offset = idx1 + idx2;
176  ReducePacket(self, offset, data);
177  }
178  for (; idx2 < self.stride(); idx2++) {
179  // Calculate the starting offset for the scan
180  Index offset = idx1 + idx2;
181  ReduceScalar(self, offset, data);
182  }
183  }
184 };
185 
186 // Single-threaded CPU implementation of scan
187 template <typename Self, typename Reducer, typename Device,
188  bool Vectorize =
191 struct ScanLauncher {
192  void operator()(Self& self, typename Self::CoeffReturnType* data) {
193  Index total_size = internal::array_prod(self.dimensions());
194 
195  // We fix the index along the scan axis to 0 and perform a
196  // scan per remaining entry. The iteration is split into two nested
197  // loops to avoid an integer division by keeping track of each idx1 and
198  // idx2.
199  for (Index idx1 = 0; idx1 < total_size; idx1 += self.stride() * self.size()) {
200  ReduceBlock<Self, Vectorize, /*Parallel=*/false> block_reducer;
201  block_reducer(self, idx1, data);
202  }
203  }
204 };
205 
206 #ifdef EIGEN_USE_THREADS
207 
208 // Adjust block_size to avoid false sharing of cachelines among
209 // threads. Currently set to twice the cache line size on Intel and ARM
210 // processors.
211 EIGEN_STRONG_INLINE Index AdjustBlockSize(Index item_size, Index block_size) {
212  EIGEN_CONSTEXPR Index kBlockAlignment = 128;
213  const Index items_per_cacheline =
214  numext::maxi<Index>(1, kBlockAlignment / item_size);
215  return items_per_cacheline * divup(block_size, items_per_cacheline);
216 }
217 
218 template <typename Self>
219 struct ReduceBlock<Self, /*Vectorize=*/true, /*Parallel=*/true> {
220  EIGEN_STRONG_INLINE void operator()(Self& self, Index idx1,
221  typename Self::CoeffReturnType* data) {
222  using Scalar = typename Self::CoeffReturnType;
223  using Packet = typename Self::PacketReturnType;
224  const int PacketSize = internal::unpacket_traits<Packet>::size;
225  Index num_scalars = self.stride();
226  Index num_packets = 0;
227  if (self.stride() >= PacketSize) {
228  num_packets = self.stride() / PacketSize;
229  self.device().parallelFor(
230  num_packets,
231  TensorOpCost(PacketSize * self.size(), PacketSize * self.size(),
232  16 * PacketSize * self.size(), true, PacketSize),
233  // Make the shard size large enough that two neighboring threads
234  // won't write to the same cacheline of `data`.
235  [=](Index blk_size) {
236  return AdjustBlockSize(PacketSize * sizeof(Scalar), blk_size);
237  },
238  [&](Index first, Index last) {
239  for (Index packet = first; packet < last; ++packet) {
240  const Index idx2 = packet * PacketSize;
241  ReducePacket(self, idx1 + idx2, data);
242  }
243  });
244  num_scalars -= num_packets * PacketSize;
245  }
246  self.device().parallelFor(
247  num_scalars, TensorOpCost(self.size(), self.size(), 16 * self.size()),
248  // Make the shard size large enough that two neighboring threads
249  // won't write to the same cacheline of `data`.
250  [=](Index blk_size) {
251  return AdjustBlockSize(sizeof(Scalar), blk_size);
252  },
253  [&](Index first, Index last) {
254  for (Index scalar = first; scalar < last; ++scalar) {
255  const Index idx2 = num_packets * PacketSize + scalar;
256  ReduceScalar(self, idx1 + idx2, data);
257  }
258  });
259  }
260 };
261 
262 template <typename Self>
263 struct ReduceBlock<Self, /*Vectorize=*/false, /*Parallel=*/true> {
264  EIGEN_STRONG_INLINE void operator()(Self& self, Index idx1,
265  typename Self::CoeffReturnType* data) {
266  using Scalar = typename Self::CoeffReturnType;
267  self.device().parallelFor(
268  self.stride(), TensorOpCost(self.size(), self.size(), 16 * self.size()),
269  // Make the shard size large enough that two neighboring threads
270  // won't write to the same cacheline of `data`.
271  [=](Index blk_size) {
272  return AdjustBlockSize(sizeof(Scalar), blk_size);
273  },
274  [&](Index first, Index last) {
275  for (Index idx2 = first; idx2 < last; ++idx2) {
276  ReduceScalar(self, idx1 + idx2, data);
277  }
278  });
279  }
280 };
281 
282 // Specialization for multi-threaded execution.
283 template <typename Self, typename Reducer, bool Vectorize>
284 struct ScanLauncher<Self, Reducer, ThreadPoolDevice, Vectorize> {
285  void operator()(Self& self, typename Self::CoeffReturnType* data) {
286  using Scalar = typename Self::CoeffReturnType;
287  using Packet = typename Self::PacketReturnType;
288  const int PacketSize = internal::unpacket_traits<Packet>::size;
289  const Index total_size = internal::array_prod(self.dimensions());
290  const Index inner_block_size = self.stride() * self.size();
291  bool parallelize_by_outer_blocks = (total_size >= (self.stride() * inner_block_size));
292 
293  if ((parallelize_by_outer_blocks && total_size <= 4096) ||
294  (!parallelize_by_outer_blocks && self.stride() < PacketSize)) {
295  ScanLauncher<Self, Reducer, DefaultDevice, Vectorize> launcher;
296  launcher(self, data);
297  return;
298  }
299 
300  if (parallelize_by_outer_blocks) {
301  // Parallelize over outer blocks.
302  const Index num_outer_blocks = total_size / inner_block_size;
303  self.device().parallelFor(
304  num_outer_blocks,
305  TensorOpCost(inner_block_size, inner_block_size,
306  16 * PacketSize * inner_block_size, Vectorize,
307  PacketSize),
308  [=](Index blk_size) {
309  return AdjustBlockSize(inner_block_size * sizeof(Scalar), blk_size);
310  },
311  [&](Index first, Index last) {
312  for (Index idx1 = first; idx1 < last; ++idx1) {
313  ReduceBlock<Self, Vectorize, /*Parallelize=*/false> block_reducer;
314  block_reducer(self, idx1 * inner_block_size, data);
315  }
316  });
317  } else {
318  // Parallelize over inner packets/scalars dimensions when the reduction
319  // axis is not an inner dimension.
320  ReduceBlock<Self, Vectorize, /*Parallelize=*/true> block_reducer;
321  for (Index idx1 = 0; idx1 < total_size;
322  idx1 += self.stride() * self.size()) {
323  block_reducer(self, idx1, data);
324  }
325  }
326  }
327 };
328 #endif // EIGEN_USE_THREADS
329 
330 #if defined(EIGEN_USE_GPU) && (defined(EIGEN_GPUCC))
331 
332 // GPU implementation of scan
333 // TODO(ibab) This placeholder implementation performs multiple scans in
334 // parallel, but it would be better to use a parallel scan algorithm and
335 // optimize memory access.
336 template <typename Self, typename Reducer>
337 __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ScanKernel(Self self, Index total_size, typename Self::CoeffReturnType* data) {
338  // Compute offset as in the CPU version
339  Index val = threadIdx.x + blockIdx.x * blockDim.x;
340  Index offset = (val / self.stride()) * self.stride() * self.size() + val % self.stride();
341 
342  if (offset + (self.size() - 1) * self.stride() < total_size) {
343  // Compute the scan along the axis, starting at the calculated offset
344  typename Self::CoeffReturnType accum = self.accumulator().initialize();
345  for (Index idx = 0; idx < self.size(); idx++) {
346  Index curr = offset + idx * self.stride();
347  if (self.exclusive()) {
348  data[curr] = self.accumulator().finalize(accum);
349  self.accumulator().reduce(self.inner().coeff(curr), &accum);
350  } else {
351  self.accumulator().reduce(self.inner().coeff(curr), &accum);
352  data[curr] = self.accumulator().finalize(accum);
353  }
354  }
355  }
356  __syncthreads();
357 
358 }
359 
360 template <typename Self, typename Reducer, bool Vectorize>
361 struct ScanLauncher<Self, Reducer, GpuDevice, Vectorize> {
362  void operator()(const Self& self, typename Self::CoeffReturnType* data) {
363  Index total_size = internal::array_prod(self.dimensions());
364  Index num_blocks = (total_size / self.size() + 63) / 64;
365  Index block_size = 64;
366 
367  LAUNCH_GPU_KERNEL((ScanKernel<Self, Reducer>), num_blocks, block_size, 0, self.device(), self, total_size, data);
368  }
369 };
370 #endif // EIGEN_USE_GPU && (EIGEN_GPUCC)
371 
372 } // namespace internal
373 
374 // Eval as rvalue
375 template <typename Op, typename ArgType, typename Device>
376 struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
377 
379  typedef typename XprType::Index Index;
380  typedef const ArgType ChildTypeNoConst;
381  typedef const ArgType ChildType;
390 
391  enum {
392  IsAligned = false,
394  BlockAccess = false,
397  CoordAccess = false,
398  RawAccess = true
399  };
400 
401  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
403  //===--------------------------------------------------------------------===//
404 
405  EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
406  : m_impl(op.expression(), device),
407  m_device(device),
408  m_exclusive(op.exclusive()),
409  m_accumulator(op.accumulator()),
410  m_size(m_impl.dimensions()[op.axis()]),
411  m_stride(1), m_consume_dim(op.axis()),
412  m_output(NULL) {
413 
414  // Accumulating a scalar isn't supported.
415  EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
416  eigen_assert(op.axis() >= 0 && op.axis() < NumDims);
417 
418  // Compute stride of scan axis
419  const Dimensions& dims = m_impl.dimensions();
420  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
421  for (int i = 0; i < op.axis(); ++i) {
422  m_stride = m_stride * dims[i];
423  }
424  } else {
425  // dims can only be indexed through unsigned integers,
426  // so let's use an unsigned type to let the compiler knows.
427  // This prevents stupid warnings: ""'*((void*)(& evaluator)+64)[18446744073709551615]' may be used uninitialized in this function"
428  unsigned int axis = internal::convert_index<unsigned int>(op.axis());
429  for (unsigned int i = NumDims - 1; i > axis; --i) {
430  m_stride = m_stride * dims[i];
431  }
432  }
433  }
434 
436  return m_impl.dimensions();
437  }
438 
440  return m_stride;
441  }
442 
444  return m_consume_dim;
445  }
446 
448  return m_size;
449  }
450 
452  return m_accumulator;
453  }
454 
456  return m_exclusive;
457  }
458 
460  return m_impl;
461  }
462 
464  return m_device;
465  }
466 
468  m_impl.evalSubExprsIfNeeded(NULL);
470  if (data) {
471  launcher(*this, data);
472  return false;
473  }
474 
475  const Index total_size = internal::array_prod(dimensions());
476  m_output = static_cast<EvaluatorPointerType>(m_device.get((Scalar*) m_device.allocate_temp(total_size * sizeof(Scalar))));
477  launcher(*this, m_output);
478  return true;
479  }
480 
481  template<int LoadMode>
483  return internal::ploadt<PacketReturnType, LoadMode>(m_output + index);
484  }
485 
487  {
488  return m_output;
489  }
490 
492  {
493  return m_output[index];
494  }
495 
497  return TensorOpCost(sizeof(CoeffReturnType), 0, 0);
498  }
499 
501  if (m_output) {
502  m_device.deallocate_temp(m_output);
503  m_output = NULL;
504  }
505  m_impl.cleanup();
506  }
507 
508 #ifdef EIGEN_USE_SYCL
509  // binding placeholder accessors to a command group handler for SYCL
510  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
511  m_impl.bind(cgh);
512  m_output.bind(cgh);
513  }
514 #endif
515 protected:
518  const bool m_exclusive;
520  const Index m_size;
524 };
525 
526 } // end namespace Eigen
527 
528 #endif // EIGEN_CXX11_TENSOR_TENSOR_SCAN_H
Eigen::TensorEvaluator::dimensions
EIGEN_DEVICE_FUNC const EIGEN_STRONG_INLINE Dimensions & dimensions() const
Definition: TensorEvaluator.h:73
Eigen::internal::ReduceBlock
Definition: TensorScan.h:154
EIGEN_DEVICE_FUNC
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:976
Eigen
Namespace containing all symbols from the Eigen library.
Definition: jet.h:637
dimensions
const std::vector< size_t > dimensions
Definition: testVerticalBlockMatrix.cpp:27
Eigen::TensorEvaluator< const TensorScanOp< Op, ArgType >, Device >::TensorEvaluator
EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
Definition: TensorScan.h:405
Eigen::internal::TensorBlockNotImplemented
Definition: TensorBlock.h:617
Eigen::TensorEvaluator< const TensorScanOp< Op, ArgType >, Device >::costPerCoeff
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool) const
Definition: TensorScan.h:496
Eigen::TensorScanOp::Nested
Eigen::internal::nested< TensorScanOp >::type Nested
Definition: TensorScan.h:56
Eigen::TensorEvaluator< const TensorScanOp< Op, ArgType >, Device >::packet
EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const
Definition: TensorScan.h:482
Eigen::TensorEvaluator< const TensorScanOp< Op, ArgType >, Device >::inner
EIGEN_DEVICE_FUNC const EIGEN_STRONG_INLINE TensorEvaluator< ArgType, Device > & inner() const
Definition: TensorScan.h:459
Eigen::TensorEvaluator< const TensorScanOp< Op, ArgType >, Device >::consume_dim
EIGEN_DEVICE_FUNC const EIGEN_STRONG_INLINE Index & consume_dim() const
Definition: TensorScan.h:443
Eigen::CwiseBinaryOp
Generic expression where a coefficient-wise binary operator is applied to two expressions.
Definition: CwiseBinaryOp.h:77
Eigen::TensorEvaluator< const TensorScanOp< Op, ArgType >, Device >::exclusive
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool exclusive() const
Definition: TensorScan.h:455
Eigen::TensorEvaluator< const TensorScanOp< Op, ArgType >, Device >::data
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data() const
Definition: TensorScan.h:486
Eigen::internal::nested
Definition: TensorTraits.h:174
Eigen::TensorEvaluator< const TensorScanOp< Op, ArgType >, Device >::Self
TensorEvaluator< const TensorScanOp< Op, ArgType >, Device > Self
Definition: TensorScan.h:387
Eigen::internal::traits< TensorScanOp< Op, XprType > >::_Nested
remove_reference< Nested >::type _Nested
Definition: TensorScan.h:24
blockDim
dim3 blockDim
Definition: gpu_common.h:19
eigen_assert
#define eigen_assert(x)
Definition: Macros.h:1037
Eigen::internal::ScanLauncher::operator()
void operator()(Self &self, typename Self::CoeffReturnType *data)
Definition: TensorScan.h:192
Eigen::TensorEvaluator< const TensorScanOp< Op, ArgType >, Device >::m_exclusive
const bool m_exclusive
Definition: TensorScan.h:518
EIGEN_CONSTEXPR
#define EIGEN_CONSTEXPR
Definition: Macros.h:787
Eigen::TensorEvaluator::Layout
@ Layout
Definition: TensorEvaluator.h:50
Eigen::TensorScanOp
Definition: TensorForwardDeclarations.h:87
Eigen::TensorEvaluator< const TensorScanOp< Op, ArgType >, Device >::size
EIGEN_DEVICE_FUNC const EIGEN_STRONG_INLINE Index & size() const
Definition: TensorScan.h:447
Eigen::TensorScanOp::m_exclusive
const bool m_exclusive
Definition: TensorScan.h:77
Eigen::internal::ScanLauncher
Definition: TensorScan.h:191
Eigen::TensorEvaluator< const TensorScanOp< Op, ArgType >, Device >::m_accumulator
Op m_accumulator
Definition: TensorScan.h:519
Eigen::TensorEvaluator< const TensorScanOp< Op, ArgType >, Device >::coeff
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
Definition: TensorScan.h:491
Eigen::last
static const symbolic::SymbolExpr< internal::symbolic_last_tag > last
Definition: IndexedViewHelper.h:38
Eigen::TensorScanOp::RealScalar
Eigen::NumTraits< Scalar >::Real RealScalar
Definition: TensorScan.h:54
Eigen::DSizes< Index, NumDims >
Eigen::TensorEvaluator< const TensorScanOp< Op, ArgType >, Device >::EvaluatorPointerType
Storage::Type EvaluatorPointerType
Definition: TensorScan.h:389
Eigen::internal::traits< TensorScanOp< Op, XprType > >::PointerType
XprTraits::PointerType PointerType
Definition: TensorScan.h:27
Eigen::TensorEvaluator< const TensorScanOp< Op, ArgType >, Device >::accumulator
EIGEN_DEVICE_FUNC const EIGEN_STRONG_INLINE Op & accumulator() const
Definition: TensorScan.h:451
Eigen::PacketType
Definition: TensorMeta.h:50
Eigen::TensorEvaluator< const TensorScanOp< Op, ArgType >, Device >::cleanup
EIGEN_STRONG_INLINE void cleanup()
Definition: TensorScan.h:500
Eigen::TensorEvaluator< const TensorScanOp< Op, ArgType >, Device >::XprType
TensorScanOp< Op, ArgType > XprType
Definition: TensorScan.h:378
Eigen::internal::true_type
Definition: Meta.h:96
Eigen::TensorScanOp::exclusive
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool exclusive() const
Definition: TensorScan.h:71
Eigen::internal::unpacket_traits::size
@ size
Definition: GenericPacketMath.h:138
Eigen::TensorEvaluator< const TensorScanOp< Op, ArgType >, Device >::m_device
const Device EIGEN_DEVICE_REF m_device
Definition: TensorScan.h:517
data
int data[]
Definition: Map_placement_new.cpp:1
Eigen::TensorEvaluator< const TensorScanOp< Op, ArgType >, Device >::device
EIGEN_DEVICE_FUNC const EIGEN_STRONG_INLINE Device & device() const
Definition: TensorScan.h:463
operator()
internal::enable_if< internal::valid_indexed_view_overload< RowIndices, ColIndices >::value &&internal::traits< typename EIGEN_INDEXED_VIEW_METHOD_TYPE< RowIndices, ColIndices >::type >::ReturnAsIndexedView, typename EIGEN_INDEXED_VIEW_METHOD_TYPE< RowIndices, ColIndices >::type >::type operator()(const RowIndices &rowIndices, const ColIndices &colIndices) EIGEN_INDEXED_VIEW_METHOD_CONST
Definition: IndexedViewMethods.h:73
Eigen::internal::traits< TensorScanOp< Op, XprType > >::StorageKind
XprTraits::StorageKind StorageKind
Definition: TensorScan.h:22
scalar
mxArray * scalar(mxClassID classid)
Definition: matlab.h:82
Eigen::TensorEvaluator::data
EIGEN_DEVICE_FUNC EvaluatorPointerType data() const
Definition: TensorEvaluator.h:181
Eigen::internal::unpacket_traits
Definition: GenericPacketMath.h:132
Eigen::internal::reducer_traits
Definition: TensorFunctors.h:58
Eigen::TensorEvaluator< const TensorScanOp< Op, ArgType >, Device >::CoeffReturnType
XprType::CoeffReturnType CoeffReturnType
Definition: TensorScan.h:385
Eigen::TensorScanOp::accumulator
EIGEN_DEVICE_FUNC const EIGEN_STRONG_INLINE Op accumulator() const
Definition: TensorScan.h:69
Eigen::TensorEvaluator< const TensorScanOp< Op, ArgType >, Device >::Dimensions
DSizes< Index, NumDims > Dimensions
Definition: TensorScan.h:383
Eigen::TensorEvaluator< const TensorScanOp< Op, ArgType >, Device >::evalSubExprsIfNeeded
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data)
Definition: TensorScan.h:467
EIGEN_STRONG_INLINE
#define EIGEN_STRONG_INLINE
Definition: Macros.h:917
Eigen::TensorEvaluator< const TensorScanOp< Op, ArgType >, Device >::ChildType
const typedef ArgType ChildType
Definition: TensorScan.h:381
Eigen::TensorEvaluator::PreferBlockAccess
@ PreferBlockAccess
Definition: TensorEvaluator.h:49
Eigen::internal::first
EIGEN_CONSTEXPR Index first(const T &x) EIGEN_NOEXCEPT
Definition: IndexedViewHelper.h:81
Eigen::TensorScanOp::TensorScanOp
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorScanOp(const XprType &expr, const Index &axis, bool exclusive=false, const Op &op=Op())
Definition: TensorScan.h:60
Eigen::internal::traits< TensorScanOp< Op, XprType > >::XprTraits
traits< XprType > XprTraits
Definition: TensorScan.h:21
blockIdx
dim3 blockIdx
Definition: gpu_common.h:19
Eigen::internal::eval< TensorScanOp< Op, XprType >, Eigen::Dense >::type
const typedef TensorScanOp< Op, XprType > & type
Definition: TensorScan.h:33
Eigen::internal::ReduceScalar
EIGEN_STRONG_INLINE void ReduceScalar(Self &self, Index offset, typename Self::CoeffReturnType *data)
Definition: TensorScan.h:84
Eigen::TensorEvaluator< const TensorScanOp< Op, ArgType >, Device >::PacketReturnType
PacketType< CoeffReturnType, Device >::type PacketReturnType
Definition: TensorScan.h:386
Eigen::internal::Packet
Definition: ZVector/PacketMath.h:47
Eigen::TensorScanOp::expression
EIGEN_DEVICE_FUNC const EIGEN_STRONG_INLINE XprType & expression() const
Definition: TensorScan.h:67
Eigen::TensorEvaluator< const TensorScanOp< Op, ArgType >, Device >::m_stride
Index m_stride
Definition: TensorScan.h:521
Eigen::Triplet< double >
Eigen::TensorEvaluator< const TensorScanOp< Op, ArgType >, Device >::TensorBlock
internal::TensorBlockNotImplemented TensorBlock
Definition: TensorScan.h:402
Eigen::TensorEvaluator::m_device
const Device EIGEN_DEVICE_REF m_device
Definition: TensorEvaluator.h:192
Eigen::TensorScanOp::axis
EIGEN_DEVICE_FUNC const EIGEN_STRONG_INLINE Index axis() const
Definition: TensorScan.h:65
Eigen::internal::ReducePacket
EIGEN_STRONG_INLINE void ReducePacket(Self &self, Index offset, typename Self::CoeffReturnType *data)
Definition: TensorScan.h:118
Eigen::StorageMemory
Definition: TensorForwardDeclarations.h:37
Eigen::TensorEvaluator< const TensorScanOp< Op, ArgType >, Device >::ChildTypeNoConst
const typedef ArgType ChildTypeNoConst
Definition: TensorScan.h:380
Eigen::TensorBase
The tensor base class.
Definition: TensorBase.h:973
anyset::size
size_t size() const
Definition: pytypes.h:2173
Eigen::internal::traits< TensorScanOp< Op, XprType > >::Nested
XprType::Nested Nested
Definition: TensorScan.h:23
Eigen::internal::array_prod
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_prod(const Sizes< Indices... > &)
Definition: TensorDimensions.h:140
Eigen::internal::array_size
Definition: Meta.h:445
Eigen::TensorScanOp::m_expr
XprType::Nested m_expr
Definition: TensorScan.h:74
Eigen::TensorEvaluator::BlockAccess
@ BlockAccess
Definition: TensorEvaluator.h:48
Eigen::TensorScanOp::Index
Eigen::internal::traits< TensorScanOp >::Index Index
Definition: TensorScan.h:58
offset
set noclip points set clip one set noclip two set bar set border lt lw set xdata set ydata set zdata set x2data set y2data set boxwidth set dummy y set format x g set format y g set format x2 g set format y2 g set format z g set angles radians set nogrid set key title set key left top Right noreverse box linetype linewidth samplen spacing width set nolabel set noarrow set nologscale set logscale x set set pointsize set encoding default set nopolar set noparametric set set set set surface set nocontour set clabel set mapping cartesian set nohidden3d set cntrparam order set cntrparam linear set cntrparam levels auto set cntrparam points set size set set xzeroaxis lt lw set x2zeroaxis lt lw set yzeroaxis lt lw set y2zeroaxis lt lw set tics in set ticslevel set tics set mxtics default set mytics default set mx2tics default set my2tics default set xtics border mirror norotate autofreq set ytics border mirror norotate autofreq set ztics border nomirror norotate autofreq set nox2tics set noy2tics set timestamp bottom norotate offset
Definition: gnuplot_common_settings.hh:64
Eigen::internal::ReduceBlock< Self, true, false >::operator()
EIGEN_STRONG_INLINE void operator()(Self &self, Index idx1, typename Self::CoeffReturnType *data)
Definition: TensorScan.h:168
Eigen::TensorEvaluator< const TensorScanOp< Op, ArgType >, Device >::dimensions
EIGEN_DEVICE_FUNC const EIGEN_STRONG_INLINE Dimensions & dimensions() const
Definition: TensorScan.h:435
Eigen::TensorScanOp::CoeffReturnType
XprType::CoeffReturnType CoeffReturnType
Definition: TensorScan.h:55
EIGEN_HIP_LAUNCH_BOUNDS_1024
#define EIGEN_HIP_LAUNCH_BOUNDS_1024
Definition: Macros.h:510
Eigen::TensorEvaluator< const TensorScanOp< Op, ArgType >, Device >::stride
EIGEN_DEVICE_FUNC const EIGEN_STRONG_INLINE Index & stride() const
Definition: TensorScan.h:439
Eigen::TensorEvaluator< const TensorScanOp< Op, ArgType >, Device >::Storage
StorageMemory< Scalar, Device > Storage
Definition: TensorScan.h:388
threadIdx
dim3 threadIdx
Definition: gpu_common.h:19
Eigen::internal::traits
Definition: ForwardDeclarations.h:17
EIGEN_STATIC_ASSERT
#define EIGEN_STATIC_ASSERT(CONDITION, MSG)
Definition: StaticAssert.h:127
Eigen::TensorScanOp::StorageKind
Eigen::internal::traits< TensorScanOp >::StorageKind StorageKind
Definition: TensorScan.h:57
EIGEN_DEVICE_REF
#define EIGEN_DEVICE_REF
Definition: TensorMacros.h:50
Eigen::TensorEvaluator< const TensorScanOp< Op, ArgType >, Device >::Scalar
internal::remove_const< typename XprType::Scalar >::type Scalar
Definition: TensorScan.h:384
Eigen::TensorEvaluator
A cost model used to limit the number of threads used for evaluating tensor expression.
Definition: TensorEvaluator.h:28
Eigen::internal::nested< TensorScanOp< Op, XprType >, 1, typename eval< TensorScanOp< Op, XprType > >::type >::type
TensorScanOp< Op, XprType > type
Definition: TensorScan.h:40
Eigen::TensorScanOp::m_accumulator
const Op m_accumulator
Definition: TensorScan.h:76
Eigen::internal::ReduceBlock::operator()
EIGEN_STRONG_INLINE void operator()(Self &self, Index idx1, typename Self::CoeffReturnType *data)
Definition: TensorScan.h:155
Eigen::TensorScanOp::Scalar
Eigen::internal::traits< TensorScanOp >::Scalar Scalar
Definition: TensorScan.h:53
internal
Definition: BandTriangularSolver.h:13
NULL
#define NULL
Definition: ccolamd.c:609
Eigen::divup
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T divup(const X x, const Y y)
Definition: TensorMeta.h:30
Eigen::ColMajor
@ ColMajor
Definition: Constants.h:319
Eigen::internal::size
EIGEN_CONSTEXPR Index size(const T &x)
Definition: Meta.h:479
Eigen::TensorEvaluator::IsAligned
@ IsAligned
Definition: TensorEvaluator.h:46
Eigen::TensorScanOp::m_axis
const Index m_axis
Definition: TensorScan.h:75
Eigen::internal::eval
Definition: XprHelper.h:332
Eigen::TensorEvaluator::PacketAccess
@ PacketAccess
Definition: TensorEvaluator.h:47
Eigen::TensorEvaluator< const TensorScanOp< Op, ArgType >, Device >::m_consume_dim
Index m_consume_dim
Definition: TensorScan.h:522
Eigen::TensorEvaluator< const TensorScanOp< Op, ArgType >, Device >::m_size
const Index m_size
Definition: TensorScan.h:520
Eigen::TensorEvaluator< const TensorScanOp< Op, ArgType >, Device >::m_impl
TensorEvaluator< ArgType, Device > m_impl
Definition: TensorScan.h:516
Eigen::TensorOpCost
Definition: TensorCostModel.h:25
test_callbacks.value
value
Definition: test_callbacks.py:158
i
int i
Definition: BiCGSTAB_step_by_step.cpp:9
Eigen::TensorEvaluator< const TensorScanOp< Op, ArgType >, Device >::m_output
EvaluatorPointerType m_output
Definition: TensorScan.h:523
Scalar
SCALAR Scalar
Definition: bench_gemm.cpp:46
Eigen::TensorEvaluator< const TensorScanOp< Op, ArgType >, Device >::Index
XprType::Index Index
Definition: TensorScan.h:379
Eigen::Index
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:74
Eigen::Dense
Definition: Constants.h:507
Eigen::internal::traits< TensorScanOp< Op, XprType > >::Scalar
XprType::Scalar Scalar
Definition: TensorScan.h:20


gtsam
Author(s):
autogenerated on Thu Jun 13 2024 03:08:42