10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_SCAN_H 11 #define EIGEN_CXX11_TENSOR_TENSOR_SCAN_H 17 template <
typename Op,
typename XprType>
23 typedef typename XprType::Nested
Nested;
25 static const int NumDimensions = XprTraits::NumDimensions;
26 static const int Layout = XprTraits::Layout;
29 template<
typename Op,
typename XprType>
35 template<
typename Op,
typename XprType>
48 template <
typename Op,
typename XprType>
50 :
public TensorBase<TensorScanOp<Op, XprType>, ReadOnlyAccessors> {
60 const XprType& expr,
const Index& axis,
bool exclusive =
false,
const Op& op = Op())
61 : m_expr(expr), m_axis(axis), m_accumulator(op), m_exclusive(exclusive) {}
64 const Index
axis()
const {
return m_axis; }
79 template <
typename Self,
typename Reducer,
typename Device>
83 template <
typename Op,
typename ArgType,
typename Device>
105 const Device& device)
106 : m_impl(op.expression(), device),
108 m_exclusive(op.exclusive()),
109 m_accumulator(op.accumulator()),
119 const Dimensions& dims = m_impl.dimensions();
120 if (static_cast<int>(Layout) == static_cast<int>(
ColMajor)) {
121 for (
int i = 0;
i < op.
axis(); ++
i) {
122 m_stride = m_stride * dims[
i];
125 for (
int i = NumDims - 1;
i > op.
axis(); --
i) {
126 m_stride = m_stride * dims[
i];
132 return m_impl.dimensions();
144 return m_accumulator;
160 m_impl.evalSubExprsIfNeeded(
NULL);
163 launcher(*
this, data);
168 m_output =
static_cast<CoeffReturnType*
>(m_device.allocate(total_size *
sizeof(Scalar)));
169 launcher(*
this, m_output);
173 template<
int LoadMode>
174 EIGEN_DEVICE_FUNC PacketReturnType
packet(Index index)
const {
175 return internal::ploadt<PacketReturnType, LoadMode>(m_output + index);
185 return m_output[index];
193 if (m_output !=
NULL) {
194 m_device.deallocate(m_output);
213 template <
typename Self,
typename Reducer,
typename Device>
221 for (Index idx1 = 0; idx1 < total_size; idx1 +=
self.stride() *
self.size()) {
222 for (Index idx2 = 0; idx2 <
self.stride(); idx2++) {
224 Index
offset = idx1 + idx2;
227 typename Self::CoeffReturnType accum =
self.accumulator().initialize();
228 for (Index idx3 = 0; idx3 <
self.size(); idx3++) {
229 Index curr = offset + idx3 *
self.stride();
231 if (
self.exclusive()) {
232 data[curr] =
self.accumulator().finalize(accum);
233 self.accumulator().reduce(
self.inner().coeff(curr), &accum);
235 self.accumulator().reduce(
self.inner().coeff(curr), &accum);
236 data[curr] =
self.accumulator().finalize(accum);
244 #if defined(EIGEN_USE_GPU) && defined(__CUDACC__) 250 template <
typename Self,
typename Reducer>
251 __global__
void ScanKernel(Self
self, Index total_size,
typename Self::CoeffReturnType*
data) {
254 Index
offset = (val /
self.stride()) *
self.stride() *
self.
size() + val %
self.stride();
256 if (offset + (
self.
size() - 1) *
self.stride() < total_size) {
258 typename Self::CoeffReturnType accum =
self.accumulator().initialize();
259 for (Index idx = 0; idx <
self.size(); idx++) {
260 Index curr = offset + idx *
self.stride();
261 if (
self.exclusive()) {
262 data[curr] =
self.accumulator().finalize(accum);
263 self.accumulator().reduce(
self.inner().coeff(curr), &accum);
265 self.accumulator().reduce(
self.inner().coeff(curr), &accum);
266 data[curr] =
self.accumulator().finalize(accum);
274 template <
typename Self,
typename Reducer>
276 void operator()(
const Self&
self,
typename Self::CoeffReturnType* data) {
278 Index num_blocks = (total_size /
self.size() + 63) / 64;
279 Index block_size = 64;
280 LAUNCH_CUDA_KERNEL((ScanKernel<Self, Reducer>), num_blocks, block_size, 0,
self.device(),
self, total_size, data);
283 #endif // EIGEN_USE_GPU && __CUDACC__ 287 #endif // EIGEN_CXX11_TENSOR_TENSOR_SCAN_H EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator< ArgType, Device > & inner() const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const XprType & expression() const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Op & accumulator() const
EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_prod(const Sizes< Indices... > &)
#define EIGEN_STRONG_INLINE
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool exclusive() const
internal::remove_const< typename XprType::Scalar >::type Scalar
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType * data() const
traits< XprType > XprTraits
set noclip points set clip one set noclip two set bar set border lt lw set xdata set ydata set zdata set x2data set y2data set boxwidth set dummy y set format x g set format y g set format x2 g set format y2 g set format z g set angles radians set nogrid set key title set key left top Right noreverse box linetype linewidth samplen spacing width set nolabel set noarrow set nologscale set logscale x set set pointsize set encoding default set nopolar set noparametric set set set set surface set nocontour set clabel set mapping cartesian set nohidden3d set cntrparam order set cntrparam linear set cntrparam levels auto set cntrparam points set size set set xzeroaxis lt lw set x2zeroaxis lt lw set yzeroaxis lt lw set y2zeroaxis lt lw set tics in set ticslevel set tics set mxtics default set mytics default set mx2tics default set my2tics default set xtics border mirror norotate autofreq set ytics border mirror norotate autofreq set ztics border nomirror norotate autofreq set nox2tics set noy2tics set timestamp bottom norotate offset
Namespace containing all symbols from the Eigen library.
A cost model used to limit the number of threads used for evaluating tensor expression.
#define EIGEN_STATIC_ASSERT(CONDITION, MSG)
vector< size_t > dimensions(L.begin(), L.end())
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar *data)
TensorEvaluator< const TensorScanOp< Op, ArgType >, Device > Self
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
remove_reference< Nested >::type _Nested
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup()
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Device & device() const
Eigen::internal::traits< TensorScanOp >::Scalar Scalar
void operator()(Self &self, typename Self::CoeffReturnType *data)
CoeffReturnType * m_output
PacketType< CoeffReturnType, Device >::type PacketReturnType
Eigen::internal::traits< TensorScanOp >::StorageKind StorageKind
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
DSizes< Index, NumDims > Dimensions
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool exclusive() const
XprType::CoeffReturnType CoeffReturnType
XprTraits::StorageKind StorageKind
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Index axis() const
TensorEvaluator< ArgType, Device > m_impl
TensorScanOp< Op, ArgType > XprType
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool) const
Eigen::internal::nested< TensorScanOp >::type Nested
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Index & stride() const
const TensorScanOp< Op, XprType > & type
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Op accumulator() const
Eigen::internal::traits< TensorScanOp >::Index Index
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Index & size() const
Eigen::NumTraits< Scalar >::Real RealScalar
TensorScanOp< Op, XprType > type
XprType::CoeffReturnType CoeffReturnType
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorScanOp(const XprType &expr, const Index &axis, bool exclusive=false, const Op &op=Op())