TensorEvaluator.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H
12 
13 namespace Eigen {
14 
26 // Generic evaluator
27 template<typename Derived, typename Device>
29 {
30  typedef typename Derived::Index Index;
31  typedef typename Derived::Scalar Scalar;
32  typedef typename Derived::Scalar CoeffReturnType;
34  typedef typename Derived::Dimensions Dimensions;
35  typedef Derived XprType;
40 
41  // NumDimensions is -1 for variable dim tensors
44 
45  enum {
46  IsAligned = Derived::IsAligned,
50  Layout = Derived::Layout,
51  CoordAccess = NumCoords > 0,
52  RawAccess = true
53  };
54 
56 
57  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
60 
62  Layout, Index>
64  //===--------------------------------------------------------------------===//
65 
66  EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device)
67  : m_data(device.get((const_cast<TensorPointerType>(m.data())))),
68  m_dims(m.dimensions()),
69  m_device(device)
70  { }
71 
72 
73  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; }
74 
75  EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType dest) {
76  if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization && dest) {
77  m_device.memcpy((void*)(m_device.get(dest)), m_device.get(m_data), m_dims.TotalSize() * sizeof(Scalar));
78  return false;
79  }
80  return true;
81  }
82 
83 #ifdef EIGEN_USE_THREADS
84  template <typename EvalSubExprsCallback>
85  EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
86  EvaluatorPointerType dest, EvalSubExprsCallback done) {
87  // TODO(ezhulenev): ThreadPoolDevice memcpy is blockign operation.
88  done(evalSubExprsIfNeeded(dest));
89  }
90 #endif // EIGEN_USE_THREADS
91 
93 
94  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
96  return m_data[index];
97  }
98 
99  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) {
101  return m_data[index];
102  }
103 
104  template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
105  PacketReturnType packet(Index index) const
106  {
107  return internal::ploadt<PacketReturnType, LoadMode>(m_data + index);
108  }
109 
110  // Return a packet starting at `index` where `umask` specifies which elements
111  // have to be loaded. Type/size of mask depends on PacketReturnType, e.g. for
112  // Packet16f, `umask` is of type uint16_t and if a bit is 1, corresponding
113  // float element will be loaded, otherwise 0 will be loaded.
114  // Function has been templatized to enable Sfinae.
115  template <typename PacketReturnTypeT> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
118  {
119  return internal::ploadu<PacketReturnTypeT>(m_data + index, umask);
120  }
121 
122  template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
123  void writePacket(Index index, const PacketReturnType& x)
124  {
125  return internal::pstoret<Scalar, PacketReturnType, StoreMode>(m_data + index, x);
126  }
127 
130  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
131  return m_data[m_dims.IndexOfColMajor(coords)];
132  } else {
133  return m_data[m_dims.IndexOfRowMajor(coords)];
134  }
135  }
136 
137  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType&
140  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
141  return m_data[m_dims.IndexOfColMajor(coords)];
142  } else {
143  return m_data[m_dims.IndexOfRowMajor(coords)];
144  }
145  }
146 
148  return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized,
150  }
151 
155  }
156 
158  block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
159  bool /*root_of_expr_ast*/ = false) const {
160  assert(m_data != NULL);
161  return TensorBlock::materialize(m_data, m_dims, desc, scratch);
162  }
163 
164  template<typename TensorBlock>
166  const TensorBlockDesc& desc, const TensorBlock& block) {
167  assert(m_data != NULL);
168 
169  typedef typename TensorBlock::XprType TensorBlockExpr;
170  typedef internal::TensorBlockAssignment<Scalar, NumCoords, TensorBlockExpr,
171  Index>
172  TensorBlockAssign;
173 
174  TensorBlockAssign::Run(
175  TensorBlockAssign::target(desc.dimensions(),
176  internal::strides<Layout>(m_dims), m_data,
177  desc.offset()),
178  block.expr());
179  }
180 
181  EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_data; }
182 
183 #ifdef EIGEN_USE_SYCL
184  // binding placeholder accessors to a command group handler for SYCL
185  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
186  m_data.bind(cgh);
187  }
188 #endif
189  protected:
190  EvaluatorPointerType m_data;
191  Dimensions m_dims;
193 };
194 
195 namespace {
196 template <typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
197 T loadConstant(const T* address) {
198  return *address;
199 }
200 // Use the texture cache on CUDA devices whenever possible
201 #if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 350
203 float loadConstant(const float* address) {
204  return __ldg(address);
205 }
207 double loadConstant(const double* address) {
208  return __ldg(address);
209 }
211 Eigen::half loadConstant(const Eigen::half* address) {
212  return Eigen::half(half_impl::raw_uint16_to_half(__ldg(&address->x)));
213 }
214 #endif
215 #ifdef EIGEN_USE_SYCL
216 // overload of load constant should be implemented here based on range access
217 template <cl::sycl::access::mode AcMd, typename T>
218 T &loadConstant(const Eigen::TensorSycl::internal::RangeAccess<AcMd, T> &address) {
219  return *address;
220 }
221 #endif
222 }
223 
224 
225 // Default evaluator for rvalues
226 template<typename Derived, typename Device>
227 struct TensorEvaluator<const Derived, Device>
228 {
229  typedef typename Derived::Index Index;
230  typedef typename Derived::Scalar Scalar;
233  typedef typename Derived::Dimensions Dimensions;
234  typedef const Derived XprType;
238 
240 
241  // NumDimensions is -1 for variable dim tensors
245 
246  enum {
247  IsAligned = Derived::IsAligned,
251  Layout = Derived::Layout,
252  CoordAccess = NumCoords > 0,
253  RawAccess = true
254  };
255 
256  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
259 
261  Layout, Index>
263  //===--------------------------------------------------------------------===//
264 
265  EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device)
266  : m_data(device.get(m.data())), m_dims(m.dimensions()), m_device(device)
267  { }
268 
269  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; }
270 
271  EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
272  if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization && data) {
273  m_device.memcpy((void*)(m_device.get(data)),m_device.get(m_data), m_dims.TotalSize() * sizeof(Scalar));
274  return false;
275  }
276  return true;
277  }
278 
279 #ifdef EIGEN_USE_THREADS
280  template <typename EvalSubExprsCallback>
281  EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
282  EvaluatorPointerType dest, EvalSubExprsCallback done) {
283  // TODO(ezhulenev): ThreadPoolDevice memcpy is a blockign operation.
284  done(evalSubExprsIfNeeded(dest));
285  }
286 #endif // EIGEN_USE_THREADS
287 
289 
290  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
292  return loadConstant(m_data+index);
293  }
294 
295  template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
296  PacketReturnType packet(Index index) const
297  {
298  return internal::ploadt_ro<PacketReturnType, LoadMode>(m_data + index);
299  }
300 
301  // Return a packet starting at `index` where `umask` specifies which elements
302  // have to be loaded. Type/size of mask depends on PacketReturnType, e.g. for
303  // Packet16f, `umask` is of type uint16_t and if a bit is 1, corresponding
304  // float element will be loaded, otherwise 0 will be loaded.
305  // Function has been templatized to enable Sfinae.
306  template <typename PacketReturnTypeT> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
309  {
310  return internal::ploadu<PacketReturnTypeT>(m_data + index, umask);
311  }
312 
315  const Index index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_dims.IndexOfColMajor(coords)
316  : m_dims.IndexOfRowMajor(coords);
317  return loadConstant(m_data+index);
318  }
319 
321  return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized,
323  }
324 
328  }
329 
331  block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
332  bool /*root_of_expr_ast*/ = false) const {
333  assert(m_data != NULL);
334  return TensorBlock::materialize(m_data, m_dims, desc, scratch);
335  }
336 
337  EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_data; }
338 #ifdef EIGEN_USE_SYCL
339  // binding placeholder accessors to a command group handler for SYCL
340  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
341  m_data.bind(cgh);
342  }
343 #endif
344  protected:
345  EvaluatorPointerType m_data;
346  Dimensions m_dims;
348 };
349 
350 
351 
352 
353 // -------------------- CwiseNullaryOp --------------------
354 
355 template<typename NullaryOp, typename ArgType, typename Device>
356 struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device>
357 {
359 
360  TensorEvaluator(const XprType& op, const Device& device)
361  : m_functor(op.functor()), m_argImpl(op.nestedExpression(), device), m_wrapper()
362  { }
363 
364  typedef typename XprType::Index Index;
365  typedef typename XprType::Scalar Scalar;
372 
373  enum {
374  IsAligned = true,
376  #ifdef EIGEN_USE_SYCL
378  #endif
379  ,
380  BlockAccess = false,
383  CoordAccess = false, // to be implemented
384  RawAccess = false
385  };
386 
387  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
389  //===--------------------------------------------------------------------===//
390 
391  EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); }
392 
393  EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) { return true; }
394 
395 #ifdef EIGEN_USE_THREADS
396  template <typename EvalSubExprsCallback>
397  EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
398  EvaluatorPointerType, EvalSubExprsCallback done) {
399  done(true);
400  }
401 #endif // EIGEN_USE_THREADS
402 
404 
405  EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
406  {
407  return m_wrapper(m_functor, index);
408  }
409 
410  template<int LoadMode>
411  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
412  {
413  return m_wrapper.template packetOp<PacketReturnType, Index>(m_functor, index);
414  }
415 
417  costPerCoeff(bool vectorized) const {
418  return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized,
420  }
421 
422  EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
423 
424 #ifdef EIGEN_USE_SYCL
425  // binding placeholder accessors to a command group handler for SYCL
426  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
427  m_argImpl.bind(cgh);
428  }
429 #endif
430 
431  private:
432  const NullaryOp m_functor;
435 };
436 
437 
438 
439 // -------------------- CwiseUnaryOp --------------------
440 
441 template<typename UnaryOp, typename ArgType, typename Device>
442 struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
443 {
445 
446  enum {
453  CoordAccess = false, // to be implemented
454  RawAccess = false
455  };
456 
457  TensorEvaluator(const XprType& op, const Device& device)
458  : m_device(device),
459  m_functor(op.functor()),
460  m_argImpl(op.nestedExpression(), device)
461  { }
462 
463  typedef typename XprType::Index Index;
464  typedef typename XprType::Scalar Scalar;
472  static const int NumDims = internal::array_size<Dimensions>::value;
473 
474  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
477 
480 
483  //===--------------------------------------------------------------------===//
484 
485  EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); }
486 
487  EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
488  m_argImpl.evalSubExprsIfNeeded(NULL);
489  return true;
490  }
491 
492 #ifdef EIGEN_USE_THREADS
493  template <typename EvalSubExprsCallback>
494  EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
495  EvaluatorPointerType, EvalSubExprsCallback done) {
496  m_argImpl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
497  }
498 #endif // EIGEN_USE_THREADS
499 
501  m_argImpl.cleanup();
502  }
503 
504  EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
505  {
506  return m_functor(m_argImpl.coeff(index));
507  }
508 
509  template<int LoadMode>
510  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
511  {
512  return m_functor.packetOp(m_argImpl.template packet<LoadMode>(index));
513  }
514 
516  const double functor_cost = internal::functor_traits<UnaryOp>::Cost;
517  return m_argImpl.costPerCoeff(vectorized) +
518  TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
519  }
520 
523  static const double functor_cost = internal::functor_traits<UnaryOp>::Cost;
524  return m_argImpl.getResourceRequirements().addCostPerCoeff(
525  {0, 0, functor_cost / PacketSize});
526  }
527 
529  block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
530  bool /*root_of_expr_ast*/ = false) const {
531  return TensorBlock(m_argImpl.block(desc, scratch), m_functor);
532  }
533 
534  EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
535 
536 #ifdef EIGEN_USE_SYCL
537  // binding placeholder accessors to a command group handler for SYCL
538  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const{
539  m_argImpl.bind(cgh);
540  }
541 #endif
542 
543 
544  private:
546  const UnaryOp m_functor;
548 };
549 
550 
551 // -------------------- CwiseBinaryOp --------------------
552 
553 template<typename BinaryOp, typename LeftArgType, typename RightArgType, typename Device>
554 struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArgType>, Device>
555 {
557 
558  enum {
569  CoordAccess = false, // to be implemented
570  RawAccess = false
571  };
572 
573  TensorEvaluator(const XprType& op, const Device& device)
574  : m_device(device),
575  m_functor(op.functor()),
576  m_leftImpl(op.lhsExpression(), device),
577  m_rightImpl(op.rhsExpression(), device)
578  {
580  eigen_assert(dimensions_match(m_leftImpl.dimensions(), m_rightImpl.dimensions()));
581  }
582 
583  typedef typename XprType::Index Index;
584  typedef typename XprType::Scalar Scalar;
591 
592  static const int NumDims = internal::array_size<
594 
595  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
598 
603 
607  //===--------------------------------------------------------------------===//
608 
609  EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
610  {
611  // TODO: use right impl instead if right impl dimensions are known at compile time.
612  return m_leftImpl.dimensions();
613  }
614 
615  EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
616  m_leftImpl.evalSubExprsIfNeeded(NULL);
617  m_rightImpl.evalSubExprsIfNeeded(NULL);
618  return true;
619  }
620 
621 #ifdef EIGEN_USE_THREADS
622  template <typename EvalSubExprsCallback>
623  EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
624  EvaluatorPointerType, EvalSubExprsCallback done) {
625  // TODO(ezhulenev): Evaluate two expression in parallel?
626  m_leftImpl.evalSubExprsIfNeededAsync(nullptr, [this, done](bool) {
627  m_rightImpl.evalSubExprsIfNeededAsync(nullptr,
628  [done](bool) { done(true); });
629  });
630  }
631 #endif // EIGEN_USE_THREADS
632 
634  m_leftImpl.cleanup();
635  m_rightImpl.cleanup();
636  }
637 
638  EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
639  {
640  return m_functor(m_leftImpl.coeff(index), m_rightImpl.coeff(index));
641  }
642  template<int LoadMode>
643  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
644  {
645  return m_functor.packetOp(m_leftImpl.template packet<LoadMode>(index), m_rightImpl.template packet<LoadMode>(index));
646  }
647 
649  costPerCoeff(bool vectorized) const {
650  const double functor_cost = internal::functor_traits<BinaryOp>::Cost;
651  return m_leftImpl.costPerCoeff(vectorized) +
652  m_rightImpl.costPerCoeff(vectorized) +
653  TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
654  }
655 
658  static const double functor_cost = internal::functor_traits<BinaryOp>::Cost;
660  m_leftImpl.getResourceRequirements(),
661  m_rightImpl.getResourceRequirements())
662  .addCostPerCoeff({0, 0, functor_cost / PacketSize});
663  }
664 
666  block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
667  bool /*root_of_expr_ast*/ = false) const {
668  desc.DropDestinationBuffer();
669  return TensorBlock(m_leftImpl.block(desc, scratch),
670  m_rightImpl.block(desc, scratch), m_functor);
671  }
672 
673  EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
674 
675  #ifdef EIGEN_USE_SYCL
676  // binding placeholder accessors to a command group handler for SYCL
677  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
678  m_leftImpl.bind(cgh);
679  m_rightImpl.bind(cgh);
680  }
681  #endif
682  private:
684  const BinaryOp m_functor;
687 };
688 
689 // -------------------- CwiseTernaryOp --------------------
690 
691 template<typename TernaryOp, typename Arg1Type, typename Arg2Type, typename Arg3Type, typename Device>
692 struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type, Arg3Type>, Device>
693 {
695 
696  enum {
702  BlockAccess = false,
707  CoordAccess = false, // to be implemented
708  RawAccess = false
709  };
710 
711  TensorEvaluator(const XprType& op, const Device& device)
712  : m_functor(op.functor()),
713  m_arg1Impl(op.arg1Expression(), device),
714  m_arg2Impl(op.arg2Expression(), device),
715  m_arg3Impl(op.arg3Expression(), device)
716  {
718 
721  STORAGE_KIND_MUST_MATCH)
724  STORAGE_KIND_MUST_MATCH)
727  STORAGE_INDEX_MUST_MATCH)
730  STORAGE_INDEX_MUST_MATCH)
731 
732  eigen_assert(dimensions_match(m_arg1Impl.dimensions(), m_arg2Impl.dimensions()) && dimensions_match(m_arg1Impl.dimensions(), m_arg3Impl.dimensions()));
733  }
734 
735  typedef typename XprType::Index Index;
736  typedef typename XprType::Scalar Scalar;
743 
744  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
746  //===--------------------------------------------------------------------===//
747 
748  EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
749  {
750  // TODO: use arg2 or arg3 dimensions if they are known at compile time.
751  return m_arg1Impl.dimensions();
752  }
753 
754  EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
755  m_arg1Impl.evalSubExprsIfNeeded(NULL);
756  m_arg2Impl.evalSubExprsIfNeeded(NULL);
757  m_arg3Impl.evalSubExprsIfNeeded(NULL);
758  return true;
759  }
761  m_arg1Impl.cleanup();
762  m_arg2Impl.cleanup();
763  m_arg3Impl.cleanup();
764  }
765 
766  EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
767  {
768  return m_functor(m_arg1Impl.coeff(index), m_arg2Impl.coeff(index), m_arg3Impl.coeff(index));
769  }
770  template<int LoadMode>
771  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
772  {
773  return m_functor.packetOp(m_arg1Impl.template packet<LoadMode>(index),
774  m_arg2Impl.template packet<LoadMode>(index),
775  m_arg3Impl.template packet<LoadMode>(index));
776  }
777 
779  costPerCoeff(bool vectorized) const {
780  const double functor_cost = internal::functor_traits<TernaryOp>::Cost;
781  return m_arg1Impl.costPerCoeff(vectorized) +
782  m_arg2Impl.costPerCoeff(vectorized) +
783  m_arg3Impl.costPerCoeff(vectorized) +
784  TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
785  }
786 
787  EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
788 
789 #ifdef EIGEN_USE_SYCL
790  // binding placeholder accessors to a command group handler for SYCL
791  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
792  m_arg1Impl.bind(cgh);
793  m_arg2Impl.bind(cgh);
794  m_arg3Impl.bind(cgh);
795  }
796 #endif
797 
798  private:
799  const TernaryOp m_functor;
803 };
804 
805 
806 // -------------------- SelectOp --------------------
807 
808 template<typename IfArgType, typename ThenArgType, typename ElseArgType, typename Device>
809 struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>, Device>
810 {
812  typedef typename XprType::Scalar Scalar;
813 
814  enum {
827  CoordAccess = false, // to be implemented
828  RawAccess = false
829  };
830 
831  TensorEvaluator(const XprType& op, const Device& device)
832  : m_condImpl(op.ifExpression(), device),
833  m_thenImpl(op.thenExpression(), device),
834  m_elseImpl(op.elseExpression(), device)
835  {
836  EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<IfArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<ThenArgType, Device>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE);
837  EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<IfArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<ElseArgType, Device>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE);
838  eigen_assert(dimensions_match(m_condImpl.dimensions(), m_thenImpl.dimensions()));
839  eigen_assert(dimensions_match(m_thenImpl.dimensions(), m_elseImpl.dimensions()));
840  }
841 
842  typedef typename XprType::Index Index;
849 
850  static const int NumDims = internal::array_size<Dimensions>::value;
851 
852  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
855 
862 
863  struct TensorSelectOpBlockFactory {
864  template <typename IfArgXprType, typename ThenArgXprType, typename ElseArgXprType>
865  struct XprType {
867  };
868 
869  template <typename IfArgXprType, typename ThenArgXprType, typename ElseArgXprType>
871  const IfArgXprType& if_expr, const ThenArgXprType& then_expr, const ElseArgXprType& else_expr) const {
872  return typename XprType<IfArgXprType, ThenArgXprType, ElseArgXprType>::type(if_expr, then_expr, else_expr);
873  }
874  };
875 
876  typedef internal::TensorTernaryExprBlock<TensorSelectOpBlockFactory,
880  //===--------------------------------------------------------------------===//
881 
882  EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
883  {
884  // TODO: use then or else impl instead if they happen to be known at compile time.
885  return m_condImpl.dimensions();
886  }
887 
888  EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
889  m_condImpl.evalSubExprsIfNeeded(NULL);
890  m_thenImpl.evalSubExprsIfNeeded(NULL);
891  m_elseImpl.evalSubExprsIfNeeded(NULL);
892  return true;
893  }
894 
895 #ifdef EIGEN_USE_THREADS
896  template <typename EvalSubExprsCallback>
897  EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
898  EvaluatorPointerType, EvalSubExprsCallback done) {
899  m_condImpl.evalSubExprsIfNeeded(nullptr, [this, done](bool) {
900  m_thenImpl.evalSubExprsIfNeeded(nullptr, [this, done](bool) {
901  m_elseImpl.evalSubExprsIfNeeded(nullptr, [done](bool) { done(true); });
902  });
903  });
904  }
905 #endif // EIGEN_USE_THREADS
906 
908  m_condImpl.cleanup();
909  m_thenImpl.cleanup();
910  m_elseImpl.cleanup();
911  }
912 
913  EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
914  {
915  return m_condImpl.coeff(index) ? m_thenImpl.coeff(index) : m_elseImpl.coeff(index);
916  }
917  template<int LoadMode>
918  EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const
919  {
922  for (Index i = 0; i < PacketSize; ++i) {
923  select.select[i] = m_condImpl.coeff(index+i);
924  }
925  return internal::pblend(select,
926  m_thenImpl.template packet<LoadMode>(index),
927  m_elseImpl.template packet<LoadMode>(index));
928 
929  }
930 
932  costPerCoeff(bool vectorized) const {
933  return m_condImpl.costPerCoeff(vectorized) +
934  m_thenImpl.costPerCoeff(vectorized)
935  .cwiseMax(m_elseImpl.costPerCoeff(vectorized));
936  }
937 
940  auto then_req = m_thenImpl.getResourceRequirements();
941  auto else_req = m_elseImpl.getResourceRequirements();
942 
943  auto merged_req =
945  merged_req.cost_per_coeff =
946  then_req.cost_per_coeff.cwiseMax(else_req.cost_per_coeff);
947 
949  m_condImpl.getResourceRequirements(), merged_req);
950  }
951 
953  block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
954  bool /*root_of_expr_ast*/ = false) const {
955  // It's unsafe to pass destination buffer to underlying expressions, because
956  // output might be aliased with one of the inputs.
957  desc.DropDestinationBuffer();
958 
959  return TensorBlock(
960  m_condImpl.block(desc, scratch), m_thenImpl.block(desc, scratch),
961  m_elseImpl.block(desc, scratch), TensorSelectOpBlockFactory());
962  }
963 
964  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data() const { return NULL; }
965 
966 #ifdef EIGEN_USE_SYCL
967  // binding placeholder accessors to a command group handler for SYCL
968  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
969  m_condImpl.bind(cgh);
970  m_thenImpl.bind(cgh);
971  m_elseImpl.bind(cgh);
972  }
973 #endif
974  private:
978 };
979 
980 
981 } // end namespace Eigen
982 
983 #endif // EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array< DenseIndex, NumCoords > &coords) const
Matrix3f m
EIGEN_STRONG_INLINE TensorEvaluator(const Derived &m, const Device &device)
#define EIGEN_ALWAYS_INLINE
Definition: Macros.h:932
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::TensorBlockResourceRequirements getResourceRequirements() const
SCALAR Scalar
Definition: bench_gemm.cpp:46
EIGEN_STRONG_INLINE void cleanup()
internal::TensorBlockScratchAllocator< Device > TensorBlockScratch
#define EIGEN_STRONG_INLINE
Definition: Macros.h:917
Eigen::internal::traits< TensorCwiseNullaryOp >::Index Index
Definition: TensorExpr.h:60
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::TensorBlockResourceRequirements getResourceRequirements() const
internal::TensorBlockDescriptor< NumCoords, Index > TensorBlockDesc
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const
internal::traits< Derived >::template MakePointer< Scalar >::Type TensorPointerType
EIGEN_DEVICE_FUNC EvaluatorPointerType data() const
Derived::Scalar CoeffReturnType
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::TensorBlockResourceRequirements getResourceRequirements() const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
TensorEvaluator< const ArgType, Device >::TensorBlock ArgTensorBlock
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::enable_if< internal::unpacket_traits< PacketReturnTypeT >::masked_load_available, PacketReturnTypeT >::type partialPacket(Index index, typename internal::unpacket_traits< PacketReturnTypeT >::mask_t umask) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::enable_if< internal::unpacket_traits< PacketReturnTypeT >::masked_load_available, PacketReturnTypeT >::type partialPacket(Index index, typename internal::unpacket_traits< PacketReturnTypeT >::mask_t umask) const
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool dimensions_match(Dims1 dims1, Dims2 dims2)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::TensorBlockResourceRequirements getResourceRequirements() const
Eigen::internal::traits< TensorCwiseTernaryOp >::Index Index
Definition: TensorExpr.h:286
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Namespace containing all symbols from the Eigen library.
Definition: jet.h:637
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketReturnType &x)
A cost model used to limit the number of threads used for evaluating tensor expression.
static const int PacketSize
Holds information about the various numeric (i.e. scalar) types allowed by Eigen. ...
Definition: NumTraits.h:232
#define EIGEN_STATIC_ASSERT(CONDITION, MSG)
Definition: StaticAssert.h:127
Eigen::internal::traits< TensorCwiseBinaryOp >::Index Index
Definition: TensorExpr.h:206
EIGEN_DEVICE_FUNC EvaluatorPointerType data() const
TensorBlockDescriptor & DropDestinationBuffer()
Definition: TensorBlock.h:320
Eigen::internal::traits< TensorCwiseUnaryOp >::Scalar Scalar
Definition: TensorExpr.h:120
Storage::Type EvaluatorPointerType
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType & coeffRef(Index index)
internal::traits< Derived >::template MakePointer< const Scalar >::Type TensorPointerType
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost cwiseMax(const TensorOpCost &rhs) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
PacketType< CoeffReturnType, Device >::type PacketReturnType
StorageMemory< Scalar, Device > Storage
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data() const
Eigen::internal::traits< TensorCwiseNullaryOp >::Scalar Scalar
Definition: TensorExpr.h:55
internal::remove_const< Scalar >::type ScalarNoConst
XprType< IfArgXprType, ThenArgXprType, ElseArgXprType >::type expr(const IfArgXprType &if_expr, const ThenArgXprType &then_expr, const ElseArgXprType &else_expr) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
StorageMemory< const Scalar, Device > Storage
const Device EIGEN_DEVICE_REF m_device
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:74
Eigen::internal::traits< TensorCwiseTernaryOp >::Scalar Scalar
Definition: TensorExpr.h:281
#define eigen_assert(x)
Definition: Macros.h:1037
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
Eigen::internal::traits< TensorSelectOp >::Scalar Scalar
Definition: TensorExpr.h:355
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType & coeffRef(const array< DenseIndex, NumCoords > &coords)
internal::TensorCwiseBinaryBlock< BinaryOp, LeftTensorBlock, RightTensorBlock > TensorBlock
static const int NumCoords
numext::uint16_t x
Definition: Half.h:104
#define NULL
Definition: ccolamd.c:609
internal::TensorMaterializedBlock< ScalarNoConst, NumCoords, Layout, Index > TensorBlock
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(const TensorBlockDesc &desc, const TensorBlock &block)
PacketType< CoeffReturnType, Device >::type PacketReturnType
internal::TensorBlockScratchAllocator< Device > TensorBlockScratch
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
internal::TensorMaterializedBlock< ScalarNoConst, NumCoords, Layout, Index > TensorBlock
Derived::Dimensions Dimensions
Eigen::internal::traits< TensorSelectOp >::Index Index
Definition: TensorExpr.h:361
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:976
static EIGEN_STRONG_INLINE TensorMaterializedBlock materialize(const Scalar *data, const DataDimensions &data_dims, TensorBlockDesc &desc, TensorBlockScratch &scratch)
Definition: TensorBlock.h:762
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType dest)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
Eigen::internal::traits< TensorCwiseBinaryOp >::Scalar Scalar
Definition: TensorExpr.h:201
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __half_raw raw_uint16_to_half(numext::uint16_t x)
Definition: Half.h:495
Eigen::internal::traits< TensorCwiseUnaryOp >::Index Index
Definition: TensorExpr.h:125
const Dimensions & dimensions() const
Definition: TensorBlock.h:299
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
internal::remove_const< Scalar >::type ScalarNoConst
internal::TensorBlockDescriptor< NumCoords, Index > TensorBlockDesc
internal::TensorCwiseUnaryBlock< UnaryOp, ArgTensorBlock > TensorBlock
internal::TensorTernaryExprBlock< TensorSelectOpBlockFactory, IfArgTensorBlock, ThenArgTensorBlock, ElseArgTensorBlock > TensorBlock
#define EIGEN_DEVICE_REF
Definition: TensorMacros.h:50
Container::iterator get(Container &c, Position position)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockResourceRequirements any()
Definition: TensorBlock.h:155
set noclip points set clip one set noclip two set bar set border lt lw set xdata set ydata set zdata set x2data set y2data set boxwidth set dummy x
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::TensorBlockResourceRequirements getResourceRequirements() const
EIGEN_STRONG_INLINE TensorEvaluator(const Derived &m, const Device &device)
Derived::Scalar Scalar
EvaluatorPointerType m_data
const internal::nullary_wrapper< CoeffReturnType, NullaryOp > m_wrapper
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockResourceRequirements merge(const TensorBlockResourceRequirements &lhs, const TensorBlockResourceRequirements &rhs)
Definition: TensorBlock.h:138
EIGEN_STRONG_INLINE Packet4i pblend(const Selector< 4 > &ifPacket, const Packet4i &thenPacket, const Packet4i &elsePacket)
Definition: pytypes.h:1370
#define EIGEN_UNROLL_LOOP
Definition: Macros.h:1461
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array< DenseIndex, NumCoords > &coords) const


gtsam
Author(s):
autogenerated on Tue Jul 4 2023 02:36:54