AssignEvaluator.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2011 Benoit Jacob <jacob.benoit.1@gmail.com>
5 // Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
6 // Copyright (C) 2011-2012 Jitse Niesen <jitse@maths.leeds.ac.uk>
7 //
8 // This Source Code Form is subject to the terms of the Mozilla
9 // Public License v. 2.0. If a copy of the MPL was not distributed
10 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
11 
12 #ifndef EIGEN_ASSIGN_EVALUATOR_H
13 #define EIGEN_ASSIGN_EVALUATOR_H
14 
15 namespace Eigen {
16 
17 // This implementation is based on Assign.h
18 
19 namespace internal {
20 
21 /***************************************************************************
22 * Part 1 : the logic deciding a strategy for traversal and unrolling *
23 ***************************************************************************/
24 
25 // copy_using_evaluator_traits is based on assign_traits
26 
27 template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc, int MaxPacketSize = -1>
29 {
30  typedef typename DstEvaluator::XprType Dst;
31  typedef typename Dst::Scalar DstScalar;
32 
33  enum {
34  DstFlags = DstEvaluator::Flags,
35  SrcFlags = SrcEvaluator::Flags
36  };
37 
38 public:
39  enum {
40  DstAlignment = DstEvaluator::Alignment,
41  SrcAlignment = SrcEvaluator::Alignment,
44  };
45 
46 private:
47  enum {
48  InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
49  : int(DstFlags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
50  : int(Dst::RowsAtCompileTime),
51  InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
52  : int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
53  : int(Dst::MaxRowsAtCompileTime),
55  RestrictedLinearSize = EIGEN_SIZE_MIN_PREFER_FIXED(Dst::SizeAtCompileTime,MaxPacketSize),
57  MaxSizeAtCompileTime = Dst::SizeAtCompileTime
58  };
59 
60  // TODO distinguish between linear traversal and inner-traversals
63 
64  enum {
67  };
68 
69 public:
70  enum {
73  };
74 
75 private:
76  enum {
90  /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
91  so it's only good for large enough sizes. */
94  /* slice vectorization can be slow, so we only want it if the slices are big, which is
95  indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
96  in a fixed-size matrix
97  However, with EIGEN_UNALIGNED_VECTORIZE and unrolling, slice vectorization is still worth it */
98  };
99 
100 public:
101  enum {
102  Traversal = int(Dst::SizeAtCompileTime) == 0 ? int(AllAtOnceTraversal) // If compile-size is zero, traversing will fail at compile-time.
112  };
113 
115 
116 private:
117  enum {
120  : 1,
122  MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic
123  && int(Dst::SizeAtCompileTime) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit),
124  MayUnrollInner = int(InnerSize) != Dynamic
125  && int(InnerSize) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit)
126  };
127 
128 public:
129  enum {
131  ? (
132  int(MayUnrollCompletely) ? int(CompleteUnrolling)
133  : int(MayUnrollInner) ? int(InnerUnrolling)
134  : int(NoUnrolling)
135  )
137  ? ( bool(MayUnrollCompletely) && ( EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)))
139  : int(NoUnrolling) )
141  ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling)
142  : int(NoUnrolling) )
145  ? ( bool(MayUnrollInner) ? int(InnerUnrolling)
146  : int(NoUnrolling) )
147 #endif
148  : int(NoUnrolling)
149  };
150 
151 #ifdef EIGEN_DEBUG_ASSIGN
152  static void debug()
153  {
154  std::cerr << "DstXpr: " << typeid(typename DstEvaluator::XprType).name() << std::endl;
155  std::cerr << "SrcXpr: " << typeid(typename SrcEvaluator::XprType).name() << std::endl;
156  std::cerr.setf(std::ios::hex, std::ios::basefield);
157  std::cerr << "DstFlags" << " = " << DstFlags << " (" << demangle_flags(DstFlags) << " )" << std::endl;
158  std::cerr << "SrcFlags" << " = " << SrcFlags << " (" << demangle_flags(SrcFlags) << " )" << std::endl;
159  std::cerr.unsetf(std::ios::hex);
169  EIGEN_DEBUG_VAR(ActualPacketSize)
176  std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl;
177  EIGEN_DEBUG_VAR(SrcEvaluator::CoeffReadCost)
178  EIGEN_DEBUG_VAR(DstEvaluator::CoeffReadCost)
179  EIGEN_DEBUG_VAR(Dst::SizeAtCompileTime)
181  EIGEN_DEBUG_VAR(MayUnrollCompletely)
182  EIGEN_DEBUG_VAR(MayUnrollInner)
183  std::cerr << "Unrolling" << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl;
184  std::cerr << std::endl;
185  }
186 #endif
187 };
188 
189 /***************************************************************************
190 * Part 2 : meta-unrollers
191 ***************************************************************************/
192 
193 /************************
194 *** Default traversal ***
195 ************************/
196 
197 template<typename Kernel, int Index, int Stop>
199 {
200  // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
201  typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
203 
204  enum {
205  outer = Index / DstXprType::InnerSizeAtCompileTime,
206  inner = Index % DstXprType::InnerSizeAtCompileTime
207  };
208 
209  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
210  {
211  kernel.assignCoeffByOuterInner(outer, inner);
213  }
214 };
215 
216 template<typename Kernel, int Stop>
218 {
219  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
220 };
221 
222 template<typename Kernel, int Index_, int Stop>
224 {
225  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
226  {
227  kernel.assignCoeffByOuterInner(outer, Index_);
229  }
230 };
231 
232 template<typename Kernel, int Stop>
234 {
235  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index) { }
236 };
237 
238 /***********************
239 *** Linear traversal ***
240 ***********************/
241 
242 template<typename Kernel, int Index, int Stop>
244 {
245  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel)
246  {
247  kernel.assignCoeff(Index);
249  }
250 };
251 
252 template<typename Kernel, int Stop>
254 {
255  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
256 };
257 
258 /**************************
259 *** Inner vectorization ***
260 **************************/
261 
262 template<typename Kernel, int Index, int Stop>
264 {
265  // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
266  typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
268  typedef typename Kernel::PacketType PacketType;
269 
270  enum {
271  outer = Index / DstXprType::InnerSizeAtCompileTime,
272  inner = Index % DstXprType::InnerSizeAtCompileTime,
273  SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
274  DstAlignment = Kernel::AssignmentTraits::DstAlignment
275  };
276 
277  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
278  {
279  kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
280  enum { NextIndex = Index + unpacket_traits<PacketType>::size };
282  }
283 };
284 
285 template<typename Kernel, int Stop>
287 {
288  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
289 };
290 
291 template<typename Kernel, int Index_, int Stop, int SrcAlignment, int DstAlignment>
293 {
294  typedef typename Kernel::PacketType PacketType;
295  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
296  {
297  kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, Index_);
298  enum { NextIndex = Index_ + unpacket_traits<PacketType>::size };
300  }
301 };
302 
303 template<typename Kernel, int Stop, int SrcAlignment, int DstAlignment>
305 {
306  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &, Index) { }
307 };
308 
309 /***************************************************************************
310 * Part 3 : implementation of all cases
311 ***************************************************************************/
312 
313 // dense_assignment_loop is based on assign_impl
314 
315 template<typename Kernel,
316  int Traversal = Kernel::AssignmentTraits::Traversal,
317  int Unrolling = Kernel::AssignmentTraits::Unrolling>
319 
320 /************************
321 ***** Special Cases *****
322 ************************/
323 
324 // Zero-sized assignment is a no-op.
325 template<typename Kernel, int Unrolling>
326 struct dense_assignment_loop<Kernel, AllAtOnceTraversal, Unrolling>
327 {
328  EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel& /*kernel*/)
329  {
330  typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
331  EIGEN_STATIC_ASSERT(int(DstXprType::SizeAtCompileTime) == 0,
332  EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT)
333  }
334 };
335 
336 /************************
337 *** Default traversal ***
338 ************************/
339 
340 template<typename Kernel>
342 {
343  EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel &kernel)
344  {
345  for(Index outer = 0; outer < kernel.outerSize(); ++outer) {
346  for(Index inner = 0; inner < kernel.innerSize(); ++inner) {
347  kernel.assignCoeffByOuterInner(outer, inner);
348  }
349  }
350  }
351 };
352 
353 template<typename Kernel>
355 {
356  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
357  {
358  typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
360  }
361 };
362 
363 template<typename Kernel>
365 {
366  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
367  {
368  typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
369 
370  const Index outerSize = kernel.outerSize();
371  for(Index outer = 0; outer < outerSize; ++outer)
373  }
374 };
375 
376 /***************************
377 *** Linear vectorization ***
378 ***************************/
379 
380 
381 // The goal of unaligned_dense_assignment_loop is simply to factorize the handling
382 // of the non vectorizable beginning and ending parts
383 
384 template <bool IsAligned = false>
386 {
387  // if IsAligned = true, then do nothing
388  template <typename Kernel>
390 };
391 
392 template <>
394 {
395  // MSVC must not inline this functions. If it does, it fails to optimize the
396  // packet access path.
397  // FIXME check which version exhibits this issue
398 #if EIGEN_COMP_MSVC
399  template <typename Kernel>
400  static EIGEN_DONT_INLINE void run(Kernel &kernel,
401  Index start,
402  Index end)
403 #else
404  template <typename Kernel>
405  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel,
406  Index start,
407  Index end)
408 #endif
409  {
410  for (Index index = start; index < end; ++index)
411  kernel.assignCoeff(index);
412  }
413 };
414 
415 template<typename Kernel>
417 {
418  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
419  {
420  const Index size = kernel.size();
421  typedef typename Kernel::Scalar Scalar;
422  typedef typename Kernel::PacketType PacketType;
423  enum {
424  requestedAlignment = Kernel::AssignmentTraits::LinearRequiredAlignment,
426  dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
427  dstAlignment = packet_traits<Scalar>::AlignedOnScalar ? int(requestedAlignment)
428  : int(Kernel::AssignmentTraits::DstAlignment),
429  srcAlignment = Kernel::AssignmentTraits::JointAlignment
430  };
431  const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned<requestedAlignment>(kernel.dstDataPtr(), size);
432  const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
433 
435 
436  for(Index index = alignedStart; index < alignedEnd; index += packetSize)
437  kernel.template assignPacket<dstAlignment, srcAlignment, PacketType>(index);
438 
439  unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size);
440  }
441 };
442 
443 template<typename Kernel>
445 {
446  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
447  {
448  typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
449  typedef typename Kernel::PacketType PacketType;
450 
451  enum { size = DstXprType::SizeAtCompileTime,
453  alignedSize = (int(size)/packetSize)*packetSize };
454 
457  }
458 };
459 
460 /**************************
461 *** Inner vectorization ***
462 **************************/
463 
464 template<typename Kernel>
466 {
467  typedef typename Kernel::PacketType PacketType;
468  enum {
469  SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
470  DstAlignment = Kernel::AssignmentTraits::DstAlignment
471  };
472  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
473  {
474  const Index innerSize = kernel.innerSize();
475  const Index outerSize = kernel.outerSize();
476  const Index packetSize = unpacket_traits<PacketType>::size;
477  for(Index outer = 0; outer < outerSize; ++outer)
478  for(Index inner = 0; inner < innerSize; inner+=packetSize)
479  kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
480  }
481 };
482 
483 template<typename Kernel>
485 {
486  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
487  {
488  typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
490  }
491 };
492 
493 template<typename Kernel>
495 {
496  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
497  {
498  typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
499  typedef typename Kernel::AssignmentTraits Traits;
500  const Index outerSize = kernel.outerSize();
501  for(Index outer = 0; outer < outerSize; ++outer)
502  copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime,
503  Traits::SrcAlignment, Traits::DstAlignment>::run(kernel, outer);
504  }
505 };
506 
507 /***********************
508 *** Linear traversal ***
509 ***********************/
510 
511 template<typename Kernel>
513 {
514  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
515  {
516  const Index size = kernel.size();
517  for(Index i = 0; i < size; ++i)
518  kernel.assignCoeff(i);
519  }
520 };
521 
522 template<typename Kernel>
524 {
525  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
526  {
527  typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
529  }
530 };
531 
532 /**************************
533 *** Slice vectorization ***
534 ***************************/
535 
536 template<typename Kernel>
538 {
539  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
540  {
541  typedef typename Kernel::Scalar Scalar;
542  typedef typename Kernel::PacketType PacketType;
543  enum {
545  requestedAlignment = int(Kernel::AssignmentTraits::InnerRequiredAlignment),
546  alignable = packet_traits<Scalar>::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment)>=sizeof(Scalar),
547  dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
548  dstAlignment = alignable ? int(requestedAlignment)
549  : int(Kernel::AssignmentTraits::DstAlignment)
550  };
551  const Scalar *dst_ptr = kernel.dstDataPtr();
552  if((!bool(dstIsAligned)) && (UIntPtr(dst_ptr) % sizeof(Scalar))>0)
553  {
554  // the pointer is not aligned-on scalar, so alignment is not possible
556  }
557  const Index packetAlignedMask = packetSize - 1;
558  const Index innerSize = kernel.innerSize();
559  const Index outerSize = kernel.outerSize();
560  const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0;
561  Index alignedStart = ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned<requestedAlignment>(dst_ptr, innerSize);
562 
563  for(Index outer = 0; outer < outerSize; ++outer)
564  {
565  const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
566  // do the non-vectorizable part of the assignment
567  for(Index inner = 0; inner<alignedStart ; ++inner)
568  kernel.assignCoeffByOuterInner(outer, inner);
569 
570  // do the vectorizable part of the assignment
571  for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize)
572  kernel.template assignPacketByOuterInner<dstAlignment, Unaligned, PacketType>(outer, inner);
573 
574  // do the non-vectorizable part of the assignment
575  for(Index inner = alignedEnd; inner<innerSize ; ++inner)
576  kernel.assignCoeffByOuterInner(outer, inner);
577 
578  alignedStart = numext::mini((alignedStart+alignedStep)%packetSize, innerSize);
579  }
580  }
581 };
582 
583 #if EIGEN_UNALIGNED_VECTORIZE
584 template<typename Kernel>
585 struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, InnerUnrolling>
586 {
587  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
588  {
589  typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
590  typedef typename Kernel::PacketType PacketType;
591 
592  enum { innerSize = DstXprType::InnerSizeAtCompileTime,
594  vectorizableSize = (int(innerSize) / int(packetSize)) * int(packetSize),
595  size = DstXprType::SizeAtCompileTime };
596 
597  for(Index outer = 0; outer < kernel.outerSize(); ++outer)
598  {
601  }
602  }
603 };
604 #endif
605 
606 
607 /***************************************************************************
608 * Part 4 : Generic dense assignment kernel
609 ***************************************************************************/
610 
611 // This class generalize the assignment of a coefficient (or packet) from one dense evaluator
612 // to another dense writable evaluator.
613 // It is parametrized by the two evaluators, and the actual assignment functor.
614 // This abstraction level permits to keep the evaluation loops as simple and as generic as possible.
615 // One can customize the assignment using this generic dense_assignment_kernel with different
616 // functors, or by completely overloading it, by-passing a functor.
617 template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version = Specialized>
619 {
620 protected:
623 public:
624 
625  typedef DstEvaluatorTypeT DstEvaluatorType;
626  typedef SrcEvaluatorTypeT SrcEvaluatorType;
630 
631 
633  generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
634  : m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr)
635  {
636  #ifdef EIGEN_DEBUG_ASSIGN
638  #endif
639  }
640 
641  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index size() const EIGEN_NOEXCEPT { return m_dstExpr.size(); }
642  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index innerSize() const EIGEN_NOEXCEPT { return m_dstExpr.innerSize(); }
643  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index outerSize() const EIGEN_NOEXCEPT { return m_dstExpr.outerSize(); }
644  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_dstExpr.rows(); }
645  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_dstExpr.cols(); }
646  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index outerStride() const EIGEN_NOEXCEPT { return m_dstExpr.outerStride(); }
647 
648  EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() EIGEN_NOEXCEPT { return m_dst; }
649  EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const EIGEN_NOEXCEPT { return m_src; }
650 
653  {
654  m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col));
655  }
656 
659  {
660  m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index));
661  }
662 
665  {
666  Index row = rowIndexByOuterInner(outer, inner);
667  Index col = colIndexByOuterInner(outer, inner);
668  assignCoeff(row, col);
669  }
670 
671 
672  template<int StoreMode, int LoadMode, typename PacketType>
674  {
675  m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row,col), m_src.template packet<LoadMode,PacketType>(row,col));
676  }
677 
678  template<int StoreMode, int LoadMode, typename PacketType>
680  {
681  m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode,PacketType>(index));
682  }
683 
684  template<int StoreMode, int LoadMode, typename PacketType>
686  {
687  Index row = rowIndexByOuterInner(outer, inner);
688  Index col = colIndexByOuterInner(outer, inner);
689  assignPacket<StoreMode,LoadMode,PacketType>(row, col);
690  }
691 
693  {
694  typedef typename DstEvaluatorType::ExpressionTraits Traits;
695  return int(Traits::RowsAtCompileTime) == 1 ? 0
696  : int(Traits::ColsAtCompileTime) == 1 ? inner
697  : int(DstEvaluatorType::Flags)&RowMajorBit ? outer
698  : inner;
699  }
700 
702  {
703  typedef typename DstEvaluatorType::ExpressionTraits Traits;
704  return int(Traits::ColsAtCompileTime) == 1 ? 0
705  : int(Traits::RowsAtCompileTime) == 1 ? inner
706  : int(DstEvaluatorType::Flags)&RowMajorBit ? inner
707  : outer;
708  }
709 
710  EIGEN_DEVICE_FUNC const Scalar* dstDataPtr() const
711  {
712  return m_dstExpr.data();
713  }
714 
715 protected:
716  DstEvaluatorType& m_dst;
717  const SrcEvaluatorType& m_src;
719  // TODO find a way to avoid the needs of the original expression
720  DstXprType& m_dstExpr;
721 };
722 
723 // Special kernel used when computing small products whose operands have dynamic dimensions. It ensures that the
724 // PacketSize used is no larger than 4, thereby increasing the chance that vectorized instructions will be used
725 // when computing the product.
726 
727 template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor>
728 class restricted_packet_dense_assignment_kernel : public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, BuiltIn>
729 {
730 protected:
732  public:
733  typedef typename Base::Scalar Scalar;
734  typedef typename Base::DstXprType DstXprType;
737 
738  EIGEN_DEVICE_FUNC restricted_packet_dense_assignment_kernel(DstEvaluatorTypeT &dst, const SrcEvaluatorTypeT &src, const Functor &func, DstXprType& dstExpr)
739  : Base(dst, src, func, dstExpr)
740  {
741  }
742  };
743 
744 /***************************************************************************
745 * Part 5 : Entry point for dense rectangular assignment
746 ***************************************************************************/
747 
748 template<typename DstXprType,typename SrcXprType, typename Functor>
750 void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const Functor &/*func*/)
751 {
754  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
755 }
756 
757 template<typename DstXprType,typename SrcXprType, typename T1, typename T2>
759 void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const internal::assign_op<T1,T2> &/*func*/)
760 {
761  Index dstRows = src.rows();
762  Index dstCols = src.cols();
763  if(((dst.rows()!=dstRows) || (dst.cols()!=dstCols)))
764  dst.resize(dstRows, dstCols);
765  eigen_assert(dst.rows() == dstRows && dst.cols() == dstCols);
766 }
767 
768 template<typename DstXprType, typename SrcXprType, typename Functor>
769 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor &func)
770 {
771  typedef evaluator<DstXprType> DstEvaluatorType;
772  typedef evaluator<SrcXprType> SrcEvaluatorType;
773 
774  SrcEvaluatorType srcEvaluator(src);
775 
776  // NOTE To properly handle A = (A*A.transpose())/s with A rectangular,
777  // we need to resize the destination after the source evaluator has been created.
778  resize_if_allowed(dst, src, func);
779 
780  DstEvaluatorType dstEvaluator(dst);
781 
783  Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
784 
786 }
787 
788 // Specialization for filling the destination with a constant value.
789 #ifndef EIGEN_GPU_COMPILE_PHASE
790 template<typename DstXprType>
792 {
793  resize_if_allowed(dst, src, func);
794  std::fill_n(dst.data(), dst.size(), src.functor()());
795 }
796 #endif
797 
798 template<typename DstXprType, typename SrcXprType>
799 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src)
800 {
802 }
803 
804 /***************************************************************************
805 * Part 6 : Generic assignment
806 ***************************************************************************/
807 
808 // Based on the respective shapes of the destination and source,
809 // the class AssignmentKind determine the kind of assignment mechanism.
810 // AssignmentKind must define a Kind typedef.
811 template<typename DstShape, typename SrcShape> struct AssignmentKind;
812 
813 // Assignment kind defined in this file:
814 struct Dense2Dense {};
816 
817 template<typename,typename> struct AssignmentKind { typedef EigenBase2EigenBase Kind; };
818 template<> struct AssignmentKind<DenseShape,DenseShape> { typedef Dense2Dense Kind; };
819 
820 // This is the main assignment class
821 template< typename DstXprType, typename SrcXprType, typename Functor,
823  typename EnableIf = void>
824 struct Assignment;
825 
826 
827 // The only purpose of this call_assignment() function is to deal with noalias() / "assume-aliasing" and automatic transposition.
828 // Indeed, I (Gael) think that this concept of "assume-aliasing" was a mistake, and it makes thing quite complicated.
829 // So this intermediate function removes everything related to "assume-aliasing" such that Assignment
830 // does not has to bother about these annoying details.
831 
832 template<typename Dst, typename Src>
834 void call_assignment(Dst& dst, const Src& src)
835 {
837 }
838 template<typename Dst, typename Src>
840 void call_assignment(const Dst& dst, const Src& src)
841 {
843 }
844 
845 // Deal with "assume-aliasing"
846 template<typename Dst, typename Src, typename Func>
848 void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if< evaluator_assume_aliasing<Src>::value, void*>::type = 0)
849 {
850  typename plain_matrix_type<Src>::type tmp(src);
851  call_assignment_no_alias(dst, tmp, func);
852 }
853 
854 template<typename Dst, typename Src, typename Func>
856 void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<!evaluator_assume_aliasing<Src>::value, void*>::type = 0)
857 {
858  call_assignment_no_alias(dst, src, func);
859 }
860 
861 // by-pass "assume-aliasing"
862 // When there is no aliasing, we require that 'dst' has been properly resized
863 template<typename Dst, template <typename> class StorageBase, typename Src, typename Func>
865 void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func)
866 {
868 }
869 
870 
871 template<typename Dst, typename Src, typename Func>
873 void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
874 {
875  enum {
876  NeedToTranspose = ( (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1)
877  || (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1)
878  ) && int(Dst::SizeAtCompileTime) != 1
879  };
880 
881  typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst>::type ActualDstTypeCleaned;
882  typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst&>::type ActualDstType;
883  ActualDstType actualDst(dst);
884 
885  // TODO check whether this is the right place to perform these checks:
887  EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src)
889 
891 }
892 
893 template<typename Dst, typename Src, typename Func>
895 void call_restricted_packet_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
896 {
897  typedef evaluator<Dst> DstEvaluatorType;
898  typedef evaluator<Src> SrcEvaluatorType;
900 
903 
904  SrcEvaluatorType srcEvaluator(src);
905  resize_if_allowed(dst, src, func);
906 
907  DstEvaluatorType dstEvaluator(dst);
908  Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
909 
911 }
912 
913 template<typename Dst, typename Src>
915 void call_assignment_no_alias(Dst& dst, const Src& src)
916 {
918 }
919 
920 template<typename Dst, typename Src, typename Func>
922 void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func)
923 {
924  // TODO check whether this is the right place to perform these checks:
928 
929  Assignment<Dst,Src,Func>::run(dst, src, func);
930 }
931 template<typename Dst, typename Src>
933 void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src)
934 {
936 }
937 
938 // forward declaration
939 template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, const Src &src);
940 
941 // Generic Dense to Dense assignment
942 // Note that the last template argument "Weak" is needed to make it possible to perform
943 // both partial specialization+SFINAE without ambiguous specialization
944 template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
945 struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Weak>
946 {
948  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
949  {
950 #ifndef EIGEN_NO_DEBUG
952 #endif
953 
954  call_dense_assignment_loop(dst, src, func);
955  }
956 };
957 
958 // Generic assignment through evalTo.
959 // TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism.
960 // Note that the last template argument "Weak" is needed to make it possible to perform
961 // both partial specialization+SFINAE without ambiguous specialization
962 template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
963 struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Weak>
964 {
966  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
967  {
968  Index dstRows = src.rows();
969  Index dstCols = src.cols();
970  if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
971  dst.resize(dstRows, dstCols);
972 
973  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
974  src.evalTo(dst);
975  }
976 
977  // NOTE The following two functions are templated to avoid their instantiation if not needed
978  // This is needed because some expressions supports evalTo only and/or have 'void' as scalar type.
979  template<typename SrcScalarType>
981  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar,SrcScalarType> &/*func*/)
982  {
983  Index dstRows = src.rows();
984  Index dstCols = src.cols();
985  if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
986  dst.resize(dstRows, dstCols);
987 
988  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
989  src.addTo(dst);
990  }
991 
992  template<typename SrcScalarType>
994  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar,SrcScalarType> &/*func*/)
995  {
996  Index dstRows = src.rows();
997  Index dstCols = src.cols();
998  if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
999  dst.resize(dstRows, dstCols);
1000 
1001  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
1002  src.subTo(dst);
1003  }
1004 };
1005 
1006 } // namespace internal
1007 
1008 } // end namespace Eigen
1009 
1010 #endif // EIGEN_ASSIGN_EVALUATOR_H
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT
find_best_packet_helper< Size, typename packet_traits< T >::type >::type type
Definition: XprHelper.h:208
Generic expression of a matrix where all coefficients are defined by a functor.
static EIGEN_DEVICE_FUNC void EIGEN_STRONG_INLINE run(Kernel &kernel)
SCALAR Scalar
Definition: bench_gemm.cpp:46
copy_using_evaluator_traits< DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, 4 > AssignmentTraits
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_assignment_no_alias(Dst &dst, const Src &src, const Func &func)
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
const unsigned int ActualPacketAccessBit
Definition: Constants.h:107
#define EIGEN_STRONG_INLINE
Definition: Macros.h:917
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel &kernel)
EIGEN_DEVICE_FUNC ExpressionType & expression() const
Definition: NoAlias.h:64
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize_if_allowed(DstXprType &dst, const SrcXprType &src, const Functor &)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType &dst, const SrcXprType &src, const Functor &func)
#define EIGEN_DEBUG_VAR(x)
Definition: Macros.h:898
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel &kernel)
Pseudo expression providing an operator = assuming no aliasing.
Definition: NoAlias.h:31
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel &kernel)
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel &kernel)
const unsigned int DirectAccessBit
Definition: Constants.h:155
void check_for_aliasing(const Dst &dst, const Src &src)
Definition: Transpose.h:452
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel &kernel)
find_best_packet< DstScalar, RestrictedLinearSize >::type LinearPacketType
EIGEN_DEVICE_FUNC const SrcEvaluatorType & srcEvaluator() const EIGEN_NOEXCEPT
Namespace containing all symbols from the Eigen library.
Definition: jet.h:637
EIGEN_DEVICE_FUNC const Scalar * dstDataPtr() const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index row, Index col)
conditional< int(Traversal)==LinearVectorizedTraversal, LinearPacketType, InnerPacketType >::type PacketType
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel &, Index, Index)
if((m *x).isApprox(y))
#define EIGEN_STATIC_ASSERT(CONDITION, MSG)
Definition: StaticAssert.h:127
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel &kernel)
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index outerSize() const EIGEN_NOEXCEPT
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel &kernel)
const unsigned int RowMajorBit
Definition: Constants.h:66
#define EIGEN_SIZE_MIN_PREFER_FIXED(a, b)
Definition: Macros.h:1302
EIGEN_DEVICE_FUNC restricted_packet_dense_assignment_kernel(DstEvaluatorTypeT &dst, const SrcEvaluatorTypeT &src, const Functor &func, DstXprType &dstExpr)
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
std::size_t UIntPtr
Definition: Meta.h:92
#define EIGEN_STATIC_ASSERT_LVALUE(Derived)
Definition: StaticAssert.h:202
static constexpr bool debug
#define EIGEN_DONT_INLINE
Definition: Macros.h:940
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index size() const EIGEN_NOEXCEPT
static EIGEN_DEVICE_FUNC void EIGEN_STRONG_INLINE run(Kernel &)
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel &kernel)
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel &)
m row(1)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeffByOuterInner(Index outer, Index inner)
find_best_packet< DstScalar, RestrictedInnerSize >::type InnerPacketType
#define EIGEN_NOEXCEPT
Definition: Macros.h:1418
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:74
#define eigen_assert(x)
Definition: Macros.h:1037
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel &, Index)
#define EIGEN_UNALIGNED_VECTORIZE
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op< typename DstXprType::Scalar, typename SrcXprType::Scalar > &)
#define EIGEN_CONSTEXPR
Definition: Macros.h:787
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel &kernel)
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op< typename DstXprType::Scalar, SrcScalarType > &)
copy_using_evaluator_traits< DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor > AssignmentTraits
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col)
Assign src(row,col) to dst(row,col) through the assignment functor.
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T mini(const T &x, const T &y)
EIGEN_DEVICE_FUNC DstEvaluatorType & dstEvaluator() EIGEN_NOEXCEPT
DenseIndex ret
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index innerSize() const EIGEN_NOEXCEPT
generic_dense_assignment_kernel< DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, BuiltIn > Base
EIGEN_CONSTEXPR Index size(const T &x)
Definition: Meta.h:479
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:976
int func(const int &a)
Definition: testDSF.cpp:221
CwiseBinaryOp< internal::scalar_sum_op< double, double >, const CpyMatrixXd, const CpyMatrixXd > XprType
Definition: nestbyvalue.cpp:15
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_assignment_no_alias_no_transpose(Dst &dst, const Src &src, const Func &func)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_restricted_packet_assignment_no_alias(Dst &dst, const Src &src, const Func &func)
#define EIGEN_PLAIN_ENUM_MIN(a, b)
Definition: Macros.h:1288
storage_kind_to_shape< typename traits< T >::StorageKind >::Shape Shape
static EIGEN_DEPRECATED const end_t end
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel &kernel, Index start, Index end)
#define EIGEN_CHECK_BINARY_COMPATIBILIY(BINOP, LHS, RHS)
Definition: XprHelper.h:850
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner)
m col(1)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType &dstExpr)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index)
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel &kernel)
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op< typename DstXprType::Scalar, SrcScalarType > &)
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel &kernel)
const int Dynamic
Definition: Constants.h:22
Convenience specialization of Stride to specify only an outer stride See class Map for some examples...
Definition: Stride.h:106
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel &kernel)
const unsigned int LinearAccessBit
Definition: Constants.h:130
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index index)
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index outerStride() const EIGEN_NOEXCEPT
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_assignment(Dst &dst, const Src &src)
#define EIGEN_UNROLLING_LIMIT
Definition: Settings.h:24
Definition: pytypes.h:1370
#define EIGEN_ONLY_USED_FOR_DEBUG(x)
Definition: Macros.h:1049
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel &kernel)
#define EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(TYPE0, TYPE1)
Definition: StaticAssert.h:192


gtsam
Author(s):
autogenerated on Tue Jul 4 2023 02:33:56