AssignEvaluator.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2011 Benoit Jacob <jacob.benoit.1@gmail.com>
5 // Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
6 // Copyright (C) 2011-2012 Jitse Niesen <jitse@maths.leeds.ac.uk>
7 //
8 // This Source Code Form is subject to the terms of the Mozilla
9 // Public License v. 2.0. If a copy of the MPL was not distributed
10 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
11 
12 #ifndef EIGEN_ASSIGN_EVALUATOR_H
13 #define EIGEN_ASSIGN_EVALUATOR_H
14 
15 namespace Eigen {
16 
17 // This implementation is based on Assign.h
18 
19 namespace internal {
20 
21 /***************************************************************************
22 * Part 1 : the logic deciding a strategy for traversal and unrolling *
23 ***************************************************************************/
24 
25 // copy_using_evaluator_traits is based on assign_traits
26 
27 template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc>
29 {
30  typedef typename DstEvaluator::XprType Dst;
31  typedef typename Dst::Scalar DstScalar;
32 
33  enum {
34  DstFlags = DstEvaluator::Flags,
35  SrcFlags = SrcEvaluator::Flags
36  };
37 
38 public:
39  enum {
40  DstAlignment = DstEvaluator::Alignment,
41  SrcAlignment = SrcEvaluator::Alignment,
44  };
45 
46 private:
47  enum {
48  InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
49  : int(DstFlags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
50  : int(Dst::RowsAtCompileTime),
51  InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
52  : int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
53  : int(Dst::MaxRowsAtCompileTime),
55  MaxSizeAtCompileTime = Dst::SizeAtCompileTime
56  };
57 
58  // TODO distinguish between linear traversal and inner-traversals
61 
62  enum {
65  };
66 
67 public:
68  enum {
71  };
72 
73 private:
74  enum {
79  && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit)
82  && int(InnerSize)!=Dynamic && int(InnerSize)%int(InnerPacketSize)==0
83  && int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0
88  /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
89  so it's only good for large enough sizes. */
92  /* slice vectorization can be slow, so we only want it if the slices are big, which is
93  indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
94  in a fixed-size matrix
95  However, with EIGEN_UNALIGNED_VECTORIZE and unrolling, slice vectorization is still worth it */
96  };
97 
98 public:
99  enum {
104  : int(MayLinearize) ? int(LinearTraversal)
105  : int(DefaultTraversal),
109  };
110 
112 
113 private:
114  enum {
117  : 1,
119  MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic
120  && int(Dst::SizeAtCompileTime) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit),
121  MayUnrollInner = int(InnerSize) != Dynamic
122  && int(InnerSize) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit)
123  };
124 
125 public:
126  enum {
128  ? (
129  int(MayUnrollCompletely) ? int(CompleteUnrolling)
130  : int(MayUnrollInner) ? int(InnerUnrolling)
131  : int(NoUnrolling)
132  )
133  : int(Traversal) == int(LinearVectorizedTraversal)
134  ? ( bool(MayUnrollCompletely) && ( EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)))
135  ? int(CompleteUnrolling)
136  : int(NoUnrolling) )
137  : int(Traversal) == int(LinearTraversal)
138  ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling)
139  : int(NoUnrolling) )
141  : int(Traversal) == int(SliceVectorizedTraversal)
142  ? ( bool(MayUnrollInner) ? int(InnerUnrolling)
143  : int(NoUnrolling) )
144 #endif
145  : int(NoUnrolling)
146  };
147 
148 #ifdef EIGEN_DEBUG_ASSIGN
149  static void debug()
150  {
151  std::cerr << "DstXpr: " << typeid(typename DstEvaluator::XprType).name() << std::endl;
152  std::cerr << "SrcXpr: " << typeid(typename SrcEvaluator::XprType).name() << std::endl;
153  std::cerr.setf(std::ios::hex, std::ios::basefield);
154  std::cerr << "DstFlags" << " = " << DstFlags << " (" << demangle_flags(DstFlags) << " )" << std::endl;
155  std::cerr << "SrcFlags" << " = " << SrcFlags << " (" << demangle_flags(SrcFlags) << " )" << std::endl;
156  std::cerr.unsetf(std::ios::hex);
166  EIGEN_DEBUG_VAR(ActualPacketSize)
173  std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl;
174  EIGEN_DEBUG_VAR(SrcEvaluator::CoeffReadCost)
176  EIGEN_DEBUG_VAR(MayUnrollCompletely)
177  EIGEN_DEBUG_VAR(MayUnrollInner)
178  std::cerr << "Unrolling" << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl;
179  std::cerr << std::endl;
180  }
181 #endif
182 };
183 
184 /***************************************************************************
185 * Part 2 : meta-unrollers
186 ***************************************************************************/
187 
188 /************************
189 *** Default traversal ***
190 ************************/
191 
192 template<typename Kernel, int Index, int Stop>
194 {
195  // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
196  typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
197  typedef typename DstEvaluatorType::XprType DstXprType;
198 
199  enum {
200  outer = Index / DstXprType::InnerSizeAtCompileTime,
201  inner = Index % DstXprType::InnerSizeAtCompileTime
202  };
203 
204  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
205  {
206  kernel.assignCoeffByOuterInner(outer, inner);
208  }
209 };
210 
211 template<typename Kernel, int Stop>
213 {
214  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
215 };
216 
217 template<typename Kernel, int Index_, int Stop>
219 {
220  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
221  {
222  kernel.assignCoeffByOuterInner(outer, Index_);
224  }
225 };
226 
227 template<typename Kernel, int Stop>
229 {
230  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index) { }
231 };
232 
233 /***********************
234 *** Linear traversal ***
235 ***********************/
236 
237 template<typename Kernel, int Index, int Stop>
239 {
240  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel)
241  {
242  kernel.assignCoeff(Index);
244  }
245 };
246 
247 template<typename Kernel, int Stop>
249 {
250  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
251 };
252 
253 /**************************
254 *** Inner vectorization ***
255 **************************/
256 
257 template<typename Kernel, int Index, int Stop>
259 {
260  // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
261  typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
262  typedef typename DstEvaluatorType::XprType DstXprType;
263  typedef typename Kernel::PacketType PacketType;
264 
265  enum {
266  outer = Index / DstXprType::InnerSizeAtCompileTime,
267  inner = Index % DstXprType::InnerSizeAtCompileTime,
268  SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
269  DstAlignment = Kernel::AssignmentTraits::DstAlignment
270  };
271 
272  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
273  {
274  kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
275  enum { NextIndex = Index + unpacket_traits<PacketType>::size };
277  }
278 };
279 
280 template<typename Kernel, int Stop>
282 {
283  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
284 };
285 
286 template<typename Kernel, int Index_, int Stop, int SrcAlignment, int DstAlignment>
288 {
289  typedef typename Kernel::PacketType PacketType;
290  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
291  {
292  kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, Index_);
293  enum { NextIndex = Index_ + unpacket_traits<PacketType>::size };
295  }
296 };
297 
298 template<typename Kernel, int Stop, int SrcAlignment, int DstAlignment>
300 {
301  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &, Index) { }
302 };
303 
304 /***************************************************************************
305 * Part 3 : implementation of all cases
306 ***************************************************************************/
307 
308 // dense_assignment_loop is based on assign_impl
309 
310 template<typename Kernel,
311  int Traversal = Kernel::AssignmentTraits::Traversal,
312  int Unrolling = Kernel::AssignmentTraits::Unrolling>
314 
315 /************************
316 *** Default traversal ***
317 ************************/
318 
319 template<typename Kernel>
321 {
322  EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel &kernel)
323  {
324  for(Index outer = 0; outer < kernel.outerSize(); ++outer) {
325  for(Index inner = 0; inner < kernel.innerSize(); ++inner) {
326  kernel.assignCoeffByOuterInner(outer, inner);
327  }
328  }
329  }
330 };
331 
332 template<typename Kernel>
334 {
335  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
336  {
337  typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
339  }
340 };
341 
342 template<typename Kernel>
344 {
345  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
346  {
347  typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
348 
349  const Index outerSize = kernel.outerSize();
350  for(Index outer = 0; outer < outerSize; ++outer)
352  }
353 };
354 
355 /***************************
356 *** Linear vectorization ***
357 ***************************/
358 
359 
360 // The goal of unaligned_dense_assignment_loop is simply to factorize the handling
361 // of the non vectorizable beginning and ending parts
362 
363 template <bool IsAligned = false>
365 {
366  // if IsAligned = true, then do nothing
367  template <typename Kernel>
368  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index, Index) {}
369 };
370 
371 template <>
373 {
374  // MSVC must not inline this functions. If it does, it fails to optimize the
375  // packet access path.
376  // FIXME check which version exhibits this issue
377 #if EIGEN_COMP_MSVC
378  template <typename Kernel>
379  static EIGEN_DONT_INLINE void run(Kernel &kernel,
380  Index start,
381  Index end)
382 #else
383  template <typename Kernel>
384  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel,
385  Index start,
386  Index end)
387 #endif
388  {
389  for (Index index = start; index < end; ++index)
390  kernel.assignCoeff(index);
391  }
392 };
393 
394 template<typename Kernel>
396 {
397  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
398  {
399  const Index size = kernel.size();
400  typedef typename Kernel::Scalar Scalar;
401  typedef typename Kernel::PacketType PacketType;
402  enum {
403  requestedAlignment = Kernel::AssignmentTraits::LinearRequiredAlignment,
405  dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
406  dstAlignment = packet_traits<Scalar>::AlignedOnScalar ? int(requestedAlignment)
407  : int(Kernel::AssignmentTraits::DstAlignment),
408  srcAlignment = Kernel::AssignmentTraits::JointAlignment
409  };
410  const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned<requestedAlignment>(kernel.dstDataPtr(), size);
411  const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
412 
414 
415  for(Index index = alignedStart; index < alignedEnd; index += packetSize)
416  kernel.template assignPacket<dstAlignment, srcAlignment, PacketType>(index);
417 
418  unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size);
419  }
420 };
421 
422 template<typename Kernel>
424 {
425  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
426  {
427  typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
428  typedef typename Kernel::PacketType PacketType;
429 
430  enum { size = DstXprType::SizeAtCompileTime,
432  alignedSize = (size/packetSize)*packetSize };
433 
436  }
437 };
438 
439 /**************************
440 *** Inner vectorization ***
441 **************************/
442 
443 template<typename Kernel>
445 {
446  typedef typename Kernel::PacketType PacketType;
447  enum {
448  SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
449  DstAlignment = Kernel::AssignmentTraits::DstAlignment
450  };
451  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
452  {
453  const Index innerSize = kernel.innerSize();
454  const Index outerSize = kernel.outerSize();
455  const Index packetSize = unpacket_traits<PacketType>::size;
456  for(Index outer = 0; outer < outerSize; ++outer)
457  for(Index inner = 0; inner < innerSize; inner+=packetSize)
458  kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
459  }
460 };
461 
462 template<typename Kernel>
464 {
465  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
466  {
467  typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
469  }
470 };
471 
472 template<typename Kernel>
474 {
475  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
476  {
477  typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
478  typedef typename Kernel::AssignmentTraits Traits;
479  const Index outerSize = kernel.outerSize();
480  for(Index outer = 0; outer < outerSize; ++outer)
481  copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime,
482  Traits::SrcAlignment, Traits::DstAlignment>::run(kernel, outer);
483  }
484 };
485 
486 /***********************
487 *** Linear traversal ***
488 ***********************/
489 
490 template<typename Kernel>
492 {
493  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
494  {
495  const Index size = kernel.size();
496  for(Index i = 0; i < size; ++i)
497  kernel.assignCoeff(i);
498  }
499 };
500 
501 template<typename Kernel>
503 {
504  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
505  {
506  typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
508  }
509 };
510 
511 /**************************
512 *** Slice vectorization ***
513 ***************************/
514 
515 template<typename Kernel>
517 {
518  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
519  {
520  typedef typename Kernel::Scalar Scalar;
521  typedef typename Kernel::PacketType PacketType;
522  enum {
524  requestedAlignment = int(Kernel::AssignmentTraits::InnerRequiredAlignment),
525  alignable = packet_traits<Scalar>::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment)>=sizeof(Scalar),
526  dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
527  dstAlignment = alignable ? int(requestedAlignment)
528  : int(Kernel::AssignmentTraits::DstAlignment)
529  };
530  const Scalar *dst_ptr = kernel.dstDataPtr();
531  if((!bool(dstIsAligned)) && (UIntPtr(dst_ptr) % sizeof(Scalar))>0)
532  {
533  // the pointer is not aligend-on scalar, so alignment is not possible
535  }
536  const Index packetAlignedMask = packetSize - 1;
537  const Index innerSize = kernel.innerSize();
538  const Index outerSize = kernel.outerSize();
539  const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0;
540  Index alignedStart = ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned<requestedAlignment>(dst_ptr, innerSize);
541 
542  for(Index outer = 0; outer < outerSize; ++outer)
543  {
544  const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
545  // do the non-vectorizable part of the assignment
546  for(Index inner = 0; inner<alignedStart ; ++inner)
547  kernel.assignCoeffByOuterInner(outer, inner);
548 
549  // do the vectorizable part of the assignment
550  for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize)
551  kernel.template assignPacketByOuterInner<dstAlignment, Unaligned, PacketType>(outer, inner);
552 
553  // do the non-vectorizable part of the assignment
554  for(Index inner = alignedEnd; inner<innerSize ; ++inner)
555  kernel.assignCoeffByOuterInner(outer, inner);
556 
557  alignedStart = numext::mini((alignedStart+alignedStep)%packetSize, innerSize);
558  }
559  }
560 };
561 
562 #if EIGEN_UNALIGNED_VECTORIZE
563 template<typename Kernel>
564 struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, InnerUnrolling>
565 {
566  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
567  {
568  typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
569  typedef typename Kernel::PacketType PacketType;
570 
571  enum { size = DstXprType::InnerSizeAtCompileTime,
573  vectorizableSize = (size/packetSize)*packetSize };
574 
575  for(Index outer = 0; outer < kernel.outerSize(); ++outer)
576  {
579  }
580  }
581 };
582 #endif
583 
584 
585 /***************************************************************************
586 * Part 4 : Generic dense assignment kernel
587 ***************************************************************************/
588 
589 // This class generalize the assignment of a coefficient (or packet) from one dense evaluator
590 // to another dense writable evaluator.
591 // It is parametrized by the two evaluators, and the actual assignment functor.
592 // This abstraction level permits to keep the evaluation loops as simple and as generic as possible.
593 // One can customize the assignment using this generic dense_assignment_kernel with different
594 // functors, or by completely overloading it, by-passing a functor.
595 template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version = Specialized>
597 {
598 protected:
599  typedef typename DstEvaluatorTypeT::XprType DstXprType;
600  typedef typename SrcEvaluatorTypeT::XprType SrcXprType;
601 public:
602 
603  typedef DstEvaluatorTypeT DstEvaluatorType;
604  typedef SrcEvaluatorTypeT SrcEvaluatorType;
605  typedef typename DstEvaluatorType::Scalar Scalar;
608 
609 
610  EIGEN_DEVICE_FUNC generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
611  : m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr)
612  {
613  #ifdef EIGEN_DEBUG_ASSIGN
614  AssignmentTraits::debug();
615  #endif
616  }
617 
618  EIGEN_DEVICE_FUNC Index size() const { return m_dstExpr.size(); }
619  EIGEN_DEVICE_FUNC Index innerSize() const { return m_dstExpr.innerSize(); }
620  EIGEN_DEVICE_FUNC Index outerSize() const { return m_dstExpr.outerSize(); }
621  EIGEN_DEVICE_FUNC Index rows() const { return m_dstExpr.rows(); }
622  EIGEN_DEVICE_FUNC Index cols() const { return m_dstExpr.cols(); }
623  EIGEN_DEVICE_FUNC Index outerStride() const { return m_dstExpr.outerStride(); }
624 
625  EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() { return m_dst; }
626  EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const { return m_src; }
627 
630  {
631  m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col));
632  }
633 
635  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index)
636  {
637  m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index));
638  }
639 
641  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeffByOuterInner(Index outer, Index inner)
642  {
643  Index row = rowIndexByOuterInner(outer, inner);
644  Index col = colIndexByOuterInner(outer, inner);
645  assignCoeff(row, col);
646  }
647 
648 
649  template<int StoreMode, int LoadMode, typename PacketType>
651  {
652  m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row,col), m_src.template packet<LoadMode,PacketType>(row,col));
653  }
654 
655  template<int StoreMode, int LoadMode, typename PacketType>
656  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index index)
657  {
658  m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode,PacketType>(index));
659  }
660 
661  template<int StoreMode, int LoadMode, typename PacketType>
662  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner)
663  {
664  Index row = rowIndexByOuterInner(outer, inner);
665  Index col = colIndexByOuterInner(outer, inner);
666  assignPacket<StoreMode,LoadMode,PacketType>(row, col);
667  }
668 
669  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner)
670  {
671  typedef typename DstEvaluatorType::ExpressionTraits Traits;
672  return int(Traits::RowsAtCompileTime) == 1 ? 0
673  : int(Traits::ColsAtCompileTime) == 1 ? inner
674  : int(DstEvaluatorType::Flags)&RowMajorBit ? outer
675  : inner;
676  }
677 
678  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner)
679  {
680  typedef typename DstEvaluatorType::ExpressionTraits Traits;
681  return int(Traits::ColsAtCompileTime) == 1 ? 0
682  : int(Traits::RowsAtCompileTime) == 1 ? inner
683  : int(DstEvaluatorType::Flags)&RowMajorBit ? inner
684  : outer;
685  }
686 
687  EIGEN_DEVICE_FUNC const Scalar* dstDataPtr() const
688  {
689  return m_dstExpr.data();
690  }
691 
692 protected:
693  DstEvaluatorType& m_dst;
694  const SrcEvaluatorType& m_src;
696  // TODO find a way to avoid the needs of the original expression
697  DstXprType& m_dstExpr;
698 };
699 
700 /***************************************************************************
701 * Part 5 : Entry point for dense rectangular assignment
702 ***************************************************************************/
703 
704 template<typename DstXprType,typename SrcXprType, typename Functor>
705 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
706 void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const Functor &/*func*/)
707 {
710  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
711 }
712 
713 template<typename DstXprType,typename SrcXprType, typename T1, typename T2>
714 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
715 void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const internal::assign_op<T1,T2> &/*func*/)
716 {
717  Index dstRows = src.rows();
718  Index dstCols = src.cols();
719  if(((dst.rows()!=dstRows) || (dst.cols()!=dstCols)))
720  dst.resize(dstRows, dstCols);
721  eigen_assert(dst.rows() == dstRows && dst.cols() == dstCols);
722 }
723 
724 template<typename DstXprType, typename SrcXprType, typename Functor>
725 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor &func)
726 {
727  typedef evaluator<DstXprType> DstEvaluatorType;
728  typedef evaluator<SrcXprType> SrcEvaluatorType;
729 
730  SrcEvaluatorType srcEvaluator(src);
731 
732  // NOTE To properly handle A = (A*A.transpose())/s with A rectangular,
733  // we need to resize the destination after the source evaluator has been created.
734  resize_if_allowed(dst, src, func);
735 
736  DstEvaluatorType dstEvaluator(dst);
737 
739  Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
740 
742 }
743 
744 template<typename DstXprType, typename SrcXprType>
745 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src)
746 {
748 }
749 
750 /***************************************************************************
751 * Part 6 : Generic assignment
752 ***************************************************************************/
753 
754 // Based on the respective shapes of the destination and source,
755 // the class AssignmentKind determine the kind of assignment mechanism.
756 // AssignmentKind must define a Kind typedef.
757 template<typename DstShape, typename SrcShape> struct AssignmentKind;
758 
759 // Assignement kind defined in this file:
760 struct Dense2Dense {};
762 
763 template<typename,typename> struct AssignmentKind { typedef EigenBase2EigenBase Kind; };
764 template<> struct AssignmentKind<DenseShape,DenseShape> { typedef Dense2Dense Kind; };
765 
766 // This is the main assignment class
767 template< typename DstXprType, typename SrcXprType, typename Functor,
769  typename EnableIf = void>
770 struct Assignment;
771 
772 
773 // The only purpose of this call_assignment() function is to deal with noalias() / "assume-aliasing" and automatic transposition.
774 // Indeed, I (Gael) think that this concept of "assume-aliasing" was a mistake, and it makes thing quite complicated.
775 // So this intermediate function removes everything related to "assume-aliasing" such that Assignment
776 // does not has to bother about these annoying details.
777 
778 template<typename Dst, typename Src>
779 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
780 void call_assignment(Dst& dst, const Src& src)
781 {
783 }
784 template<typename Dst, typename Src>
785 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
786 void call_assignment(const Dst& dst, const Src& src)
787 {
789 }
790 
791 // Deal with "assume-aliasing"
792 template<typename Dst, typename Src, typename Func>
793 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
794 void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if< evaluator_assume_aliasing<Src>::value, void*>::type = 0)
795 {
796  typename plain_matrix_type<Src>::type tmp(src);
797  call_assignment_no_alias(dst, tmp, func);
798 }
799 
800 template<typename Dst, typename Src, typename Func>
801 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
802 void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<!evaluator_assume_aliasing<Src>::value, void*>::type = 0)
803 {
804  call_assignment_no_alias(dst, src, func);
805 }
806 
807 // by-pass "assume-aliasing"
808 // When there is no aliasing, we require that 'dst' has been properly resized
809 template<typename Dst, template <typename> class StorageBase, typename Src, typename Func>
810 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
811 void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func)
812 {
813  call_assignment_no_alias(dst.expression(), src, func);
814 }
815 
816 
817 template<typename Dst, typename Src, typename Func>
818 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
819 void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
820 {
821  enum {
822  NeedToTranspose = ( (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1)
823  || (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1)
824  ) && int(Dst::SizeAtCompileTime) != 1
825  };
826 
827  typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst>::type ActualDstTypeCleaned;
828  typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst&>::type ActualDstType;
829  ActualDstType actualDst(dst);
830 
831  // TODO check whether this is the right place to perform these checks:
833  EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src)
834  EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar);
835 
837 }
838 template<typename Dst, typename Src>
839 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
840 void call_assignment_no_alias(Dst& dst, const Src& src)
841 {
843 }
844 
845 template<typename Dst, typename Src, typename Func>
846 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
847 void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func)
848 {
849  // TODO check whether this is the right place to perform these checks:
852  EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename Dst::Scalar,typename Src::Scalar);
853 
854  Assignment<Dst,Src,Func>::run(dst, src, func);
855 }
856 template<typename Dst, typename Src>
857 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
858 void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src)
859 {
861 }
862 
863 // forward declaration
864 template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, const Src &src);
865 
866 // Generic Dense to Dense assignment
867 // Note that the last template argument "Weak" is needed to make it possible to perform
868 // both partial specialization+SFINAE without ambiguous specialization
869 template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
870 struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Weak>
871 {
872  EIGEN_DEVICE_FUNC
873  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
874  {
875 #ifndef EIGEN_NO_DEBUG
877 #endif
878 
879  call_dense_assignment_loop(dst, src, func);
880  }
881 };
882 
883 // Generic assignment through evalTo.
884 // TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism.
885 // Note that the last template argument "Weak" is needed to make it possible to perform
886 // both partial specialization+SFINAE without ambiguous specialization
887 template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
888 struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Weak>
889 {
890  EIGEN_DEVICE_FUNC
891  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
892  {
893  Index dstRows = src.rows();
894  Index dstCols = src.cols();
895  if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
896  dst.resize(dstRows, dstCols);
897 
898  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
899  src.evalTo(dst);
900  }
901 
902  // NOTE The following two functions are templated to avoid their instanciation if not needed
903  // This is needed because some expressions supports evalTo only and/or have 'void' as scalar type.
904  template<typename SrcScalarType>
905  EIGEN_DEVICE_FUNC
906  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar,SrcScalarType> &/*func*/)
907  {
908  Index dstRows = src.rows();
909  Index dstCols = src.cols();
910  if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
911  dst.resize(dstRows, dstCols);
912 
913  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
914  src.addTo(dst);
915  }
916 
917  template<typename SrcScalarType>
918  EIGEN_DEVICE_FUNC
919  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar,SrcScalarType> &/*func*/)
920  {
921  Index dstRows = src.rows();
922  Index dstCols = src.cols();
923  if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
924  dst.resize(dstRows, dstCols);
925 
926  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
927  src.subTo(dst);
928  }
929 };
930 
931 } // namespace internal
932 
933 } // end namespace Eigen
934 
935 #endif // EIGEN_ASSIGN_EVALUATOR_H
find_best_packet_helper< Size, typename packet_traits< T >::type >::type type
Definition: XprHelper.h:188
static EIGEN_DEVICE_FUNC void EIGEN_STRONG_INLINE run(Kernel &kernel)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_assignment_no_alias(Dst &dst, const Src &src, const Func &func)
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
const unsigned int ActualPacketAccessBit
Definition: Constants.h:102
#define EIGEN_STRONG_INLINE
Definition: Macros.h:493
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel &kernel)
EIGEN_DEVICE_FUNC Index outerStride() const
EIGEN_DEVICE_FUNC const SrcEvaluatorType & srcEvaluator() const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize_if_allowed(DstXprType &dst, const SrcXprType &src, const Functor &)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType &dst, const SrcXprType &src, const Functor &func)
#define EIGEN_DEBUG_VAR(x)
Definition: Macros.h:475
EIGEN_DEVICE_FUNC Index outerSize() const
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel &kernel)
Pseudo expression providing an operator = assuming no aliasing.
Definition: NoAlias.h:31
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel &kernel)
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel &kernel)
const unsigned int DirectAccessBit
Definition: Constants.h:150
void check_for_aliasing(const Dst &dst, const Src &src)
Definition: Transpose.h:392
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel &kernel)
Definition: LDLT.h:16
static constexpr size_t size(Tuple< Args... > &)
Provides access to the number of elements in a tuple as a compile-time constant expression.
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index row, Index col)
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel &, Index, Index)
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel &kernel)
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel &kernel)
const unsigned int RowMajorBit
Definition: Constants.h:61
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
std::size_t UIntPtr
Definition: Meta.h:51
#define EIGEN_STATIC_ASSERT_LVALUE(Derived)
Definition: StaticAssert.h:197
#define EIGEN_DONT_INLINE
Definition: Macros.h:515
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel &kernel)
EIGEN_DEVICE_FUNC ColXpr col(Index i)
This is the const version of col().
Definition: BlockMethods.h:838
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel &)
EIGEN_DEVICE_FUNC DstEvaluatorType & dstEvaluator()
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeffByOuterInner(Index outer, Index inner)
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:33
#define eigen_assert(x)
Definition: Macros.h:577
EIGEN_DEVICE_FUNC RowXpr row(Index i)
This is the const version of row(). */.
Definition: BlockMethods.h:859
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel &, Index)
find_best_packet< DstScalar, InnerSize >::type InnerPacketType
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op< typename DstXprType::Scalar, typename SrcXprType::Scalar > &)
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel &kernel)
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op< typename DstXprType::Scalar, SrcScalarType > &)
copy_using_evaluator_traits< DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor > AssignmentTraits
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col)
Assign src(row,col) to dst(row,col) through the assignment functor.
EIGEN_DEVICE_FUNC Index innerSize() const
EIGEN_DEVICE_FUNC generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType &dstExpr)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_assignment_no_alias_no_transpose(Dst &dst, const Src &src, const Func &func)
#define EIGEN_PLAIN_ENUM_MIN(a, b)
Definition: Macros.h:872
storage_kind_to_shape< typename traits< T >::StorageKind >::Shape Shape
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel &kernel, Index start, Index end)
#define EIGEN_CHECK_BINARY_COMPATIBILIY(BINOP, LHS, RHS)
Definition: XprHelper.h:815
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner)
EIGEN_DEVICE_FUNC ExpressionType & expression() const
Definition: NoAlias.h:63
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index)
conditional< int(Traversal)==LinearVectorizedTraversal, LinearPacketType, InnerPacketType >::type PacketType
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel &kernel)
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op< typename DstXprType::Scalar, SrcScalarType > &)
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel &kernel)
const int Dynamic
Definition: Constants.h:21
Convenience specialization of Stride to specify only an outer stride See class Map for some examples...
Definition: Stride.h:101
void run(Expr &expr, Dev &dev)
Definition: TensorSyclRun.h:33
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel &kernel)
EIGEN_DEVICE_FUNC const Scalar * dstDataPtr() const
const unsigned int LinearAccessBit
Definition: Constants.h:125
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index index)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_assignment(Dst &dst, const Src &src)
#define EIGEN_UNROLLING_LIMIT
Definition: Settings.h:24
#define EIGEN_UNALIGNED_VECTORIZE
Definition: Macros.h:784
find_best_packet< DstScalar, Dst::SizeAtCompileTime >::type LinearPacketType
#define EIGEN_ONLY_USED_FOR_DEBUG(x)
Definition: Macros.h:589
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel &kernel)
#define EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(TYPE0, TYPE1)
Definition: StaticAssert.h:187


hebiros
Author(s): Xavier Artache , Matthew Tesch
autogenerated on Thu Sep 3 2020 04:08:00