SelfadjointMatrixMatrix.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_H
11 #define EIGEN_SELFADJOINT_MATRIX_MATRIX_H
12 
13 namespace Eigen {
14 
15 namespace internal {
16 
17 // pack a selfadjoint block diagonal for use with the gebp_kernel
18 template<typename Scalar, typename Index, int Pack1, int Pack2_dummy, int StorageOrder>
20 {
21  template<int BlockRows> inline
22  void pack(Scalar* blockA, const const_blas_data_mapper<Scalar,Index,StorageOrder>& lhs, Index cols, Index i, Index& count)
23  {
24  // normal copy
25  for(Index k=0; k<i; k++)
26  for(Index w=0; w<BlockRows; w++)
27  blockA[count++] = lhs(i+w,k); // normal
28  // symmetric copy
29  Index h = 0;
30  for(Index k=i; k<i+BlockRows; k++)
31  {
32  for(Index w=0; w<h; w++)
33  blockA[count++] = numext::conj(lhs(k, i+w)); // transposed
34 
35  blockA[count++] = numext::real(lhs(k,k)); // real (diagonal)
36 
37  for(Index w=h+1; w<BlockRows; w++)
38  blockA[count++] = lhs(i+w, k); // normal
39  ++h;
40  }
41  // transposed copy
42  for(Index k=i+BlockRows; k<cols; k++)
43  for(Index w=0; w<BlockRows; w++)
44  blockA[count++] = numext::conj(lhs(k, i+w)); // transposed
45  }
46  void operator()(Scalar* blockA, const Scalar* _lhs, Index lhsStride, Index cols, Index rows)
47  {
48  enum { PacketSize = packet_traits<Scalar>::size };
50  Index count = 0;
51  //Index peeled_mc3 = (rows/Pack1)*Pack1;
52 
53  const Index peeled_mc3 = Pack1>=3*PacketSize ? (rows/(3*PacketSize))*(3*PacketSize) : 0;
54  const Index peeled_mc2 = Pack1>=2*PacketSize ? peeled_mc3+((rows-peeled_mc3)/(2*PacketSize))*(2*PacketSize) : 0;
55  const Index peeled_mc1 = Pack1>=1*PacketSize ? (rows/(1*PacketSize))*(1*PacketSize) : 0;
56 
57  if(Pack1>=3*PacketSize)
58  for(Index i=0; i<peeled_mc3; i+=3*PacketSize)
59  pack<3*PacketSize>(blockA, lhs, cols, i, count);
60 
61  if(Pack1>=2*PacketSize)
62  for(Index i=peeled_mc3; i<peeled_mc2; i+=2*PacketSize)
63  pack<2*PacketSize>(blockA, lhs, cols, i, count);
64 
65  if(Pack1>=1*PacketSize)
66  for(Index i=peeled_mc2; i<peeled_mc1; i+=1*PacketSize)
67  pack<1*PacketSize>(blockA, lhs, cols, i, count);
68 
69  // do the same with mr==1
70  for(Index i=peeled_mc1; i<rows; i++)
71  {
72  for(Index k=0; k<i; k++)
73  blockA[count++] = lhs(i, k); // normal
74 
75  blockA[count++] = numext::real(lhs(i, i)); // real (diagonal)
76 
77  for(Index k=i+1; k<cols; k++)
78  blockA[count++] = numext::conj(lhs(k, i)); // transposed
79  }
80  }
81 };
82 
83 template<typename Scalar, typename Index, int nr, int StorageOrder>
85 {
86  enum { PacketSize = packet_traits<Scalar>::size };
87  void operator()(Scalar* blockB, const Scalar* _rhs, Index rhsStride, Index rows, Index cols, Index k2)
88  {
89  Index end_k = k2 + rows;
90  Index count = 0;
92  Index packet_cols8 = nr>=8 ? (cols/8) * 8 : 0;
93  Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0;
94 
95  // first part: normal case
96  for(Index j2=0; j2<k2; j2+=nr)
97  {
98  for(Index k=k2; k<end_k; k++)
99  {
100  blockB[count+0] = rhs(k,j2+0);
101  blockB[count+1] = rhs(k,j2+1);
102  if (nr>=4)
103  {
104  blockB[count+2] = rhs(k,j2+2);
105  blockB[count+3] = rhs(k,j2+3);
106  }
107  if (nr>=8)
108  {
109  blockB[count+4] = rhs(k,j2+4);
110  blockB[count+5] = rhs(k,j2+5);
111  blockB[count+6] = rhs(k,j2+6);
112  blockB[count+7] = rhs(k,j2+7);
113  }
114  count += nr;
115  }
116  }
117 
118  // second part: diagonal block
119  Index end8 = nr>=8 ? (std::min)(k2+rows,packet_cols8) : k2;
120  if(nr>=8)
121  {
122  for(Index j2=k2; j2<end8; j2+=8)
123  {
124  // again we can split vertically in three different parts (transpose, symmetric, normal)
125  // transpose
126  for(Index k=k2; k<j2; k++)
127  {
128  blockB[count+0] = numext::conj(rhs(j2+0,k));
129  blockB[count+1] = numext::conj(rhs(j2+1,k));
130  blockB[count+2] = numext::conj(rhs(j2+2,k));
131  blockB[count+3] = numext::conj(rhs(j2+3,k));
132  blockB[count+4] = numext::conj(rhs(j2+4,k));
133  blockB[count+5] = numext::conj(rhs(j2+5,k));
134  blockB[count+6] = numext::conj(rhs(j2+6,k));
135  blockB[count+7] = numext::conj(rhs(j2+7,k));
136  count += 8;
137  }
138  // symmetric
139  Index h = 0;
140  for(Index k=j2; k<j2+8; k++)
141  {
142  // normal
143  for (Index w=0 ; w<h; ++w)
144  blockB[count+w] = rhs(k,j2+w);
145 
146  blockB[count+h] = numext::real(rhs(k,k));
147 
148  // transpose
149  for (Index w=h+1 ; w<8; ++w)
150  blockB[count+w] = numext::conj(rhs(j2+w,k));
151  count += 8;
152  ++h;
153  }
154  // normal
155  for(Index k=j2+8; k<end_k; k++)
156  {
157  blockB[count+0] = rhs(k,j2+0);
158  blockB[count+1] = rhs(k,j2+1);
159  blockB[count+2] = rhs(k,j2+2);
160  blockB[count+3] = rhs(k,j2+3);
161  blockB[count+4] = rhs(k,j2+4);
162  blockB[count+5] = rhs(k,j2+5);
163  blockB[count+6] = rhs(k,j2+6);
164  blockB[count+7] = rhs(k,j2+7);
165  count += 8;
166  }
167  }
168  }
169  if(nr>=4)
170  {
171  for(Index j2=end8; j2<(std::min)(k2+rows,packet_cols4); j2+=4)
172  {
173  // again we can split vertically in three different parts (transpose, symmetric, normal)
174  // transpose
175  for(Index k=k2; k<j2; k++)
176  {
177  blockB[count+0] = numext::conj(rhs(j2+0,k));
178  blockB[count+1] = numext::conj(rhs(j2+1,k));
179  blockB[count+2] = numext::conj(rhs(j2+2,k));
180  blockB[count+3] = numext::conj(rhs(j2+3,k));
181  count += 4;
182  }
183  // symmetric
184  Index h = 0;
185  for(Index k=j2; k<j2+4; k++)
186  {
187  // normal
188  for (Index w=0 ; w<h; ++w)
189  blockB[count+w] = rhs(k,j2+w);
190 
191  blockB[count+h] = numext::real(rhs(k,k));
192 
193  // transpose
194  for (Index w=h+1 ; w<4; ++w)
195  blockB[count+w] = numext::conj(rhs(j2+w,k));
196  count += 4;
197  ++h;
198  }
199  // normal
200  for(Index k=j2+4; k<end_k; k++)
201  {
202  blockB[count+0] = rhs(k,j2+0);
203  blockB[count+1] = rhs(k,j2+1);
204  blockB[count+2] = rhs(k,j2+2);
205  blockB[count+3] = rhs(k,j2+3);
206  count += 4;
207  }
208  }
209  }
210 
211  // third part: transposed
212  if(nr>=8)
213  {
214  for(Index j2=k2+rows; j2<packet_cols8; j2+=8)
215  {
216  for(Index k=k2; k<end_k; k++)
217  {
218  blockB[count+0] = numext::conj(rhs(j2+0,k));
219  blockB[count+1] = numext::conj(rhs(j2+1,k));
220  blockB[count+2] = numext::conj(rhs(j2+2,k));
221  blockB[count+3] = numext::conj(rhs(j2+3,k));
222  blockB[count+4] = numext::conj(rhs(j2+4,k));
223  blockB[count+5] = numext::conj(rhs(j2+5,k));
224  blockB[count+6] = numext::conj(rhs(j2+6,k));
225  blockB[count+7] = numext::conj(rhs(j2+7,k));
226  count += 8;
227  }
228  }
229  }
230  if(nr>=4)
231  {
232  for(Index j2=(std::max)(packet_cols8,k2+rows); j2<packet_cols4; j2+=4)
233  {
234  for(Index k=k2; k<end_k; k++)
235  {
236  blockB[count+0] = numext::conj(rhs(j2+0,k));
237  blockB[count+1] = numext::conj(rhs(j2+1,k));
238  blockB[count+2] = numext::conj(rhs(j2+2,k));
239  blockB[count+3] = numext::conj(rhs(j2+3,k));
240  count += 4;
241  }
242  }
243  }
244 
245  // copy the remaining columns one at a time (=> the same with nr==1)
246  for(Index j2=packet_cols4; j2<cols; ++j2)
247  {
248  // transpose
249  Index half = (std::min)(end_k,j2);
250  for(Index k=k2; k<half; k++)
251  {
252  blockB[count] = numext::conj(rhs(j2,k));
253  count += 1;
254  }
255 
256  if(half==j2 && half<k2+rows)
257  {
258  blockB[count] = numext::real(rhs(j2,j2));
259  count += 1;
260  }
261  else
262  half--;
263 
264  // normal
265  for(Index k=half+1; k<k2+rows; k++)
266  {
267  blockB[count] = rhs(k,j2);
268  count += 1;
269  }
270  }
271  }
272 };
273 
274 /* Optimized selfadjoint matrix * matrix (_SYMM) product built on top of
275  * the general matrix matrix product.
276  */
277 template <typename Scalar, typename Index,
278  int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
279  int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs,
280  int ResStorageOrder>
282 
283 template <typename Scalar, typename Index,
284  int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
285  int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs>
286 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,ConjugateLhs, RhsStorageOrder,RhsSelfAdjoint,ConjugateRhs,RowMajor>
287 {
288 
290  Index rows, Index cols,
291  const Scalar* lhs, Index lhsStride,
292  const Scalar* rhs, Index rhsStride,
293  Scalar* res, Index resStride,
294  const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
295  {
297  EIGEN_LOGICAL_XOR(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
298  RhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsSelfAdjoint,ConjugateRhs),
299  EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
300  LhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs),
301  ColMajor>
302  ::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha, blocking);
303  }
304 };
305 
306 template <typename Scalar, typename Index,
307  int LhsStorageOrder, bool ConjugateLhs,
308  int RhsStorageOrder, bool ConjugateRhs>
309 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor>
310 {
311 
312  static EIGEN_DONT_INLINE void run(
313  Index rows, Index cols,
314  const Scalar* _lhs, Index lhsStride,
315  const Scalar* _rhs, Index rhsStride,
316  Scalar* res, Index resStride,
317  const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
318 };
319 
320 template <typename Scalar, typename Index,
321  int LhsStorageOrder, bool ConjugateLhs,
322  int RhsStorageOrder, bool ConjugateRhs>
324  Index rows, Index cols,
325  const Scalar* _lhs, Index lhsStride,
326  const Scalar* _rhs, Index rhsStride,
327  Scalar* _res, Index resStride,
328  const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
329  {
330  Index size = rows;
331 
332  typedef gebp_traits<Scalar,Scalar> Traits;
333 
338  LhsMapper lhs(_lhs,lhsStride);
339  LhsTransposeMapper lhs_transpose(_lhs,lhsStride);
340  RhsMapper rhs(_rhs,rhsStride);
341  ResMapper res(_res, resStride);
342 
343  Index kc = blocking.kc(); // cache block size along the K direction
344  Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
345  // kc must be smaller than mc
346  kc = (std::min)(kc,mc);
347  std::size_t sizeA = kc*mc;
348  std::size_t sizeB = kc*cols;
349  ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
350  ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
351 
356 
357  for(Index k2=0; k2<size; k2+=kc)
358  {
359  const Index actual_kc = (std::min)(k2+kc,size)-k2;
360 
361  // we have selected one row panel of rhs and one column panel of lhs
362  // pack rhs's panel into a sequential chunk of memory
363  // and expand each coeff to a constant packet for further reuse
364  pack_rhs(blockB, rhs.getSubMapper(k2,0), actual_kc, cols);
365 
366  // the select lhs's panel has to be split in three different parts:
367  // 1 - the transposed panel above the diagonal block => transposed packed copy
368  // 2 - the diagonal block => special packed copy
369  // 3 - the panel below the diagonal block => generic packed copy
370  for(Index i2=0; i2<k2; i2+=mc)
371  {
372  const Index actual_mc = (std::min)(i2+mc,k2)-i2;
373  // transposed packed copy
374  pack_lhs_transposed(blockA, lhs_transpose.getSubMapper(i2, k2), actual_kc, actual_mc);
375 
376  gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
377  }
378  // the block diagonal
379  {
380  const Index actual_mc = (std::min)(k2+kc,size)-k2;
381  // symmetric packed copy
382  pack_lhs(blockA, &lhs(k2,k2), lhsStride, actual_kc, actual_mc);
383 
384  gebp_kernel(res.getSubMapper(k2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
385  }
386 
387  for(Index i2=k2+kc; i2<size; i2+=mc)
388  {
389  const Index actual_mc = (std::min)(i2+mc,size)-i2;
391  (blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);
392 
393  gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
394  }
395  }
396  }
397 
398 // matrix * selfadjoint product
399 template <typename Scalar, typename Index,
400  int LhsStorageOrder, bool ConjugateLhs,
401  int RhsStorageOrder, bool ConjugateRhs>
402 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor>
403 {
404 
405  static EIGEN_DONT_INLINE void run(
406  Index rows, Index cols,
407  const Scalar* _lhs, Index lhsStride,
408  const Scalar* _rhs, Index rhsStride,
409  Scalar* res, Index resStride,
410  const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
411 };
412 
413 template <typename Scalar, typename Index,
414  int LhsStorageOrder, bool ConjugateLhs,
415  int RhsStorageOrder, bool ConjugateRhs>
417  Index rows, Index cols,
418  const Scalar* _lhs, Index lhsStride,
419  const Scalar* _rhs, Index rhsStride,
420  Scalar* _res, Index resStride,
421  const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
422  {
423  Index size = cols;
424 
425  typedef gebp_traits<Scalar,Scalar> Traits;
426 
429  LhsMapper lhs(_lhs,lhsStride);
430  ResMapper res(_res,resStride);
431 
432  Index kc = blocking.kc(); // cache block size along the K direction
433  Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
434  std::size_t sizeA = kc*mc;
435  std::size_t sizeB = kc*cols;
436  ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
437  ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
438 
442 
443  for(Index k2=0; k2<size; k2+=kc)
444  {
445  const Index actual_kc = (std::min)(k2+kc,size)-k2;
446 
447  pack_rhs(blockB, _rhs, rhsStride, actual_kc, cols, k2);
448 
449  // => GEPP
450  for(Index i2=0; i2<rows; i2+=mc)
451  {
452  const Index actual_mc = (std::min)(i2+mc,rows)-i2;
453  pack_lhs(blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);
454 
455  gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
456  }
457  }
458  }
459 
460 } // end namespace internal
461 
462 /***************************************************************************
463 * Wrapper to product_selfadjoint_matrix
464 ***************************************************************************/
465 
466 namespace internal {
467 
468 template<typename Lhs, int LhsMode, typename Rhs, int RhsMode>
469 struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,RhsMode,false>
470 {
472 
477 
478  enum {
479  LhsIsUpper = (LhsMode&(Upper|Lower))==Upper,
480  LhsIsSelfAdjoint = (LhsMode&SelfAdjoint)==SelfAdjoint,
481  RhsIsUpper = (RhsMode&(Upper|Lower))==Upper,
482  RhsIsSelfAdjoint = (RhsMode&SelfAdjoint)==SelfAdjoint
483  };
484 
485  template<typename Dest>
486  static void run(Dest &dst, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar& alpha)
487  {
488  eigen_assert(dst.rows()==a_lhs.rows() && dst.cols()==a_rhs.cols());
489 
490  typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
491  typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);
492 
493  Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs)
494  * RhsBlasTraits::extractScalarFactor(a_rhs);
495 
496  typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
497  Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,1> BlockingType;
498 
499  BlockingType blocking(lhs.rows(), rhs.cols(), lhs.cols(), 1, false);
500 
503  NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)),
504  EIGEN_LOGICAL_XOR(RhsIsUpper,internal::traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint,
505  NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsIsUpper,bool(RhsBlasTraits::NeedToConjugate)),
506  internal::traits<Dest>::Flags&RowMajorBit ? RowMajor : ColMajor>
507  ::run(
508  lhs.rows(), rhs.cols(), // sizes
509  &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
510  &rhs.coeffRef(0,0), rhs.outerStride(), // rhs info
511  &dst.coeffRef(0,0), dst.outerStride(), // result info
512  actualAlpha, blocking // alpha
513  );
514  }
515 };
516 
517 } // end namespace internal
518 
519 } // end namespace Eigen
520 
521 #endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_H
void operator()(Scalar *blockB, const Scalar *_rhs, Index rhsStride, Index rows, Index cols, Index k2)
#define EIGEN_STRONG_INLINE
Definition: Macros.h:493
const AutoDiffScalar< DerType > & conj(const AutoDiffScalar< DerType > &x)
EIGEN_DEVICE_FUNC RealReturnType real() const
Expression of the product of two arbitrary matrices or vectors.
Definition: Product.h:71
Definition: LDLT.h:16
static constexpr size_t size(Tuple< Args... > &)
Provides access to the number of elements in a tuple as a compile-time constant expression.
Holds information about the various numeric (i.e. scalar) types allowed by Eigen. ...
Definition: NumTraits.h:150
#define EIGEN_LOGICAL_XOR(a, b)
Definition: Macros.h:897
const unsigned int RowMajorBit
Definition: Constants.h:61
#define EIGEN_DONT_INLINE
Definition: Macros.h:515
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half() max(const half &a, const half &b)
Definition: Half.h:438
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:33
#define eigen_assert(x)
Definition: Macros.h:577
void pack(Scalar *blockA, const const_blas_data_mapper< Scalar, Index, StorageOrder > &lhs, Index cols, Index i, Index &count)
#define ei_declare_aligned_stack_constructed_variable(TYPE, NAME, SIZE, BUFFER)
Definition: Memory.h:644
void operator()(Scalar *blockA, const Scalar *_lhs, Index lhsStride, Index cols, Index rows)
TFSIMD_FORCE_INLINE const tfScalar & w() const
void run(Expr &expr, Dev &dev)
Definition: TensorSyclRun.h:33
static void run(Dest &dst, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar &alpha)
static EIGEN_STRONG_INLINE void run(Index rows, Index cols, const Scalar *lhs, Index lhsStride, const Scalar *rhs, Index rhsStride, Scalar *res, Index resStride, const Scalar &alpha, level3_blocking< Scalar, Scalar > &blocking)


hebiros
Author(s): Xavier Artache , Matthew Tesch
autogenerated on Thu Sep 3 2020 04:08:45