SelfadjointMatrixMatrix.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_H
11 #define EIGEN_SELFADJOINT_MATRIX_MATRIX_H
12 
13 namespace Eigen {
14 
15 namespace internal {
16 
17 // pack a selfadjoint block diagonal for use with the gebp_kernel
18 template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder>
20 {
21  template<int BlockRows> inline
22  void pack(Scalar* blockA, const const_blas_data_mapper<Scalar,Index,StorageOrder>& lhs, Index cols, Index i, Index& count)
23  {
24  // normal copy
25  for(Index k=0; k<i; k++)
26  for(Index w=0; w<BlockRows; w++)
27  blockA[count++] = lhs(i+w,k); // normal
28  // symmetric copy
29  Index h = 0;
30  for(Index k=i; k<i+BlockRows; k++)
31  {
32  for(Index w=0; w<h; w++)
33  blockA[count++] = numext::conj(lhs(k, i+w)); // transposed
34 
35  blockA[count++] = numext::real(lhs(k,k)); // real (diagonal)
36 
37  for(Index w=h+1; w<BlockRows; w++)
38  blockA[count++] = lhs(i+w, k); // normal
39  ++h;
40  }
41  // transposed copy
42  for(Index k=i+BlockRows; k<cols; k++)
43  for(Index w=0; w<BlockRows; w++)
44  blockA[count++] = numext::conj(lhs(k, i+w)); // transposed
45  }
46  void operator()(Scalar* blockA, const Scalar* _lhs, Index lhsStride, Index cols, Index rows)
47  {
49  Index count = 0;
50  Index peeled_mc = (rows/Pack1)*Pack1;
51  for(Index i=0; i<peeled_mc; i+=Pack1)
52  {
53  pack<Pack1>(blockA, lhs, cols, i, count);
54  }
55 
56  if(rows-peeled_mc>=Pack2)
57  {
58  pack<Pack2>(blockA, lhs, cols, peeled_mc, count);
59  peeled_mc += Pack2;
60  }
61 
62  // do the same with mr==1
63  for(Index i=peeled_mc; i<rows; i++)
64  {
65  for(Index k=0; k<i; k++)
66  blockA[count++] = lhs(i, k); // normal
67 
68  blockA[count++] = numext::real(lhs(i, i)); // real (diagonal)
69 
70  for(Index k=i+1; k<cols; k++)
71  blockA[count++] = numext::conj(lhs(k, i)); // transposed
72  }
73  }
74 };
75 
76 template<typename Scalar, typename Index, int nr, int StorageOrder>
78 {
79  enum { PacketSize = packet_traits<Scalar>::size };
80  void operator()(Scalar* blockB, const Scalar* _rhs, Index rhsStride, Index rows, Index cols, Index k2)
81  {
82  Index end_k = k2 + rows;
83  Index count = 0;
85  Index packet_cols = (cols/nr)*nr;
86 
87  // first part: normal case
88  for(Index j2=0; j2<k2; j2+=nr)
89  {
90  for(Index k=k2; k<end_k; k++)
91  {
92  blockB[count+0] = rhs(k,j2+0);
93  blockB[count+1] = rhs(k,j2+1);
94  if (nr==4)
95  {
96  blockB[count+2] = rhs(k,j2+2);
97  blockB[count+3] = rhs(k,j2+3);
98  }
99  count += nr;
100  }
101  }
102 
103  // second part: diagonal block
104  for(Index j2=k2; j2<(std::min)(k2+rows,packet_cols); j2+=nr)
105  {
106  // again we can split vertically in three different parts (transpose, symmetric, normal)
107  // transpose
108  for(Index k=k2; k<j2; k++)
109  {
110  blockB[count+0] = numext::conj(rhs(j2+0,k));
111  blockB[count+1] = numext::conj(rhs(j2+1,k));
112  if (nr==4)
113  {
114  blockB[count+2] = numext::conj(rhs(j2+2,k));
115  blockB[count+3] = numext::conj(rhs(j2+3,k));
116  }
117  count += nr;
118  }
119  // symmetric
120  Index h = 0;
121  for(Index k=j2; k<j2+nr; k++)
122  {
123  // normal
124  for (Index w=0 ; w<h; ++w)
125  blockB[count+w] = rhs(k,j2+w);
126 
127  blockB[count+h] = numext::real(rhs(k,k));
128 
129  // transpose
130  for (Index w=h+1 ; w<nr; ++w)
131  blockB[count+w] = numext::conj(rhs(j2+w,k));
132  count += nr;
133  ++h;
134  }
135  // normal
136  for(Index k=j2+nr; k<end_k; k++)
137  {
138  blockB[count+0] = rhs(k,j2+0);
139  blockB[count+1] = rhs(k,j2+1);
140  if (nr==4)
141  {
142  blockB[count+2] = rhs(k,j2+2);
143  blockB[count+3] = rhs(k,j2+3);
144  }
145  count += nr;
146  }
147  }
148 
149  // third part: transposed
150  for(Index j2=k2+rows; j2<packet_cols; j2+=nr)
151  {
152  for(Index k=k2; k<end_k; k++)
153  {
154  blockB[count+0] = numext::conj(rhs(j2+0,k));
155  blockB[count+1] = numext::conj(rhs(j2+1,k));
156  if (nr==4)
157  {
158  blockB[count+2] = numext::conj(rhs(j2+2,k));
159  blockB[count+3] = numext::conj(rhs(j2+3,k));
160  }
161  count += nr;
162  }
163  }
164 
165  // copy the remaining columns one at a time (=> the same with nr==1)
166  for(Index j2=packet_cols; j2<cols; ++j2)
167  {
168  // transpose
169  Index half = (std::min)(end_k,j2);
170  for(Index k=k2; k<half; k++)
171  {
172  blockB[count] = numext::conj(rhs(j2,k));
173  count += 1;
174  }
175 
176  if(half==j2 && half<k2+rows)
177  {
178  blockB[count] = numext::real(rhs(j2,j2));
179  count += 1;
180  }
181  else
182  half--;
183 
184  // normal
185  for(Index k=half+1; k<k2+rows; k++)
186  {
187  blockB[count] = rhs(k,j2);
188  count += 1;
189  }
190  }
191  }
192 };
193 
194 /* Optimized selfadjoint matrix * matrix (_SYMM) product built on top of
195  * the general matrix matrix product.
196  */
197 template <typename Scalar, typename Index,
198  int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
199  int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs,
200  int ResStorageOrder>
202 
203 template <typename Scalar, typename Index,
204  int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
205  int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs>
206 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,ConjugateLhs, RhsStorageOrder,RhsSelfAdjoint,ConjugateRhs,RowMajor>
207 {
208 
210  Index rows, Index cols,
211  const Scalar* lhs, Index lhsStride,
212  const Scalar* rhs, Index rhsStride,
213  Scalar* res, Index resStride,
214  const Scalar& alpha)
215  {
216  product_selfadjoint_matrix<Scalar, Index,
217  EIGEN_LOGICAL_XOR(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
218  RhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsSelfAdjoint,ConjugateRhs),
219  EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
220  LhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs),
221  ColMajor>
222  ::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha);
223  }
224 };
225 
226 template <typename Scalar, typename Index,
227  int LhsStorageOrder, bool ConjugateLhs,
228  int RhsStorageOrder, bool ConjugateRhs>
229 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor>
230 {
231 
232  static EIGEN_DONT_INLINE void run(
233  Index rows, Index cols,
234  const Scalar* _lhs, Index lhsStride,
235  const Scalar* _rhs, Index rhsStride,
236  Scalar* res, Index resStride,
237  const Scalar& alpha);
238 };
239 
240 template <typename Scalar, typename Index,
241  int LhsStorageOrder, bool ConjugateLhs,
242  int RhsStorageOrder, bool ConjugateRhs>
244  Index rows, Index cols,
245  const Scalar* _lhs, Index lhsStride,
246  const Scalar* _rhs, Index rhsStride,
247  Scalar* res, Index resStride,
248  const Scalar& alpha)
249  {
250  Index size = rows;
251 
254 
255  typedef gebp_traits<Scalar,Scalar> Traits;
256 
257  Index kc = size; // cache block size along the K direction
258  Index mc = rows; // cache block size along the M direction
259  Index nc = cols; // cache block size along the N direction
260  computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
261  // kc must smaller than mc
262  kc = (std::min)(kc,mc);
263 
264  std::size_t sizeW = kc*Traits::WorkSpaceFactor;
265  std::size_t sizeB = sizeW + kc*cols;
266  ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
267  ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
268  Scalar* blockB = allocatedBlockB + sizeW;
269 
274 
275  for(Index k2=0; k2<size; k2+=kc)
276  {
277  const Index actual_kc = (std::min)(k2+kc,size)-k2;
278 
279  // we have selected one row panel of rhs and one column panel of lhs
280  // pack rhs's panel into a sequential chunk of memory
281  // and expand each coeff to a constant packet for further reuse
282  pack_rhs(blockB, &rhs(k2,0), rhsStride, actual_kc, cols);
283 
284  // the select lhs's panel has to be split in three different parts:
285  // 1 - the transposed panel above the diagonal block => transposed packed copy
286  // 2 - the diagonal block => special packed copy
287  // 3 - the panel below the diagonal block => generic packed copy
288  for(Index i2=0; i2<k2; i2+=mc)
289  {
290  const Index actual_mc = (std::min)(i2+mc,k2)-i2;
291  // transposed packed copy
292  pack_lhs_transposed(blockA, &lhs(k2, i2), lhsStride, actual_kc, actual_mc);
293 
294  gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
295  }
296  // the block diagonal
297  {
298  const Index actual_mc = (std::min)(k2+kc,size)-k2;
299  // symmetric packed copy
300  pack_lhs(blockA, &lhs(k2,k2), lhsStride, actual_kc, actual_mc);
301 
302  gebp_kernel(res+k2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
303  }
304 
305  for(Index i2=k2+kc; i2<size; i2+=mc)
306  {
307  const Index actual_mc = (std::min)(i2+mc,size)-i2;
309  (blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);
310 
311  gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
312  }
313  }
314  }
315 
316 // matrix * selfadjoint product
317 template <typename Scalar, typename Index,
318  int LhsStorageOrder, bool ConjugateLhs,
319  int RhsStorageOrder, bool ConjugateRhs>
320 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor>
321 {
322 
323  static EIGEN_DONT_INLINE void run(
324  Index rows, Index cols,
325  const Scalar* _lhs, Index lhsStride,
326  const Scalar* _rhs, Index rhsStride,
327  Scalar* res, Index resStride,
328  const Scalar& alpha);
329 };
330 
331 template <typename Scalar, typename Index,
332  int LhsStorageOrder, bool ConjugateLhs,
333  int RhsStorageOrder, bool ConjugateRhs>
335  Index rows, Index cols,
336  const Scalar* _lhs, Index lhsStride,
337  const Scalar* _rhs, Index rhsStride,
338  Scalar* res, Index resStride,
339  const Scalar& alpha)
340  {
341  Index size = cols;
342 
344 
345  typedef gebp_traits<Scalar,Scalar> Traits;
346 
347  Index kc = size; // cache block size along the K direction
348  Index mc = rows; // cache block size along the M direction
349  Index nc = cols; // cache block size along the N direction
350  computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
351  std::size_t sizeW = kc*Traits::WorkSpaceFactor;
352  std::size_t sizeB = sizeW + kc*cols;
353  ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
354  ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
355  Scalar* blockB = allocatedBlockB + sizeW;
356 
360 
361  for(Index k2=0; k2<size; k2+=kc)
362  {
363  const Index actual_kc = (std::min)(k2+kc,size)-k2;
364 
365  pack_rhs(blockB, _rhs, rhsStride, actual_kc, cols, k2);
366 
367  // => GEPP
368  for(Index i2=0; i2<rows; i2+=mc)
369  {
370  const Index actual_mc = (std::min)(i2+mc,rows)-i2;
371  pack_lhs(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);
372 
373  gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
374  }
375  }
376  }
377 
378 } // end namespace internal
379 
380 /***************************************************************************
381 * Wrapper to product_selfadjoint_matrix
382 ***************************************************************************/
383 
384 namespace internal {
385 template<typename Lhs, int LhsMode, typename Rhs, int RhsMode>
386 struct traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false> >
387  : traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>, Lhs, Rhs> >
388 {};
389 }
390 
391 template<typename Lhs, int LhsMode, typename Rhs, int RhsMode>
392 struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>
393  : public ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>, Lhs, Rhs >
394 {
396 
397  SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
398 
399  enum {
400  LhsIsUpper = (LhsMode&(Upper|Lower))==Upper,
401  LhsIsSelfAdjoint = (LhsMode&SelfAdjoint)==SelfAdjoint,
402  RhsIsUpper = (RhsMode&(Upper|Lower))==Upper,
403  RhsIsSelfAdjoint = (RhsMode&SelfAdjoint)==SelfAdjoint
404  };
405 
406  template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
407  {
408  eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
409 
410  typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
411  typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
412 
413  Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
414  * RhsBlasTraits::extractScalarFactor(m_rhs);
415 
417  EIGEN_LOGICAL_XOR(LhsIsUpper,
419  NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)),
420  EIGEN_LOGICAL_XOR(RhsIsUpper,
421  internal::traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint,
422  NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsIsUpper,bool(RhsBlasTraits::NeedToConjugate)),
424  ::run(
425  lhs.rows(), rhs.cols(), // sizes
426  &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
427  &rhs.coeffRef(0,0), rhs.outerStride(), // rhs info
428  &dst.coeffRef(0,0), dst.outerStride(), // result info
429  actualAlpha // alpha
430  );
431  }
432 };
433 
434 } // end namespace Eigen
435 
436 #endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_H
#define EIGEN_PRODUCT_PUBLIC_INTERFACE(Derived)
Definition: ProductBase.h:46
void operator()(Scalar *blockB, const Scalar *_rhs, Index rhsStride, Index rows, Index cols, Index k2)
#define EIGEN_STRONG_INLINE
internal::traits< Derived >::Scalar Scalar
Definition: DenseBase.h:63
#define ei_declare_aligned_stack_constructed_variable(TYPE, NAME, SIZE, BUFFER)
internal::traits< Derived >::Index Index
The type of indices.
Definition: DenseBase.h:61
Definition: LDLT.h:16
Holds information about the various numeric (i.e. scalar) types allowed by Eigen. ...
Definition: NumTraits.h:88
const unsigned int RowMajorBit
Definition: Constants.h:53
RealReturnType real() const
#define EIGEN_LOGICAL_XOR(a, b)
static EIGEN_STRONG_INLINE void run(Index rows, Index cols, const Scalar *lhs, Index lhsStride, const Scalar *rhs, Index rhsStride, Scalar *res, Index resStride, const Scalar &alpha)
void pack(Scalar *blockA, const const_blas_data_mapper< Scalar, Index, StorageOrder > &lhs, Index cols, Index i, Index &count)
void operator()(Scalar *blockA, const Scalar *_lhs, Index lhsStride, Index cols, Index rows)
TFSIMD_FORCE_INLINE const tfScalar & w() const
#define EIGEN_DONT_INLINE
#define eigen_assert(x)
void run(ClassLoader *loader)


tuw_aruco
Author(s): Lukas Pfeifhofer
autogenerated on Mon Jun 10 2019 15:40:57