TriangularSolverMatrix.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_TRIANGULAR_SOLVER_MATRIX_H
11 #define EIGEN_TRIANGULAR_SOLVER_MATRIX_H
12 
13 namespace Eigen {
14 
15 namespace internal {
16 
17 // if the rhs is row major, let's transpose the product
18 template <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>
19 struct triangular_solve_matrix<Scalar,Index,Side,Mode,Conjugate,TriStorageOrder,RowMajor,OtherInnerStride>
20 {
21  static void run(
23  const Scalar* tri, Index triStride,
24  Scalar* _other, Index otherIncr, Index otherStride,
26  {
29  (Mode&UnitDiag) | ((Mode&Upper) ? Lower : Upper),
31  TriStorageOrder==RowMajor ? ColMajor : RowMajor, ColMajor, OtherInnerStride>
32  ::run(size, cols, tri, triStride, _other, otherIncr, otherStride, blocking);
33  }
34 };
35 
36 /* Optimized triangular solver with multiple right hand side and the triangular matrix on the left
37  */
38 template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder,int OtherInnerStride>
39 struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor,OtherInnerStride>
40 {
41  static EIGEN_DONT_INLINE void run(
42  Index size, Index otherSize,
43  const Scalar* _tri, Index triStride,
44  Scalar* _other, Index otherIncr, Index otherStride,
46 };
47 template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>
49  Index size, Index otherSize,
50  const Scalar* _tri, Index triStride,
51  Scalar* _other, Index otherIncr, Index otherStride,
53  {
54  Index cols = otherSize;
55 
58  TriMapper tri(_tri, triStride);
59  OtherMapper other(_other, otherStride, otherIncr);
60 
61  typedef gebp_traits<Scalar,Scalar> Traits;
62 
63  enum {
64  SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
65  IsLower = (Mode&Lower) == Lower
66  };
67 
68  Index kc = blocking.kc(); // cache block size along the K direction
69  Index mc = (std::min)(size,blocking.mc()); // cache block size along the M direction
70 
71  std::size_t sizeA = kc*mc;
72  std::size_t sizeB = kc*cols;
73 
76 
81 
82  // the goal here is to subdivise the Rhs panels such that we keep some cache
83  // coherence when accessing the rhs elements
84  std::ptrdiff_t l1, l2, l3;
85  manage_caching_sizes(GetAction, &l1, &l2, &l3);
86  Index subcols = cols>0 ? l2/(4 * sizeof(Scalar) * std::max<Index>(otherStride,size)) : 0;
87  subcols = std::max<Index>((subcols/Traits::nr)*Traits::nr, Traits::nr);
88 
89  for(Index k2=IsLower ? 0 : size;
90  IsLower ? k2<size : k2>0;
91  IsLower ? k2+=kc : k2-=kc)
92  {
93  const Index actual_kc = (std::min)(IsLower ? size-k2 : k2, kc);
94 
95  // We have selected and packed a big horizontal panel R1 of rhs. Let B be the packed copy of this panel,
96  // and R2 the remaining part of rhs. The corresponding vertical panel of lhs is split into
97  // A11 (the triangular part) and A21 the remaining rectangular part.
98  // Then the high level algorithm is:
99  // - B = R1 => general block copy (done during the next step)
100  // - R1 = A11^-1 B => tricky part
101  // - update B from the new R1 => actually this has to be performed continuously during the above step
102  // - R2 -= A21 * B => GEPP
103 
104  // The tricky part: compute R1 = A11^-1 B while updating B from R1
105  // The idea is to split A11 into multiple small vertical panels.
106  // Each panel can be split into a small triangular part T1k which is processed without optimization,
107  // and the remaining small part T2k which is processed using gebp with appropriate block strides
108  for(Index j2=0; j2<cols; j2+=subcols)
109  {
110  Index actual_cols = (std::min)(cols-j2,subcols);
111  // for each small vertical panels [T1k^T, T2k^T]^T of lhs
112  for (Index k1=0; k1<actual_kc; k1+=SmallPanelWidth)
113  {
114  Index actualPanelWidth = std::min<Index>(actual_kc-k1, SmallPanelWidth);
115  // tr solve
116  for (Index k=0; k<actualPanelWidth; ++k)
117  {
118  // TODO write a small kernel handling this (can be shared with trsv)
119  Index i = IsLower ? k2+k1+k : k2-k1-k-1;
120  Index rs = actualPanelWidth - k - 1; // remaining size
121  Index s = TriStorageOrder==RowMajor ? (IsLower ? k2+k1 : i+1)
122  : IsLower ? i+1 : i-rs;
123 
124  Scalar a = (Mode & UnitDiag) ? Scalar(1) : Scalar(1)/conj(tri(i,i));
125  for (Index j=j2; j<j2+actual_cols; ++j)
126  {
127  if (TriStorageOrder==RowMajor)
128  {
129  Scalar b(0);
130  const Scalar* l = &tri(i,s);
131  typename OtherMapper::LinearMapper r = other.getLinearMapper(s,j);
132  for (Index i3=0; i3<k; ++i3)
133  b += conj(l[i3]) * r(i3);
134 
135  other(i,j) = (other(i,j) - b)*a;
136  }
137  else
138  {
139  Scalar& otherij = other(i,j);
140  otherij *= a;
141  Scalar b = otherij;
142  typename OtherMapper::LinearMapper r = other.getLinearMapper(s,j);
143  typename TriMapper::LinearMapper l = tri.getLinearMapper(s,i);
144  for (Index i3=0;i3<rs;++i3)
145  r(i3) -= b * conj(l(i3));
146  }
147  }
148  }
149 
150  Index lengthTarget = actual_kc-k1-actualPanelWidth;
151  Index startBlock = IsLower ? k2+k1 : k2-k1-actualPanelWidth;
152  Index blockBOffset = IsLower ? k1 : lengthTarget;
153 
154  // update the respective rows of B from other
155  pack_rhs(blockB+actual_kc*j2, other.getSubMapper(startBlock,j2), actualPanelWidth, actual_cols, actual_kc, blockBOffset);
156 
157  // GEBP
158  if (lengthTarget>0)
159  {
160  Index startTarget = IsLower ? k2+k1+actualPanelWidth : k2-actual_kc;
161 
162  pack_lhs(blockA, tri.getSubMapper(startTarget,startBlock), actualPanelWidth, lengthTarget);
163 
164  gebp_kernel(other.getSubMapper(startTarget,j2), blockA, blockB+actual_kc*j2, lengthTarget, actualPanelWidth, actual_cols, Scalar(-1),
165  actualPanelWidth, actual_kc, 0, blockBOffset);
166  }
167  }
168  }
169 
170  // R2 -= A21 * B => GEPP
171  {
172  Index start = IsLower ? k2+kc : 0;
173  Index end = IsLower ? size : k2-kc;
174  for(Index i2=start; i2<end; i2+=mc)
175  {
176  const Index actual_mc = (std::min)(mc,end-i2);
177  if (actual_mc>0)
178  {
179  pack_lhs(blockA, tri.getSubMapper(i2, IsLower ? k2 : k2-kc), actual_kc, actual_mc);
180 
181  gebp_kernel(other.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, Scalar(-1), -1, -1, 0, 0);
182  }
183  }
184  }
185  }
186  }
187 
188 /* Optimized triangular solver with multiple left hand sides and the triangular matrix on the right
189  */
190 template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>
191 struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor,OtherInnerStride>
192 {
193  static EIGEN_DONT_INLINE void run(
194  Index size, Index otherSize,
195  const Scalar* _tri, Index triStride,
196  Scalar* _other, Index otherIncr, Index otherStride,
198 };
199 template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>
201  Index size, Index otherSize,
202  const Scalar* _tri, Index triStride,
203  Scalar* _other, Index otherIncr, Index otherStride,
205  {
206  Index rows = otherSize;
207  typedef typename NumTraits<Scalar>::Real RealScalar;
208 
211  LhsMapper lhs(_other, otherStride, otherIncr);
212  RhsMapper rhs(_tri, triStride);
213 
214  typedef gebp_traits<Scalar,Scalar> Traits;
215  enum {
216  RhsStorageOrder = TriStorageOrder,
217  SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
218  IsLower = (Mode&Lower) == Lower
219  };
220 
221  Index kc = blocking.kc(); // cache block size along the K direction
222  Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
223 
224  std::size_t sizeA = kc*mc;
225  std::size_t sizeB = kc*size;
226 
227  ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
228  ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
229 
235 
236  for(Index k2=IsLower ? size : 0;
237  IsLower ? k2>0 : k2<size;
238  IsLower ? k2-=kc : k2+=kc)
239  {
240  const Index actual_kc = (std::min)(IsLower ? k2 : size-k2, kc);
241  Index actual_k2 = IsLower ? k2-actual_kc : k2 ;
242 
243  Index startPanel = IsLower ? 0 : k2+actual_kc;
244  Index rs = IsLower ? actual_k2 : size - actual_k2 - actual_kc;
245  Scalar* geb = blockB+actual_kc*actual_kc;
246 
247  if (rs>0) pack_rhs(geb, rhs.getSubMapper(actual_k2,startPanel), actual_kc, rs);
248 
249  // triangular packing (we only pack the panels off the diagonal,
250  // neglecting the blocks overlapping the diagonal
251  {
252  for (Index j2=0; j2<actual_kc; j2+=SmallPanelWidth)
253  {
254  Index actualPanelWidth = std::min<Index>(actual_kc-j2, SmallPanelWidth);
255  Index actual_j2 = actual_k2 + j2;
256  Index panelOffset = IsLower ? j2+actualPanelWidth : 0;
257  Index panelLength = IsLower ? actual_kc-j2-actualPanelWidth : j2;
258 
259  if (panelLength>0)
260  pack_rhs_panel(blockB+j2*actual_kc,
261  rhs.getSubMapper(actual_k2+panelOffset, actual_j2),
262  panelLength, actualPanelWidth,
263  actual_kc, panelOffset);
264  }
265  }
266 
267  for(Index i2=0; i2<rows; i2+=mc)
268  {
269  const Index actual_mc = (std::min)(mc,rows-i2);
270 
271  // triangular solver kernel
272  {
273  // for each small block of the diagonal (=> vertical panels of rhs)
274  for (Index j2 = IsLower
275  ? (actual_kc - ((actual_kc%SmallPanelWidth) ? Index(actual_kc%SmallPanelWidth)
276  : Index(SmallPanelWidth)))
277  : 0;
278  IsLower ? j2>=0 : j2<actual_kc;
279  IsLower ? j2-=SmallPanelWidth : j2+=SmallPanelWidth)
280  {
281  Index actualPanelWidth = std::min<Index>(actual_kc-j2, SmallPanelWidth);
282  Index absolute_j2 = actual_k2 + j2;
283  Index panelOffset = IsLower ? j2+actualPanelWidth : 0;
284  Index panelLength = IsLower ? actual_kc - j2 - actualPanelWidth : j2;
285 
286  // GEBP
287  if(panelLength>0)
288  {
289  gebp_kernel(lhs.getSubMapper(i2,absolute_j2),
290  blockA, blockB+j2*actual_kc,
291  actual_mc, panelLength, actualPanelWidth,
292  Scalar(-1),
293  actual_kc, actual_kc, // strides
294  panelOffset, panelOffset); // offsets
295  }
296 
297  // unblocked triangular solve
298  for (Index k=0; k<actualPanelWidth; ++k)
299  {
300  Index j = IsLower ? absolute_j2+actualPanelWidth-k-1 : absolute_j2+k;
301 
302  typename LhsMapper::LinearMapper r = lhs.getLinearMapper(i2,j);
303  for (Index k3=0; k3<k; ++k3)
304  {
305  Scalar b = conj(rhs(IsLower ? j+1+k3 : absolute_j2+k3,j));
306  typename LhsMapper::LinearMapper a = lhs.getLinearMapper(i2,IsLower ? j+1+k3 : absolute_j2+k3);
307  for (Index i=0; i<actual_mc; ++i)
308  r(i) -= a(i) * b;
309  }
310  if((Mode & UnitDiag)==0)
311  {
312  Scalar inv_rjj = RealScalar(1)/conj(rhs(j,j));
313  for (Index i=0; i<actual_mc; ++i)
314  r(i) *= inv_rjj;
315  }
316  }
317 
318  // pack the just computed part of lhs to A
319  pack_lhs_panel(blockA, lhs.getSubMapper(i2,absolute_j2),
320  actualPanelWidth, actual_mc,
321  actual_kc, j2);
322  }
323  }
324 
325  if (rs>0)
326  gebp_kernel(lhs.getSubMapper(i2, startPanel), blockA, geb,
327  actual_mc, actual_kc, rs, Scalar(-1),
328  -1, -1, 0, 0);
329  }
330  }
331  }
332 
333 } // end namespace internal
334 
335 } // end namespace Eigen
336 
337 #endif // EIGEN_TRIANGULAR_SOLVER_MATRIX_H
SCALAR Scalar
Definition: bench_gemm.cpp:46
const AutoDiffScalar< DerType > & conj(const AutoDiffScalar< DerType > &x)
Scalar * b
Definition: benchVecAdd.cpp:17
static void run(Index size, Index cols, const Scalar *tri, Index triStride, Scalar *_other, Index otherIncr, Index otherStride, level3_blocking< Scalar, Scalar > &blocking)
#define min(a, b)
Definition: datatypes.h:19
gtsam::Key l2
#define EIGEN_PLAIN_ENUM_MAX(a, b)
Definition: Macros.h:1289
Namespace containing all symbols from the Eigen library.
Definition: jet.h:637
Holds information about the various numeric (i.e. scalar) types allowed by Eigen. ...
Definition: NumTraits.h:232
void manage_caching_sizes(Action action, std::ptrdiff_t *l1, std::ptrdiff_t *l2, std::ptrdiff_t *l3)
#define EIGEN_DONT_INLINE
Definition: Macros.h:940
Point3 l3(2, 2, 0)
static const Line3 l(Rot3(), 1, 1)
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:74
RealScalar s
NumTraits< Scalar >::Real RealScalar
Definition: bench_gemm.cpp:47
#define ei_declare_aligned_stack_constructed_variable(TYPE, NAME, SIZE, BUFFER)
Definition: Memory.h:768
gtsam::Key l1
EIGEN_CONSTEXPR Index size(const T &x)
Definition: Meta.h:479
Tridiagonalization< MatrixXf > tri
static EIGEN_DEPRECATED const end_t end
std::ptrdiff_t j


gtsam
Author(s):
autogenerated on Tue Jul 4 2023 02:40:33