SparseLU_panel_bmod.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
5 // Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>
6 //
7 // This Source Code Form is subject to the terms of the Mozilla
8 // Public License v. 2.0. If a copy of the MPL was not distributed
9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 
11 /*
12 
13  * NOTE: This file is the modified version of [s,d,c,z]panel_bmod.c file in SuperLU
14 
15  * -- SuperLU routine (version 3.0) --
16  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
17  * and Lawrence Berkeley National Lab.
18  * October 15, 2003
19  *
20  * Copyright (c) 1994 by Xerox Corporation. All rights reserved.
21  *
22  * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
23  * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
24  *
25  * Permission is hereby granted to use or copy this program for any
26  * purpose, provided the above notices are retained on all copies.
27  * Permission to modify the code and to distribute modified code is
28  * granted, provided the above notices are retained, and a notice that
29  * the code was modified is included with the above copyright notice.
30  */
31 #ifndef SPARSELU_PANEL_BMOD_H
32 #define SPARSELU_PANEL_BMOD_H
33 
34 namespace Eigen {
35 namespace internal {
36 
55 template <typename Scalar, typename StorageIndex>
57  const Index nseg, ScalarVector& dense, ScalarVector& tempv,
58  IndexVector& segrep, IndexVector& repfnz, GlobalLU_t& glu)
59 {
60 
61  Index ksub,jj,nextl_col;
62  Index fsupc, nsupc, nsupr, nrow;
63  Index krep, kfnz;
64  Index lptr; // points to the row subscripts of a supernode
65  Index luptr; // ...
66  Index segsize,no_zeros ;
67  // For each nonz supernode segment of U[*,j] in topological order
68  Index k = nseg - 1;
70 
71  for (ksub = 0; ksub < nseg; ksub++)
72  { // For each updating supernode
73  /* krep = representative of current k-th supernode
74  * fsupc = first supernodal column
75  * nsupc = number of columns in a supernode
76  * nsupr = number of rows in a supernode
77  */
78  krep = segrep(k); k--;
79  fsupc = glu.xsup(glu.supno(krep));
80  nsupc = krep - fsupc + 1;
81  nsupr = glu.xlsub(fsupc+1) - glu.xlsub(fsupc);
82  nrow = nsupr - nsupc;
83  lptr = glu.xlsub(fsupc);
84 
85  // loop over the panel columns to detect the actual number of columns and rows
86  Index u_rows = 0;
87  Index u_cols = 0;
88  for (jj = jcol; jj < jcol + w; jj++)
89  {
90  nextl_col = (jj-jcol) * m;
91  VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row
92 
93  kfnz = repfnz_col(krep);
94  if ( kfnz == emptyIdxLU )
95  continue; // skip any zero segment
96 
97  segsize = krep - kfnz + 1;
98  u_cols++;
99  u_rows = (std::max)(segsize,u_rows);
100  }
101 
102  if(nsupc >= 2)
103  {
104  Index ldu = internal::first_multiple<Index>(u_rows, PacketSize);
105  Map<ScalarMatrix, Aligned, OuterStride<> > U(tempv.data(), u_rows, u_cols, OuterStride<>(ldu));
106 
107  // gather U
108  Index u_col = 0;
109  for (jj = jcol; jj < jcol + w; jj++)
110  {
111  nextl_col = (jj-jcol) * m;
112  VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row
113  VectorBlock<ScalarVector> dense_col(dense, nextl_col, m); // Scatter/gather entire matrix column from/to here
114 
115  kfnz = repfnz_col(krep);
116  if ( kfnz == emptyIdxLU )
117  continue; // skip any zero segment
118 
119  segsize = krep - kfnz + 1;
120  luptr = glu.xlusup(fsupc);
121  no_zeros = kfnz - fsupc;
122 
123  Index isub = lptr + no_zeros;
124  Index off = u_rows-segsize;
125  for (Index i = 0; i < off; i++) U(i,u_col) = 0;
126  for (Index i = 0; i < segsize; i++)
127  {
128  Index irow = glu.lsub(isub);
129  U(i+off,u_col) = dense_col(irow);
130  ++isub;
131  }
132  u_col++;
133  }
134  // solve U = A^-1 U
135  luptr = glu.xlusup(fsupc);
136  Index lda = glu.xlusup(fsupc+1) - glu.xlusup(fsupc);
137  no_zeros = (krep - u_rows + 1) - fsupc;
138  luptr += lda * no_zeros + no_zeros;
139  MappedMatrixBlock A(glu.lusup.data()+luptr, u_rows, u_rows, OuterStride<>(lda) );
140  U = A.template triangularView<UnitLower>().solve(U);
141 
142  // update
143  luptr += u_rows;
144  MappedMatrixBlock B(glu.lusup.data()+luptr, nrow, u_rows, OuterStride<>(lda) );
145  eigen_assert(tempv.size()>w*ldu + nrow*w + 1);
146 
147  Index ldl = internal::first_multiple<Index>(nrow, PacketSize);
148  Index offset = (PacketSize-internal::first_default_aligned(B.data(), PacketSize)) % PacketSize;
149  MappedMatrixBlock L(tempv.data()+w*ldu+offset, nrow, u_cols, OuterStride<>(ldl));
150 
151  L.setZero();
152  internal::sparselu_gemm<Scalar>(L.rows(), L.cols(), B.cols(), B.data(), B.outerStride(), U.data(), U.outerStride(), L.data(), L.outerStride());
153 
154  // scatter U and L
155  u_col = 0;
156  for (jj = jcol; jj < jcol + w; jj++)
157  {
158  nextl_col = (jj-jcol) * m;
159  VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row
160  VectorBlock<ScalarVector> dense_col(dense, nextl_col, m); // Scatter/gather entire matrix column from/to here
161 
162  kfnz = repfnz_col(krep);
163  if ( kfnz == emptyIdxLU )
164  continue; // skip any zero segment
165 
166  segsize = krep - kfnz + 1;
167  no_zeros = kfnz - fsupc;
168  Index isub = lptr + no_zeros;
169 
170  Index off = u_rows-segsize;
171  for (Index i = 0; i < segsize; i++)
172  {
173  Index irow = glu.lsub(isub++);
174  dense_col(irow) = U.coeff(i+off,u_col);
175  U.coeffRef(i+off,u_col) = 0;
176  }
177 
178  // Scatter l into SPA dense[]
179  for (Index i = 0; i < nrow; i++)
180  {
181  Index irow = glu.lsub(isub++);
182  dense_col(irow) -= L.coeff(i,u_col);
183  L.coeffRef(i,u_col) = 0;
184  }
185  u_col++;
186  }
187  }
188  else // level 2 only
189  {
190  // Sequence through each column in the panel
191  for (jj = jcol; jj < jcol + w; jj++)
192  {
193  nextl_col = (jj-jcol) * m;
194  VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row
195  VectorBlock<ScalarVector> dense_col(dense, nextl_col, m); // Scatter/gather entire matrix column from/to here
196 
197  kfnz = repfnz_col(krep);
198  if ( kfnz == emptyIdxLU )
199  continue; // skip any zero segment
200 
201  segsize = krep - kfnz + 1;
202  luptr = glu.xlusup(fsupc);
203 
204  Index lda = glu.xlusup(fsupc+1)-glu.xlusup(fsupc);// nsupr
205 
206  // Perform a trianglar solve and block update,
207  // then scatter the result of sup-col update to dense[]
208  no_zeros = kfnz - fsupc;
209  if(segsize==1) LU_kernel_bmod<1>::run(segsize, dense_col, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros);
210  else if(segsize==2) LU_kernel_bmod<2>::run(segsize, dense_col, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros);
211  else if(segsize==3) LU_kernel_bmod<3>::run(segsize, dense_col, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros);
212  else LU_kernel_bmod<Dynamic>::run(segsize, dense_col, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros);
213  } // End for each column in the panel
214  }
215 
216  } // End for each updating supernode
217 } // end panel bmod
218 
219 } // end namespace internal
220 
221 } // end namespace Eigen
222 
223 #endif // SPARSELU_PANEL_BMOD_H
A matrix or vector expression mapping an existing array of data.
Definition: Map.h:88
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar * data() const
Definition: LDLT.h:16
void panel_bmod(const Index m, const Index w, const Index jcol, const Index nseg, ScalarVector &dense, ScalarVector &tempv, IndexVector &segrep, IndexVector &repfnz, GlobalLU_t &glu)
Performs numeric block updates (sup-panel) in topological order.
static EIGEN_DONT_INLINE void run(const Index segsize, BlockScalarVector &dense, ScalarVector &tempv, ScalarVector &lusup, Index &luptr, const Index lda, const Index nrow, IndexVector &lsub, const Index lptr, const Index no_zeros)
static Index first_default_aligned(const DenseBase< Derived > &m)
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half() max(const half &a, const half &b)
Definition: Half.h:438
Expression of a fixed-size or dynamic-size sub-vector.
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:33
#define eigen_assert(x)
Definition: Macros.h:577
EIGEN_DEVICE_FUNC Index outerStride() const
Definition: Map.h:108
Convenience specialization of Stride to specify only an outer stride See class Map for some examples...
Definition: Stride.h:101


hebiros
Author(s): Xavier Artache , Matthew Tesch
autogenerated on Thu Sep 3 2020 04:08:54