sgetrf.c
Go to the documentation of this file.
00001 /* sgetrf.f -- translated by f2c (version 20061008).
00002    You must link the resulting object file with libf2c:
00003         on Microsoft Windows system, link with libf2c.lib;
00004         on Linux or Unix systems, link with .../path/to/libf2c.a -lm
00005         or, if you install libf2c.a in a standard place, with -lf2c -lm
00006         -- in that order, at the end of the command line, as in
00007                 cc *.o -lf2c -lm
00008         Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
00009 
00010                 http://www.netlib.org/f2c/libf2c.zip
00011 */
00012 
00013 #include "f2c.h"
00014 #include "blaswrap.h"
00015 
00016 /* Table of constant values */
00017 
00018 static integer c__1 = 1;
00019 static real c_b12 = 1.f;
00020 static real c_b15 = -1.f;
00021 
00022 /* Subroutine */ int sgetrf_(integer *m, integer *n, real *a, integer *lda, 
00023         integer *ipiv, integer *info)
00024 {
00025     /* System generated locals */
00026     integer a_dim1, a_offset, i__1, i__2, i__3;
00027     real r__1;
00028 
00029     /* Local variables */
00030     integer i__, j, ipivstart, jpivstart, jp;
00031     real tmp;
00032     extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), 
00033             sgemm_(char *, char *, integer *, integer *, integer *, real *, 
00034             real *, integer *, real *, integer *, real *, real *, integer *);
00035     integer kcols;
00036     real sfmin;
00037     integer nstep;
00038     extern /* Subroutine */ int strsm_(char *, char *, char *, char *, 
00039             integer *, integer *, real *, real *, integer *, real *, integer *
00040 );
00041     integer kahead;
00042     extern doublereal slamch_(char *);
00043     extern /* Subroutine */ int xerbla_(char *, integer *);
00044     extern integer isamax_(integer *, real *, integer *);
00045     integer npived;
00046     extern logical sisnan_(real *);
00047     integer kstart;
00048     extern /* Subroutine */ int slaswp_(integer *, real *, integer *, integer 
00049             *, integer *, integer *, integer *);
00050     integer ntopiv;
00051 
00052 
00053 /*  -- LAPACK routine (version 3.X) -- */
00054 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
00055 /*     May 2008 */
00056 
00057 /*     .. Scalar Arguments .. */
00058 /*     .. */
00059 /*     .. Array Arguments .. */
00060 /*     .. */
00061 
00062 /*  Purpose */
00063 /*  ======= */
00064 
00065 /*  SGETRF computes an LU factorization of a general M-by-N matrix A */
00066 /*  using partial pivoting with row interchanges. */
00067 
00068 /*  The factorization has the form */
00069 /*     A = P * L * U */
00070 /*  where P is a permutation matrix, L is lower triangular with unit */
00071 /*  diagonal elements (lower trapezoidal if m > n), and U is upper */
00072 /*  triangular (upper trapezoidal if m < n). */
00073 
00074 /*  This code implements an iterative version of Sivan Toledo's recursive */
00075 /*  LU algorithm[1].  For square matrices, this iterative versions should */
00076 /*  be within a factor of two of the optimum number of memory transfers. */
00077 
00078 /*  The pattern is as follows, with the large blocks of U being updated */
00079 /*  in one call to STRSM, and the dotted lines denoting sections that */
00080 /*  have had all pending permutations applied: */
00081 
00082 /*   1 2 3 4 5 6 7 8 */
00083 /*  +-+-+---+-------+------ */
00084 /*  | |1|   |       | */
00085 /*  |.+-+ 2 |       | */
00086 /*  | | |   |       | */
00087 /*  |.|.+-+-+   4   | */
00088 /*  | | | |1|       | */
00089 /*  | | |.+-+       | */
00090 /*  | | | | |       | */
00091 /*  |.|.|.|.+-+-+---+  8 */
00092 /*  | | | | | |1|   | */
00093 /*  | | | | |.+-+ 2 | */
00094 /*  | | | | | | |   | */
00095 /*  | | | | |.|.+-+-+ */
00096 /*  | | | | | | | |1| */
00097 /*  | | | | | | |.+-+ */
00098 /*  | | | | | | | | | */
00099 /*  |.|.|.|.|.|.|.|.+----- */
00100 /*  | | | | | | | | | */
00101 
00102 /*  The 1-2-1-4-1-2-1-8-... pattern is the position of the last 1 bit in */
00103 /*  the binary expansion of the current column.  Each Schur update is */
00104 /*  applied as soon as the necessary portion of U is available. */
00105 
00106 /*  [1] Toledo, S. 1997. Locality of Reference in LU Decomposition with */
00107 /*  Partial Pivoting. SIAM J. Matrix Anal. Appl. 18, 4 (Oct. 1997), */
00108 /*  1065-1081. http://dx.doi.org/10.1137/S0895479896297744 */
00109 
00110 /*  Arguments */
00111 /*  ========= */
00112 
00113 /*  M       (input) INTEGER */
00114 /*          The number of rows of the matrix A.  M >= 0. */
00115 
00116 /*  N       (input) INTEGER */
00117 /*          The number of columns of the matrix A.  N >= 0. */
00118 
00119 /*  A       (input/output) REAL array, dimension (LDA,N) */
00120 /*          On entry, the M-by-N matrix to be factored. */
00121 /*          On exit, the factors L and U from the factorization */
00122 /*          A = P*L*U; the unit diagonal elements of L are not stored. */
00123 
00124 /*  LDA     (input) INTEGER */
00125 /*          The leading dimension of the array A.  LDA >= max(1,M). */
00126 
00127 /*  IPIV    (output) INTEGER array, dimension (min(M,N)) */
00128 /*          The pivot indices; for 1 <= i <= min(M,N), row i of the */
00129 /*          matrix was interchanged with row IPIV(i). */
00130 
00131 /*  INFO    (output) INTEGER */
00132 /*          = 0:  successful exit */
00133 /*          < 0:  if INFO = -i, the i-th argument had an illegal value */
00134 /*          > 0:  if INFO = i, U(i,i) is exactly zero. The factorization */
00135 /*                has been completed, but the factor U is exactly */
00136 /*                singular, and division by zero will occur if it is used */
00137 /*                to solve a system of equations. */
00138 
00139 /*  ===================================================================== */
00140 
00141 /*     .. Parameters .. */
00142 /*     .. */
00143 /*     .. Local Scalars .. */
00144 /*     .. */
00145 /*     .. External Functions .. */
00146 /*     .. */
00147 /*     .. External Subroutines .. */
00148 /*     .. */
00149 /*     .. Intrinsic Functions .. */
00150 /*     .. */
00151 /*     .. Executable Statements .. */
00152 
00153 /*     Test the input parameters. */
00154 
00155     /* Parameter adjustments */
00156     a_dim1 = *lda;
00157     a_offset = 1 + a_dim1;
00158     a -= a_offset;
00159     --ipiv;
00160 
00161     /* Function Body */
00162     *info = 0;
00163     if (*m < 0) {
00164         *info = -1;
00165     } else if (*n < 0) {
00166         *info = -2;
00167     } else if (*lda < max(1,*m)) {
00168         *info = -4;
00169     }
00170     if (*info != 0) {
00171         i__1 = -(*info);
00172         xerbla_("SGETRF", &i__1);
00173         return 0;
00174     }
00175 
00176 /*     Quick return if possible */
00177 
00178     if (*m == 0 || *n == 0) {
00179         return 0;
00180     }
00181 
00182 /*     Compute machine safe minimum */
00183 
00184     sfmin = slamch_("S");
00185 
00186     nstep = min(*m,*n);
00187     i__1 = nstep;
00188     for (j = 1; j <= i__1; ++j) {
00189         kahead = j & -j;
00190         kstart = j + 1 - kahead;
00191 /* Computing MIN */
00192         i__2 = kahead, i__3 = *m - j;
00193         kcols = min(i__2,i__3);
00194 
00195 /*        Find pivot. */
00196 
00197         i__2 = *m - j + 1;
00198         jp = j - 1 + isamax_(&i__2, &a[j + j * a_dim1], &c__1);
00199         ipiv[j] = jp;
00200 /*        Permute just this column. */
00201         if (jp != j) {
00202             tmp = a[j + j * a_dim1];
00203             a[j + j * a_dim1] = a[jp + j * a_dim1];
00204             a[jp + j * a_dim1] = tmp;
00205         }
00206 /*        Apply pending permutations to L */
00207         ntopiv = 1;
00208         ipivstart = j;
00209         jpivstart = j - ntopiv;
00210         while(ntopiv < kahead) {
00211             slaswp_(&ntopiv, &a[jpivstart * a_dim1 + 1], lda, &ipivstart, &j, 
00212                     &ipiv[1], &c__1);
00213             ipivstart -= ntopiv;
00214             ntopiv <<= 1;
00215             jpivstart -= ntopiv;
00216         }
00217 /*        Permute U block to match L */
00218         slaswp_(&kcols, &a[(j + 1) * a_dim1 + 1], lda, &kstart, &j, &ipiv[1], 
00219                 &c__1);
00220 /*        Factor the current column */
00221         if (a[j + j * a_dim1] != 0.f && ! sisnan_(&a[j + j * a_dim1])) {
00222             if ((r__1 = a[j + j * a_dim1], dabs(r__1)) >= sfmin) {
00223                 i__2 = *m - j;
00224                 r__1 = 1.f / a[j + j * a_dim1];
00225                 sscal_(&i__2, &r__1, &a[j + 1 + j * a_dim1], &c__1);
00226             } else {
00227                 i__2 = *m - j;
00228                 for (i__ = 1; i__ <= i__2; ++i__) {
00229                     a[j + i__ + j * a_dim1] /= a[j + j * a_dim1];
00230                 }
00231             }
00232         } else if (a[j + j * a_dim1] == 0.f && *info == 0) {
00233             *info = j;
00234         }
00235 /*        Solve for U block. */
00236         strsm_("Left", "Lower", "No transpose", "Unit", &kahead, &kcols, &
00237                 c_b12, &a[kstart + kstart * a_dim1], lda, &a[kstart + (j + 1) 
00238                 * a_dim1], lda);
00239 /*        Schur complement. */
00240         i__2 = *m - j;
00241         sgemm_("No transpose", "No transpose", &i__2, &kcols, &kahead, &c_b15, 
00242                  &a[j + 1 + kstart * a_dim1], lda, &a[kstart + (j + 1) * 
00243                 a_dim1], lda, &c_b12, &a[j + 1 + (j + 1) * a_dim1], lda);
00244     }
00245 /*     Handle pivot permutations on the way out of the recursion */
00246     npived = nstep & -nstep;
00247     j = nstep - npived;
00248     while(j > 0) {
00249         ntopiv = j & -j;
00250         i__1 = j + 1;
00251         slaswp_(&ntopiv, &a[(j - ntopiv + 1) * a_dim1 + 1], lda, &i__1, &
00252                 nstep, &ipiv[1], &c__1);
00253         j -= ntopiv;
00254     }
00255 /*     If short and wide, handle the rest of the columns. */
00256     if (*m < *n) {
00257         i__1 = *n - *m;
00258         slaswp_(&i__1, &a[(*m + kcols + 1) * a_dim1 + 1], lda, &c__1, m, &
00259                 ipiv[1], &c__1);
00260         i__1 = *n - *m;
00261         strsm_("Left", "Lower", "No transpose", "Unit", m, &i__1, &c_b12, &a[
00262                 a_offset], lda, &a[(*m + kcols + 1) * a_dim1 + 1], lda);
00263     }
00264     return 0;
00265 
00266 /*     End of SGETRF */
00267 
00268 } /* sgetrf_ */


swiftnav
Author(s):
autogenerated on Sat Jun 8 2019 18:56:08