swiftnav: cgetrf.c Source File

Go to the documentation of this file.
00001 /* cgetrf.f -- translated by f2c (version 20061008).
00002    You must link the resulting object file with libf2c:
00003         on Microsoft Windows system, link with libf2c.lib;
00004         on Linux or Unix systems, link with .../path/to/libf2c.a -lm
00005         or, if you install libf2c.a in a standard place, with -lf2c -lm
00006         -- in that order, at the end of the command line, as in
00007                 cc *.o -lf2c -lm
00008         Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
00009 
00010                 http://www.netlib.org/f2c/libf2c.zip
00011 */
00012 
00013 #include "f2c.h"
00014 #include "blaswrap.h"
00015 
00016 /* Table of constant values */
00017 
00018 static complex c_b1 = {1.f,0.f};
00019 static complex c_b2 = {-1.f,0.f};
00020 static integer c__1 = 1;
00021 
00022 /* Subroutine */ int cgetrf_(integer *m, integer *n, complex *a, integer *lda, 
00023          integer *ipiv, integer *info)
00024 {
00025     /* System generated locals */
00026     integer a_dim1, a_offset, i__1, i__2, i__3;
00027     complex q__1;
00028 
00029     /* Builtin functions */
00030     double c_abs(complex *);
00031     void c_div(complex *, complex *, complex *);
00032 
00033     /* Local variables */
00034     integer i__, j, ipivstart, jpivstart, jp;
00035     complex tmp;
00036     extern /* Subroutine */ int cscal_(integer *, complex *, complex *, 
00037             integer *), cgemm_(char *, char *, integer *, integer *, integer *
00038 , complex *, complex *, integer *, complex *, integer *, complex *
00039 , complex *, integer *);
00040     integer kcols;
00041     real sfmin;
00042     extern /* Subroutine */ int ctrsm_(char *, char *, char *, char *, 
00043             integer *, integer *, complex *, complex *, integer *, complex *, 
00044             integer *);
00045     integer nstep, kahead;
00046     extern integer icamax_(integer *, complex *, integer *);
00047     extern doublereal slamch_(char *);
00048     extern /* Subroutine */ int xerbla_(char *, integer *);
00049     real pivmag;
00050     integer npived;
00051     extern /* Subroutine */ int claswp_(integer *, complex *, integer *, 
00052             integer *, integer *, integer *, integer *);
00053     extern logical sisnan_(real *);
00054     integer kstart, ntopiv;
00055 
00056 
00057 /*  -- LAPACK routine (version 3.X) -- */
00058 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
00059 /*     May 2008 */
00060 
00061 /*     .. Scalar Arguments .. */
00062 /*     .. */
00063 /*     .. Array Arguments .. */
00064 /*     .. */
00065 
00066 /*  Purpose */
00067 /*  ======= */
00068 
00069 /*  CGETRF computes an LU factorization of a general M-by-N matrix A */
00070 /*  using partial pivoting with row interchanges. */
00071 
00072 /*  The factorization has the form */
00073 /*     A = P * L * U */
00074 /*  where P is a permutation matrix, L is lower triangular with unit */
00075 /*  diagonal elements (lower trapezoidal if m > n), and U is upper */
00076 /*  triangular (upper trapezoidal if m < n). */
00077 
00078 /*  This code implements an iterative version of Sivan Toledo's recursive */
00079 /*  LU algorithm[1].  For square matrices, this iterative versions should */
00080 /*  be within a factor of two of the optimum number of memory transfers. */
00081 
00082 /*  The pattern is as follows, with the large blocks of U being updated */
00083 /*  in one call to DTRSM, and the dotted lines denoting sections that */
00084 /*  have had all pending permutations applied: */
00085 
00086 /*   1 2 3 4 5 6 7 8 */
00087 /*  +-+-+---+-------+------ */
00088 /*  | |1|   |       | */
00089 /*  |.+-+ 2 |       | */
00090 /*  | | |   |       | */
00091 /*  |.|.+-+-+   4   | */
00092 /*  | | | |1|       | */
00093 /*  | | |.+-+       | */
00094 /*  | | | | |       | */
00095 /*  |.|.|.|.+-+-+---+  8 */
00096 /*  | | | | | |1|   | */
00097 /*  | | | | |.+-+ 2 | */
00098 /*  | | | | | | |   | */
00099 /*  | | | | |.|.+-+-+ */
00100 /*  | | | | | | | |1| */
00101 /*  | | | | | | |.+-+ */
00102 /*  | | | | | | | | | */
00103 /*  |.|.|.|.|.|.|.|.+----- */
00104 /*  | | | | | | | | | */
00105 
00106 /*  The 1-2-1-4-1-2-1-8-... pattern is the position of the last 1 bit in */
00107 /*  the binary expansion of the current column.  Each Schur update is */
00108 /*  applied as soon as the necessary portion of U is available. */
00109 
00110 /*  [1] Toledo, S. 1997. Locality of Reference in LU Decomposition with */
00111 /*  Partial Pivoting. SIAM J. Matrix Anal. Appl. 18, 4 (Oct. 1997), */
00112 /*  1065-1081. http://dx.doi.org/10.1137/S0895479896297744 */
00113 
00114 /*  Arguments */
00115 /*  ========= */
00116 
00117 /*  M       (input) INTEGER */
00118 /*          The number of rows of the matrix A.  M >= 0. */
00119 
00120 /*  N       (input) INTEGER */
00121 /*          The number of columns of the matrix A.  N >= 0. */
00122 
00123 /*  A       (input/output) COMPLEX array, dimension (LDA,N) */
00124 /*          On entry, the M-by-N matrix to be factored. */
00125 /*          On exit, the factors L and U from the factorization */
00126 /*          A = P*L*U; the unit diagonal elements of L are not stored. */
00127 
00128 /*  LDA     (input) INTEGER */
00129 /*          The leading dimension of the array A.  LDA >= max(1,M). */
00130 
00131 /*  IPIV    (output) INTEGER array, dimension (min(M,N)) */
00132 /*          The pivot indices; for 1 <= i <= min(M,N), row i of the */
00133 /*          matrix was interchanged with row IPIV(i). */
00134 
00135 /*  INFO    (output) INTEGER */
00136 /*          = 0:  successful exit */
00137 /*          < 0:  if INFO = -i, the i-th argument had an illegal value */
00138 /*          > 0:  if INFO = i, U(i,i) is exactly zero. The factorization */
00139 /*                has been completed, but the factor U is exactly */
00140 /*                singular, and division by zero will occur if it is used */
00141 /*                to solve a system of equations. */
00142 
00143 /*  ===================================================================== */
00144 
00145 /*     .. Parameters .. */
00146 /*     .. */
00147 /*     .. Local Scalars .. */
00148 /*     .. */
00149 /*     .. External Functions .. */
00150 /*     .. */
00151 /*     .. External Subroutines .. */
00152 /*     .. */
00153 /*     .. Intrinsic Functions .. */
00154 /*     .. */
00155 /*     .. Executable Statements .. */
00156 
00157 /*     Test the input parameters. */
00158 
00159     /* Parameter adjustments */
00160     a_dim1 = *lda;
00161     a_offset = 1 + a_dim1;
00162     a -= a_offset;
00163     --ipiv;
00164 
00165     /* Function Body */
00166     *info = 0;
00167     if (*m < 0) {
00168         *info = -1;
00169     } else if (*n < 0) {
00170         *info = -2;
00171     } else if (*lda < max(1,*m)) {
00172         *info = -4;
00173     }
00174     if (*info != 0) {
00175         i__1 = -(*info);
00176         xerbla_("CGETRF", &i__1);
00177         return 0;
00178     }
00179 
00180 /*     Quick return if possible */
00181 
00182     if (*m == 0 || *n == 0) {
00183         return 0;
00184     }
00185 
00186 /*     Compute machine safe minimum */
00187 
00188     sfmin = slamch_("S");
00189 
00190     nstep = min(*m,*n);
00191     i__1 = nstep;
00192     for (j = 1; j <= i__1; ++j) {
00193         kahead = j & -j;
00194         kstart = j + 1 - kahead;
00195 /* Computing MIN */
00196         i__2 = kahead, i__3 = *m - j;
00197         kcols = min(i__2,i__3);
00198 
00199 /*        Find pivot. */
00200 
00201         i__2 = *m - j + 1;
00202         jp = j - 1 + icamax_(&i__2, &a[j + j * a_dim1], &c__1);
00203         ipiv[j] = jp;
00204 /*        Permute just this column. */
00205         if (jp != j) {
00206             i__2 = j + j * a_dim1;
00207             tmp.r = a[i__2].r, tmp.i = a[i__2].i;
00208             i__2 = j + j * a_dim1;
00209             i__3 = jp + j * a_dim1;
00210             a[i__2].r = a[i__3].r, a[i__2].i = a[i__3].i;
00211             i__2 = jp + j * a_dim1;
00212             a[i__2].r = tmp.r, a[i__2].i = tmp.i;
00213         }
00214 /*        Apply pending permutations to L */
00215         ntopiv = 1;
00216         ipivstart = j;
00217         jpivstart = j - ntopiv;
00218         while(ntopiv < kahead) {
00219             claswp_(&ntopiv, &a[jpivstart * a_dim1 + 1], lda, &ipivstart, &j, 
00220                     &ipiv[1], &c__1);
00221             ipivstart -= ntopiv;
00222             ntopiv <<= 1;
00223             jpivstart -= ntopiv;
00224         }
00225 /*        Permute U block to match L */
00226         claswp_(&kcols, &a[(j + 1) * a_dim1 + 1], lda, &kstart, &j, &ipiv[1], 
00227                 &c__1);
00228 /*        Factor the current column */
00229         pivmag = c_abs(&a[j + j * a_dim1]);
00230         if (pivmag != 0.f && ! sisnan_(&pivmag)) {
00231             if (pivmag >= sfmin) {
00232                 i__2 = *m - j;
00233                 c_div(&q__1, &c_b1, &a[j + j * a_dim1]);
00234                 cscal_(&i__2, &q__1, &a[j + 1 + j * a_dim1], &c__1);
00235             } else {
00236                 i__2 = *m - j;
00237                 for (i__ = 1; i__ <= i__2; ++i__) {
00238                     i__3 = j + i__ + j * a_dim1;
00239                     c_div(&q__1, &a[j + i__ + j * a_dim1], &a[j + j * a_dim1])
00240                             ;
00241                     a[i__3].r = q__1.r, a[i__3].i = q__1.i;
00242                 }
00243             }
00244         } else if (pivmag == 0.f && *info == 0) {
00245             *info = j;
00246         }
00247 /*        Solve for U block. */
00248         ctrsm_("Left", "Lower", "No transpose", "Unit", &kahead, &kcols, &
00249                 c_b1, &a[kstart + kstart * a_dim1], lda, &a[kstart + (j + 1) *
00250                  a_dim1], lda);
00251 /*        Schur complement. */
00252         i__2 = *m - j;
00253         cgemm_("No transpose", "No transpose", &i__2, &kcols, &kahead, &c_b2, 
00254                 &a[j + 1 + kstart * a_dim1], lda, &a[kstart + (j + 1) * 
00255                 a_dim1], lda, &c_b1, &a[j + 1 + (j + 1) * a_dim1], lda);
00256     }
00257 /*     Handle pivot permutations on the way out of the recursion */
00258     npived = nstep & -nstep;
00259     j = nstep - npived;
00260     while(j > 0) {
00261         ntopiv = j & -j;
00262         i__1 = j + 1;
00263         claswp_(&ntopiv, &a[(j - ntopiv + 1) * a_dim1 + 1], lda, &i__1, &
00264                 nstep, &ipiv[1], &c__1);
00265         j -= ntopiv;
00266     }
00267 /*     If short and wide, handle the rest of the columns. */
00268     if (*m < *n) {
00269         i__1 = *n - *m;
00270         claswp_(&i__1, &a[(*m + kcols + 1) * a_dim1 + 1], lda, &c__1, m, &
00271                 ipiv[1], &c__1);
00272         i__1 = *n - *m;
00273         ctrsm_("Left", "Lower", "No transpose", "Unit", m, &i__1, &c_b1, &a[
00274                 a_offset], lda, &a[(*m + kcols + 1) * a_dim1 + 1], lda);
00275     }
00276     return 0;
00277 
00278 /*     End of CGETRF */
00279 
00280 } /* cgetrf_ */