swiftnav: zgetrf.c Source File

Go to the documentation of this file.
00001 /* zgetrf.f -- translated by f2c (version 20061008).
00002    You must link the resulting object file with libf2c:
00003         on Microsoft Windows system, link with libf2c.lib;
00004         on Linux or Unix systems, link with .../path/to/libf2c.a -lm
00005         or, if you install libf2c.a in a standard place, with -lf2c -lm
00006         -- in that order, at the end of the command line, as in
00007                 cc *.o -lf2c -lm
00008         Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
00009 
00010                 http://www.netlib.org/f2c/libf2c.zip
00011 */
00012 
00013 #include "f2c.h"
00014 #include "blaswrap.h"
00015 
00016 /* Table of constant values */
00017 
00018 static doublecomplex c_b1 = {1.,0.};
00019 static doublecomplex c_b2 = {-1.,0.};
00020 static integer c__1 = 1;
00021 
00022 /* Subroutine */ int zgetrf_(integer *m, integer *n, doublecomplex *a, 
00023         integer *lda, integer *ipiv, integer *info)
00024 {
00025     /* System generated locals */
00026     integer a_dim1, a_offset, i__1, i__2, i__3;
00027     doublecomplex z__1;
00028 
00029     /* Builtin functions */
00030     double z_abs(doublecomplex *);
00031     void z_div(doublecomplex *, doublecomplex *, doublecomplex *);
00032 
00033     /* Local variables */
00034     integer i__, j, ipivstart, jpivstart, jp;
00035     doublecomplex tmp;
00036     integer kcols;
00037     doublereal sfmin;
00038     extern /* Subroutine */ int zscal_(integer *, doublecomplex *, 
00039             doublecomplex *, integer *), zgemm_(char *, char *, integer *, 
00040             integer *, integer *, doublecomplex *, doublecomplex *, integer *, 
00041              doublecomplex *, integer *, doublecomplex *, doublecomplex *, 
00042             integer *);
00043     integer nstep;
00044     extern /* Subroutine */ int ztrsm_(char *, char *, char *, char *, 
00045             integer *, integer *, doublecomplex *, doublecomplex *, integer *, 
00046              doublecomplex *, integer *);
00047     integer kahead;
00048     extern doublereal dlamch_(char *);
00049     extern logical disnan_(doublereal *);
00050     extern /* Subroutine */ int xerbla_(char *, integer *);
00051     doublereal pivmag;
00052     integer npived;
00053     extern integer izamax_(integer *, doublecomplex *, integer *);
00054     integer kstart, ntopiv;
00055     extern /* Subroutine */ int zlaswp_(integer *, doublecomplex *, integer *, 
00056              integer *, integer *, integer *, integer *);
00057 
00058 
00059 /*  -- LAPACK routine (version 3.X) -- */
00060 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
00061 /*     May 2008 */
00062 
00063 /*     .. Scalar Arguments .. */
00064 /*     .. */
00065 /*     .. Array Arguments .. */
00066 /*     .. */
00067 
00068 /*  Purpose */
00069 /*  ======= */
00070 
00071 /*  ZGETRF computes an LU factorization of a general M-by-N matrix A */
00072 /*  using partial pivoting with row interchanges. */
00073 
00074 /*  The factorization has the form */
00075 /*     A = P * L * U */
00076 /*  where P is a permutation matrix, L is lower triangular with unit */
00077 /*  diagonal elements (lower trapezoidal if m > n), and U is upper */
00078 /*  triangular (upper trapezoidal if m < n). */
00079 
00080 /*  This code implements an iterative version of Sivan Toledo's recursive */
00081 /*  LU algorithm[1].  For square matrices, this iterative versions should */
00082 /*  be within a factor of two of the optimum number of memory transfers. */
00083 
00084 /*  The pattern is as follows, with the large blocks of U being updated */
00085 /*  in one call to DTRSM, and the dotted lines denoting sections that */
00086 /*  have had all pending permutations applied: */
00087 
00088 /*   1 2 3 4 5 6 7 8 */
00089 /*  +-+-+---+-------+------ */
00090 /*  | |1|   |       | */
00091 /*  |.+-+ 2 |       | */
00092 /*  | | |   |       | */
00093 /*  |.|.+-+-+   4   | */
00094 /*  | | | |1|       | */
00095 /*  | | |.+-+       | */
00096 /*  | | | | |       | */
00097 /*  |.|.|.|.+-+-+---+  8 */
00098 /*  | | | | | |1|   | */
00099 /*  | | | | |.+-+ 2 | */
00100 /*  | | | | | | |   | */
00101 /*  | | | | |.|.+-+-+ */
00102 /*  | | | | | | | |1| */
00103 /*  | | | | | | |.+-+ */
00104 /*  | | | | | | | | | */
00105 /*  |.|.|.|.|.|.|.|.+----- */
00106 /*  | | | | | | | | | */
00107 
00108 /*  The 1-2-1-4-1-2-1-8-... pattern is the position of the last 1 bit in */
00109 /*  the binary expansion of the current column.  Each Schur update is */
00110 /*  applied as soon as the necessary portion of U is available. */
00111 
00112 /*  [1] Toledo, S. 1997. Locality of Reference in LU Decomposition with */
00113 /*  Partial Pivoting. SIAM J. Matrix Anal. Appl. 18, 4 (Oct. 1997), */
00114 /*  1065-1081. http://dx.doi.org/10.1137/S0895479896297744 */
00115 
00116 /*  Arguments */
00117 /*  ========= */
00118 
00119 /*  M       (input) INTEGER */
00120 /*          The number of rows of the matrix A.  M >= 0. */
00121 
00122 /*  N       (input) INTEGER */
00123 /*          The number of columns of the matrix A.  N >= 0. */
00124 
00125 /*  A       (input/output) COMPLEX*16 array, dimension (LDA,N) */
00126 /*          On entry, the M-by-N matrix to be factored. */
00127 /*          On exit, the factors L and U from the factorization */
00128 /*          A = P*L*U; the unit diagonal elements of L are not stored. */
00129 
00130 /*  LDA     (input) INTEGER */
00131 /*          The leading dimension of the array A.  LDA >= max(1,M). */
00132 
00133 /*  IPIV    (output) INTEGER array, dimension (min(M,N)) */
00134 /*          The pivot indices; for 1 <= i <= min(M,N), row i of the */
00135 /*          matrix was interchanged with row IPIV(i). */
00136 
00137 /*  INFO    (output) INTEGER */
00138 /*          = 0:  successful exit */
00139 /*          < 0:  if INFO = -i, the i-th argument had an illegal value */
00140 /*          > 0:  if INFO = i, U(i,i) is exactly zero. The factorization */
00141 /*                has been completed, but the factor U is exactly */
00142 /*                singular, and division by zero will occur if it is used */
00143 /*                to solve a system of equations. */
00144 
00145 /*  ===================================================================== */
00146 
00147 /*     .. Parameters .. */
00148 /*     .. */
00149 /*     .. Local Scalars .. */
00150 /*     .. */
00151 /*     .. External Functions .. */
00152 /*     .. */
00153 /*     .. External Subroutines .. */
00154 /*     .. */
00155 /*     .. Intrinsic Functions .. */
00156 /*     .. */
00157 /*     .. Executable Statements .. */
00158 
00159 /*     Test the input parameters. */
00160 
00161     /* Parameter adjustments */
00162     a_dim1 = *lda;
00163     a_offset = 1 + a_dim1;
00164     a -= a_offset;
00165     --ipiv;
00166 
00167     /* Function Body */
00168     *info = 0;
00169     if (*m < 0) {
00170         *info = -1;
00171     } else if (*n < 0) {
00172         *info = -2;
00173     } else if (*lda < max(1,*m)) {
00174         *info = -4;
00175     }
00176     if (*info != 0) {
00177         i__1 = -(*info);
00178         xerbla_("ZGETRF", &i__1);
00179         return 0;
00180     }
00181 
00182 /*     Quick return if possible */
00183 
00184     if (*m == 0 || *n == 0) {
00185         return 0;
00186     }
00187 
00188 /*     Compute machine safe minimum */
00189 
00190     sfmin = dlamch_("S");
00191 
00192     nstep = min(*m,*n);
00193     i__1 = nstep;
00194     for (j = 1; j <= i__1; ++j) {
00195         kahead = j & -j;
00196         kstart = j + 1 - kahead;
00197 /* Computing MIN */
00198         i__2 = kahead, i__3 = *m - j;
00199         kcols = min(i__2,i__3);
00200 
00201 /*        Find pivot. */
00202 
00203         i__2 = *m - j + 1;
00204         jp = j - 1 + izamax_(&i__2, &a[j + j * a_dim1], &c__1);
00205         ipiv[j] = jp;
00206 /*        Permute just this column. */
00207         if (jp != j) {
00208             i__2 = j + j * a_dim1;
00209             tmp.r = a[i__2].r, tmp.i = a[i__2].i;
00210             i__2 = j + j * a_dim1;
00211             i__3 = jp + j * a_dim1;
00212             a[i__2].r = a[i__3].r, a[i__2].i = a[i__3].i;
00213             i__2 = jp + j * a_dim1;
00214             a[i__2].r = tmp.r, a[i__2].i = tmp.i;
00215         }
00216 /*        Apply pending permutations to L */
00217         ntopiv = 1;
00218         ipivstart = j;
00219         jpivstart = j - ntopiv;
00220         while(ntopiv < kahead) {
00221             zlaswp_(&ntopiv, &a[jpivstart * a_dim1 + 1], lda, &ipivstart, &j, 
00222                     &ipiv[1], &c__1);
00223             ipivstart -= ntopiv;
00224             ntopiv <<= 1;
00225             jpivstart -= ntopiv;
00226         }
00227 /*        Permute U block to match L */
00228         zlaswp_(&kcols, &a[(j + 1) * a_dim1 + 1], lda, &kstart, &j, &ipiv[1], 
00229                 &c__1);
00230 /*        Factor the current column */
00231         pivmag = z_abs(&a[j + j * a_dim1]);
00232         if (pivmag != 0. && ! disnan_(&pivmag)) {
00233             if (pivmag >= sfmin) {
00234                 i__2 = *m - j;
00235                 z_div(&z__1, &c_b1, &a[j + j * a_dim1]);
00236                 zscal_(&i__2, &z__1, &a[j + 1 + j * a_dim1], &c__1);
00237             } else {
00238                 i__2 = *m - j;
00239                 for (i__ = 1; i__ <= i__2; ++i__) {
00240                     i__3 = j + i__ + j * a_dim1;
00241                     z_div(&z__1, &a[j + i__ + j * a_dim1], &a[j + j * a_dim1])
00242                             ;
00243                     a[i__3].r = z__1.r, a[i__3].i = z__1.i;
00244                 }
00245             }
00246         } else if (pivmag == 0. && *info == 0) {
00247             *info = j;
00248         }
00249 /*        Solve for U block. */
00250         ztrsm_("Left", "Lower", "No transpose", "Unit", &kahead, &kcols, &
00251                 c_b1, &a[kstart + kstart * a_dim1], lda, &a[kstart + (j + 1) *
00252                  a_dim1], lda);
00253 /*        Schur complement. */
00254         i__2 = *m - j;
00255         zgemm_("No transpose", "No transpose", &i__2, &kcols, &kahead, &c_b2, 
00256                 &a[j + 1 + kstart * a_dim1], lda, &a[kstart + (j + 1) * 
00257                 a_dim1], lda, &c_b1, &a[j + 1 + (j + 1) * a_dim1], lda);
00258     }
00259 /*     Handle pivot permutations on the way out of the recursion */
00260     npived = nstep & -nstep;
00261     j = nstep - npived;
00262     while(j > 0) {
00263         ntopiv = j & -j;
00264         i__1 = j + 1;
00265         zlaswp_(&ntopiv, &a[(j - ntopiv + 1) * a_dim1 + 1], lda, &i__1, &
00266                 nstep, &ipiv[1], &c__1);
00267         j -= ntopiv;
00268     }
00269 /*     If short and wide, handle the rest of the columns. */
00270     if (*m < *n) {
00271         i__1 = *n - *m;
00272         zlaswp_(&i__1, &a[(*m + kcols + 1) * a_dim1 + 1], lda, &c__1, m, &
00273                 ipiv[1], &c__1);
00274         i__1 = *n - *m;
00275         ztrsm_("Left", "Lower", "No transpose", "Unit", m, &i__1, &c_b1, &a[
00276                 a_offset], lda, &a[(*m + kcols + 1) * a_dim1 + 1], lda);
00277     }
00278     return 0;
00279 
00280 /*     End of ZGETRF */
00281 
00282 } /* zgetrf_ */