swiftnav: dgbtrf.c Source File

Go to the documentation of this file.
00001 /* dgbtrf.f -- translated by f2c (version 20061008).
00002    You must link the resulting object file with libf2c:
00003         on Microsoft Windows system, link with libf2c.lib;
00004         on Linux or Unix systems, link with .../path/to/libf2c.a -lm
00005         or, if you install libf2c.a in a standard place, with -lf2c -lm
00006         -- in that order, at the end of the command line, as in
00007                 cc *.o -lf2c -lm
00008         Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
00009 
00010                 http://www.netlib.org/f2c/libf2c.zip
00011 */
00012 
00013 #include "f2c.h"
00014 #include "blaswrap.h"
00015 
00016 /* Table of constant values */
00017 
00018 static integer c__1 = 1;
00019 static integer c__65 = 65;
00020 static doublereal c_b18 = -1.;
00021 static doublereal c_b31 = 1.;
00022 
00023 /* Subroutine */ int dgbtrf_(integer *m, integer *n, integer *kl, integer *ku, 
00024          doublereal *ab, integer *ldab, integer *ipiv, integer *info)
00025 {
00026     /* System generated locals */
00027     integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4, i__5, i__6;
00028     doublereal d__1;
00029 
00030     /* Local variables */
00031     integer i__, j, i2, i3, j2, j3, k2, jb, nb, ii, jj, jm, ip, jp, km, ju, 
00032             kv, nw;
00033     extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, 
00034             doublereal *, integer *, doublereal *, integer *, doublereal *, 
00035             integer *);
00036     doublereal temp;
00037     extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
00038             integer *), dgemm_(char *, char *, integer *, integer *, integer *
00039 , doublereal *, doublereal *, integer *, doublereal *, integer *, 
00040             doublereal *, doublereal *, integer *), dcopy_(
00041             integer *, doublereal *, integer *, doublereal *, integer *), 
00042             dswap_(integer *, doublereal *, integer *, doublereal *, integer *
00043 );
00044     doublereal work13[4160]     /* was [65][64] */, work31[4160]        /* 
00045             was [65][64] */;
00046     extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, 
00047             integer *, integer *, doublereal *, doublereal *, integer *, 
00048             doublereal *, integer *), dgbtf2_(
00049             integer *, integer *, integer *, integer *, doublereal *, integer 
00050             *, integer *, integer *);
00051     extern integer idamax_(integer *, doublereal *, integer *);
00052     extern /* Subroutine */ int xerbla_(char *, integer *);
00053     extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
00054             integer *, integer *);
00055     extern /* Subroutine */ int dlaswp_(integer *, doublereal *, integer *, 
00056             integer *, integer *, integer *, integer *);
00057 
00058 
00059 /*  -- LAPACK routine (version 3.2) -- */
00060 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
00061 /*     November 2006 */
00062 
00063 /*     .. Scalar Arguments .. */
00064 /*     .. */
00065 /*     .. Array Arguments .. */
00066 /*     .. */
00067 
00068 /*  Purpose */
00069 /*  ======= */
00070 
00071 /*  DGBTRF computes an LU factorization of a real m-by-n band matrix A */
00072 /*  using partial pivoting with row interchanges. */
00073 
00074 /*  This is the blocked version of the algorithm, calling Level 3 BLAS. */
00075 
00076 /*  Arguments */
00077 /*  ========= */
00078 
00079 /*  M       (input) INTEGER */
00080 /*          The number of rows of the matrix A.  M >= 0. */
00081 
00082 /*  N       (input) INTEGER */
00083 /*          The number of columns of the matrix A.  N >= 0. */
00084 
00085 /*  KL      (input) INTEGER */
00086 /*          The number of subdiagonals within the band of A.  KL >= 0. */
00087 
00088 /*  KU      (input) INTEGER */
00089 /*          The number of superdiagonals within the band of A.  KU >= 0. */
00090 
00091 /*  AB      (input/output) DOUBLE PRECISION array, dimension (LDAB,N) */
00092 /*          On entry, the matrix A in band storage, in rows KL+1 to */
00093 /*          2*KL+KU+1; rows 1 to KL of the array need not be set. */
00094 /*          The j-th column of A is stored in the j-th column of the */
00095 /*          array AB as follows: */
00096 /*          AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) */
00097 
00098 /*          On exit, details of the factorization: U is stored as an */
00099 /*          upper triangular band matrix with KL+KU superdiagonals in */
00100 /*          rows 1 to KL+KU+1, and the multipliers used during the */
00101 /*          factorization are stored in rows KL+KU+2 to 2*KL+KU+1. */
00102 /*          See below for further details. */
00103 
00104 /*  LDAB    (input) INTEGER */
00105 /*          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1. */
00106 
00107 /*  IPIV    (output) INTEGER array, dimension (min(M,N)) */
00108 /*          The pivot indices; for 1 <= i <= min(M,N), row i of the */
00109 /*          matrix was interchanged with row IPIV(i). */
00110 
00111 /*  INFO    (output) INTEGER */
00112 /*          = 0: successful exit */
00113 /*          < 0: if INFO = -i, the i-th argument had an illegal value */
00114 /*          > 0: if INFO = +i, U(i,i) is exactly zero. The factorization */
00115 /*               has been completed, but the factor U is exactly */
00116 /*               singular, and division by zero will occur if it is used */
00117 /*               to solve a system of equations. */
00118 
00119 /*  Further Details */
00120 /*  =============== */
00121 
00122 /*  The band storage scheme is illustrated by the following example, when */
00123 /*  M = N = 6, KL = 2, KU = 1: */
00124 
00125 /*  On entry:                       On exit: */
00126 
00127 /*      *    *    *    +    +    +       *    *    *   u14  u25  u36 */
00128 /*      *    *    +    +    +    +       *    *   u13  u24  u35  u46 */
00129 /*      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56 */
00130 /*     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66 */
00131 /*     a21  a32  a43  a54  a65   *      m21  m32  m43  m54  m65   * */
00132 /*     a31  a42  a53  a64   *    *      m31  m42  m53  m64   *    * */
00133 
00134 /*  Array elements marked * are not used by the routine; elements marked */
00135 /*  + need not be set on entry, but are required by the routine to store */
00136 /*  elements of U because of fill-in resulting from the row interchanges. */
00137 
00138 /*  ===================================================================== */
00139 
00140 /*     .. Parameters .. */
00141 /*     .. */
00142 /*     .. Local Scalars .. */
00143 /*     .. */
00144 /*     .. Local Arrays .. */
00145 /*     .. */
00146 /*     .. External Functions .. */
00147 /*     .. */
00148 /*     .. External Subroutines .. */
00149 /*     .. */
00150 /*     .. Intrinsic Functions .. */
00151 /*     .. */
00152 /*     .. Executable Statements .. */
00153 
00154 /*     KV is the number of superdiagonals in the factor U, allowing for */
00155 /*     fill-in */
00156 
00157     /* Parameter adjustments */
00158     ab_dim1 = *ldab;
00159     ab_offset = 1 + ab_dim1;
00160     ab -= ab_offset;
00161     --ipiv;
00162 
00163     /* Function Body */
00164     kv = *ku + *kl;
00165 
00166 /*     Test the input parameters. */
00167 
00168     *info = 0;
00169     if (*m < 0) {
00170         *info = -1;
00171     } else if (*n < 0) {
00172         *info = -2;
00173     } else if (*kl < 0) {
00174         *info = -3;
00175     } else if (*ku < 0) {
00176         *info = -4;
00177     } else if (*ldab < *kl + kv + 1) {
00178         *info = -6;
00179     }
00180     if (*info != 0) {
00181         i__1 = -(*info);
00182         xerbla_("DGBTRF", &i__1);
00183         return 0;
00184     }
00185 
00186 /*     Quick return if possible */
00187 
00188     if (*m == 0 || *n == 0) {
00189         return 0;
00190     }
00191 
00192 /*     Determine the block size for this environment */
00193 
00194     nb = ilaenv_(&c__1, "DGBTRF", " ", m, n, kl, ku);
00195 
00196 /*     The block size must not exceed the limit set by the size of the */
00197 /*     local arrays WORK13 and WORK31. */
00198 
00199     nb = min(nb,64);
00200 
00201     if (nb <= 1 || nb > *kl) {
00202 
00203 /*        Use unblocked code */
00204 
00205         dgbtf2_(m, n, kl, ku, &ab[ab_offset], ldab, &ipiv[1], info);
00206     } else {
00207 
00208 /*        Use blocked code */
00209 
00210 /*        Zero the superdiagonal elements of the work array WORK13 */
00211 
00212         i__1 = nb;
00213         for (j = 1; j <= i__1; ++j) {
00214             i__2 = j - 1;
00215             for (i__ = 1; i__ <= i__2; ++i__) {
00216                 work13[i__ + j * 65 - 66] = 0.;
00217 /* L10: */
00218             }
00219 /* L20: */
00220         }
00221 
00222 /*        Zero the subdiagonal elements of the work array WORK31 */
00223 
00224         i__1 = nb;
00225         for (j = 1; j <= i__1; ++j) {
00226             i__2 = nb;
00227             for (i__ = j + 1; i__ <= i__2; ++i__) {
00228                 work31[i__ + j * 65 - 66] = 0.;
00229 /* L30: */
00230             }
00231 /* L40: */
00232         }
00233 
00234 /*        Gaussian elimination with partial pivoting */
00235 
00236 /*        Set fill-in elements in columns KU+2 to KV to zero */
00237 
00238         i__1 = min(kv,*n);
00239         for (j = *ku + 2; j <= i__1; ++j) {
00240             i__2 = *kl;
00241             for (i__ = kv - j + 2; i__ <= i__2; ++i__) {
00242                 ab[i__ + j * ab_dim1] = 0.;
00243 /* L50: */
00244             }
00245 /* L60: */
00246         }
00247 
00248 /*        JU is the index of the last column affected by the current */
00249 /*        stage of the factorization */
00250 
00251         ju = 1;
00252 
00253         i__1 = min(*m,*n);
00254         i__2 = nb;
00255         for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
00256 /* Computing MIN */
00257             i__3 = nb, i__4 = min(*m,*n) - j + 1;
00258             jb = min(i__3,i__4);
00259 
00260 /*           The active part of the matrix is partitioned */
00261 
00262 /*              A11   A12   A13 */
00263 /*              A21   A22   A23 */
00264 /*              A31   A32   A33 */
00265 
00266 /*           Here A11, A21 and A31 denote the current block of JB columns */
00267 /*           which is about to be factorized. The number of rows in the */
00268 /*           partitioning are JB, I2, I3 respectively, and the numbers */
00269 /*           of columns are JB, J2, J3. The superdiagonal elements of A13 */
00270 /*           and the subdiagonal elements of A31 lie outside the band. */
00271 
00272 /* Computing MIN */
00273             i__3 = *kl - jb, i__4 = *m - j - jb + 1;
00274             i2 = min(i__3,i__4);
00275 /* Computing MIN */
00276             i__3 = jb, i__4 = *m - j - *kl + 1;
00277             i3 = min(i__3,i__4);
00278 
00279 /*           J2 and J3 are computed after JU has been updated. */
00280 
00281 /*           Factorize the current block of JB columns */
00282 
00283             i__3 = j + jb - 1;
00284             for (jj = j; jj <= i__3; ++jj) {
00285 
00286 /*              Set fill-in elements in column JJ+KV to zero */
00287 
00288                 if (jj + kv <= *n) {
00289                     i__4 = *kl;
00290                     for (i__ = 1; i__ <= i__4; ++i__) {
00291                         ab[i__ + (jj + kv) * ab_dim1] = 0.;
00292 /* L70: */
00293                     }
00294                 }
00295 
00296 /*              Find pivot and test for singularity. KM is the number of */
00297 /*              subdiagonal elements in the current column. */
00298 
00299 /* Computing MIN */
00300                 i__4 = *kl, i__5 = *m - jj;
00301                 km = min(i__4,i__5);
00302                 i__4 = km + 1;
00303                 jp = idamax_(&i__4, &ab[kv + 1 + jj * ab_dim1], &c__1);
00304                 ipiv[jj] = jp + jj - j;
00305                 if (ab[kv + jp + jj * ab_dim1] != 0.) {
00306 /* Computing MAX */
00307 /* Computing MIN */
00308                     i__6 = jj + *ku + jp - 1;
00309                     i__4 = ju, i__5 = min(i__6,*n);
00310                     ju = max(i__4,i__5);
00311                     if (jp != 1) {
00312 
00313 /*                    Apply interchange to columns J to J+JB-1 */
00314 
00315                         if (jp + jj - 1 < j + *kl) {
00316 
00317                             i__4 = *ldab - 1;
00318                             i__5 = *ldab - 1;
00319                             dswap_(&jb, &ab[kv + 1 + jj - j + j * ab_dim1], &
00320                                     i__4, &ab[kv + jp + jj - j + j * ab_dim1], 
00321                                      &i__5);
00322                         } else {
00323 
00324 /*                       The interchange affects columns J to JJ-1 of A31 */
00325 /*                       which are stored in the work array WORK31 */
00326 
00327                             i__4 = jj - j;
00328                             i__5 = *ldab - 1;
00329                             dswap_(&i__4, &ab[kv + 1 + jj - j + j * ab_dim1], 
00330                                     &i__5, &work31[jp + jj - j - *kl - 1], &
00331                                     c__65);
00332                             i__4 = j + jb - jj;
00333                             i__5 = *ldab - 1;
00334                             i__6 = *ldab - 1;
00335                             dswap_(&i__4, &ab[kv + 1 + jj * ab_dim1], &i__5, &
00336                                     ab[kv + jp + jj * ab_dim1], &i__6);
00337                         }
00338                     }
00339 
00340 /*                 Compute multipliers */
00341 
00342                     d__1 = 1. / ab[kv + 1 + jj * ab_dim1];
00343                     dscal_(&km, &d__1, &ab[kv + 2 + jj * ab_dim1], &c__1);
00344 
00345 /*                 Update trailing submatrix within the band and within */
00346 /*                 the current block. JM is the index of the last column */
00347 /*                 which needs to be updated. */
00348 
00349 /* Computing MIN */
00350                     i__4 = ju, i__5 = j + jb - 1;
00351                     jm = min(i__4,i__5);
00352                     if (jm > jj) {
00353                         i__4 = jm - jj;
00354                         i__5 = *ldab - 1;
00355                         i__6 = *ldab - 1;
00356                         dger_(&km, &i__4, &c_b18, &ab[kv + 2 + jj * ab_dim1], 
00357                                 &c__1, &ab[kv + (jj + 1) * ab_dim1], &i__5, &
00358                                 ab[kv + 1 + (jj + 1) * ab_dim1], &i__6);
00359                     }
00360                 } else {
00361 
00362 /*                 If pivot is zero, set INFO to the index of the pivot */
00363 /*                 unless a zero pivot has already been found. */
00364 
00365                     if (*info == 0) {
00366                         *info = jj;
00367                     }
00368                 }
00369 
00370 /*              Copy current column of A31 into the work array WORK31 */
00371 
00372 /* Computing MIN */
00373                 i__4 = jj - j + 1;
00374                 nw = min(i__4,i3);
00375                 if (nw > 0) {
00376                     dcopy_(&nw, &ab[kv + *kl + 1 - jj + j + jj * ab_dim1], &
00377                             c__1, &work31[(jj - j + 1) * 65 - 65], &c__1);
00378                 }
00379 /* L80: */
00380             }
00381             if (j + jb <= *n) {
00382 
00383 /*              Apply the row interchanges to the other blocks. */
00384 
00385 /* Computing MIN */
00386                 i__3 = ju - j + 1;
00387                 j2 = min(i__3,kv) - jb;
00388 /* Computing MAX */
00389                 i__3 = 0, i__4 = ju - j - kv + 1;
00390                 j3 = max(i__3,i__4);
00391 
00392 /*              Use DLASWP to apply the row interchanges to A12, A22, and */
00393 /*              A32. */
00394 
00395                 i__3 = *ldab - 1;
00396                 dlaswp_(&j2, &ab[kv + 1 - jb + (j + jb) * ab_dim1], &i__3, &
00397                         c__1, &jb, &ipiv[j], &c__1);
00398 
00399 /*              Adjust the pivot indices. */
00400 
00401                 i__3 = j + jb - 1;
00402                 for (i__ = j; i__ <= i__3; ++i__) {
00403                     ipiv[i__] = ipiv[i__] + j - 1;
00404 /* L90: */
00405                 }
00406 
00407 /*              Apply the row interchanges to A13, A23, and A33 */
00408 /*              columnwise. */
00409 
00410                 k2 = j - 1 + jb + j2;
00411                 i__3 = j3;
00412                 for (i__ = 1; i__ <= i__3; ++i__) {
00413                     jj = k2 + i__;
00414                     i__4 = j + jb - 1;
00415                     for (ii = j + i__ - 1; ii <= i__4; ++ii) {
00416                         ip = ipiv[ii];
00417                         if (ip != ii) {
00418                             temp = ab[kv + 1 + ii - jj + jj * ab_dim1];
00419                             ab[kv + 1 + ii - jj + jj * ab_dim1] = ab[kv + 1 + 
00420                                     ip - jj + jj * ab_dim1];
00421                             ab[kv + 1 + ip - jj + jj * ab_dim1] = temp;
00422                         }
00423 /* L100: */
00424                     }
00425 /* L110: */
00426                 }
00427 
00428 /*              Update the relevant part of the trailing submatrix */
00429 
00430                 if (j2 > 0) {
00431 
00432 /*                 Update A12 */
00433 
00434                     i__3 = *ldab - 1;
00435                     i__4 = *ldab - 1;
00436                     dtrsm_("Left", "Lower", "No transpose", "Unit", &jb, &j2, 
00437                             &c_b31, &ab[kv + 1 + j * ab_dim1], &i__3, &ab[kv 
00438                             + 1 - jb + (j + jb) * ab_dim1], &i__4);
00439 
00440                     if (i2 > 0) {
00441 
00442 /*                    Update A22 */
00443 
00444                         i__3 = *ldab - 1;
00445                         i__4 = *ldab - 1;
00446                         i__5 = *ldab - 1;
00447                         dgemm_("No transpose", "No transpose", &i2, &j2, &jb, 
00448                                 &c_b18, &ab[kv + 1 + jb + j * ab_dim1], &i__3, 
00449                                  &ab[kv + 1 - jb + (j + jb) * ab_dim1], &i__4, 
00450                                  &c_b31, &ab[kv + 1 + (j + jb) * ab_dim1], &
00451                                 i__5);
00452                     }
00453 
00454                     if (i3 > 0) {
00455 
00456 /*                    Update A32 */
00457 
00458                         i__3 = *ldab - 1;
00459                         i__4 = *ldab - 1;
00460                         dgemm_("No transpose", "No transpose", &i3, &j2, &jb, 
00461                                 &c_b18, work31, &c__65, &ab[kv + 1 - jb + (j 
00462                                 + jb) * ab_dim1], &i__3, &c_b31, &ab[kv + *kl 
00463                                 + 1 - jb + (j + jb) * ab_dim1], &i__4);
00464                     }
00465                 }
00466 
00467                 if (j3 > 0) {
00468 
00469 /*                 Copy the lower triangle of A13 into the work array */
00470 /*                 WORK13 */
00471 
00472                     i__3 = j3;
00473                     for (jj = 1; jj <= i__3; ++jj) {
00474                         i__4 = jb;
00475                         for (ii = jj; ii <= i__4; ++ii) {
00476                             work13[ii + jj * 65 - 66] = ab[ii - jj + 1 + (jj 
00477                                     + j + kv - 1) * ab_dim1];
00478 /* L120: */
00479                         }
00480 /* L130: */
00481                     }
00482 
00483 /*                 Update A13 in the work array */
00484 
00485                     i__3 = *ldab - 1;
00486                     dtrsm_("Left", "Lower", "No transpose", "Unit", &jb, &j3, 
00487                             &c_b31, &ab[kv + 1 + j * ab_dim1], &i__3, work13, 
00488                             &c__65);
00489 
00490                     if (i2 > 0) {
00491 
00492 /*                    Update A23 */
00493 
00494                         i__3 = *ldab - 1;
00495                         i__4 = *ldab - 1;
00496                         dgemm_("No transpose", "No transpose", &i2, &j3, &jb, 
00497                                 &c_b18, &ab[kv + 1 + jb + j * ab_dim1], &i__3, 
00498                                  work13, &c__65, &c_b31, &ab[jb + 1 + (j + kv)
00499                                  * ab_dim1], &i__4);
00500                     }
00501 
00502                     if (i3 > 0) {
00503 
00504 /*                    Update A33 */
00505 
00506                         i__3 = *ldab - 1;
00507                         dgemm_("No transpose", "No transpose", &i3, &j3, &jb, 
00508                                 &c_b18, work31, &c__65, work13, &c__65, &
00509                                 c_b31, &ab[*kl + 1 + (j + kv) * ab_dim1], &
00510                                 i__3);
00511                     }
00512 
00513 /*                 Copy the lower triangle of A13 back into place */
00514 
00515                     i__3 = j3;
00516                     for (jj = 1; jj <= i__3; ++jj) {
00517                         i__4 = jb;
00518                         for (ii = jj; ii <= i__4; ++ii) {
00519                             ab[ii - jj + 1 + (jj + j + kv - 1) * ab_dim1] = 
00520                                     work13[ii + jj * 65 - 66];
00521 /* L140: */
00522                         }
00523 /* L150: */
00524                     }
00525                 }
00526             } else {
00527 
00528 /*              Adjust the pivot indices. */
00529 
00530                 i__3 = j + jb - 1;
00531                 for (i__ = j; i__ <= i__3; ++i__) {
00532                     ipiv[i__] = ipiv[i__] + j - 1;
00533 /* L160: */
00534                 }
00535             }
00536 
00537 /*           Partially undo the interchanges in the current block to */
00538 /*           restore the upper triangular form of A31 and copy the upper */
00539 /*           triangle of A31 back into place */
00540 
00541             i__3 = j;
00542             for (jj = j + jb - 1; jj >= i__3; --jj) {
00543                 jp = ipiv[jj] - jj + 1;
00544                 if (jp != 1) {
00545 
00546 /*                 Apply interchange to columns J to JJ-1 */
00547 
00548                     if (jp + jj - 1 < j + *kl) {
00549 
00550 /*                    The interchange does not affect A31 */
00551 
00552                         i__4 = jj - j;
00553                         i__5 = *ldab - 1;
00554                         i__6 = *ldab - 1;
00555                         dswap_(&i__4, &ab[kv + 1 + jj - j + j * ab_dim1], &
00556                                 i__5, &ab[kv + jp + jj - j + j * ab_dim1], &
00557                                 i__6);
00558                     } else {
00559 
00560 /*                    The interchange does affect A31 */
00561 
00562                         i__4 = jj - j;
00563                         i__5 = *ldab - 1;
00564                         dswap_(&i__4, &ab[kv + 1 + jj - j + j * ab_dim1], &
00565                                 i__5, &work31[jp + jj - j - *kl - 1], &c__65);
00566                     }
00567                 }
00568 
00569 /*              Copy the current column of A31 back into place */
00570 
00571 /* Computing MIN */
00572                 i__4 = i3, i__5 = jj - j + 1;
00573                 nw = min(i__4,i__5);
00574                 if (nw > 0) {
00575                     dcopy_(&nw, &work31[(jj - j + 1) * 65 - 65], &c__1, &ab[
00576                             kv + *kl + 1 - jj + j + jj * ab_dim1], &c__1);
00577                 }
00578 /* L170: */
00579             }
00580 /* L180: */
00581         }
00582     }
00583 
00584     return 0;
00585 
00586 /*     End of DGBTRF */
00587 
00588 } /* dgbtrf_ */