swiftnav: zpstrf.c Source File

Go to the documentation of this file.
00001 /* zpstrf.f -- translated by f2c (version 20061008).
00002    You must link the resulting object file with libf2c:
00003         on Microsoft Windows system, link with libf2c.lib;
00004         on Linux or Unix systems, link with .../path/to/libf2c.a -lm
00005         or, if you install libf2c.a in a standard place, with -lf2c -lm
00006         -- in that order, at the end of the command line, as in
00007                 cc *.o -lf2c -lm
00008         Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
00009 
00010                 http://www.netlib.org/f2c/libf2c.zip
00011 */
00012 
00013 #include "f2c.h"
00014 #include "blaswrap.h"
00015 
00016 /* Table of constant values */
00017 
00018 static doublecomplex c_b1 = {1.,0.};
00019 static integer c__1 = 1;
00020 static integer c_n1 = -1;
00021 static doublereal c_b29 = -1.;
00022 static doublereal c_b30 = 1.;
00023 
00024 /* Subroutine */ int zpstrf_(char *uplo, integer *n, doublecomplex *a, 
00025         integer *lda, integer *piv, integer *rank, doublereal *tol, 
00026         doublereal *work, integer *info)
00027 {
00028     /* System generated locals */
00029     integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
00030     doublereal d__1;
00031     doublecomplex z__1, z__2;
00032 
00033     /* Builtin functions */
00034     void d_cnjg(doublecomplex *, doublecomplex *);
00035     double sqrt(doublereal);
00036 
00037     /* Local variables */
00038     integer i__, j, k, maxlocval, jb, nb;
00039     doublereal ajj;
00040     integer pvt;
00041     extern logical lsame_(char *, char *);
00042     doublereal dtemp;
00043     integer itemp;
00044     extern /* Subroutine */ int zherk_(char *, char *, integer *, integer *, 
00045             doublereal *, doublecomplex *, integer *, doublereal *, 
00046             doublecomplex *, integer *), zgemv_(char *, 
00047             integer *, integer *, doublecomplex *, doublecomplex *, integer *, 
00048              doublecomplex *, integer *, doublecomplex *, doublecomplex *, 
00049             integer *);
00050     doublereal dstop;
00051     logical upper;
00052     doublecomplex ztemp;
00053     extern /* Subroutine */ int zswap_(integer *, doublecomplex *, integer *, 
00054             doublecomplex *, integer *);
00055     extern doublereal dlamch_(char *);
00056     extern /* Subroutine */ int zpstf2_(char *, integer *, doublecomplex *, 
00057             integer *, integer *, integer *, doublereal *, doublereal *, 
00058             integer *);
00059     extern logical disnan_(doublereal *);
00060     extern /* Subroutine */ int xerbla_(char *, integer *);
00061     extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
00062             integer *, integer *);
00063     extern /* Subroutine */ int zdscal_(integer *, doublereal *, 
00064             doublecomplex *, integer *), zlacgv_(integer *, doublecomplex *, 
00065             integer *);
00066     extern integer dmaxloc_(doublereal *, integer *);
00067 
00068 
00069 /*  -- LAPACK routine (version 3.2.1)                                  -- */
00070 
00071 /*  -- Contributed by Craig Lucas, University of Manchester / NAG Ltd. -- */
00072 /*  -- April 2009                                                      -- */
00073 
00074 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
00075 
00076 /*     .. Scalar Arguments .. */
00077 /*     .. */
00078 /*     .. Array Arguments .. */
00079 /*     .. */
00080 
00081 /*  Purpose */
00082 /*  ======= */
00083 
00084 /*  ZPSTRF computes the Cholesky factorization with complete */
00085 /*  pivoting of a complex Hermitian positive semidefinite matrix A. */
00086 
00087 /*  The factorization has the form */
00088 /*     P' * A * P = U' * U ,  if UPLO = 'U', */
00089 /*     P' * A * P = L  * L',  if UPLO = 'L', */
00090 /*  where U is an upper triangular matrix and L is lower triangular, and */
00091 /*  P is stored as vector PIV. */
00092 
00093 /*  This algorithm does not attempt to check that A is positive */
00094 /*  semidefinite. This version of the algorithm calls level 3 BLAS. */
00095 
00096 /*  Arguments */
00097 /*  ========= */
00098 
00099 /*  UPLO    (input) CHARACTER*1 */
00100 /*          Specifies whether the upper or lower triangular part of the */
00101 /*          symmetric matrix A is stored. */
00102 /*          = 'U':  Upper triangular */
00103 /*          = 'L':  Lower triangular */
00104 
00105 /*  N       (input) INTEGER */
00106 /*          The order of the matrix A.  N >= 0. */
00107 
00108 /*  A       (input/output) COMPLEX*16 array, dimension (LDA,N) */
00109 /*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading */
00110 /*          n by n upper triangular part of A contains the upper */
00111 /*          triangular part of the matrix A, and the strictly lower */
00112 /*          triangular part of A is not referenced.  If UPLO = 'L', the */
00113 /*          leading n by n lower triangular part of A contains the lower */
00114 /*          triangular part of the matrix A, and the strictly upper */
00115 /*          triangular part of A is not referenced. */
00116 
00117 /*          On exit, if INFO = 0, the factor U or L from the Cholesky */
00118 /*          factorization as above. */
00119 
00120 /*  LDA     (input) INTEGER */
00121 /*          The leading dimension of the array A.  LDA >= max(1,N). */
00122 
00123 /*  PIV     (output) INTEGER array, dimension (N) */
00124 /*          PIV is such that the nonzero entries are P( PIV(K), K ) = 1. */
00125 
00126 /*  RANK    (output) INTEGER */
00127 /*          The rank of A given by the number of steps the algorithm */
00128 /*          completed. */
00129 
00130 /*  TOL     (input) DOUBLE PRECISION */
00131 /*          User defined tolerance. If TOL < 0, then N*U*MAX( A(K,K) ) */
00132 /*          will be used. The algorithm terminates at the (K-1)st step */
00133 /*          if the pivot <= TOL. */
00134 
00135 /*  WORK    DOUBLE PRECISION array, dimension (2*N) */
00136 /*          Work space. */
00137 
00138 /*  INFO    (output) INTEGER */
00139 /*          < 0: If INFO = -K, the K-th argument had an illegal value, */
00140 /*          = 0: algorithm completed successfully, and */
00141 /*          > 0: the matrix A is either rank deficient with computed rank */
00142 /*               as returned in RANK, or is indefinite.  See Section 7 of */
00143 /*               LAPACK Working Note #161 for further information. */
00144 
00145 /*  ===================================================================== */
00146 
00147 /*     .. Parameters .. */
00148 /*     .. */
00149 /*     .. Local Scalars .. */
00150 /*     .. */
00151 /*     .. External Functions .. */
00152 /*     .. */
00153 /*     .. External Subroutines .. */
00154 /*     .. */
00155 /*     .. Intrinsic Functions .. */
00156 /*     .. */
00157 /*     .. Executable Statements .. */
00158 
00159 /*     Test the input parameters. */
00160 
00161     /* Parameter adjustments */
00162     --work;
00163     --piv;
00164     a_dim1 = *lda;
00165     a_offset = 1 + a_dim1;
00166     a -= a_offset;
00167 
00168     /* Function Body */
00169     *info = 0;
00170     upper = lsame_(uplo, "U");
00171     if (! upper && ! lsame_(uplo, "L")) {
00172         *info = -1;
00173     } else if (*n < 0) {
00174         *info = -2;
00175     } else if (*lda < max(1,*n)) {
00176         *info = -4;
00177     }
00178     if (*info != 0) {
00179         i__1 = -(*info);
00180         xerbla_("ZPSTRF", &i__1);
00181         return 0;
00182     }
00183 
00184 /*     Quick return if possible */
00185 
00186     if (*n == 0) {
00187         return 0;
00188     }
00189 
00190 /*     Get block size */
00191 
00192     nb = ilaenv_(&c__1, "ZPOTRF", uplo, n, &c_n1, &c_n1, &c_n1);
00193     if (nb <= 1 || nb >= *n) {
00194 
00195 /*        Use unblocked code */
00196 
00197         zpstf2_(uplo, n, &a[a_dim1 + 1], lda, &piv[1], rank, tol, &work[1], 
00198                 info);
00199         goto L230;
00200 
00201     } else {
00202 
00203 /*     Initialize PIV */
00204 
00205         i__1 = *n;
00206         for (i__ = 1; i__ <= i__1; ++i__) {
00207             piv[i__] = i__;
00208 /* L100: */
00209         }
00210 
00211 /*     Compute stopping value */
00212 
00213         i__1 = *n;
00214         for (i__ = 1; i__ <= i__1; ++i__) {
00215             i__2 = i__ + i__ * a_dim1;
00216             work[i__] = a[i__2].r;
00217 /* L110: */
00218         }
00219         pvt = dmaxloc_(&work[1], n);
00220         i__1 = pvt + pvt * a_dim1;
00221         ajj = a[i__1].r;
00222         if (ajj == 0. || disnan_(&ajj)) {
00223             *rank = 0;
00224             *info = 1;
00225             goto L230;
00226         }
00227 
00228 /*     Compute stopping value if not supplied */
00229 
00230         if (*tol < 0.) {
00231             dstop = *n * dlamch_("Epsilon") * ajj;
00232         } else {
00233             dstop = *tol;
00234         }
00235 
00236 
00237         if (upper) {
00238 
00239 /*           Compute the Cholesky factorization P' * A * P = U' * U */
00240 
00241             i__1 = *n;
00242             i__2 = nb;
00243             for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) {
00244 
00245 /*              Account for last block not being NB wide */
00246 
00247 /* Computing MIN */
00248                 i__3 = nb, i__4 = *n - k + 1;
00249                 jb = min(i__3,i__4);
00250 
00251 /*              Set relevant part of first half of WORK to zero, */
00252 /*              holds dot products */
00253 
00254                 i__3 = *n;
00255                 for (i__ = k; i__ <= i__3; ++i__) {
00256                     work[i__] = 0.;
00257 /* L120: */
00258                 }
00259 
00260                 i__3 = k + jb - 1;
00261                 for (j = k; j <= i__3; ++j) {
00262 
00263 /*              Find pivot, test for exit, else swap rows and columns */
00264 /*              Update dot products, compute possible pivots which are */
00265 /*              stored in the second half of WORK */
00266 
00267                     i__4 = *n;
00268                     for (i__ = j; i__ <= i__4; ++i__) {
00269 
00270                         if (j > k) {
00271                             d_cnjg(&z__2, &a[j - 1 + i__ * a_dim1]);
00272                             i__5 = j - 1 + i__ * a_dim1;
00273                             z__1.r = z__2.r * a[i__5].r - z__2.i * a[i__5].i, 
00274                                     z__1.i = z__2.r * a[i__5].i + z__2.i * a[
00275                                     i__5].r;
00276                             work[i__] += z__1.r;
00277                         }
00278                         i__5 = i__ + i__ * a_dim1;
00279                         work[*n + i__] = a[i__5].r - work[i__];
00280 
00281 /* L130: */
00282                     }
00283 
00284                     if (j > 1) {
00285                         maxlocval = (*n << 1) - (*n + j) + 1;
00286                         itemp = dmaxloc_(&work[*n + j], &maxlocval);
00287                         pvt = itemp + j - 1;
00288                         ajj = work[*n + pvt];
00289                         if (ajj <= dstop || disnan_(&ajj)) {
00290                             i__4 = j + j * a_dim1;
00291                             a[i__4].r = ajj, a[i__4].i = 0.;
00292                             goto L220;
00293                         }
00294                     }
00295 
00296                     if (j != pvt) {
00297 
00298 /*                    Pivot OK, so can now swap pivot rows and columns */
00299 
00300                         i__4 = pvt + pvt * a_dim1;
00301                         i__5 = j + j * a_dim1;
00302                         a[i__4].r = a[i__5].r, a[i__4].i = a[i__5].i;
00303                         i__4 = j - 1;
00304                         zswap_(&i__4, &a[j * a_dim1 + 1], &c__1, &a[pvt * 
00305                                 a_dim1 + 1], &c__1);
00306                         if (pvt < *n) {
00307                             i__4 = *n - pvt;
00308                             zswap_(&i__4, &a[j + (pvt + 1) * a_dim1], lda, &a[
00309                                     pvt + (pvt + 1) * a_dim1], lda);
00310                         }
00311                         i__4 = pvt - 1;
00312                         for (i__ = j + 1; i__ <= i__4; ++i__) {
00313                             d_cnjg(&z__1, &a[j + i__ * a_dim1]);
00314                             ztemp.r = z__1.r, ztemp.i = z__1.i;
00315                             i__5 = j + i__ * a_dim1;
00316                             d_cnjg(&z__1, &a[i__ + pvt * a_dim1]);
00317                             a[i__5].r = z__1.r, a[i__5].i = z__1.i;
00318                             i__5 = i__ + pvt * a_dim1;
00319                             a[i__5].r = ztemp.r, a[i__5].i = ztemp.i;
00320 /* L140: */
00321                         }
00322                         i__4 = j + pvt * a_dim1;
00323                         d_cnjg(&z__1, &a[j + pvt * a_dim1]);
00324                         a[i__4].r = z__1.r, a[i__4].i = z__1.i;
00325 
00326 /*                    Swap dot products and PIV */
00327 
00328                         dtemp = work[j];
00329                         work[j] = work[pvt];
00330                         work[pvt] = dtemp;
00331                         itemp = piv[pvt];
00332                         piv[pvt] = piv[j];
00333                         piv[j] = itemp;
00334                     }
00335 
00336                     ajj = sqrt(ajj);
00337                     i__4 = j + j * a_dim1;
00338                     a[i__4].r = ajj, a[i__4].i = 0.;
00339 
00340 /*                 Compute elements J+1:N of row J. */
00341 
00342                     if (j < *n) {
00343                         i__4 = j - 1;
00344                         zlacgv_(&i__4, &a[j * a_dim1 + 1], &c__1);
00345                         i__4 = j - k;
00346                         i__5 = *n - j;
00347                         z__1.r = -1., z__1.i = -0.;
00348                         zgemv_("Trans", &i__4, &i__5, &z__1, &a[k + (j + 1) * 
00349                                 a_dim1], lda, &a[k + j * a_dim1], &c__1, &
00350                                 c_b1, &a[j + (j + 1) * a_dim1], lda);
00351                         i__4 = j - 1;
00352                         zlacgv_(&i__4, &a[j * a_dim1 + 1], &c__1);
00353                         i__4 = *n - j;
00354                         d__1 = 1. / ajj;
00355                         zdscal_(&i__4, &d__1, &a[j + (j + 1) * a_dim1], lda);
00356                     }
00357 
00358 /* L150: */
00359                 }
00360 
00361 /*              Update trailing matrix, J already incremented */
00362 
00363                 if (k + jb <= *n) {
00364                     i__3 = *n - j + 1;
00365                     zherk_("Upper", "Conj Trans", &i__3, &jb, &c_b29, &a[k + 
00366                             j * a_dim1], lda, &c_b30, &a[j + j * a_dim1], lda);
00367                 }
00368 
00369 /* L160: */
00370             }
00371 
00372         } else {
00373 
00374 /*        Compute the Cholesky factorization P' * A * P = L * L' */
00375 
00376             i__2 = *n;
00377             i__1 = nb;
00378             for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) {
00379 
00380 /*              Account for last block not being NB wide */
00381 
00382 /* Computing MIN */
00383                 i__3 = nb, i__4 = *n - k + 1;
00384                 jb = min(i__3,i__4);
00385 
00386 /*              Set relevant part of first half of WORK to zero, */
00387 /*              holds dot products */
00388 
00389                 i__3 = *n;
00390                 for (i__ = k; i__ <= i__3; ++i__) {
00391                     work[i__] = 0.;
00392 /* L170: */
00393                 }
00394 
00395                 i__3 = k + jb - 1;
00396                 for (j = k; j <= i__3; ++j) {
00397 
00398 /*              Find pivot, test for exit, else swap rows and columns */
00399 /*              Update dot products, compute possible pivots which are */
00400 /*              stored in the second half of WORK */
00401 
00402                     i__4 = *n;
00403                     for (i__ = j; i__ <= i__4; ++i__) {
00404 
00405                         if (j > k) {
00406                             d_cnjg(&z__2, &a[i__ + (j - 1) * a_dim1]);
00407                             i__5 = i__ + (j - 1) * a_dim1;
00408                             z__1.r = z__2.r * a[i__5].r - z__2.i * a[i__5].i, 
00409                                     z__1.i = z__2.r * a[i__5].i + z__2.i * a[
00410                                     i__5].r;
00411                             work[i__] += z__1.r;
00412                         }
00413                         i__5 = i__ + i__ * a_dim1;
00414                         work[*n + i__] = a[i__5].r - work[i__];
00415 
00416 /* L180: */
00417                     }
00418 
00419                     if (j > 1) {
00420                         maxlocval = (*n << 1) - (*n + j) + 1;
00421                         itemp = dmaxloc_(&work[*n + j], &maxlocval);
00422                         pvt = itemp + j - 1;
00423                         ajj = work[*n + pvt];
00424                         if (ajj <= dstop || disnan_(&ajj)) {
00425                             i__4 = j + j * a_dim1;
00426                             a[i__4].r = ajj, a[i__4].i = 0.;
00427                             goto L220;
00428                         }
00429                     }
00430 
00431                     if (j != pvt) {
00432 
00433 /*                    Pivot OK, so can now swap pivot rows and columns */
00434 
00435                         i__4 = pvt + pvt * a_dim1;
00436                         i__5 = j + j * a_dim1;
00437                         a[i__4].r = a[i__5].r, a[i__4].i = a[i__5].i;
00438                         i__4 = j - 1;
00439                         zswap_(&i__4, &a[j + a_dim1], lda, &a[pvt + a_dim1], 
00440                                 lda);
00441                         if (pvt < *n) {
00442                             i__4 = *n - pvt;
00443                             zswap_(&i__4, &a[pvt + 1 + j * a_dim1], &c__1, &a[
00444                                     pvt + 1 + pvt * a_dim1], &c__1);
00445                         }
00446                         i__4 = pvt - 1;
00447                         for (i__ = j + 1; i__ <= i__4; ++i__) {
00448                             d_cnjg(&z__1, &a[i__ + j * a_dim1]);
00449                             ztemp.r = z__1.r, ztemp.i = z__1.i;
00450                             i__5 = i__ + j * a_dim1;
00451                             d_cnjg(&z__1, &a[pvt + i__ * a_dim1]);
00452                             a[i__5].r = z__1.r, a[i__5].i = z__1.i;
00453                             i__5 = pvt + i__ * a_dim1;
00454                             a[i__5].r = ztemp.r, a[i__5].i = ztemp.i;
00455 /* L190: */
00456                         }
00457                         i__4 = pvt + j * a_dim1;
00458                         d_cnjg(&z__1, &a[pvt + j * a_dim1]);
00459                         a[i__4].r = z__1.r, a[i__4].i = z__1.i;
00460 
00461 
00462 /*                    Swap dot products and PIV */
00463 
00464                         dtemp = work[j];
00465                         work[j] = work[pvt];
00466                         work[pvt] = dtemp;
00467                         itemp = piv[pvt];
00468                         piv[pvt] = piv[j];
00469                         piv[j] = itemp;
00470                     }
00471 
00472                     ajj = sqrt(ajj);
00473                     i__4 = j + j * a_dim1;
00474                     a[i__4].r = ajj, a[i__4].i = 0.;
00475 
00476 /*                 Compute elements J+1:N of column J. */
00477 
00478                     if (j < *n) {
00479                         i__4 = j - 1;
00480                         zlacgv_(&i__4, &a[j + a_dim1], lda);
00481                         i__4 = *n - j;
00482                         i__5 = j - k;
00483                         z__1.r = -1., z__1.i = -0.;
00484                         zgemv_("No Trans", &i__4, &i__5, &z__1, &a[j + 1 + k *
00485                                  a_dim1], lda, &a[j + k * a_dim1], lda, &c_b1, 
00486                                  &a[j + 1 + j * a_dim1], &c__1);
00487                         i__4 = j - 1;
00488                         zlacgv_(&i__4, &a[j + a_dim1], lda);
00489                         i__4 = *n - j;
00490                         d__1 = 1. / ajj;
00491                         zdscal_(&i__4, &d__1, &a[j + 1 + j * a_dim1], &c__1);
00492                     }
00493 
00494 /* L200: */
00495                 }
00496 
00497 /*              Update trailing matrix, J already incremented */
00498 
00499                 if (k + jb <= *n) {
00500                     i__3 = *n - j + 1;
00501                     zherk_("Lower", "No Trans", &i__3, &jb, &c_b29, &a[j + k *
00502                              a_dim1], lda, &c_b30, &a[j + j * a_dim1], lda);
00503                 }
00504 
00505 /* L210: */
00506             }
00507 
00508         }
00509     }
00510 
00511 /*     Ran to completion, A has full rank */
00512 
00513     *rank = *n;
00514 
00515     goto L230;
00516 L220:
00517 
00518 /*     Rank is the number of steps completed.  Set INFO = 1 to signal */
00519 /*     that the factorization cannot be used to solve a system. */
00520 
00521     *rank = j - 1;
00522     *info = 1;
00523 
00524 L230:
00525     return 0;
00526 
00527 /*     End of ZPSTRF */
00528 
00529 } /* zpstrf_ */