swiftnav: dlaed2.c Source File

Go to the documentation of this file.
00001 /* dlaed2.f -- translated by f2c (version 20061008).
00002    You must link the resulting object file with libf2c:
00003         on Microsoft Windows system, link with libf2c.lib;
00004         on Linux or Unix systems, link with .../path/to/libf2c.a -lm
00005         or, if you install libf2c.a in a standard place, with -lf2c -lm
00006         -- in that order, at the end of the command line, as in
00007                 cc *.o -lf2c -lm
00008         Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
00009 
00010                 http://www.netlib.org/f2c/libf2c.zip
00011 */
00012 
00013 #include "f2c.h"
00014 #include "blaswrap.h"
00015 
00016 /* Table of constant values */
00017 
00018 static doublereal c_b3 = -1.;
00019 static integer c__1 = 1;
00020 
00021 /* Subroutine */ int dlaed2_(integer *k, integer *n, integer *n1, doublereal *
00022         d__, doublereal *q, integer *ldq, integer *indxq, doublereal *rho, 
00023         doublereal *z__, doublereal *dlamda, doublereal *w, doublereal *q2, 
00024         integer *indx, integer *indxc, integer *indxp, integer *coltyp, 
00025         integer *info)
00026 {
00027     /* System generated locals */
00028     integer q_dim1, q_offset, i__1, i__2;
00029     doublereal d__1, d__2, d__3, d__4;
00030 
00031     /* Builtin functions */
00032     double sqrt(doublereal);
00033 
00034     /* Local variables */
00035     doublereal c__;
00036     integer i__, j;
00037     doublereal s, t;
00038     integer k2, n2, ct, nj, pj, js, iq1, iq2, n1p1;
00039     doublereal eps, tau, tol;
00040     integer psm[4], imax, jmax;
00041     extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, 
00042             doublereal *, integer *, doublereal *, doublereal *);
00043     integer ctot[4];
00044     extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
00045             integer *), dcopy_(integer *, doublereal *, integer *, doublereal 
00046             *, integer *);
00047     extern doublereal dlapy2_(doublereal *, doublereal *), dlamch_(char *);
00048     extern integer idamax_(integer *, doublereal *, integer *);
00049     extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *, 
00050             integer *, integer *, integer *), dlacpy_(char *, integer *, 
00051             integer *, doublereal *, integer *, doublereal *, integer *), xerbla_(char *, integer *);
00052 
00053 
00054 /*  -- LAPACK routine (version 3.2) -- */
00055 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
00056 /*     November 2006 */
00057 
00058 /*     .. Scalar Arguments .. */
00059 /*     .. */
00060 /*     .. Array Arguments .. */
00061 /*     .. */
00062 
00063 /*  Purpose */
00064 /*  ======= */
00065 
00066 /*  DLAED2 merges the two sets of eigenvalues together into a single */
00067 /*  sorted set.  Then it tries to deflate the size of the problem. */
00068 /*  There are two ways in which deflation can occur:  when two or more */
00069 /*  eigenvalues are close together or if there is a tiny entry in the */
00070 /*  Z vector.  For each such occurrence the order of the related secular */
00071 /*  equation problem is reduced by one. */
00072 
00073 /*  Arguments */
00074 /*  ========= */
00075 
00076 /*  K      (output) INTEGER */
00077 /*         The number of non-deflated eigenvalues, and the order of the */
00078 /*         related secular equation. 0 <= K <=N. */
00079 
00080 /*  N      (input) INTEGER */
00081 /*         The dimension of the symmetric tridiagonal matrix.  N >= 0. */
00082 
00083 /*  N1     (input) INTEGER */
00084 /*         The location of the last eigenvalue in the leading sub-matrix. */
00085 /*         min(1,N) <= N1 <= N/2. */
00086 
00087 /*  D      (input/output) DOUBLE PRECISION array, dimension (N) */
00088 /*         On entry, D contains the eigenvalues of the two submatrices to */
00089 /*         be combined. */
00090 /*         On exit, D contains the trailing (N-K) updated eigenvalues */
00091 /*         (those which were deflated) sorted into increasing order. */
00092 
00093 /*  Q      (input/output) DOUBLE PRECISION array, dimension (LDQ, N) */
00094 /*         On entry, Q contains the eigenvectors of two submatrices in */
00095 /*         the two square blocks with corners at (1,1), (N1,N1) */
00096 /*         and (N1+1, N1+1), (N,N). */
00097 /*         On exit, Q contains the trailing (N-K) updated eigenvectors */
00098 /*         (those which were deflated) in its last N-K columns. */
00099 
00100 /*  LDQ    (input) INTEGER */
00101 /*         The leading dimension of the array Q.  LDQ >= max(1,N). */
00102 
00103 /*  INDXQ  (input/output) INTEGER array, dimension (N) */
00104 /*         The permutation which separately sorts the two sub-problems */
00105 /*         in D into ascending order.  Note that elements in the second */
00106 /*         half of this permutation must first have N1 added to their */
00107 /*         values. Destroyed on exit. */
00108 
00109 /*  RHO    (input/output) DOUBLE PRECISION */
00110 /*         On entry, the off-diagonal element associated with the rank-1 */
00111 /*         cut which originally split the two submatrices which are now */
00112 /*         being recombined. */
00113 /*         On exit, RHO has been modified to the value required by */
00114 /*         DLAED3. */
00115 
00116 /*  Z      (input) DOUBLE PRECISION array, dimension (N) */
00117 /*         On entry, Z contains the updating vector (the last */
00118 /*         row of the first sub-eigenvector matrix and the first row of */
00119 /*         the second sub-eigenvector matrix). */
00120 /*         On exit, the contents of Z have been destroyed by the updating */
00121 /*         process. */
00122 
00123 /*  DLAMDA (output) DOUBLE PRECISION array, dimension (N) */
00124 /*         A copy of the first K eigenvalues which will be used by */
00125 /*         DLAED3 to form the secular equation. */
00126 
00127 /*  W      (output) DOUBLE PRECISION array, dimension (N) */
00128 /*         The first k values of the final deflation-altered z-vector */
00129 /*         which will be passed to DLAED3. */
00130 
00131 /*  Q2     (output) DOUBLE PRECISION array, dimension (N1**2+(N-N1)**2) */
00132 /*         A copy of the first K eigenvectors which will be used by */
00133 /*         DLAED3 in a matrix multiply (DGEMM) to solve for the new */
00134 /*         eigenvectors. */
00135 
00136 /*  INDX   (workspace) INTEGER array, dimension (N) */
00137 /*         The permutation used to sort the contents of DLAMDA into */
00138 /*         ascending order. */
00139 
00140 /*  INDXC  (output) INTEGER array, dimension (N) */
00141 /*         The permutation used to arrange the columns of the deflated */
00142 /*         Q matrix into three groups:  the first group contains non-zero */
00143 /*         elements only at and above N1, the second contains */
00144 /*         non-zero elements only below N1, and the third is dense. */
00145 
00146 /*  INDXP  (workspace) INTEGER array, dimension (N) */
00147 /*         The permutation used to place deflated values of D at the end */
00148 /*         of the array.  INDXP(1:K) points to the nondeflated D-values */
00149 /*         and INDXP(K+1:N) points to the deflated eigenvalues. */
00150 
00151 /*  COLTYP (workspace/output) INTEGER array, dimension (N) */
00152 /*         During execution, a label which will indicate which of the */
00153 /*         following types a column in the Q2 matrix is: */
00154 /*         1 : non-zero in the upper half only; */
00155 /*         2 : dense; */
00156 /*         3 : non-zero in the lower half only; */
00157 /*         4 : deflated. */
00158 /*         On exit, COLTYP(i) is the number of columns of type i, */
00159 /*         for i=1 to 4 only. */
00160 
00161 /*  INFO   (output) INTEGER */
00162 /*          = 0:  successful exit. */
00163 /*          < 0:  if INFO = -i, the i-th argument had an illegal value. */
00164 
00165 /*  Further Details */
00166 /*  =============== */
00167 
00168 /*  Based on contributions by */
00169 /*     Jeff Rutter, Computer Science Division, University of California */
00170 /*     at Berkeley, USA */
00171 /*  Modified by Francoise Tisseur, University of Tennessee. */
00172 
00173 /*  ===================================================================== */
00174 
00175 /*     .. Parameters .. */
00176 /*     .. */
00177 /*     .. Local Arrays .. */
00178 /*     .. */
00179 /*     .. Local Scalars .. */
00180 /*     .. */
00181 /*     .. External Functions .. */
00182 /*     .. */
00183 /*     .. External Subroutines .. */
00184 /*     .. */
00185 /*     .. Intrinsic Functions .. */
00186 /*     .. */
00187 /*     .. Executable Statements .. */
00188 
00189 /*     Test the input parameters. */
00190 
00191     /* Parameter adjustments */
00192     --d__;
00193     q_dim1 = *ldq;
00194     q_offset = 1 + q_dim1;
00195     q -= q_offset;
00196     --indxq;
00197     --z__;
00198     --dlamda;
00199     --w;
00200     --q2;
00201     --indx;
00202     --indxc;
00203     --indxp;
00204     --coltyp;
00205 
00206     /* Function Body */
00207     *info = 0;
00208 
00209     if (*n < 0) {
00210         *info = -2;
00211     } else if (*ldq < max(1,*n)) {
00212         *info = -6;
00213     } else /* if(complicated condition) */ {
00214 /* Computing MIN */
00215         i__1 = 1, i__2 = *n / 2;
00216         if (min(i__1,i__2) > *n1 || *n / 2 < *n1) {
00217             *info = -3;
00218         }
00219     }
00220     if (*info != 0) {
00221         i__1 = -(*info);
00222         xerbla_("DLAED2", &i__1);
00223         return 0;
00224     }
00225 
00226 /*     Quick return if possible */
00227 
00228     if (*n == 0) {
00229         return 0;
00230     }
00231 
00232     n2 = *n - *n1;
00233     n1p1 = *n1 + 1;
00234 
00235     if (*rho < 0.) {
00236         dscal_(&n2, &c_b3, &z__[n1p1], &c__1);
00237     }
00238 
00239 /*     Normalize z so that norm(z) = 1.  Since z is the concatenation of */
00240 /*     two normalized vectors, norm2(z) = sqrt(2). */
00241 
00242     t = 1. / sqrt(2.);
00243     dscal_(n, &t, &z__[1], &c__1);
00244 
00245 /*     RHO = ABS( norm(z)**2 * RHO ) */
00246 
00247     *rho = (d__1 = *rho * 2., abs(d__1));
00248 
00249 /*     Sort the eigenvalues into increasing order */
00250 
00251     i__1 = *n;
00252     for (i__ = n1p1; i__ <= i__1; ++i__) {
00253         indxq[i__] += *n1;
00254 /* L10: */
00255     }
00256 
00257 /*     re-integrate the deflated parts from the last pass */
00258 
00259     i__1 = *n;
00260     for (i__ = 1; i__ <= i__1; ++i__) {
00261         dlamda[i__] = d__[indxq[i__]];
00262 /* L20: */
00263     }
00264     dlamrg_(n1, &n2, &dlamda[1], &c__1, &c__1, &indxc[1]);
00265     i__1 = *n;
00266     for (i__ = 1; i__ <= i__1; ++i__) {
00267         indx[i__] = indxq[indxc[i__]];
00268 /* L30: */
00269     }
00270 
00271 /*     Calculate the allowable deflation tolerance */
00272 
00273     imax = idamax_(n, &z__[1], &c__1);
00274     jmax = idamax_(n, &d__[1], &c__1);
00275     eps = dlamch_("Epsilon");
00276 /* Computing MAX */
00277     d__3 = (d__1 = d__[jmax], abs(d__1)), d__4 = (d__2 = z__[imax], abs(d__2))
00278             ;
00279     tol = eps * 8. * max(d__3,d__4);
00280 
00281 /*     If the rank-1 modifier is small enough, no more needs to be done */
00282 /*     except to reorganize Q so that its columns correspond with the */
00283 /*     elements in D. */
00284 
00285     if (*rho * (d__1 = z__[imax], abs(d__1)) <= tol) {
00286         *k = 0;
00287         iq2 = 1;
00288         i__1 = *n;
00289         for (j = 1; j <= i__1; ++j) {
00290             i__ = indx[j];
00291             dcopy_(n, &q[i__ * q_dim1 + 1], &c__1, &q2[iq2], &c__1);
00292             dlamda[j] = d__[i__];
00293             iq2 += *n;
00294 /* L40: */
00295         }
00296         dlacpy_("A", n, n, &q2[1], n, &q[q_offset], ldq);
00297         dcopy_(n, &dlamda[1], &c__1, &d__[1], &c__1);
00298         goto L190;
00299     }
00300 
00301 /*     If there are multiple eigenvalues then the problem deflates.  Here */
00302 /*     the number of equal eigenvalues are found.  As each equal */
00303 /*     eigenvalue is found, an elementary reflector is computed to rotate */
00304 /*     the corresponding eigensubspace so that the corresponding */
00305 /*     components of Z are zero in this new basis. */
00306 
00307     i__1 = *n1;
00308     for (i__ = 1; i__ <= i__1; ++i__) {
00309         coltyp[i__] = 1;
00310 /* L50: */
00311     }
00312     i__1 = *n;
00313     for (i__ = n1p1; i__ <= i__1; ++i__) {
00314         coltyp[i__] = 3;
00315 /* L60: */
00316     }
00317 
00318 
00319     *k = 0;
00320     k2 = *n + 1;
00321     i__1 = *n;
00322     for (j = 1; j <= i__1; ++j) {
00323         nj = indx[j];
00324         if (*rho * (d__1 = z__[nj], abs(d__1)) <= tol) {
00325 
00326 /*           Deflate due to small z component. */
00327 
00328             --k2;
00329             coltyp[nj] = 4;
00330             indxp[k2] = nj;
00331             if (j == *n) {
00332                 goto L100;
00333             }
00334         } else {
00335             pj = nj;
00336             goto L80;
00337         }
00338 /* L70: */
00339     }
00340 L80:
00341     ++j;
00342     nj = indx[j];
00343     if (j > *n) {
00344         goto L100;
00345     }
00346     if (*rho * (d__1 = z__[nj], abs(d__1)) <= tol) {
00347 
00348 /*        Deflate due to small z component. */
00349 
00350         --k2;
00351         coltyp[nj] = 4;
00352         indxp[k2] = nj;
00353     } else {
00354 
00355 /*        Check if eigenvalues are close enough to allow deflation. */
00356 
00357         s = z__[pj];
00358         c__ = z__[nj];
00359 
00360 /*        Find sqrt(a**2+b**2) without overflow or */
00361 /*        destructive underflow. */
00362 
00363         tau = dlapy2_(&c__, &s);
00364         t = d__[nj] - d__[pj];
00365         c__ /= tau;
00366         s = -s / tau;
00367         if ((d__1 = t * c__ * s, abs(d__1)) <= tol) {
00368 
00369 /*           Deflation is possible. */
00370 
00371             z__[nj] = tau;
00372             z__[pj] = 0.;
00373             if (coltyp[nj] != coltyp[pj]) {
00374                 coltyp[nj] = 2;
00375             }
00376             coltyp[pj] = 4;
00377             drot_(n, &q[pj * q_dim1 + 1], &c__1, &q[nj * q_dim1 + 1], &c__1, &
00378                     c__, &s);
00379 /* Computing 2nd power */
00380             d__1 = c__;
00381 /* Computing 2nd power */
00382             d__2 = s;
00383             t = d__[pj] * (d__1 * d__1) + d__[nj] * (d__2 * d__2);
00384 /* Computing 2nd power */
00385             d__1 = s;
00386 /* Computing 2nd power */
00387             d__2 = c__;
00388             d__[nj] = d__[pj] * (d__1 * d__1) + d__[nj] * (d__2 * d__2);
00389             d__[pj] = t;
00390             --k2;
00391             i__ = 1;
00392 L90:
00393             if (k2 + i__ <= *n) {
00394                 if (d__[pj] < d__[indxp[k2 + i__]]) {
00395                     indxp[k2 + i__ - 1] = indxp[k2 + i__];
00396                     indxp[k2 + i__] = pj;
00397                     ++i__;
00398                     goto L90;
00399                 } else {
00400                     indxp[k2 + i__ - 1] = pj;
00401                 }
00402             } else {
00403                 indxp[k2 + i__ - 1] = pj;
00404             }
00405             pj = nj;
00406         } else {
00407             ++(*k);
00408             dlamda[*k] = d__[pj];
00409             w[*k] = z__[pj];
00410             indxp[*k] = pj;
00411             pj = nj;
00412         }
00413     }
00414     goto L80;
00415 L100:
00416 
00417 /*     Record the last eigenvalue. */
00418 
00419     ++(*k);
00420     dlamda[*k] = d__[pj];
00421     w[*k] = z__[pj];
00422     indxp[*k] = pj;
00423 
00424 /*     Count up the total number of the various types of columns, then */
00425 /*     form a permutation which positions the four column types into */
00426 /*     four uniform groups (although one or more of these groups may be */
00427 /*     empty). */
00428 
00429     for (j = 1; j <= 4; ++j) {
00430         ctot[j - 1] = 0;
00431 /* L110: */
00432     }
00433     i__1 = *n;
00434     for (j = 1; j <= i__1; ++j) {
00435         ct = coltyp[j];
00436         ++ctot[ct - 1];
00437 /* L120: */
00438     }
00439 
00440 /*     PSM(*) = Position in SubMatrix (of types 1 through 4) */
00441 
00442     psm[0] = 1;
00443     psm[1] = ctot[0] + 1;
00444     psm[2] = psm[1] + ctot[1];
00445     psm[3] = psm[2] + ctot[2];
00446     *k = *n - ctot[3];
00447 
00448 /*     Fill out the INDXC array so that the permutation which it induces */
00449 /*     will place all type-1 columns first, all type-2 columns next, */
00450 /*     then all type-3's, and finally all type-4's. */
00451 
00452     i__1 = *n;
00453     for (j = 1; j <= i__1; ++j) {
00454         js = indxp[j];
00455         ct = coltyp[js];
00456         indx[psm[ct - 1]] = js;
00457         indxc[psm[ct - 1]] = j;
00458         ++psm[ct - 1];
00459 /* L130: */
00460     }
00461 
00462 /*     Sort the eigenvalues and corresponding eigenvectors into DLAMDA */
00463 /*     and Q2 respectively.  The eigenvalues/vectors which were not */
00464 /*     deflated go into the first K slots of DLAMDA and Q2 respectively, */
00465 /*     while those which were deflated go into the last N - K slots. */
00466 
00467     i__ = 1;
00468     iq1 = 1;
00469     iq2 = (ctot[0] + ctot[1]) * *n1 + 1;
00470     i__1 = ctot[0];
00471     for (j = 1; j <= i__1; ++j) {
00472         js = indx[i__];
00473         dcopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1);
00474         z__[i__] = d__[js];
00475         ++i__;
00476         iq1 += *n1;
00477 /* L140: */
00478     }
00479 
00480     i__1 = ctot[1];
00481     for (j = 1; j <= i__1; ++j) {
00482         js = indx[i__];
00483         dcopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1);
00484         dcopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1);
00485         z__[i__] = d__[js];
00486         ++i__;
00487         iq1 += *n1;
00488         iq2 += n2;
00489 /* L150: */
00490     }
00491 
00492     i__1 = ctot[2];
00493     for (j = 1; j <= i__1; ++j) {
00494         js = indx[i__];
00495         dcopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1);
00496         z__[i__] = d__[js];
00497         ++i__;
00498         iq2 += n2;
00499 /* L160: */
00500     }
00501 
00502     iq1 = iq2;
00503     i__1 = ctot[3];
00504     for (j = 1; j <= i__1; ++j) {
00505         js = indx[i__];
00506         dcopy_(n, &q[js * q_dim1 + 1], &c__1, &q2[iq2], &c__1);
00507         iq2 += *n;
00508         z__[i__] = d__[js];
00509         ++i__;
00510 /* L170: */
00511     }
00512 
00513 /*     The deflated eigenvalues and their corresponding vectors go back */
00514 /*     into the last N - K slots of D and Q respectively. */
00515 
00516     dlacpy_("A", n, &ctot[3], &q2[iq1], n, &q[(*k + 1) * q_dim1 + 1], ldq);
00517     i__1 = *n - *k;
00518     dcopy_(&i__1, &z__[*k + 1], &c__1, &d__[*k + 1], &c__1);
00519 
00520 /*     Copy CTOT into COLTYP for referencing in DLAED3. */
00521 
00522     for (j = 1; j <= 4; ++j) {
00523         coltyp[j] = ctot[j - 1];
00524 /* L180: */
00525     }
00526 
00527 L190:
00528     return 0;
00529 
00530 /*     End of DLAED2 */
00531 
00532 } /* dlaed2_ */