swiftnav: slaed2.c Source File

Go to the documentation of this file.
00001 /* slaed2.f -- translated by f2c (version 20061008).
00002    You must link the resulting object file with libf2c:
00003         on Microsoft Windows system, link with libf2c.lib;
00004         on Linux or Unix systems, link with .../path/to/libf2c.a -lm
00005         or, if you install libf2c.a in a standard place, with -lf2c -lm
00006         -- in that order, at the end of the command line, as in
00007                 cc *.o -lf2c -lm
00008         Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
00009 
00010                 http://www.netlib.org/f2c/libf2c.zip
00011 */
00012 
00013 #include "f2c.h"
00014 #include "blaswrap.h"
00015 
00016 /* Table of constant values */
00017 
00018 static real c_b3 = -1.f;
00019 static integer c__1 = 1;
00020 
00021 /* Subroutine */ int slaed2_(integer *k, integer *n, integer *n1, real *d__, 
00022         real *q, integer *ldq, integer *indxq, real *rho, real *z__, real *
00023         dlamda, real *w, real *q2, integer *indx, integer *indxc, integer *
00024         indxp, integer *coltyp, integer *info)
00025 {
00026     /* System generated locals */
00027     integer q_dim1, q_offset, i__1, i__2;
00028     real r__1, r__2, r__3, r__4;
00029 
00030     /* Builtin functions */
00031     double sqrt(doublereal);
00032 
00033     /* Local variables */
00034     real c__;
00035     integer i__, j;
00036     real s, t;
00037     integer k2, n2, ct, nj, pj, js, iq1, iq2, n1p1;
00038     real eps, tau, tol;
00039     integer psm[4], imax, jmax, ctot[4];
00040     extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, 
00041             integer *, real *, real *), sscal_(integer *, real *, real *, 
00042             integer *), scopy_(integer *, real *, integer *, real *, integer *
00043 );
00044     extern doublereal slapy2_(real *, real *), slamch_(char *);
00045     extern /* Subroutine */ int xerbla_(char *, integer *);
00046     extern integer isamax_(integer *, real *, integer *);
00047     extern /* Subroutine */ int slamrg_(integer *, integer *, real *, integer 
00048             *, integer *, integer *), slacpy_(char *, integer *, integer *, 
00049             real *, integer *, real *, integer *);
00050 
00051 
00052 /*  -- LAPACK routine (version 3.2) -- */
00053 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
00054 /*     November 2006 */
00055 
00056 /*     .. Scalar Arguments .. */
00057 /*     .. */
00058 /*     .. Array Arguments .. */
00059 /*     .. */
00060 
00061 /*  Purpose */
00062 /*  ======= */
00063 
00064 /*  SLAED2 merges the two sets of eigenvalues together into a single */
00065 /*  sorted set.  Then it tries to deflate the size of the problem. */
00066 /*  There are two ways in which deflation can occur:  when two or more */
00067 /*  eigenvalues are close together or if there is a tiny entry in the */
00068 /*  Z vector.  For each such occurrence the order of the related secular */
00069 /*  equation problem is reduced by one. */
00070 
00071 /*  Arguments */
00072 /*  ========= */
00073 
00074 /*  K      (output) INTEGER */
00075 /*         The number of non-deflated eigenvalues, and the order of the */
00076 /*         related secular equation. 0 <= K <=N. */
00077 
00078 /*  N      (input) INTEGER */
00079 /*         The dimension of the symmetric tridiagonal matrix.  N >= 0. */
00080 
00081 /*  N1     (input) INTEGER */
00082 /*         The location of the last eigenvalue in the leading sub-matrix. */
00083 /*         min(1,N) <= N1 <= N/2. */
00084 
00085 /*  D      (input/output) REAL array, dimension (N) */
00086 /*         On entry, D contains the eigenvalues of the two submatrices to */
00087 /*         be combined. */
00088 /*         On exit, D contains the trailing (N-K) updated eigenvalues */
00089 /*         (those which were deflated) sorted into increasing order. */
00090 
00091 /*  Q      (input/output) REAL array, dimension (LDQ, N) */
00092 /*         On entry, Q contains the eigenvectors of two submatrices in */
00093 /*         the two square blocks with corners at (1,1), (N1,N1) */
00094 /*         and (N1+1, N1+1), (N,N). */
00095 /*         On exit, Q contains the trailing (N-K) updated eigenvectors */
00096 /*         (those which were deflated) in its last N-K columns. */
00097 
00098 /*  LDQ    (input) INTEGER */
00099 /*         The leading dimension of the array Q.  LDQ >= max(1,N). */
00100 
00101 /*  INDXQ  (input/output) INTEGER array, dimension (N) */
00102 /*         The permutation which separately sorts the two sub-problems */
00103 /*         in D into ascending order.  Note that elements in the second */
00104 /*         half of this permutation must first have N1 added to their */
00105 /*         values. Destroyed on exit. */
00106 
00107 /*  RHO    (input/output) REAL */
00108 /*         On entry, the off-diagonal element associated with the rank-1 */
00109 /*         cut which originally split the two submatrices which are now */
00110 /*         being recombined. */
00111 /*         On exit, RHO has been modified to the value required by */
00112 /*         SLAED3. */
00113 
00114 /*  Z      (input) REAL array, dimension (N) */
00115 /*         On entry, Z contains the updating vector (the last */
00116 /*         row of the first sub-eigenvector matrix and the first row of */
00117 /*         the second sub-eigenvector matrix). */
00118 /*         On exit, the contents of Z have been destroyed by the updating */
00119 /*         process. */
00120 
00121 /*  DLAMDA (output) REAL array, dimension (N) */
00122 /*         A copy of the first K eigenvalues which will be used by */
00123 /*         SLAED3 to form the secular equation. */
00124 
00125 /*  W      (output) REAL array, dimension (N) */
00126 /*         The first k values of the final deflation-altered z-vector */
00127 /*         which will be passed to SLAED3. */
00128 
00129 /*  Q2     (output) REAL array, dimension (N1**2+(N-N1)**2) */
00130 /*         A copy of the first K eigenvectors which will be used by */
00131 /*         SLAED3 in a matrix multiply (SGEMM) to solve for the new */
00132 /*         eigenvectors. */
00133 
00134 /*  INDX   (workspace) INTEGER array, dimension (N) */
00135 /*         The permutation used to sort the contents of DLAMDA into */
00136 /*         ascending order. */
00137 
00138 /*  INDXC  (output) INTEGER array, dimension (N) */
00139 /*         The permutation used to arrange the columns of the deflated */
00140 /*         Q matrix into three groups:  the first group contains non-zero */
00141 /*         elements only at and above N1, the second contains */
00142 /*         non-zero elements only below N1, and the third is dense. */
00143 
00144 /*  INDXP  (workspace) INTEGER array, dimension (N) */
00145 /*         The permutation used to place deflated values of D at the end */
00146 /*         of the array.  INDXP(1:K) points to the nondeflated D-values */
00147 /*         and INDXP(K+1:N) points to the deflated eigenvalues. */
00148 
00149 /*  COLTYP (workspace/output) INTEGER array, dimension (N) */
00150 /*         During execution, a label which will indicate which of the */
00151 /*         following types a column in the Q2 matrix is: */
00152 /*         1 : non-zero in the upper half only; */
00153 /*         2 : dense; */
00154 /*         3 : non-zero in the lower half only; */
00155 /*         4 : deflated. */
00156 /*         On exit, COLTYP(i) is the number of columns of type i, */
00157 /*         for i=1 to 4 only. */
00158 
00159 /*  INFO   (output) INTEGER */
00160 /*          = 0:  successful exit. */
00161 /*          < 0:  if INFO = -i, the i-th argument had an illegal value. */
00162 
00163 /*  Further Details */
00164 /*  =============== */
00165 
00166 /*  Based on contributions by */
00167 /*     Jeff Rutter, Computer Science Division, University of California */
00168 /*     at Berkeley, USA */
00169 /*  Modified by Francoise Tisseur, University of Tennessee. */
00170 
00171 /*  ===================================================================== */
00172 
00173 /*     .. Parameters .. */
00174 /*     .. */
00175 /*     .. Local Arrays .. */
00176 /*     .. */
00177 /*     .. Local Scalars .. */
00178 /*     .. */
00179 /*     .. External Functions .. */
00180 /*     .. */
00181 /*     .. External Subroutines .. */
00182 /*     .. */
00183 /*     .. Intrinsic Functions .. */
00184 /*     .. */
00185 /*     .. Executable Statements .. */
00186 
00187 /*     Test the input parameters. */
00188 
00189     /* Parameter adjustments */
00190     --d__;
00191     q_dim1 = *ldq;
00192     q_offset = 1 + q_dim1;
00193     q -= q_offset;
00194     --indxq;
00195     --z__;
00196     --dlamda;
00197     --w;
00198     --q2;
00199     --indx;
00200     --indxc;
00201     --indxp;
00202     --coltyp;
00203 
00204     /* Function Body */
00205     *info = 0;
00206 
00207     if (*n < 0) {
00208         *info = -2;
00209     } else if (*ldq < max(1,*n)) {
00210         *info = -6;
00211     } else /* if(complicated condition) */ {
00212 /* Computing MIN */
00213         i__1 = 1, i__2 = *n / 2;
00214         if (min(i__1,i__2) > *n1 || *n / 2 < *n1) {
00215             *info = -3;
00216         }
00217     }
00218     if (*info != 0) {
00219         i__1 = -(*info);
00220         xerbla_("SLAED2", &i__1);
00221         return 0;
00222     }
00223 
00224 /*     Quick return if possible */
00225 
00226     if (*n == 0) {
00227         return 0;
00228     }
00229 
00230     n2 = *n - *n1;
00231     n1p1 = *n1 + 1;
00232 
00233     if (*rho < 0.f) {
00234         sscal_(&n2, &c_b3, &z__[n1p1], &c__1);
00235     }
00236 
00237 /*     Normalize z so that norm(z) = 1.  Since z is the concatenation of */
00238 /*     two normalized vectors, norm2(z) = sqrt(2). */
00239 
00240     t = 1.f / sqrt(2.f);
00241     sscal_(n, &t, &z__[1], &c__1);
00242 
00243 /*     RHO = ABS( norm(z)**2 * RHO ) */
00244 
00245     *rho = (r__1 = *rho * 2.f, dabs(r__1));
00246 
00247 /*     Sort the eigenvalues into increasing order */
00248 
00249     i__1 = *n;
00250     for (i__ = n1p1; i__ <= i__1; ++i__) {
00251         indxq[i__] += *n1;
00252 /* L10: */
00253     }
00254 
00255 /*     re-integrate the deflated parts from the last pass */
00256 
00257     i__1 = *n;
00258     for (i__ = 1; i__ <= i__1; ++i__) {
00259         dlamda[i__] = d__[indxq[i__]];
00260 /* L20: */
00261     }
00262     slamrg_(n1, &n2, &dlamda[1], &c__1, &c__1, &indxc[1]);
00263     i__1 = *n;
00264     for (i__ = 1; i__ <= i__1; ++i__) {
00265         indx[i__] = indxq[indxc[i__]];
00266 /* L30: */
00267     }
00268 
00269 /*     Calculate the allowable deflation tolerance */
00270 
00271     imax = isamax_(n, &z__[1], &c__1);
00272     jmax = isamax_(n, &d__[1], &c__1);
00273     eps = slamch_("Epsilon");
00274 /* Computing MAX */
00275     r__3 = (r__1 = d__[jmax], dabs(r__1)), r__4 = (r__2 = z__[imax], dabs(
00276             r__2));
00277     tol = eps * 8.f * dmax(r__3,r__4);
00278 
00279 /*     If the rank-1 modifier is small enough, no more needs to be done */
00280 /*     except to reorganize Q so that its columns correspond with the */
00281 /*     elements in D. */
00282 
00283     if (*rho * (r__1 = z__[imax], dabs(r__1)) <= tol) {
00284         *k = 0;
00285         iq2 = 1;
00286         i__1 = *n;
00287         for (j = 1; j <= i__1; ++j) {
00288             i__ = indx[j];
00289             scopy_(n, &q[i__ * q_dim1 + 1], &c__1, &q2[iq2], &c__1);
00290             dlamda[j] = d__[i__];
00291             iq2 += *n;
00292 /* L40: */
00293         }
00294         slacpy_("A", n, n, &q2[1], n, &q[q_offset], ldq);
00295         scopy_(n, &dlamda[1], &c__1, &d__[1], &c__1);
00296         goto L190;
00297     }
00298 
00299 /*     If there are multiple eigenvalues then the problem deflates.  Here */
00300 /*     the number of equal eigenvalues are found.  As each equal */
00301 /*     eigenvalue is found, an elementary reflector is computed to rotate */
00302 /*     the corresponding eigensubspace so that the corresponding */
00303 /*     components of Z are zero in this new basis. */
00304 
00305     i__1 = *n1;
00306     for (i__ = 1; i__ <= i__1; ++i__) {
00307         coltyp[i__] = 1;
00308 /* L50: */
00309     }
00310     i__1 = *n;
00311     for (i__ = n1p1; i__ <= i__1; ++i__) {
00312         coltyp[i__] = 3;
00313 /* L60: */
00314     }
00315 
00316 
00317     *k = 0;
00318     k2 = *n + 1;
00319     i__1 = *n;
00320     for (j = 1; j <= i__1; ++j) {
00321         nj = indx[j];
00322         if (*rho * (r__1 = z__[nj], dabs(r__1)) <= tol) {
00323 
00324 /*           Deflate due to small z component. */
00325 
00326             --k2;
00327             coltyp[nj] = 4;
00328             indxp[k2] = nj;
00329             if (j == *n) {
00330                 goto L100;
00331             }
00332         } else {
00333             pj = nj;
00334             goto L80;
00335         }
00336 /* L70: */
00337     }
00338 L80:
00339     ++j;
00340     nj = indx[j];
00341     if (j > *n) {
00342         goto L100;
00343     }
00344     if (*rho * (r__1 = z__[nj], dabs(r__1)) <= tol) {
00345 
00346 /*        Deflate due to small z component. */
00347 
00348         --k2;
00349         coltyp[nj] = 4;
00350         indxp[k2] = nj;
00351     } else {
00352 
00353 /*        Check if eigenvalues are close enough to allow deflation. */
00354 
00355         s = z__[pj];
00356         c__ = z__[nj];
00357 
00358 /*        Find sqrt(a**2+b**2) without overflow or */
00359 /*        destructive underflow. */
00360 
00361         tau = slapy2_(&c__, &s);
00362         t = d__[nj] - d__[pj];
00363         c__ /= tau;
00364         s = -s / tau;
00365         if ((r__1 = t * c__ * s, dabs(r__1)) <= tol) {
00366 
00367 /*           Deflation is possible. */
00368 
00369             z__[nj] = tau;
00370             z__[pj] = 0.f;
00371             if (coltyp[nj] != coltyp[pj]) {
00372                 coltyp[nj] = 2;
00373             }
00374             coltyp[pj] = 4;
00375             srot_(n, &q[pj * q_dim1 + 1], &c__1, &q[nj * q_dim1 + 1], &c__1, &
00376                     c__, &s);
00377 /* Computing 2nd power */
00378             r__1 = c__;
00379 /* Computing 2nd power */
00380             r__2 = s;
00381             t = d__[pj] * (r__1 * r__1) + d__[nj] * (r__2 * r__2);
00382 /* Computing 2nd power */
00383             r__1 = s;
00384 /* Computing 2nd power */
00385             r__2 = c__;
00386             d__[nj] = d__[pj] * (r__1 * r__1) + d__[nj] * (r__2 * r__2);
00387             d__[pj] = t;
00388             --k2;
00389             i__ = 1;
00390 L90:
00391             if (k2 + i__ <= *n) {
00392                 if (d__[pj] < d__[indxp[k2 + i__]]) {
00393                     indxp[k2 + i__ - 1] = indxp[k2 + i__];
00394                     indxp[k2 + i__] = pj;
00395                     ++i__;
00396                     goto L90;
00397                 } else {
00398                     indxp[k2 + i__ - 1] = pj;
00399                 }
00400             } else {
00401                 indxp[k2 + i__ - 1] = pj;
00402             }
00403             pj = nj;
00404         } else {
00405             ++(*k);
00406             dlamda[*k] = d__[pj];
00407             w[*k] = z__[pj];
00408             indxp[*k] = pj;
00409             pj = nj;
00410         }
00411     }
00412     goto L80;
00413 L100:
00414 
00415 /*     Record the last eigenvalue. */
00416 
00417     ++(*k);
00418     dlamda[*k] = d__[pj];
00419     w[*k] = z__[pj];
00420     indxp[*k] = pj;
00421 
00422 /*     Count up the total number of the various types of columns, then */
00423 /*     form a permutation which positions the four column types into */
00424 /*     four uniform groups (although one or more of these groups may be */
00425 /*     empty). */
00426 
00427     for (j = 1; j <= 4; ++j) {
00428         ctot[j - 1] = 0;
00429 /* L110: */
00430     }
00431     i__1 = *n;
00432     for (j = 1; j <= i__1; ++j) {
00433         ct = coltyp[j];
00434         ++ctot[ct - 1];
00435 /* L120: */
00436     }
00437 
00438 /*     PSM(*) = Position in SubMatrix (of types 1 through 4) */
00439 
00440     psm[0] = 1;
00441     psm[1] = ctot[0] + 1;
00442     psm[2] = psm[1] + ctot[1];
00443     psm[3] = psm[2] + ctot[2];
00444     *k = *n - ctot[3];
00445 
00446 /*     Fill out the INDXC array so that the permutation which it induces */
00447 /*     will place all type-1 columns first, all type-2 columns next, */
00448 /*     then all type-3's, and finally all type-4's. */
00449 
00450     i__1 = *n;
00451     for (j = 1; j <= i__1; ++j) {
00452         js = indxp[j];
00453         ct = coltyp[js];
00454         indx[psm[ct - 1]] = js;
00455         indxc[psm[ct - 1]] = j;
00456         ++psm[ct - 1];
00457 /* L130: */
00458     }
00459 
00460 /*     Sort the eigenvalues and corresponding eigenvectors into DLAMDA */
00461 /*     and Q2 respectively.  The eigenvalues/vectors which were not */
00462 /*     deflated go into the first K slots of DLAMDA and Q2 respectively, */
00463 /*     while those which were deflated go into the last N - K slots. */
00464 
00465     i__ = 1;
00466     iq1 = 1;
00467     iq2 = (ctot[0] + ctot[1]) * *n1 + 1;
00468     i__1 = ctot[0];
00469     for (j = 1; j <= i__1; ++j) {
00470         js = indx[i__];
00471         scopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1);
00472         z__[i__] = d__[js];
00473         ++i__;
00474         iq1 += *n1;
00475 /* L140: */
00476     }
00477 
00478     i__1 = ctot[1];
00479     for (j = 1; j <= i__1; ++j) {
00480         js = indx[i__];
00481         scopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1);
00482         scopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1);
00483         z__[i__] = d__[js];
00484         ++i__;
00485         iq1 += *n1;
00486         iq2 += n2;
00487 /* L150: */
00488     }
00489 
00490     i__1 = ctot[2];
00491     for (j = 1; j <= i__1; ++j) {
00492         js = indx[i__];
00493         scopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1);
00494         z__[i__] = d__[js];
00495         ++i__;
00496         iq2 += n2;
00497 /* L160: */
00498     }
00499 
00500     iq1 = iq2;
00501     i__1 = ctot[3];
00502     for (j = 1; j <= i__1; ++j) {
00503         js = indx[i__];
00504         scopy_(n, &q[js * q_dim1 + 1], &c__1, &q2[iq2], &c__1);
00505         iq2 += *n;
00506         z__[i__] = d__[js];
00507         ++i__;
00508 /* L170: */
00509     }
00510 
00511 /*     The deflated eigenvalues and their corresponding vectors go back */
00512 /*     into the last N - K slots of D and Q respectively. */
00513 
00514     slacpy_("A", n, &ctot[3], &q2[iq1], n, &q[(*k + 1) * q_dim1 + 1], ldq);
00515     i__1 = *n - *k;
00516     scopy_(&i__1, &z__[*k + 1], &c__1, &d__[*k + 1], &c__1);
00517 
00518 /*     Copy CTOT into COLTYP for referencing in SLAED3. */
00519 
00520     for (j = 1; j <= 4; ++j) {
00521         coltyp[j] = ctot[j - 1];
00522 /* L180: */
00523     }
00524 
00525 L190:
00526     return 0;
00527 
00528 /*     End of SLAED2 */
00529 
00530 } /* slaed2_ */