00001 /* slasd2.f -- translated by f2c (version 20061008). 00002 You must link the resulting object file with libf2c: 00003 on Microsoft Windows system, link with libf2c.lib; 00004 on Linux or Unix systems, link with .../path/to/libf2c.a -lm 00005 or, if you install libf2c.a in a standard place, with -lf2c -lm 00006 -- in that order, at the end of the command line, as in 00007 cc *.o -lf2c -lm 00008 Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., 00009 00010 http://www.netlib.org/f2c/libf2c.zip 00011 */ 00012 00013 #include "f2c.h" 00014 #include "blaswrap.h" 00015 00016 /* Table of constant values */ 00017 00018 static integer c__1 = 1; 00019 static real c_b30 = 0.f; 00020 00021 /* Subroutine */ int slasd2_(integer *nl, integer *nr, integer *sqre, integer 00022 *k, real *d__, real *z__, real *alpha, real *beta, real *u, integer * 00023 ldu, real *vt, integer *ldvt, real *dsigma, real *u2, integer *ldu2, 00024 real *vt2, integer *ldvt2, integer *idxp, integer *idx, integer *idxc, 00025 integer *idxq, integer *coltyp, integer *info) 00026 { 00027 /* System generated locals */ 00028 integer u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1, vt_offset, 00029 vt2_dim1, vt2_offset, i__1; 00030 real r__1, r__2; 00031 00032 /* Local variables */ 00033 real c__; 00034 integer i__, j, m, n; 00035 real s; 00036 integer k2; 00037 real z1; 00038 integer ct, jp; 00039 real eps, tau, tol; 00040 integer psm[4], nlp1, nlp2, idxi, idxj, ctot[4]; 00041 extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, 00042 integer *, real *, real *); 00043 integer idxjp, jprev; 00044 extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 00045 integer *); 00046 extern doublereal slapy2_(real *, real *), slamch_(char *); 00047 extern /* Subroutine */ int xerbla_(char *, integer *), slamrg_( 00048 integer *, integer *, real *, integer *, integer *, integer *); 00049 real hlftol; 00050 extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, 00051 integer *, real *, integer *), slaset_(char *, integer *, 00052 integer *, real *, real *, real *, integer *); 00053 00054 00055 /* -- LAPACK auxiliary routine (version 3.2) -- */ 00056 /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ 00057 /* November 2006 */ 00058 00059 /* .. Scalar Arguments .. */ 00060 /* .. */ 00061 /* .. Array Arguments .. */ 00062 /* .. */ 00063 00064 /* Purpose */ 00065 /* ======= */ 00066 00067 /* SLASD2 merges the two sets of singular values together into a single */ 00068 /* sorted set. Then it tries to deflate the size of the problem. */ 00069 /* There are two ways in which deflation can occur: when two or more */ 00070 /* singular values are close together or if there is a tiny entry in the */ 00071 /* Z vector. For each such occurrence the order of the related secular */ 00072 /* equation problem is reduced by one. */ 00073 00074 /* SLASD2 is called from SLASD1. */ 00075 00076 /* Arguments */ 00077 /* ========= */ 00078 00079 /* NL (input) INTEGER */ 00080 /* The row dimension of the upper block. NL >= 1. */ 00081 00082 /* NR (input) INTEGER */ 00083 /* The row dimension of the lower block. NR >= 1. */ 00084 00085 /* SQRE (input) INTEGER */ 00086 /* = 0: the lower block is an NR-by-NR square matrix. */ 00087 /* = 1: the lower block is an NR-by-(NR+1) rectangular matrix. */ 00088 00089 /* The bidiagonal matrix has N = NL + NR + 1 rows and */ 00090 /* M = N + SQRE >= N columns. */ 00091 00092 /* K (output) INTEGER */ 00093 /* Contains the dimension of the non-deflated matrix, */ 00094 /* This is the order of the related secular equation. 1 <= K <=N. */ 00095 00096 /* D (input/output) REAL array, dimension (N) */ 00097 /* On entry D contains the singular values of the two submatrices */ 00098 /* to be combined. On exit D contains the trailing (N-K) updated */ 00099 /* singular values (those which were deflated) sorted into */ 00100 /* increasing order. */ 00101 00102 /* Z (output) REAL array, dimension (N) */ 00103 /* On exit Z contains the updating row vector in the secular */ 00104 /* equation. */ 00105 00106 /* ALPHA (input) REAL */ 00107 /* Contains the diagonal element associated with the added row. */ 00108 00109 /* BETA (input) REAL */ 00110 /* Contains the off-diagonal element associated with the added */ 00111 /* row. */ 00112 00113 /* U (input/output) REAL array, dimension (LDU,N) */ 00114 /* On entry U contains the left singular vectors of two */ 00115 /* submatrices in the two square blocks with corners at (1,1), */ 00116 /* (NL, NL), and (NL+2, NL+2), (N,N). */ 00117 /* On exit U contains the trailing (N-K) updated left singular */ 00118 /* vectors (those which were deflated) in its last N-K columns. */ 00119 00120 /* LDU (input) INTEGER */ 00121 /* The leading dimension of the array U. LDU >= N. */ 00122 00123 /* VT (input/output) REAL array, dimension (LDVT,M) */ 00124 /* On entry VT' contains the right singular vectors of two */ 00125 /* submatrices in the two square blocks with corners at (1,1), */ 00126 /* (NL+1, NL+1), and (NL+2, NL+2), (M,M). */ 00127 /* On exit VT' contains the trailing (N-K) updated right singular */ 00128 /* vectors (those which were deflated) in its last N-K columns. */ 00129 /* In case SQRE =1, the last row of VT spans the right null */ 00130 /* space. */ 00131 00132 /* LDVT (input) INTEGER */ 00133 /* The leading dimension of the array VT. LDVT >= M. */ 00134 00135 /* DSIGMA (output) REAL array, dimension (N) */ 00136 /* Contains a copy of the diagonal elements (K-1 singular values */ 00137 /* and one zero) in the secular equation. */ 00138 00139 /* U2 (output) REAL array, dimension (LDU2,N) */ 00140 /* Contains a copy of the first K-1 left singular vectors which */ 00141 /* will be used by SLASD3 in a matrix multiply (SGEMM) to solve */ 00142 /* for the new left singular vectors. U2 is arranged into four */ 00143 /* blocks. The first block contains a column with 1 at NL+1 and */ 00144 /* zero everywhere else; the second block contains non-zero */ 00145 /* entries only at and above NL; the third contains non-zero */ 00146 /* entries only below NL+1; and the fourth is dense. */ 00147 00148 /* LDU2 (input) INTEGER */ 00149 /* The leading dimension of the array U2. LDU2 >= N. */ 00150 00151 /* VT2 (output) REAL array, dimension (LDVT2,N) */ 00152 /* VT2' contains a copy of the first K right singular vectors */ 00153 /* which will be used by SLASD3 in a matrix multiply (SGEMM) to */ 00154 /* solve for the new right singular vectors. VT2 is arranged into */ 00155 /* three blocks. The first block contains a row that corresponds */ 00156 /* to the special 0 diagonal element in SIGMA; the second block */ 00157 /* contains non-zeros only at and before NL +1; the third block */ 00158 /* contains non-zeros only at and after NL +2. */ 00159 00160 /* LDVT2 (input) INTEGER */ 00161 /* The leading dimension of the array VT2. LDVT2 >= M. */ 00162 00163 /* IDXP (workspace) INTEGER array, dimension (N) */ 00164 /* This will contain the permutation used to place deflated */ 00165 /* values of D at the end of the array. On output IDXP(2:K) */ 00166 /* points to the nondeflated D-values and IDXP(K+1:N) */ 00167 /* points to the deflated singular values. */ 00168 00169 /* IDX (workspace) INTEGER array, dimension (N) */ 00170 /* This will contain the permutation used to sort the contents of */ 00171 /* D into ascending order. */ 00172 00173 /* IDXC (output) INTEGER array, dimension (N) */ 00174 /* This will contain the permutation used to arrange the columns */ 00175 /* of the deflated U matrix into three groups: the first group */ 00176 /* contains non-zero entries only at and above NL, the second */ 00177 /* contains non-zero entries only below NL+2, and the third is */ 00178 /* dense. */ 00179 00180 /* IDXQ (input/output) INTEGER array, dimension (N) */ 00181 /* This contains the permutation which separately sorts the two */ 00182 /* sub-problems in D into ascending order. Note that entries in */ 00183 /* the first hlaf of this permutation must first be moved one */ 00184 /* position backward; and entries in the second half */ 00185 /* must first have NL+1 added to their values. */ 00186 00187 /* COLTYP (workspace/output) INTEGER array, dimension (N) */ 00188 /* As workspace, this will contain a label which will indicate */ 00189 /* which of the following types a column in the U2 matrix or a */ 00190 /* row in the VT2 matrix is: */ 00191 /* 1 : non-zero in the upper half only */ 00192 /* 2 : non-zero in the lower half only */ 00193 /* 3 : dense */ 00194 /* 4 : deflated */ 00195 00196 /* On exit, it is an array of dimension 4, with COLTYP(I) being */ 00197 /* the dimension of the I-th type columns. */ 00198 00199 /* INFO (output) INTEGER */ 00200 /* = 0: successful exit. */ 00201 /* < 0: if INFO = -i, the i-th argument had an illegal value. */ 00202 00203 /* Further Details */ 00204 /* =============== */ 00205 00206 /* Based on contributions by */ 00207 /* Ming Gu and Huan Ren, Computer Science Division, University of */ 00208 /* California at Berkeley, USA */ 00209 00210 /* ===================================================================== */ 00211 00212 /* .. Parameters .. */ 00213 /* .. */ 00214 /* .. Local Arrays .. */ 00215 /* .. */ 00216 /* .. Local Scalars .. */ 00217 /* .. */ 00218 /* .. External Functions .. */ 00219 /* .. */ 00220 /* .. External Subroutines .. */ 00221 /* .. */ 00222 /* .. Intrinsic Functions .. */ 00223 /* .. */ 00224 /* .. Executable Statements .. */ 00225 00226 /* Test the input parameters. */ 00227 00228 /* Parameter adjustments */ 00229 --d__; 00230 --z__; 00231 u_dim1 = *ldu; 00232 u_offset = 1 + u_dim1; 00233 u -= u_offset; 00234 vt_dim1 = *ldvt; 00235 vt_offset = 1 + vt_dim1; 00236 vt -= vt_offset; 00237 --dsigma; 00238 u2_dim1 = *ldu2; 00239 u2_offset = 1 + u2_dim1; 00240 u2 -= u2_offset; 00241 vt2_dim1 = *ldvt2; 00242 vt2_offset = 1 + vt2_dim1; 00243 vt2 -= vt2_offset; 00244 --idxp; 00245 --idx; 00246 --idxc; 00247 --idxq; 00248 --coltyp; 00249 00250 /* Function Body */ 00251 *info = 0; 00252 00253 if (*nl < 1) { 00254 *info = -1; 00255 } else if (*nr < 1) { 00256 *info = -2; 00257 } else if (*sqre != 1 && *sqre != 0) { 00258 *info = -3; 00259 } 00260 00261 n = *nl + *nr + 1; 00262 m = n + *sqre; 00263 00264 if (*ldu < n) { 00265 *info = -10; 00266 } else if (*ldvt < m) { 00267 *info = -12; 00268 } else if (*ldu2 < n) { 00269 *info = -15; 00270 } else if (*ldvt2 < m) { 00271 *info = -17; 00272 } 00273 if (*info != 0) { 00274 i__1 = -(*info); 00275 xerbla_("SLASD2", &i__1); 00276 return 0; 00277 } 00278 00279 nlp1 = *nl + 1; 00280 nlp2 = *nl + 2; 00281 00282 /* Generate the first part of the vector Z; and move the singular */ 00283 /* values in the first part of D one position backward. */ 00284 00285 z1 = *alpha * vt[nlp1 + nlp1 * vt_dim1]; 00286 z__[1] = z1; 00287 for (i__ = *nl; i__ >= 1; --i__) { 00288 z__[i__ + 1] = *alpha * vt[i__ + nlp1 * vt_dim1]; 00289 d__[i__ + 1] = d__[i__]; 00290 idxq[i__ + 1] = idxq[i__] + 1; 00291 /* L10: */ 00292 } 00293 00294 /* Generate the second part of the vector Z. */ 00295 00296 i__1 = m; 00297 for (i__ = nlp2; i__ <= i__1; ++i__) { 00298 z__[i__] = *beta * vt[i__ + nlp2 * vt_dim1]; 00299 /* L20: */ 00300 } 00301 00302 /* Initialize some reference arrays. */ 00303 00304 i__1 = nlp1; 00305 for (i__ = 2; i__ <= i__1; ++i__) { 00306 coltyp[i__] = 1; 00307 /* L30: */ 00308 } 00309 i__1 = n; 00310 for (i__ = nlp2; i__ <= i__1; ++i__) { 00311 coltyp[i__] = 2; 00312 /* L40: */ 00313 } 00314 00315 /* Sort the singular values into increasing order */ 00316 00317 i__1 = n; 00318 for (i__ = nlp2; i__ <= i__1; ++i__) { 00319 idxq[i__] += nlp1; 00320 /* L50: */ 00321 } 00322 00323 /* DSIGMA, IDXC, IDXC, and the first column of U2 */ 00324 /* are used as storage space. */ 00325 00326 i__1 = n; 00327 for (i__ = 2; i__ <= i__1; ++i__) { 00328 dsigma[i__] = d__[idxq[i__]]; 00329 u2[i__ + u2_dim1] = z__[idxq[i__]]; 00330 idxc[i__] = coltyp[idxq[i__]]; 00331 /* L60: */ 00332 } 00333 00334 slamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]); 00335 00336 i__1 = n; 00337 for (i__ = 2; i__ <= i__1; ++i__) { 00338 idxi = idx[i__] + 1; 00339 d__[i__] = dsigma[idxi]; 00340 z__[i__] = u2[idxi + u2_dim1]; 00341 coltyp[i__] = idxc[idxi]; 00342 /* L70: */ 00343 } 00344 00345 /* Calculate the allowable deflation tolerance */ 00346 00347 eps = slamch_("Epsilon"); 00348 /* Computing MAX */ 00349 r__1 = dabs(*alpha), r__2 = dabs(*beta); 00350 tol = dmax(r__1,r__2); 00351 /* Computing MAX */ 00352 r__2 = (r__1 = d__[n], dabs(r__1)); 00353 tol = eps * 8.f * dmax(r__2,tol); 00354 00355 /* There are 2 kinds of deflation -- first a value in the z-vector */ 00356 /* is small, second two (or more) singular values are very close */ 00357 /* together (their difference is small). */ 00358 00359 /* If the value in the z-vector is small, we simply permute the */ 00360 /* array so that the corresponding singular value is moved to the */ 00361 /* end. */ 00362 00363 /* If two values in the D-vector are close, we perform a two-sided */ 00364 /* rotation designed to make one of the corresponding z-vector */ 00365 /* entries zero, and then permute the array so that the deflated */ 00366 /* singular value is moved to the end. */ 00367 00368 /* If there are multiple singular values then the problem deflates. */ 00369 /* Here the number of equal singular values are found. As each equal */ 00370 /* singular value is found, an elementary reflector is computed to */ 00371 /* rotate the corresponding singular subspace so that the */ 00372 /* corresponding components of Z are zero in this new basis. */ 00373 00374 *k = 1; 00375 k2 = n + 1; 00376 i__1 = n; 00377 for (j = 2; j <= i__1; ++j) { 00378 if ((r__1 = z__[j], dabs(r__1)) <= tol) { 00379 00380 /* Deflate due to small z component. */ 00381 00382 --k2; 00383 idxp[k2] = j; 00384 coltyp[j] = 4; 00385 if (j == n) { 00386 goto L120; 00387 } 00388 } else { 00389 jprev = j; 00390 goto L90; 00391 } 00392 /* L80: */ 00393 } 00394 L90: 00395 j = jprev; 00396 L100: 00397 ++j; 00398 if (j > n) { 00399 goto L110; 00400 } 00401 if ((r__1 = z__[j], dabs(r__1)) <= tol) { 00402 00403 /* Deflate due to small z component. */ 00404 00405 --k2; 00406 idxp[k2] = j; 00407 coltyp[j] = 4; 00408 } else { 00409 00410 /* Check if singular values are close enough to allow deflation. */ 00411 00412 if ((r__1 = d__[j] - d__[jprev], dabs(r__1)) <= tol) { 00413 00414 /* Deflation is possible. */ 00415 00416 s = z__[jprev]; 00417 c__ = z__[j]; 00418 00419 /* Find sqrt(a**2+b**2) without overflow or */ 00420 /* destructive underflow. */ 00421 00422 tau = slapy2_(&c__, &s); 00423 c__ /= tau; 00424 s = -s / tau; 00425 z__[j] = tau; 00426 z__[jprev] = 0.f; 00427 00428 /* Apply back the Givens rotation to the left and right */ 00429 /* singular vector matrices. */ 00430 00431 idxjp = idxq[idx[jprev] + 1]; 00432 idxj = idxq[idx[j] + 1]; 00433 if (idxjp <= nlp1) { 00434 --idxjp; 00435 } 00436 if (idxj <= nlp1) { 00437 --idxj; 00438 } 00439 srot_(&n, &u[idxjp * u_dim1 + 1], &c__1, &u[idxj * u_dim1 + 1], & 00440 c__1, &c__, &s); 00441 srot_(&m, &vt[idxjp + vt_dim1], ldvt, &vt[idxj + vt_dim1], ldvt, & 00442 c__, &s); 00443 if (coltyp[j] != coltyp[jprev]) { 00444 coltyp[j] = 3; 00445 } 00446 coltyp[jprev] = 4; 00447 --k2; 00448 idxp[k2] = jprev; 00449 jprev = j; 00450 } else { 00451 ++(*k); 00452 u2[*k + u2_dim1] = z__[jprev]; 00453 dsigma[*k] = d__[jprev]; 00454 idxp[*k] = jprev; 00455 jprev = j; 00456 } 00457 } 00458 goto L100; 00459 L110: 00460 00461 /* Record the last singular value. */ 00462 00463 ++(*k); 00464 u2[*k + u2_dim1] = z__[jprev]; 00465 dsigma[*k] = d__[jprev]; 00466 idxp[*k] = jprev; 00467 00468 L120: 00469 00470 /* Count up the total number of the various types of columns, then */ 00471 /* form a permutation which positions the four column types into */ 00472 /* four groups of uniform structure (although one or more of these */ 00473 /* groups may be empty). */ 00474 00475 for (j = 1; j <= 4; ++j) { 00476 ctot[j - 1] = 0; 00477 /* L130: */ 00478 } 00479 i__1 = n; 00480 for (j = 2; j <= i__1; ++j) { 00481 ct = coltyp[j]; 00482 ++ctot[ct - 1]; 00483 /* L140: */ 00484 } 00485 00486 /* PSM(*) = Position in SubMatrix (of types 1 through 4) */ 00487 00488 psm[0] = 2; 00489 psm[1] = ctot[0] + 2; 00490 psm[2] = psm[1] + ctot[1]; 00491 psm[3] = psm[2] + ctot[2]; 00492 00493 /* Fill out the IDXC array so that the permutation which it induces */ 00494 /* will place all type-1 columns first, all type-2 columns next, */ 00495 /* then all type-3's, and finally all type-4's, starting from the */ 00496 /* second column. This applies similarly to the rows of VT. */ 00497 00498 i__1 = n; 00499 for (j = 2; j <= i__1; ++j) { 00500 jp = idxp[j]; 00501 ct = coltyp[jp]; 00502 idxc[psm[ct - 1]] = j; 00503 ++psm[ct - 1]; 00504 /* L150: */ 00505 } 00506 00507 /* Sort the singular values and corresponding singular vectors into */ 00508 /* DSIGMA, U2, and VT2 respectively. The singular values/vectors */ 00509 /* which were not deflated go into the first K slots of DSIGMA, U2, */ 00510 /* and VT2 respectively, while those which were deflated go into the */ 00511 /* last N - K slots, except that the first column/row will be treated */ 00512 /* separately. */ 00513 00514 i__1 = n; 00515 for (j = 2; j <= i__1; ++j) { 00516 jp = idxp[j]; 00517 dsigma[j] = d__[jp]; 00518 idxj = idxq[idx[idxp[idxc[j]]] + 1]; 00519 if (idxj <= nlp1) { 00520 --idxj; 00521 } 00522 scopy_(&n, &u[idxj * u_dim1 + 1], &c__1, &u2[j * u2_dim1 + 1], &c__1); 00523 scopy_(&m, &vt[idxj + vt_dim1], ldvt, &vt2[j + vt2_dim1], ldvt2); 00524 /* L160: */ 00525 } 00526 00527 /* Determine DSIGMA(1), DSIGMA(2) and Z(1) */ 00528 00529 dsigma[1] = 0.f; 00530 hlftol = tol / 2.f; 00531 if (dabs(dsigma[2]) <= hlftol) { 00532 dsigma[2] = hlftol; 00533 } 00534 if (m > n) { 00535 z__[1] = slapy2_(&z1, &z__[m]); 00536 if (z__[1] <= tol) { 00537 c__ = 1.f; 00538 s = 0.f; 00539 z__[1] = tol; 00540 } else { 00541 c__ = z1 / z__[1]; 00542 s = z__[m] / z__[1]; 00543 } 00544 } else { 00545 if (dabs(z1) <= tol) { 00546 z__[1] = tol; 00547 } else { 00548 z__[1] = z1; 00549 } 00550 } 00551 00552 /* Move the rest of the updating row to Z. */ 00553 00554 i__1 = *k - 1; 00555 scopy_(&i__1, &u2[u2_dim1 + 2], &c__1, &z__[2], &c__1); 00556 00557 /* Determine the first column of U2, the first row of VT2 and the */ 00558 /* last row of VT. */ 00559 00560 slaset_("A", &n, &c__1, &c_b30, &c_b30, &u2[u2_offset], ldu2); 00561 u2[nlp1 + u2_dim1] = 1.f; 00562 if (m > n) { 00563 i__1 = nlp1; 00564 for (i__ = 1; i__ <= i__1; ++i__) { 00565 vt[m + i__ * vt_dim1] = -s * vt[nlp1 + i__ * vt_dim1]; 00566 vt2[i__ * vt2_dim1 + 1] = c__ * vt[nlp1 + i__ * vt_dim1]; 00567 /* L170: */ 00568 } 00569 i__1 = m; 00570 for (i__ = nlp2; i__ <= i__1; ++i__) { 00571 vt2[i__ * vt2_dim1 + 1] = s * vt[m + i__ * vt_dim1]; 00572 vt[m + i__ * vt_dim1] = c__ * vt[m + i__ * vt_dim1]; 00573 /* L180: */ 00574 } 00575 } else { 00576 scopy_(&m, &vt[nlp1 + vt_dim1], ldvt, &vt2[vt2_dim1 + 1], ldvt2); 00577 } 00578 if (m > n) { 00579 scopy_(&m, &vt[m + vt_dim1], ldvt, &vt2[m + vt2_dim1], ldvt2); 00580 } 00581 00582 /* The deflated singular values and their corresponding vectors go */ 00583 /* into the back of D, U, and V respectively. */ 00584 00585 if (n > *k) { 00586 i__1 = n - *k; 00587 scopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1); 00588 i__1 = n - *k; 00589 slacpy_("A", &n, &i__1, &u2[(*k + 1) * u2_dim1 + 1], ldu2, &u[(*k + 1) 00590 * u_dim1 + 1], ldu); 00591 i__1 = n - *k; 00592 slacpy_("A", &i__1, &m, &vt2[*k + 1 + vt2_dim1], ldvt2, &vt[*k + 1 + 00593 vt_dim1], ldvt); 00594 } 00595 00596 /* Copy CTOT into COLTYP for referencing in SLASD3. */ 00597 00598 for (j = 1; j <= 4; ++j) { 00599 coltyp[j] = ctot[j - 1]; 00600 /* L190: */ 00601 } 00602 00603 return 0; 00604 00605 /* End of SLASD2 */ 00606 00607 } /* slasd2_ */