sgelss.c
Go to the documentation of this file.
00001 /* sgelss.f -- translated by f2c (version 20061008).
00002    You must link the resulting object file with libf2c:
00003         on Microsoft Windows system, link with libf2c.lib;
00004         on Linux or Unix systems, link with .../path/to/libf2c.a -lm
00005         or, if you install libf2c.a in a standard place, with -lf2c -lm
00006         -- in that order, at the end of the command line, as in
00007                 cc *.o -lf2c -lm
00008         Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
00009 
00010                 http://www.netlib.org/f2c/libf2c.zip
00011 */
00012 
00013 #include "f2c.h"
00014 #include "blaswrap.h"
00015 
00016 /* Table of constant values */
00017 
00018 static integer c__6 = 6;
00019 static integer c_n1 = -1;
00020 static integer c__1 = 1;
00021 static integer c__0 = 0;
00022 static real c_b74 = 0.f;
00023 static real c_b108 = 1.f;
00024 
00025 /* Subroutine */ int sgelss_(integer *m, integer *n, integer *nrhs, real *a, 
00026         integer *lda, real *b, integer *ldb, real *s, real *rcond, integer *
00027         rank, real *work, integer *lwork, integer *info)
00028 {
00029     /* System generated locals */
00030     integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4;
00031     real r__1;
00032 
00033     /* Local variables */
00034     integer i__, bl, ie, il, mm;
00035     real eps, thr, anrm, bnrm;
00036     integer itau;
00037     real vdum[1];
00038     integer iascl, ibscl, chunk;
00039     extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
00040             integer *, real *, real *, integer *, real *, integer *, real *, 
00041             real *, integer *);
00042     real sfmin;
00043     integer minmn, maxmn;
00044     extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, 
00045             real *, integer *, real *, integer *, real *, real *, integer *);
00046     integer itaup, itauq;
00047     extern /* Subroutine */ int srscl_(integer *, real *, real *, integer *);
00048     integer mnthr, iwork;
00049     extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
00050             integer *), slabad_(real *, real *);
00051     integer bdspac;
00052     extern /* Subroutine */ int sgebrd_(integer *, integer *, real *, integer 
00053             *, real *, real *, real *, real *, real *, integer *, integer *);
00054     extern doublereal slamch_(char *), slange_(char *, integer *, 
00055             integer *, real *, integer *, real *);
00056     extern /* Subroutine */ int xerbla_(char *, integer *);
00057     extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
00058             integer *, integer *);
00059     real bignum;
00060     extern /* Subroutine */ int sgelqf_(integer *, integer *, real *, integer 
00061             *, real *, real *, integer *, integer *), slascl_(char *, integer 
00062             *, integer *, real *, real *, integer *, integer *, real *, 
00063             integer *, integer *), sgeqrf_(integer *, integer *, real 
00064             *, integer *, real *, real *, integer *, integer *), slacpy_(char 
00065             *, integer *, integer *, real *, integer *, real *, integer *), slaset_(char *, integer *, integer *, real *, real *, 
00066             real *, integer *), sbdsqr_(char *, integer *, integer *, 
00067             integer *, integer *, real *, real *, real *, integer *, real *, 
00068             integer *, real *, integer *, real *, integer *), sorgbr_(
00069             char *, integer *, integer *, integer *, real *, integer *, real *
00070 , real *, integer *, integer *);
00071     integer ldwork;
00072     extern /* Subroutine */ int sormbr_(char *, char *, char *, integer *, 
00073             integer *, integer *, real *, integer *, real *, real *, integer *
00074 , real *, integer *, integer *);
00075     integer minwrk, maxwrk;
00076     real smlnum;
00077     extern /* Subroutine */ int sormlq_(char *, char *, integer *, integer *, 
00078             integer *, real *, integer *, real *, real *, integer *, real *, 
00079             integer *, integer *);
00080     logical lquery;
00081     extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, 
00082             integer *, real *, integer *, real *, real *, integer *, real *, 
00083             integer *, integer *);
00084 
00085 
00086 /*  -- LAPACK driver routine (version 3.2) -- */
00087 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
00088 /*     November 2006 */
00089 
00090 /*     .. Scalar Arguments .. */
00091 /*     .. */
00092 /*     .. Array Arguments .. */
00093 /*     .. */
00094 
00095 /*  Purpose */
00096 /*  ======= */
00097 
00098 /*  SGELSS computes the minimum norm solution to a real linear least */
00099 /*  squares problem: */
00100 
00101 /*  Minimize 2-norm(| b - A*x |). */
00102 
00103 /*  using the singular value decomposition (SVD) of A. A is an M-by-N */
00104 /*  matrix which may be rank-deficient. */
00105 
00106 /*  Several right hand side vectors b and solution vectors x can be */
00107 /*  handled in a single call; they are stored as the columns of the */
00108 /*  M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix */
00109 /*  X. */
00110 
00111 /*  The effective rank of A is determined by treating as zero those */
00112 /*  singular values which are less than RCOND times the largest singular */
00113 /*  value. */
00114 
00115 /*  Arguments */
00116 /*  ========= */
00117 
00118 /*  M       (input) INTEGER */
00119 /*          The number of rows of the matrix A. M >= 0. */
00120 
00121 /*  N       (input) INTEGER */
00122 /*          The number of columns of the matrix A. N >= 0. */
00123 
00124 /*  NRHS    (input) INTEGER */
00125 /*          The number of right hand sides, i.e., the number of columns */
00126 /*          of the matrices B and X. NRHS >= 0. */
00127 
00128 /*  A       (input/output) REAL array, dimension (LDA,N) */
00129 /*          On entry, the M-by-N matrix A. */
00130 /*          On exit, the first min(m,n) rows of A are overwritten with */
00131 /*          its right singular vectors, stored rowwise. */
00132 
00133 /*  LDA     (input) INTEGER */
00134 /*          The leading dimension of the array A.  LDA >= max(1,M). */
00135 
00136 /*  B       (input/output) REAL array, dimension (LDB,NRHS) */
00137 /*          On entry, the M-by-NRHS right hand side matrix B. */
00138 /*          On exit, B is overwritten by the N-by-NRHS solution */
00139 /*          matrix X.  If m >= n and RANK = n, the residual */
00140 /*          sum-of-squares for the solution in the i-th column is given */
00141 /*          by the sum of squares of elements n+1:m in that column. */
00142 
00143 /*  LDB     (input) INTEGER */
00144 /*          The leading dimension of the array B. LDB >= max(1,max(M,N)). */
00145 
00146 /*  S       (output) REAL array, dimension (min(M,N)) */
00147 /*          The singular values of A in decreasing order. */
00148 /*          The condition number of A in the 2-norm = S(1)/S(min(m,n)). */
00149 
00150 /*  RCOND   (input) REAL */
00151 /*          RCOND is used to determine the effective rank of A. */
00152 /*          Singular values S(i) <= RCOND*S(1) are treated as zero. */
00153 /*          If RCOND < 0, machine precision is used instead. */
00154 
00155 /*  RANK    (output) INTEGER */
00156 /*          The effective rank of A, i.e., the number of singular values */
00157 /*          which are greater than RCOND*S(1). */
00158 
00159 /*  WORK    (workspace/output) REAL array, dimension (MAX(1,LWORK)) */
00160 /*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
00161 
00162 /*  LWORK   (input) INTEGER */
00163 /*          The dimension of the array WORK. LWORK >= 1, and also: */
00164 /*          LWORK >= 3*min(M,N) + max( 2*min(M,N), max(M,N), NRHS ) */
00165 /*          For good performance, LWORK should generally be larger. */
00166 
00167 /*          If LWORK = -1, then a workspace query is assumed; the routine */
00168 /*          only calculates the optimal size of the WORK array, returns */
00169 /*          this value as the first entry of the WORK array, and no error */
00170 /*          message related to LWORK is issued by XERBLA. */
00171 
00172 /*  INFO    (output) INTEGER */
00173 /*          = 0:  successful exit */
00174 /*          < 0:  if INFO = -i, the i-th argument had an illegal value. */
00175 /*          > 0:  the algorithm for computing the SVD failed to converge; */
00176 /*                if INFO = i, i off-diagonal elements of an intermediate */
00177 /*                bidiagonal form did not converge to zero. */
00178 
00179 /*  ===================================================================== */
00180 
00181 /*     .. Parameters .. */
00182 /*     .. */
00183 /*     .. Local Scalars .. */
00184 /*     .. */
00185 /*     .. Local Arrays .. */
00186 /*     .. */
00187 /*     .. External Subroutines .. */
00188 /*     .. */
00189 /*     .. External Functions .. */
00190 /*     .. */
00191 /*     .. Intrinsic Functions .. */
00192 /*     .. */
00193 /*     .. Executable Statements .. */
00194 
00195 /*     Test the input arguments */
00196 
00197     /* Parameter adjustments */
00198     a_dim1 = *lda;
00199     a_offset = 1 + a_dim1;
00200     a -= a_offset;
00201     b_dim1 = *ldb;
00202     b_offset = 1 + b_dim1;
00203     b -= b_offset;
00204     --s;
00205     --work;
00206 
00207     /* Function Body */
00208     *info = 0;
00209     minmn = min(*m,*n);
00210     maxmn = max(*m,*n);
00211     lquery = *lwork == -1;
00212     if (*m < 0) {
00213         *info = -1;
00214     } else if (*n < 0) {
00215         *info = -2;
00216     } else if (*nrhs < 0) {
00217         *info = -3;
00218     } else if (*lda < max(1,*m)) {
00219         *info = -5;
00220     } else if (*ldb < max(1,maxmn)) {
00221         *info = -7;
00222     }
00223 
00224 /*     Compute workspace */
00225 /*      (Note: Comments in the code beginning "Workspace:" describe the */
00226 /*       minimal amount of workspace needed at that point in the code, */
00227 /*       as well as the preferred amount for good performance. */
00228 /*       NB refers to the optimal block size for the immediately */
00229 /*       following subroutine, as returned by ILAENV.) */
00230 
00231     if (*info == 0) {
00232         minwrk = 1;
00233         maxwrk = 1;
00234         if (minmn > 0) {
00235             mm = *m;
00236             mnthr = ilaenv_(&c__6, "SGELSS", " ", m, n, nrhs, &c_n1);
00237             if (*m >= *n && *m >= mnthr) {
00238 
00239 /*              Path 1a - overdetermined, with many more rows than */
00240 /*                        columns */
00241 
00242                 mm = *n;
00243 /* Computing MAX */
00244                 i__1 = maxwrk, i__2 = *n + *n * ilaenv_(&c__1, "SGEQRF", 
00245                         " ", m, n, &c_n1, &c_n1);
00246                 maxwrk = max(i__1,i__2);
00247 /* Computing MAX */
00248                 i__1 = maxwrk, i__2 = *n + *nrhs * ilaenv_(&c__1, "SORMQR", 
00249                         "LT", m, nrhs, n, &c_n1);
00250                 maxwrk = max(i__1,i__2);
00251             }
00252             if (*m >= *n) {
00253 
00254 /*              Path 1 - overdetermined or exactly determined */
00255 
00256 /*              Compute workspace needed for SBDSQR */
00257 
00258 /* Computing MAX */
00259                 i__1 = 1, i__2 = *n * 5;
00260                 bdspac = max(i__1,i__2);
00261 /* Computing MAX */
00262                 i__1 = maxwrk, i__2 = *n * 3 + (mm + *n) * ilaenv_(&c__1, 
00263                         "SGEBRD", " ", &mm, n, &c_n1, &c_n1);
00264                 maxwrk = max(i__1,i__2);
00265 /* Computing MAX */
00266                 i__1 = maxwrk, i__2 = *n * 3 + *nrhs * ilaenv_(&c__1, "SORMBR"
00267 , "QLT", &mm, nrhs, n, &c_n1);
00268                 maxwrk = max(i__1,i__2);
00269 /* Computing MAX */
00270                 i__1 = maxwrk, i__2 = *n * 3 + (*n - 1) * ilaenv_(&c__1, 
00271                         "SORGBR", "P", n, n, n, &c_n1);
00272                 maxwrk = max(i__1,i__2);
00273                 maxwrk = max(maxwrk,bdspac);
00274 /* Computing MAX */
00275                 i__1 = maxwrk, i__2 = *n * *nrhs;
00276                 maxwrk = max(i__1,i__2);
00277 /* Computing MAX */
00278                 i__1 = *n * 3 + mm, i__2 = *n * 3 + *nrhs, i__1 = max(i__1,
00279                         i__2);
00280                 minwrk = max(i__1,bdspac);
00281                 maxwrk = max(minwrk,maxwrk);
00282             }
00283             if (*n > *m) {
00284 
00285 /*              Compute workspace needed for SBDSQR */
00286 
00287 /* Computing MAX */
00288                 i__1 = 1, i__2 = *m * 5;
00289                 bdspac = max(i__1,i__2);
00290 /* Computing MAX */
00291                 i__1 = *m * 3 + *nrhs, i__2 = *m * 3 + *n, i__1 = max(i__1,
00292                         i__2);
00293                 minwrk = max(i__1,bdspac);
00294                 if (*n >= mnthr) {
00295 
00296 /*                 Path 2a - underdetermined, with many more columns */
00297 /*                 than rows */
00298 
00299                     maxwrk = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, &
00300                             c_n1, &c_n1);
00301 /* Computing MAX */
00302                     i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m << 1) * 
00303                             ilaenv_(&c__1, "SGEBRD", " ", m, m, &c_n1, &c_n1);
00304                     maxwrk = max(i__1,i__2);
00305 /* Computing MAX */
00306                     i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + *nrhs * 
00307                             ilaenv_(&c__1, "SORMBR", "QLT", m, nrhs, m, &c_n1);
00308                     maxwrk = max(i__1,i__2);
00309 /* Computing MAX */
00310                     i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m - 1) * 
00311                             ilaenv_(&c__1, "SORGBR", "P", m, m, m, &c_n1);
00312                     maxwrk = max(i__1,i__2);
00313 /* Computing MAX */
00314                     i__1 = maxwrk, i__2 = *m * *m + *m + bdspac;
00315                     maxwrk = max(i__1,i__2);
00316                     if (*nrhs > 1) {
00317 /* Computing MAX */
00318                         i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs;
00319                         maxwrk = max(i__1,i__2);
00320                     } else {
00321 /* Computing MAX */
00322                         i__1 = maxwrk, i__2 = *m * *m + (*m << 1);
00323                         maxwrk = max(i__1,i__2);
00324                     }
00325 /* Computing MAX */
00326                     i__1 = maxwrk, i__2 = *m + *nrhs * ilaenv_(&c__1, "SORMLQ"
00327 , "LT", n, nrhs, m, &c_n1);
00328                     maxwrk = max(i__1,i__2);
00329                 } else {
00330 
00331 /*                 Path 2 - underdetermined */
00332 
00333                     maxwrk = *m * 3 + (*n + *m) * ilaenv_(&c__1, "SGEBRD", 
00334                             " ", m, n, &c_n1, &c_n1);
00335 /* Computing MAX */
00336                     i__1 = maxwrk, i__2 = *m * 3 + *nrhs * ilaenv_(&c__1, 
00337                             "SORMBR", "QLT", m, nrhs, m, &c_n1);
00338                     maxwrk = max(i__1,i__2);
00339 /* Computing MAX */
00340                     i__1 = maxwrk, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORG"
00341                             "BR", "P", m, n, m, &c_n1);
00342                     maxwrk = max(i__1,i__2);
00343                     maxwrk = max(maxwrk,bdspac);
00344 /* Computing MAX */
00345                     i__1 = maxwrk, i__2 = *n * *nrhs;
00346                     maxwrk = max(i__1,i__2);
00347                 }
00348             }
00349             maxwrk = max(minwrk,maxwrk);
00350         }
00351         work[1] = (real) maxwrk;
00352 
00353         if (*lwork < minwrk && ! lquery) {
00354             *info = -12;
00355         }
00356     }
00357 
00358     if (*info != 0) {
00359         i__1 = -(*info);
00360         xerbla_("SGELSS", &i__1);
00361         return 0;
00362     } else if (lquery) {
00363         return 0;
00364     }
00365 
00366 /*     Quick return if possible */
00367 
00368     if (*m == 0 || *n == 0) {
00369         *rank = 0;
00370         return 0;
00371     }
00372 
00373 /*     Get machine parameters */
00374 
00375     eps = slamch_("P");
00376     sfmin = slamch_("S");
00377     smlnum = sfmin / eps;
00378     bignum = 1.f / smlnum;
00379     slabad_(&smlnum, &bignum);
00380 
00381 /*     Scale A if max element outside range [SMLNUM,BIGNUM] */
00382 
00383     anrm = slange_("M", m, n, &a[a_offset], lda, &work[1]);
00384     iascl = 0;
00385     if (anrm > 0.f && anrm < smlnum) {
00386 
00387 /*        Scale matrix norm up to SMLNUM */
00388 
00389         slascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, 
00390                 info);
00391         iascl = 1;
00392     } else if (anrm > bignum) {
00393 
00394 /*        Scale matrix norm down to BIGNUM */
00395 
00396         slascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, 
00397                 info);
00398         iascl = 2;
00399     } else if (anrm == 0.f) {
00400 
00401 /*        Matrix all zero. Return zero solution. */
00402 
00403         i__1 = max(*m,*n);
00404         slaset_("F", &i__1, nrhs, &c_b74, &c_b74, &b[b_offset], ldb);
00405         slaset_("F", &minmn, &c__1, &c_b74, &c_b74, &s[1], &c__1);
00406         *rank = 0;
00407         goto L70;
00408     }
00409 
00410 /*     Scale B if max element outside range [SMLNUM,BIGNUM] */
00411 
00412     bnrm = slange_("M", m, nrhs, &b[b_offset], ldb, &work[1]);
00413     ibscl = 0;
00414     if (bnrm > 0.f && bnrm < smlnum) {
00415 
00416 /*        Scale matrix norm up to SMLNUM */
00417 
00418         slascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb, 
00419                  info);
00420         ibscl = 1;
00421     } else if (bnrm > bignum) {
00422 
00423 /*        Scale matrix norm down to BIGNUM */
00424 
00425         slascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb, 
00426                  info);
00427         ibscl = 2;
00428     }
00429 
00430 /*     Overdetermined case */
00431 
00432     if (*m >= *n) {
00433 
00434 /*        Path 1 - overdetermined or exactly determined */
00435 
00436         mm = *m;
00437         if (*m >= mnthr) {
00438 
00439 /*           Path 1a - overdetermined, with many more rows than columns */
00440 
00441             mm = *n;
00442             itau = 1;
00443             iwork = itau + *n;
00444 
00445 /*           Compute A=Q*R */
00446 /*           (Workspace: need 2*N, prefer N+N*NB) */
00447 
00448             i__1 = *lwork - iwork + 1;
00449             sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], &i__1, 
00450                      info);
00451 
00452 /*           Multiply B by transpose(Q) */
00453 /*           (Workspace: need N+NRHS, prefer N+NRHS*NB) */
00454 
00455             i__1 = *lwork - iwork + 1;
00456             sormqr_("L", "T", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[
00457                     b_offset], ldb, &work[iwork], &i__1, info);
00458 
00459 /*           Zero out below R */
00460 
00461             if (*n > 1) {
00462                 i__1 = *n - 1;
00463                 i__2 = *n - 1;
00464                 slaset_("L", &i__1, &i__2, &c_b74, &c_b74, &a[a_dim1 + 2], 
00465                         lda);
00466             }
00467         }
00468 
00469         ie = 1;
00470         itauq = ie + *n;
00471         itaup = itauq + *n;
00472         iwork = itaup + *n;
00473 
00474 /*        Bidiagonalize R in A */
00475 /*        (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB) */
00476 
00477         i__1 = *lwork - iwork + 1;
00478         sgebrd_(&mm, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
00479                 work[itaup], &work[iwork], &i__1, info);
00480 
00481 /*        Multiply B by transpose of left bidiagonalizing vectors of R */
00482 /*        (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB) */
00483 
00484         i__1 = *lwork - iwork + 1;
00485         sormbr_("Q", "L", "T", &mm, nrhs, n, &a[a_offset], lda, &work[itauq], 
00486                 &b[b_offset], ldb, &work[iwork], &i__1, info);
00487 
00488 /*        Generate right bidiagonalizing vectors of R in A */
00489 /*        (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */
00490 
00491         i__1 = *lwork - iwork + 1;
00492         sorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], &work[iwork], &
00493                 i__1, info);
00494         iwork = ie + *n;
00495 
00496 /*        Perform bidiagonal QR iteration */
00497 /*          multiply B by transpose of left singular vectors */
00498 /*          compute right singular vectors in A */
00499 /*        (Workspace: need BDSPAC) */
00500 
00501         sbdsqr_("U", n, n, &c__0, nrhs, &s[1], &work[ie], &a[a_offset], lda, 
00502                 vdum, &c__1, &b[b_offset], ldb, &work[iwork], info)
00503                 ;
00504         if (*info != 0) {
00505             goto L70;
00506         }
00507 
00508 /*        Multiply B by reciprocals of singular values */
00509 
00510 /* Computing MAX */
00511         r__1 = *rcond * s[1];
00512         thr = dmax(r__1,sfmin);
00513         if (*rcond < 0.f) {
00514 /* Computing MAX */
00515             r__1 = eps * s[1];
00516             thr = dmax(r__1,sfmin);
00517         }
00518         *rank = 0;
00519         i__1 = *n;
00520         for (i__ = 1; i__ <= i__1; ++i__) {
00521             if (s[i__] > thr) {
00522                 srscl_(nrhs, &s[i__], &b[i__ + b_dim1], ldb);
00523                 ++(*rank);
00524             } else {
00525                 slaset_("F", &c__1, nrhs, &c_b74, &c_b74, &b[i__ + b_dim1], 
00526                         ldb);
00527             }
00528 /* L10: */
00529         }
00530 
00531 /*        Multiply B by right singular vectors */
00532 /*        (Workspace: need N, prefer N*NRHS) */
00533 
00534         if (*lwork >= *ldb * *nrhs && *nrhs > 1) {
00535             sgemm_("T", "N", n, nrhs, n, &c_b108, &a[a_offset], lda, &b[
00536                     b_offset], ldb, &c_b74, &work[1], ldb);
00537             slacpy_("G", n, nrhs, &work[1], ldb, &b[b_offset], ldb)
00538                     ;
00539         } else if (*nrhs > 1) {
00540             chunk = *lwork / *n;
00541             i__1 = *nrhs;
00542             i__2 = chunk;
00543             for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
00544 /* Computing MIN */
00545                 i__3 = *nrhs - i__ + 1;
00546                 bl = min(i__3,chunk);
00547                 sgemm_("T", "N", n, &bl, n, &c_b108, &a[a_offset], lda, &b[
00548                         i__ * b_dim1 + 1], ldb, &c_b74, &work[1], n);
00549                 slacpy_("G", n, &bl, &work[1], n, &b[i__ * b_dim1 + 1], ldb);
00550 /* L20: */
00551             }
00552         } else {
00553             sgemv_("T", n, n, &c_b108, &a[a_offset], lda, &b[b_offset], &c__1, 
00554                      &c_b74, &work[1], &c__1);
00555             scopy_(n, &work[1], &c__1, &b[b_offset], &c__1);
00556         }
00557 
00558     } else /* if(complicated condition) */ {
00559 /* Computing MAX */
00560         i__2 = *m, i__1 = (*m << 1) - 4, i__2 = max(i__2,i__1), i__2 = max(
00561                 i__2,*nrhs), i__1 = *n - *m * 3;
00562         if (*n >= mnthr && *lwork >= (*m << 2) + *m * *m + max(i__2,i__1)) {
00563 
00564 /*        Path 2a - underdetermined, with many more columns than rows */
00565 /*        and sufficient workspace for an efficient algorithm */
00566 
00567             ldwork = *m;
00568 /* Computing MAX */
00569 /* Computing MAX */
00570             i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3,i__4), i__3 = 
00571                     max(i__3,*nrhs), i__4 = *n - *m * 3;
00572             i__2 = (*m << 2) + *m * *lda + max(i__3,i__4), i__1 = *m * *lda + 
00573                     *m + *m * *nrhs;
00574             if (*lwork >= max(i__2,i__1)) {
00575                 ldwork = *lda;
00576             }
00577             itau = 1;
00578             iwork = *m + 1;
00579 
00580 /*        Compute A=L*Q */
00581 /*        (Workspace: need 2*M, prefer M+M*NB) */
00582 
00583             i__2 = *lwork - iwork + 1;
00584             sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], &i__2, 
00585                      info);
00586             il = iwork;
00587 
00588 /*        Copy L to WORK(IL), zeroing out above it */
00589 
00590             slacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwork);
00591             i__2 = *m - 1;
00592             i__1 = *m - 1;
00593             slaset_("U", &i__2, &i__1, &c_b74, &c_b74, &work[il + ldwork], &
00594                     ldwork);
00595             ie = il + ldwork * *m;
00596             itauq = ie + *m;
00597             itaup = itauq + *m;
00598             iwork = itaup + *m;
00599 
00600 /*        Bidiagonalize L in WORK(IL) */
00601 /*        (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB) */
00602 
00603             i__2 = *lwork - iwork + 1;
00604             sgebrd_(m, m, &work[il], &ldwork, &s[1], &work[ie], &work[itauq], 
00605                     &work[itaup], &work[iwork], &i__2, info);
00606 
00607 /*        Multiply B by transpose of left bidiagonalizing vectors of L */
00608 /*        (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB) */
00609 
00610             i__2 = *lwork - iwork + 1;
00611             sormbr_("Q", "L", "T", m, nrhs, m, &work[il], &ldwork, &work[
00612                     itauq], &b[b_offset], ldb, &work[iwork], &i__2, info);
00613 
00614 /*        Generate right bidiagonalizing vectors of R in WORK(IL) */
00615 /*        (Workspace: need M*M+5*M-1, prefer M*M+4*M+(M-1)*NB) */
00616 
00617             i__2 = *lwork - iwork + 1;
00618             sorgbr_("P", m, m, m, &work[il], &ldwork, &work[itaup], &work[
00619                     iwork], &i__2, info);
00620             iwork = ie + *m;
00621 
00622 /*        Perform bidiagonal QR iteration, */
00623 /*           computing right singular vectors of L in WORK(IL) and */
00624 /*           multiplying B by transpose of left singular vectors */
00625 /*        (Workspace: need M*M+M+BDSPAC) */
00626 
00627             sbdsqr_("U", m, m, &c__0, nrhs, &s[1], &work[ie], &work[il], &
00628                     ldwork, &a[a_offset], lda, &b[b_offset], ldb, &work[iwork]
00629 , info);
00630             if (*info != 0) {
00631                 goto L70;
00632             }
00633 
00634 /*        Multiply B by reciprocals of singular values */
00635 
00636 /* Computing MAX */
00637             r__1 = *rcond * s[1];
00638             thr = dmax(r__1,sfmin);
00639             if (*rcond < 0.f) {
00640 /* Computing MAX */
00641                 r__1 = eps * s[1];
00642                 thr = dmax(r__1,sfmin);
00643             }
00644             *rank = 0;
00645             i__2 = *m;
00646             for (i__ = 1; i__ <= i__2; ++i__) {
00647                 if (s[i__] > thr) {
00648                     srscl_(nrhs, &s[i__], &b[i__ + b_dim1], ldb);
00649                     ++(*rank);
00650                 } else {
00651                     slaset_("F", &c__1, nrhs, &c_b74, &c_b74, &b[i__ + b_dim1]
00652 , ldb);
00653                 }
00654 /* L30: */
00655             }
00656             iwork = ie;
00657 
00658 /*        Multiply B by right singular vectors of L in WORK(IL) */
00659 /*        (Workspace: need M*M+2*M, prefer M*M+M+M*NRHS) */
00660 
00661             if (*lwork >= *ldb * *nrhs + iwork - 1 && *nrhs > 1) {
00662                 sgemm_("T", "N", m, nrhs, m, &c_b108, &work[il], &ldwork, &b[
00663                         b_offset], ldb, &c_b74, &work[iwork], ldb);
00664                 slacpy_("G", m, nrhs, &work[iwork], ldb, &b[b_offset], ldb);
00665             } else if (*nrhs > 1) {
00666                 chunk = (*lwork - iwork + 1) / *m;
00667                 i__2 = *nrhs;
00668                 i__1 = chunk;
00669                 for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += 
00670                         i__1) {
00671 /* Computing MIN */
00672                     i__3 = *nrhs - i__ + 1;
00673                     bl = min(i__3,chunk);
00674                     sgemm_("T", "N", m, &bl, m, &c_b108, &work[il], &ldwork, &
00675                             b[i__ * b_dim1 + 1], ldb, &c_b74, &work[iwork], m);
00676                     slacpy_("G", m, &bl, &work[iwork], m, &b[i__ * b_dim1 + 1]
00677 , ldb);
00678 /* L40: */
00679                 }
00680             } else {
00681                 sgemv_("T", m, m, &c_b108, &work[il], &ldwork, &b[b_dim1 + 1], 
00682                          &c__1, &c_b74, &work[iwork], &c__1);
00683                 scopy_(m, &work[iwork], &c__1, &b[b_dim1 + 1], &c__1);
00684             }
00685 
00686 /*        Zero out below first M rows of B */
00687 
00688             i__1 = *n - *m;
00689             slaset_("F", &i__1, nrhs, &c_b74, &c_b74, &b[*m + 1 + b_dim1], 
00690                     ldb);
00691             iwork = itau + *m;
00692 
00693 /*        Multiply transpose(Q) by B */
00694 /*        (Workspace: need M+NRHS, prefer M+NRHS*NB) */
00695 
00696             i__1 = *lwork - iwork + 1;
00697             sormlq_("L", "T", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[
00698                     b_offset], ldb, &work[iwork], &i__1, info);
00699 
00700         } else {
00701 
00702 /*        Path 2 - remaining underdetermined cases */
00703 
00704             ie = 1;
00705             itauq = ie + *m;
00706             itaup = itauq + *m;
00707             iwork = itaup + *m;
00708 
00709 /*        Bidiagonalize A */
00710 /*        (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) */
00711 
00712             i__1 = *lwork - iwork + 1;
00713             sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
00714                     work[itaup], &work[iwork], &i__1, info);
00715 
00716 /*        Multiply B by transpose of left bidiagonalizing vectors */
00717 /*        (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB) */
00718 
00719             i__1 = *lwork - iwork + 1;
00720             sormbr_("Q", "L", "T", m, nrhs, n, &a[a_offset], lda, &work[itauq]
00721 , &b[b_offset], ldb, &work[iwork], &i__1, info);
00722 
00723 /*        Generate right bidiagonalizing vectors in A */
00724 /*        (Workspace: need 4*M, prefer 3*M+M*NB) */
00725 
00726             i__1 = *lwork - iwork + 1;
00727             sorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &work[
00728                     iwork], &i__1, info);
00729             iwork = ie + *m;
00730 
00731 /*        Perform bidiagonal QR iteration, */
00732 /*           computing right singular vectors of A in A and */
00733 /*           multiplying B by transpose of left singular vectors */
00734 /*        (Workspace: need BDSPAC) */
00735 
00736             sbdsqr_("L", m, n, &c__0, nrhs, &s[1], &work[ie], &a[a_offset], 
00737                     lda, vdum, &c__1, &b[b_offset], ldb, &work[iwork], info);
00738             if (*info != 0) {
00739                 goto L70;
00740             }
00741 
00742 /*        Multiply B by reciprocals of singular values */
00743 
00744 /* Computing MAX */
00745             r__1 = *rcond * s[1];
00746             thr = dmax(r__1,sfmin);
00747             if (*rcond < 0.f) {
00748 /* Computing MAX */
00749                 r__1 = eps * s[1];
00750                 thr = dmax(r__1,sfmin);
00751             }
00752             *rank = 0;
00753             i__1 = *m;
00754             for (i__ = 1; i__ <= i__1; ++i__) {
00755                 if (s[i__] > thr) {
00756                     srscl_(nrhs, &s[i__], &b[i__ + b_dim1], ldb);
00757                     ++(*rank);
00758                 } else {
00759                     slaset_("F", &c__1, nrhs, &c_b74, &c_b74, &b[i__ + b_dim1]
00760 , ldb);
00761                 }
00762 /* L50: */
00763             }
00764 
00765 /*        Multiply B by right singular vectors of A */
00766 /*        (Workspace: need N, prefer N*NRHS) */
00767 
00768             if (*lwork >= *ldb * *nrhs && *nrhs > 1) {
00769                 sgemm_("T", "N", n, nrhs, m, &c_b108, &a[a_offset], lda, &b[
00770                         b_offset], ldb, &c_b74, &work[1], ldb);
00771                 slacpy_("F", n, nrhs, &work[1], ldb, &b[b_offset], ldb);
00772             } else if (*nrhs > 1) {
00773                 chunk = *lwork / *n;
00774                 i__1 = *nrhs;
00775                 i__2 = chunk;
00776                 for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += 
00777                         i__2) {
00778 /* Computing MIN */
00779                     i__3 = *nrhs - i__ + 1;
00780                     bl = min(i__3,chunk);
00781                     sgemm_("T", "N", n, &bl, m, &c_b108, &a[a_offset], lda, &
00782                             b[i__ * b_dim1 + 1], ldb, &c_b74, &work[1], n);
00783                     slacpy_("F", n, &bl, &work[1], n, &b[i__ * b_dim1 + 1], 
00784                             ldb);
00785 /* L60: */
00786                 }
00787             } else {
00788                 sgemv_("T", m, n, &c_b108, &a[a_offset], lda, &b[b_offset], &
00789                         c__1, &c_b74, &work[1], &c__1);
00790                 scopy_(n, &work[1], &c__1, &b[b_offset], &c__1);
00791             }
00792         }
00793     }
00794 
00795 /*     Undo scaling */
00796 
00797     if (iascl == 1) {
00798         slascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb, 
00799                  info);
00800         slascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &
00801                 minmn, info);
00802     } else if (iascl == 2) {
00803         slascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb, 
00804                  info);
00805         slascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &
00806                 minmn, info);
00807     }
00808     if (ibscl == 1) {
00809         slascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb, 
00810                  info);
00811     } else if (ibscl == 2) {
00812         slascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb, 
00813                  info);
00814     }
00815 
00816 L70:
00817     work[1] = (real) maxwrk;
00818     return 0;
00819 
00820 /*     End of SGELSS */
00821 
00822 } /* sgelss_ */


swiftnav
Author(s):
autogenerated on Sat Jun 8 2019 18:56:06