00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #include "f2c.h"
00014 #include "blaswrap.h"
00015
00016
00017
00018 static integer c__1 = 1;
00019 static integer c__0 = 0;
00020 static real c_b35 = 1.f;
00021
00022 int sgsvj1_(char *jobv, integer *m, integer *n, integer *n1,
00023 real *a, integer *lda, real *d__, real *sva, integer *mv, real *v,
00024 integer *ldv, real *eps, real *sfmin, real *tol, integer *nsweep,
00025 real *work, integer *lwork, integer *info)
00026 {
00027
00028 integer a_dim1, a_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4, i__5,
00029 i__6;
00030 real r__1, r__2;
00031
00032
00033 double sqrt(doublereal), r_sign(real *, real *);
00034
00035
00036 real bigtheta;
00037 integer pskipped, i__, p, q;
00038 real t, rootsfmin, cs, sn;
00039 integer jbc;
00040 real big;
00041 integer kbl, igl, ibr, jgl, mvl, nblc;
00042 real aapp, aapq, aaqq;
00043 integer nblr, ierr;
00044 extern doublereal sdot_(integer *, real *, integer *, real *, integer *);
00045 real aapp0, temp1;
00046 extern doublereal snrm2_(integer *, real *, integer *);
00047 real large, apoaq, aqoap;
00048 extern logical lsame_(char *, char *);
00049 real theta, small, fastr[5];
00050 logical applv, rsvec;
00051 extern int scopy_(integer *, real *, integer *, real *,
00052 integer *);
00053 logical rotok;
00054 extern int sswap_(integer *, real *, integer *, real *,
00055 integer *), saxpy_(integer *, real *, real *, integer *, real *,
00056 integer *), srotm_(integer *, real *, integer *, real *, integer *
00057 , real *), xerbla_(char *, integer *);
00058 integer ijblsk, swband;
00059 extern int slascl_(char *, integer *, integer *, real *,
00060 real *, integer *, integer *, real *, integer *, integer *);
00061 extern integer isamax_(integer *, real *, integer *);
00062 integer blskip;
00063 real mxaapq, thsign;
00064 extern int slassq_(integer *, real *, integer *, real *,
00065 real *);
00066 real mxsinj;
00067 integer emptsw, notrot, iswrot;
00068 real rootbig, rooteps;
00069 integer rowskip;
00070 real roottol;
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147
00148
00149
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171
00172
00173
00174
00175
00176
00177
00178
00179
00180
00181
00182
00183
00184
00185
00186
00187
00188
00189
00190
00191
00192
00193
00194
00195
00196
00197
00198
00199
00200
00201
00202
00203
00204
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220
00221
00222
00223
00224
00225
00226
00227
00228
00229
00230
00231
00232
00233
00234
00235
00236
00237
00238 --sva;
00239 --d__;
00240 a_dim1 = *lda;
00241 a_offset = 1 + a_dim1;
00242 a -= a_offset;
00243 v_dim1 = *ldv;
00244 v_offset = 1 + v_dim1;
00245 v -= v_offset;
00246 --work;
00247
00248
00249 applv = lsame_(jobv, "A");
00250 rsvec = lsame_(jobv, "V");
00251 if (! (rsvec || applv || lsame_(jobv, "N"))) {
00252 *info = -1;
00253 } else if (*m < 0) {
00254 *info = -2;
00255 } else if (*n < 0 || *n > *m) {
00256 *info = -3;
00257 } else if (*n1 < 0) {
00258 *info = -4;
00259 } else if (*lda < *m) {
00260 *info = -6;
00261 } else if (*mv < 0) {
00262 *info = -9;
00263 } else if (*ldv < *m) {
00264 *info = -11;
00265 } else if (*tol <= *eps) {
00266 *info = -14;
00267 } else if (*nsweep < 0) {
00268 *info = -15;
00269 } else if (*lwork < *m) {
00270 *info = -17;
00271 } else {
00272 *info = 0;
00273 }
00274
00275
00276 if (*info != 0) {
00277 i__1 = -(*info);
00278 xerbla_("SGSVJ1", &i__1);
00279 return 0;
00280 }
00281
00282 if (rsvec) {
00283 mvl = *n;
00284 } else if (applv) {
00285 mvl = *mv;
00286 }
00287 rsvec = rsvec || applv;
00288 rooteps = sqrt(*eps);
00289 rootsfmin = sqrt(*sfmin);
00290 small = *sfmin / *eps;
00291 big = 1.f / *sfmin;
00292 rootbig = 1.f / rootsfmin;
00293 large = big / sqrt((real) (*m * *n));
00294 bigtheta = 1.f / rooteps;
00295 roottol = sqrt(*tol);
00296
00297
00298
00299
00300
00301 emptsw = *n1 * (*n - *n1);
00302 notrot = 0;
00303 fastr[0] = 0.f;
00304
00305
00306
00307 kbl = min(8,*n);
00308 nblr = *n1 / kbl;
00309 if (nblr * kbl != *n1) {
00310 ++nblr;
00311 }
00312
00313 nblc = (*n - *n1) / kbl;
00314 if (nblc * kbl != *n - *n1) {
00315 ++nblc;
00316 }
00317
00318 i__1 = kbl;
00319 blskip = i__1 * i__1 + 1;
00320
00321 rowskip = min(5,kbl);
00322
00323 swband = 0;
00324
00325
00326
00327
00328
00329
00330
00331
00332
00333
00334
00335
00336
00337 i__1 = *nsweep;
00338 for (i__ = 1; i__ <= i__1; ++i__) {
00339
00340
00341 mxaapq = 0.f;
00342 mxsinj = 0.f;
00343 iswrot = 0;
00344
00345 notrot = 0;
00346 pskipped = 0;
00347
00348 i__2 = nblr;
00349 for (ibr = 1; ibr <= i__2; ++ibr) {
00350 igl = (ibr - 1) * kbl + 1;
00351
00352
00353
00354
00355 igl = (ibr - 1) * kbl + 1;
00356 i__3 = nblc;
00357 for (jbc = 1; jbc <= i__3; ++jbc) {
00358 jgl = *n1 + (jbc - 1) * kbl + 1;
00359
00360 ijblsk = 0;
00361
00362 i__5 = igl + kbl - 1;
00363 i__4 = min(i__5,*n1);
00364 for (p = igl; p <= i__4; ++p) {
00365 aapp = sva[p];
00366 if (aapp > 0.f) {
00367 pskipped = 0;
00368
00369 i__6 = jgl + kbl - 1;
00370 i__5 = min(i__6,*n);
00371 for (q = jgl; q <= i__5; ++q) {
00372
00373 aaqq = sva[q];
00374 if (aaqq > 0.f) {
00375 aapp0 = aapp;
00376
00377
00378
00379
00380
00381 if (aaqq >= 1.f) {
00382 if (aapp >= aaqq) {
00383 rotok = small * aapp <= aaqq;
00384 } else {
00385 rotok = small * aaqq <= aapp;
00386 }
00387 if (aapp < big / aaqq) {
00388 aapq = sdot_(m, &a[p * a_dim1 + 1], &
00389 c__1, &a[q * a_dim1 + 1], &
00390 c__1) * d__[p] * d__[q] /
00391 aaqq / aapp;
00392 } else {
00393 scopy_(m, &a[p * a_dim1 + 1], &c__1, &
00394 work[1], &c__1);
00395 slascl_("G", &c__0, &c__0, &aapp, &
00396 d__[p], m, &c__1, &work[1],
00397 lda, &ierr);
00398 aapq = sdot_(m, &work[1], &c__1, &a[q
00399 * a_dim1 + 1], &c__1) * d__[q]
00400 / aaqq;
00401 }
00402 } else {
00403 if (aapp >= aaqq) {
00404 rotok = aapp <= aaqq / small;
00405 } else {
00406 rotok = aaqq <= aapp / small;
00407 }
00408 if (aapp > small / aaqq) {
00409 aapq = sdot_(m, &a[p * a_dim1 + 1], &
00410 c__1, &a[q * a_dim1 + 1], &
00411 c__1) * d__[p] * d__[q] /
00412 aaqq / aapp;
00413 } else {
00414 scopy_(m, &a[q * a_dim1 + 1], &c__1, &
00415 work[1], &c__1);
00416 slascl_("G", &c__0, &c__0, &aaqq, &
00417 d__[q], m, &c__1, &work[1],
00418 lda, &ierr);
00419 aapq = sdot_(m, &work[1], &c__1, &a[p
00420 * a_dim1 + 1], &c__1) * d__[p]
00421 / aapp;
00422 }
00423 }
00424
00425 r__1 = mxaapq, r__2 = dabs(aapq);
00426 mxaapq = dmax(r__1,r__2);
00427
00428
00429 if (dabs(aapq) > *tol) {
00430 notrot = 0;
00431
00432 pskipped = 0;
00433 ++iswrot;
00434
00435 if (rotok) {
00436
00437 aqoap = aaqq / aapp;
00438 apoaq = aapp / aaqq;
00439 theta = (r__1 = aqoap - apoaq, dabs(
00440 r__1)) * -.5f / aapq;
00441 if (aaqq > aapp0) {
00442 theta = -theta;
00443 }
00444 if (dabs(theta) > bigtheta) {
00445 t = .5f / theta;
00446 fastr[2] = t * d__[p] / d__[q];
00447 fastr[3] = -t * d__[q] / d__[p];
00448 srotm_(m, &a[p * a_dim1 + 1], &
00449 c__1, &a[q * a_dim1 + 1],
00450 &c__1, fastr);
00451 if (rsvec) {
00452 srotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q *
00453 v_dim1 + 1], &c__1, fastr);
00454 }
00455
00456 r__1 = 0.f, r__2 = t * apoaq *
00457 aapq + 1.f;
00458 sva[q] = aaqq * sqrt((dmax(r__1,
00459 r__2)));
00460
00461 r__1 = 0.f, r__2 = 1.f - t *
00462 aqoap * aapq;
00463 aapp *= sqrt((dmax(r__1,r__2)));
00464
00465 r__1 = mxsinj, r__2 = dabs(t);
00466 mxsinj = dmax(r__1,r__2);
00467 } else {
00468
00469
00470
00471 thsign = -r_sign(&c_b35, &aapq);
00472 if (aaqq > aapp0) {
00473 thsign = -thsign;
00474 }
00475 t = 1.f / (theta + thsign * sqrt(
00476 theta * theta + 1.f));
00477 cs = sqrt(1.f / (t * t + 1.f));
00478 sn = t * cs;
00479
00480 r__1 = mxsinj, r__2 = dabs(sn);
00481 mxsinj = dmax(r__1,r__2);
00482
00483 r__1 = 0.f, r__2 = t * apoaq *
00484 aapq + 1.f;
00485 sva[q] = aaqq * sqrt((dmax(r__1,
00486 r__2)));
00487 aapp *= sqrt(1.f - t * aqoap *
00488 aapq);
00489 apoaq = d__[p] / d__[q];
00490 aqoap = d__[q] / d__[p];
00491 if (d__[p] >= 1.f) {
00492
00493 if (d__[q] >= 1.f) {
00494 fastr[2] = t * apoaq;
00495 fastr[3] = -t * aqoap;
00496 d__[p] *= cs;
00497 d__[q] *= cs;
00498 srotm_(m, &a[p * a_dim1 + 1], &c__1, &a[q *
00499 a_dim1 + 1], &c__1, fastr);
00500 if (rsvec) {
00501 srotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[
00502 q * v_dim1 + 1], &c__1, fastr);
00503 }
00504 } else {
00505 r__1 = -t * aqoap;
00506 saxpy_(m, &r__1, &a[q * a_dim1 + 1], &c__1, &a[
00507 p * a_dim1 + 1], &c__1);
00508 r__1 = cs * sn * apoaq;
00509 saxpy_(m, &r__1, &a[p * a_dim1 + 1], &c__1, &a[
00510 q * a_dim1 + 1], &c__1);
00511 if (rsvec) {
00512 r__1 = -t * aqoap;
00513 saxpy_(&mvl, &r__1, &v[q * v_dim1 + 1], &
00514 c__1, &v[p * v_dim1 + 1], &c__1);
00515 r__1 = cs * sn * apoaq;
00516 saxpy_(&mvl, &r__1, &v[p * v_dim1 + 1], &
00517 c__1, &v[q * v_dim1 + 1], &c__1);
00518 }
00519 d__[p] *= cs;
00520 d__[q] /= cs;
00521 }
00522 } else {
00523 if (d__[q] >= 1.f) {
00524 r__1 = t * apoaq;
00525 saxpy_(m, &r__1, &a[p * a_dim1 + 1], &c__1, &a[
00526 q * a_dim1 + 1], &c__1);
00527 r__1 = -cs * sn * aqoap;
00528 saxpy_(m, &r__1, &a[q * a_dim1 + 1], &c__1, &a[
00529 p * a_dim1 + 1], &c__1);
00530 if (rsvec) {
00531 r__1 = t * apoaq;
00532 saxpy_(&mvl, &r__1, &v[p * v_dim1 + 1], &
00533 c__1, &v[q * v_dim1 + 1], &c__1);
00534 r__1 = -cs * sn * aqoap;
00535 saxpy_(&mvl, &r__1, &v[q * v_dim1 + 1], &
00536 c__1, &v[p * v_dim1 + 1], &c__1);
00537 }
00538 d__[p] /= cs;
00539 d__[q] *= cs;
00540 } else {
00541 if (d__[p] >= d__[q]) {
00542 r__1 = -t * aqoap;
00543 saxpy_(m, &r__1, &a[q * a_dim1 + 1], &c__1,
00544 &a[p * a_dim1 + 1], &c__1);
00545 r__1 = cs * sn * apoaq;
00546 saxpy_(m, &r__1, &a[p * a_dim1 + 1], &c__1,
00547 &a[q * a_dim1 + 1], &c__1);
00548 d__[p] *= cs;
00549 d__[q] /= cs;
00550 if (rsvec) {
00551 r__1 = -t * aqoap;
00552 saxpy_(&mvl, &r__1, &v[q * v_dim1 + 1],
00553 &c__1, &v[p * v_dim1 + 1], &
00554 c__1);
00555 r__1 = cs * sn * apoaq;
00556 saxpy_(&mvl, &r__1, &v[p * v_dim1 + 1],
00557 &c__1, &v[q * v_dim1 + 1], &
00558 c__1);
00559 }
00560 } else {
00561 r__1 = t * apoaq;
00562 saxpy_(m, &r__1, &a[p * a_dim1 + 1], &c__1,
00563 &a[q * a_dim1 + 1], &c__1);
00564 r__1 = -cs * sn * aqoap;
00565 saxpy_(m, &r__1, &a[q * a_dim1 + 1], &c__1,
00566 &a[p * a_dim1 + 1], &c__1);
00567 d__[p] /= cs;
00568 d__[q] *= cs;
00569 if (rsvec) {
00570 r__1 = t * apoaq;
00571 saxpy_(&mvl, &r__1, &v[p * v_dim1 + 1],
00572 &c__1, &v[q * v_dim1 + 1], &
00573 c__1);
00574 r__1 = -cs * sn * aqoap;
00575 saxpy_(&mvl, &r__1, &v[q * v_dim1 + 1],
00576 &c__1, &v[p * v_dim1 + 1], &
00577 c__1);
00578 }
00579 }
00580 }
00581 }
00582 }
00583 } else {
00584 if (aapp > aaqq) {
00585 scopy_(m, &a[p * a_dim1 + 1], &
00586 c__1, &work[1], &c__1);
00587 slascl_("G", &c__0, &c__0, &aapp,
00588 &c_b35, m, &c__1, &work[1]
00589 , lda, &ierr);
00590 slascl_("G", &c__0, &c__0, &aaqq,
00591 &c_b35, m, &c__1, &a[q *
00592 a_dim1 + 1], lda, &ierr);
00593 temp1 = -aapq * d__[p] / d__[q];
00594 saxpy_(m, &temp1, &work[1], &c__1,
00595 &a[q * a_dim1 + 1], &
00596 c__1);
00597 slascl_("G", &c__0, &c__0, &c_b35,
00598 &aaqq, m, &c__1, &a[q *
00599 a_dim1 + 1], lda, &ierr);
00600
00601 r__1 = 0.f, r__2 = 1.f - aapq *
00602 aapq;
00603 sva[q] = aaqq * sqrt((dmax(r__1,
00604 r__2)));
00605 mxsinj = dmax(mxsinj,*sfmin);
00606 } else {
00607 scopy_(m, &a[q * a_dim1 + 1], &
00608 c__1, &work[1], &c__1);
00609 slascl_("G", &c__0, &c__0, &aaqq,
00610 &c_b35, m, &c__1, &work[1]
00611 , lda, &ierr);
00612 slascl_("G", &c__0, &c__0, &aapp,
00613 &c_b35, m, &c__1, &a[p *
00614 a_dim1 + 1], lda, &ierr);
00615 temp1 = -aapq * d__[q] / d__[p];
00616 saxpy_(m, &temp1, &work[1], &c__1,
00617 &a[p * a_dim1 + 1], &
00618 c__1);
00619 slascl_("G", &c__0, &c__0, &c_b35,
00620 &aapp, m, &c__1, &a[p *
00621 a_dim1 + 1], lda, &ierr);
00622
00623 r__1 = 0.f, r__2 = 1.f - aapq *
00624 aapq;
00625 sva[p] = aapp * sqrt((dmax(r__1,
00626 r__2)));
00627 mxsinj = dmax(mxsinj,*sfmin);
00628 }
00629 }
00630
00631
00632
00633
00634
00635 r__1 = sva[q] / aaqq;
00636 if (r__1 * r__1 <= rooteps) {
00637 if (aaqq < rootbig && aaqq >
00638 rootsfmin) {
00639 sva[q] = snrm2_(m, &a[q * a_dim1
00640 + 1], &c__1) * d__[q];
00641 } else {
00642 t = 0.f;
00643 aaqq = 0.f;
00644 slassq_(m, &a[q * a_dim1 + 1], &
00645 c__1, &t, &aaqq);
00646 sva[q] = t * sqrt(aaqq) * d__[q];
00647 }
00648 }
00649
00650 r__1 = aapp / aapp0;
00651 if (r__1 * r__1 <= rooteps) {
00652 if (aapp < rootbig && aapp >
00653 rootsfmin) {
00654 aapp = snrm2_(m, &a[p * a_dim1 +
00655 1], &c__1) * d__[p];
00656 } else {
00657 t = 0.f;
00658 aapp = 0.f;
00659 slassq_(m, &a[p * a_dim1 + 1], &
00660 c__1, &t, &aapp);
00661 aapp = t * sqrt(aapp) * d__[p];
00662 }
00663 sva[p] = aapp;
00664 }
00665
00666 } else {
00667 ++notrot;
00668
00669 ++pskipped;
00670 ++ijblsk;
00671 }
00672 } else {
00673 ++notrot;
00674 ++pskipped;
00675 ++ijblsk;
00676 }
00677
00678 if (i__ <= swband && ijblsk >= blskip) {
00679 sva[p] = aapp;
00680 notrot = 0;
00681 goto L2011;
00682 }
00683 if (i__ <= swband && pskipped > rowskip) {
00684 aapp = -aapp;
00685 notrot = 0;
00686 goto L2203;
00687 }
00688
00689
00690 }
00691
00692 L2203:
00693 sva[p] = aapp;
00694
00695 } else {
00696 if (aapp == 0.f) {
00697
00698 i__5 = jgl + kbl - 1;
00699 notrot = notrot + min(i__5,*n) - jgl + 1;
00700 }
00701 if (aapp < 0.f) {
00702 notrot = 0;
00703 }
00704
00705 }
00706
00707 }
00708
00709
00710 }
00711
00712 L2011:
00713
00714
00715 i__4 = igl + kbl - 1;
00716 i__3 = min(i__4,*n);
00717 for (p = igl; p <= i__3; ++p) {
00718 sva[p] = (r__1 = sva[p], dabs(r__1));
00719
00720 }
00721
00722
00723 }
00724
00725
00726
00727 if (sva[*n] < rootbig && sva[*n] > rootsfmin) {
00728 sva[*n] = snrm2_(m, &a[*n * a_dim1 + 1], &c__1) * d__[*n];
00729 } else {
00730 t = 0.f;
00731 aapp = 0.f;
00732 slassq_(m, &a[*n * a_dim1 + 1], &c__1, &t, &aapp);
00733 sva[*n] = t * sqrt(aapp) * d__[*n];
00734 }
00735
00736
00737
00738 if (i__ < swband && (mxaapq <= roottol || iswrot <= *n)) {
00739 swband = i__;
00740 }
00741 if (i__ > swband + 1 && mxaapq < (real) (*n) * *tol && (real) (*n) *
00742 mxaapq * mxsinj < *tol) {
00743 goto L1994;
00744 }
00745
00746 if (notrot >= emptsw) {
00747 goto L1994;
00748 }
00749
00750 }
00751
00752
00753
00754 *info = *nsweep - 1;
00755 goto L1995;
00756 L1994:
00757
00758
00759 *info = 0;
00760
00761 L1995:
00762
00763
00764
00765 i__1 = *n - 1;
00766 for (p = 1; p <= i__1; ++p) {
00767 i__2 = *n - p + 1;
00768 q = isamax_(&i__2, &sva[p], &c__1) + p - 1;
00769 if (p != q) {
00770 temp1 = sva[p];
00771 sva[p] = sva[q];
00772 sva[q] = temp1;
00773 temp1 = d__[p];
00774 d__[p] = d__[q];
00775 d__[q] = temp1;
00776 sswap_(m, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + 1], &c__1);
00777 if (rsvec) {
00778 sswap_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * v_dim1 + 1], &
00779 c__1);
00780 }
00781 }
00782
00783 }
00784
00785 return 0;
00786
00787
00788
00789 }