00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #include "f2c.h"
00014 #include "blaswrap.h"
00015
00016
00017
00018 static integer c__1 = 1;
00019 static real c_b8 = -1.f;
00020 static real c_b9 = 1.f;
00021
00022 int slasyf_(char *uplo, integer *n, integer *nb, integer *kb,
00023 real *a, integer *lda, integer *ipiv, real *w, integer *ldw, integer
00024 *info)
00025 {
00026
00027 integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3, i__4, i__5;
00028 real r__1, r__2, r__3;
00029
00030
00031 double sqrt(doublereal);
00032
00033
00034 integer j, k;
00035 real t, r1, d11, d21, d22;
00036 integer jb, jj, kk, jp, kp, kw, kkw, imax, jmax;
00037 real alpha;
00038 extern logical lsame_(char *, char *);
00039 extern int sscal_(integer *, real *, real *, integer *),
00040 sgemm_(char *, char *, integer *, integer *, integer *, real *,
00041 real *, integer *, real *, integer *, real *, real *, integer *), sgemv_(char *, integer *, integer *, real *,
00042 real *, integer *, real *, integer *, real *, real *, integer *);
00043 integer kstep;
00044 extern int scopy_(integer *, real *, integer *, real *,
00045 integer *), sswap_(integer *, real *, integer *, real *, integer *
00046 );
00047 real absakk;
00048 extern integer isamax_(integer *, real *, integer *);
00049 real colmax, rowmax;
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147
00148
00149
00150
00151
00152
00153
00154 a_dim1 = *lda;
00155 a_offset = 1 + a_dim1;
00156 a -= a_offset;
00157 --ipiv;
00158 w_dim1 = *ldw;
00159 w_offset = 1 + w_dim1;
00160 w -= w_offset;
00161
00162
00163 *info = 0;
00164
00165
00166
00167 alpha = (sqrt(17.f) + 1.f) / 8.f;
00168
00169 if (lsame_(uplo, "U")) {
00170
00171
00172
00173
00174
00175
00176
00177
00178
00179 k = *n;
00180 L10:
00181 kw = *nb + k - *n;
00182
00183
00184
00185 if (k <= *n - *nb + 1 && *nb < *n || k < 1) {
00186 goto L30;
00187 }
00188
00189
00190
00191 scopy_(&k, &a[k * a_dim1 + 1], &c__1, &w[kw * w_dim1 + 1], &c__1);
00192 if (k < *n) {
00193 i__1 = *n - k;
00194 sgemv_("No transpose", &k, &i__1, &c_b8, &a[(k + 1) * a_dim1 + 1],
00195 lda, &w[k + (kw + 1) * w_dim1], ldw, &c_b9, &w[kw *
00196 w_dim1 + 1], &c__1);
00197 }
00198
00199 kstep = 1;
00200
00201
00202
00203
00204 absakk = (r__1 = w[k + kw * w_dim1], dabs(r__1));
00205
00206
00207
00208
00209 if (k > 1) {
00210 i__1 = k - 1;
00211 imax = isamax_(&i__1, &w[kw * w_dim1 + 1], &c__1);
00212 colmax = (r__1 = w[imax + kw * w_dim1], dabs(r__1));
00213 } else {
00214 colmax = 0.f;
00215 }
00216
00217 if (dmax(absakk,colmax) == 0.f) {
00218
00219
00220
00221 if (*info == 0) {
00222 *info = k;
00223 }
00224 kp = k;
00225 } else {
00226 if (absakk >= alpha * colmax) {
00227
00228
00229
00230 kp = k;
00231 } else {
00232
00233
00234
00235 scopy_(&imax, &a[imax * a_dim1 + 1], &c__1, &w[(kw - 1) *
00236 w_dim1 + 1], &c__1);
00237 i__1 = k - imax;
00238 scopy_(&i__1, &a[imax + (imax + 1) * a_dim1], lda, &w[imax +
00239 1 + (kw - 1) * w_dim1], &c__1);
00240 if (k < *n) {
00241 i__1 = *n - k;
00242 sgemv_("No transpose", &k, &i__1, &c_b8, &a[(k + 1) *
00243 a_dim1 + 1], lda, &w[imax + (kw + 1) * w_dim1],
00244 ldw, &c_b9, &w[(kw - 1) * w_dim1 + 1], &c__1);
00245 }
00246
00247
00248
00249
00250 i__1 = k - imax;
00251 jmax = imax + isamax_(&i__1, &w[imax + 1 + (kw - 1) * w_dim1],
00252 &c__1);
00253 rowmax = (r__1 = w[jmax + (kw - 1) * w_dim1], dabs(r__1));
00254 if (imax > 1) {
00255 i__1 = imax - 1;
00256 jmax = isamax_(&i__1, &w[(kw - 1) * w_dim1 + 1], &c__1);
00257
00258 r__2 = rowmax, r__3 = (r__1 = w[jmax + (kw - 1) * w_dim1],
00259 dabs(r__1));
00260 rowmax = dmax(r__2,r__3);
00261 }
00262
00263 if (absakk >= alpha * colmax * (colmax / rowmax)) {
00264
00265
00266
00267 kp = k;
00268 } else if ((r__1 = w[imax + (kw - 1) * w_dim1], dabs(r__1)) >=
00269 alpha * rowmax) {
00270
00271
00272
00273
00274 kp = imax;
00275
00276
00277
00278 scopy_(&k, &w[(kw - 1) * w_dim1 + 1], &c__1, &w[kw *
00279 w_dim1 + 1], &c__1);
00280 } else {
00281
00282
00283
00284
00285 kp = imax;
00286 kstep = 2;
00287 }
00288 }
00289
00290 kk = k - kstep + 1;
00291 kkw = *nb + kk - *n;
00292
00293
00294
00295 if (kp != kk) {
00296
00297
00298
00299 a[kp + k * a_dim1] = a[kk + k * a_dim1];
00300 i__1 = k - 1 - kp;
00301 scopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + (kp +
00302 1) * a_dim1], lda);
00303 scopy_(&kp, &a[kk * a_dim1 + 1], &c__1, &a[kp * a_dim1 + 1], &
00304 c__1);
00305
00306
00307
00308 i__1 = *n - kk + 1;
00309 sswap_(&i__1, &a[kk + kk * a_dim1], lda, &a[kp + kk * a_dim1],
00310 lda);
00311 i__1 = *n - kk + 1;
00312 sswap_(&i__1, &w[kk + kkw * w_dim1], ldw, &w[kp + kkw *
00313 w_dim1], ldw);
00314 }
00315
00316 if (kstep == 1) {
00317
00318
00319
00320
00321
00322
00323
00324
00325
00326 scopy_(&k, &w[kw * w_dim1 + 1], &c__1, &a[k * a_dim1 + 1], &
00327 c__1);
00328 r1 = 1.f / a[k + k * a_dim1];
00329 i__1 = k - 1;
00330 sscal_(&i__1, &r1, &a[k * a_dim1 + 1], &c__1);
00331 } else {
00332
00333
00334
00335
00336
00337
00338
00339
00340
00341 if (k > 2) {
00342
00343
00344
00345 d21 = w[k - 1 + kw * w_dim1];
00346 d11 = w[k + kw * w_dim1] / d21;
00347 d22 = w[k - 1 + (kw - 1) * w_dim1] / d21;
00348 t = 1.f / (d11 * d22 - 1.f);
00349 d21 = t / d21;
00350 i__1 = k - 2;
00351 for (j = 1; j <= i__1; ++j) {
00352 a[j + (k - 1) * a_dim1] = d21 * (d11 * w[j + (kw - 1)
00353 * w_dim1] - w[j + kw * w_dim1]);
00354 a[j + k * a_dim1] = d21 * (d22 * w[j + kw * w_dim1] -
00355 w[j + (kw - 1) * w_dim1]);
00356
00357 }
00358 }
00359
00360
00361
00362 a[k - 1 + (k - 1) * a_dim1] = w[k - 1 + (kw - 1) * w_dim1];
00363 a[k - 1 + k * a_dim1] = w[k - 1 + kw * w_dim1];
00364 a[k + k * a_dim1] = w[k + kw * w_dim1];
00365 }
00366 }
00367
00368
00369
00370 if (kstep == 1) {
00371 ipiv[k] = kp;
00372 } else {
00373 ipiv[k] = -kp;
00374 ipiv[k - 1] = -kp;
00375 }
00376
00377
00378
00379 k -= kstep;
00380 goto L10;
00381
00382 L30:
00383
00384
00385
00386
00387
00388
00389
00390 i__1 = -(*nb);
00391 for (j = (k - 1) / *nb * *nb + 1; i__1 < 0 ? j >= 1 : j <= 1; j +=
00392 i__1) {
00393
00394 i__2 = *nb, i__3 = k - j + 1;
00395 jb = min(i__2,i__3);
00396
00397
00398
00399 i__2 = j + jb - 1;
00400 for (jj = j; jj <= i__2; ++jj) {
00401 i__3 = jj - j + 1;
00402 i__4 = *n - k;
00403 sgemv_("No transpose", &i__3, &i__4, &c_b8, &a[j + (k + 1) *
00404 a_dim1], lda, &w[jj + (kw + 1) * w_dim1], ldw, &c_b9,
00405 &a[j + jj * a_dim1], &c__1);
00406
00407 }
00408
00409
00410
00411 i__2 = j - 1;
00412 i__3 = *n - k;
00413 sgemm_("No transpose", "Transpose", &i__2, &jb, &i__3, &c_b8, &a[(
00414 k + 1) * a_dim1 + 1], lda, &w[j + (kw + 1) * w_dim1], ldw,
00415 &c_b9, &a[j * a_dim1 + 1], lda);
00416
00417 }
00418
00419
00420
00421
00422 j = k + 1;
00423 L60:
00424 jj = j;
00425 jp = ipiv[j];
00426 if (jp < 0) {
00427 jp = -jp;
00428 ++j;
00429 }
00430 ++j;
00431 if (jp != jj && j <= *n) {
00432 i__1 = *n - j + 1;
00433 sswap_(&i__1, &a[jp + j * a_dim1], lda, &a[jj + j * a_dim1], lda);
00434 }
00435 if (j <= *n) {
00436 goto L60;
00437 }
00438
00439
00440
00441 *kb = *n - k;
00442
00443 } else {
00444
00445
00446
00447
00448
00449
00450
00451 k = 1;
00452 L70:
00453
00454
00455
00456 if (k >= *nb && *nb < *n || k > *n) {
00457 goto L90;
00458 }
00459
00460
00461
00462 i__1 = *n - k + 1;
00463 scopy_(&i__1, &a[k + k * a_dim1], &c__1, &w[k + k * w_dim1], &c__1);
00464 i__1 = *n - k + 1;
00465 i__2 = k - 1;
00466 sgemv_("No transpose", &i__1, &i__2, &c_b8, &a[k + a_dim1], lda, &w[k
00467 + w_dim1], ldw, &c_b9, &w[k + k * w_dim1], &c__1);
00468
00469 kstep = 1;
00470
00471
00472
00473
00474 absakk = (r__1 = w[k + k * w_dim1], dabs(r__1));
00475
00476
00477
00478
00479 if (k < *n) {
00480 i__1 = *n - k;
00481 imax = k + isamax_(&i__1, &w[k + 1 + k * w_dim1], &c__1);
00482 colmax = (r__1 = w[imax + k * w_dim1], dabs(r__1));
00483 } else {
00484 colmax = 0.f;
00485 }
00486
00487 if (dmax(absakk,colmax) == 0.f) {
00488
00489
00490
00491 if (*info == 0) {
00492 *info = k;
00493 }
00494 kp = k;
00495 } else {
00496 if (absakk >= alpha * colmax) {
00497
00498
00499
00500 kp = k;
00501 } else {
00502
00503
00504
00505 i__1 = imax - k;
00506 scopy_(&i__1, &a[imax + k * a_dim1], lda, &w[k + (k + 1) *
00507 w_dim1], &c__1);
00508 i__1 = *n - imax + 1;
00509 scopy_(&i__1, &a[imax + imax * a_dim1], &c__1, &w[imax + (k +
00510 1) * w_dim1], &c__1);
00511 i__1 = *n - k + 1;
00512 i__2 = k - 1;
00513 sgemv_("No transpose", &i__1, &i__2, &c_b8, &a[k + a_dim1],
00514 lda, &w[imax + w_dim1], ldw, &c_b9, &w[k + (k + 1) *
00515 w_dim1], &c__1);
00516
00517
00518
00519
00520 i__1 = imax - k;
00521 jmax = k - 1 + isamax_(&i__1, &w[k + (k + 1) * w_dim1], &c__1)
00522 ;
00523 rowmax = (r__1 = w[jmax + (k + 1) * w_dim1], dabs(r__1));
00524 if (imax < *n) {
00525 i__1 = *n - imax;
00526 jmax = imax + isamax_(&i__1, &w[imax + 1 + (k + 1) *
00527 w_dim1], &c__1);
00528
00529 r__2 = rowmax, r__3 = (r__1 = w[jmax + (k + 1) * w_dim1],
00530 dabs(r__1));
00531 rowmax = dmax(r__2,r__3);
00532 }
00533
00534 if (absakk >= alpha * colmax * (colmax / rowmax)) {
00535
00536
00537
00538 kp = k;
00539 } else if ((r__1 = w[imax + (k + 1) * w_dim1], dabs(r__1)) >=
00540 alpha * rowmax) {
00541
00542
00543
00544
00545 kp = imax;
00546
00547
00548
00549 i__1 = *n - k + 1;
00550 scopy_(&i__1, &w[k + (k + 1) * w_dim1], &c__1, &w[k + k *
00551 w_dim1], &c__1);
00552 } else {
00553
00554
00555
00556
00557 kp = imax;
00558 kstep = 2;
00559 }
00560 }
00561
00562 kk = k + kstep - 1;
00563
00564
00565
00566 if (kp != kk) {
00567
00568
00569
00570 a[kp + k * a_dim1] = a[kk + k * a_dim1];
00571 i__1 = kp - k - 1;
00572 scopy_(&i__1, &a[k + 1 + kk * a_dim1], &c__1, &a[kp + (k + 1)
00573 * a_dim1], lda);
00574 i__1 = *n - kp + 1;
00575 scopy_(&i__1, &a[kp + kk * a_dim1], &c__1, &a[kp + kp *
00576 a_dim1], &c__1);
00577
00578
00579
00580 sswap_(&kk, &a[kk + a_dim1], lda, &a[kp + a_dim1], lda);
00581 sswap_(&kk, &w[kk + w_dim1], ldw, &w[kp + w_dim1], ldw);
00582 }
00583
00584 if (kstep == 1) {
00585
00586
00587
00588
00589
00590
00591
00592
00593
00594 i__1 = *n - k + 1;
00595 scopy_(&i__1, &w[k + k * w_dim1], &c__1, &a[k + k * a_dim1], &
00596 c__1);
00597 if (k < *n) {
00598 r1 = 1.f / a[k + k * a_dim1];
00599 i__1 = *n - k;
00600 sscal_(&i__1, &r1, &a[k + 1 + k * a_dim1], &c__1);
00601 }
00602 } else {
00603
00604
00605
00606
00607
00608
00609
00610
00611 if (k < *n - 1) {
00612
00613
00614
00615 d21 = w[k + 1 + k * w_dim1];
00616 d11 = w[k + 1 + (k + 1) * w_dim1] / d21;
00617 d22 = w[k + k * w_dim1] / d21;
00618 t = 1.f / (d11 * d22 - 1.f);
00619 d21 = t / d21;
00620 i__1 = *n;
00621 for (j = k + 2; j <= i__1; ++j) {
00622 a[j + k * a_dim1] = d21 * (d11 * w[j + k * w_dim1] -
00623 w[j + (k + 1) * w_dim1]);
00624 a[j + (k + 1) * a_dim1] = d21 * (d22 * w[j + (k + 1) *
00625 w_dim1] - w[j + k * w_dim1]);
00626
00627 }
00628 }
00629
00630
00631
00632 a[k + k * a_dim1] = w[k + k * w_dim1];
00633 a[k + 1 + k * a_dim1] = w[k + 1 + k * w_dim1];
00634 a[k + 1 + (k + 1) * a_dim1] = w[k + 1 + (k + 1) * w_dim1];
00635 }
00636 }
00637
00638
00639
00640 if (kstep == 1) {
00641 ipiv[k] = kp;
00642 } else {
00643 ipiv[k] = -kp;
00644 ipiv[k + 1] = -kp;
00645 }
00646
00647
00648
00649 k += kstep;
00650 goto L70;
00651
00652 L90:
00653
00654
00655
00656
00657
00658
00659
00660 i__1 = *n;
00661 i__2 = *nb;
00662 for (j = k; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
00663
00664 i__3 = *nb, i__4 = *n - j + 1;
00665 jb = min(i__3,i__4);
00666
00667
00668
00669 i__3 = j + jb - 1;
00670 for (jj = j; jj <= i__3; ++jj) {
00671 i__4 = j + jb - jj;
00672 i__5 = k - 1;
00673 sgemv_("No transpose", &i__4, &i__5, &c_b8, &a[jj + a_dim1],
00674 lda, &w[jj + w_dim1], ldw, &c_b9, &a[jj + jj * a_dim1]
00675 , &c__1);
00676
00677 }
00678
00679
00680
00681 if (j + jb <= *n) {
00682 i__3 = *n - j - jb + 1;
00683 i__4 = k - 1;
00684 sgemm_("No transpose", "Transpose", &i__3, &jb, &i__4, &c_b8,
00685 &a[j + jb + a_dim1], lda, &w[j + w_dim1], ldw, &c_b9,
00686 &a[j + jb + j * a_dim1], lda);
00687 }
00688
00689 }
00690
00691
00692
00693
00694 j = k - 1;
00695 L120:
00696 jj = j;
00697 jp = ipiv[j];
00698 if (jp < 0) {
00699 jp = -jp;
00700 --j;
00701 }
00702 --j;
00703 if (jp != jj && j >= 1) {
00704 sswap_(&j, &a[jp + a_dim1], lda, &a[jj + a_dim1], lda);
00705 }
00706 if (j >= 1) {
00707 goto L120;
00708 }
00709
00710
00711
00712 *kb = k - 1;
00713
00714 }
00715 return 0;
00716
00717
00718
00719 }