00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #include "f2c.h"
00014 #include "blaswrap.h"
00015
00016
00017
00018 static integer c__1 = 1;
00019 static integer c__65 = 65;
00020 static real c_b18 = -1.f;
00021 static real c_b31 = 1.f;
00022
00023 int sgbtrf_(integer *m, integer *n, integer *kl, integer *ku,
00024 real *ab, integer *ldab, integer *ipiv, integer *info)
00025 {
00026
00027 integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4, i__5, i__6;
00028 real r__1;
00029
00030
00031 integer i__, j, i2, i3, j2, j3, k2, jb, nb, ii, jj, jm, ip, jp, km, ju,
00032 kv, nw;
00033 extern int sger_(integer *, integer *, real *, real *,
00034 integer *, real *, integer *, real *, integer *);
00035 real temp;
00036 extern int sscal_(integer *, real *, real *, integer *),
00037 sgemm_(char *, char *, integer *, integer *, integer *, real *,
00038 real *, integer *, real *, integer *, real *, real *, integer *);
00039 real work13[4160] , work31[4160]
00040 ;
00041 extern int scopy_(integer *, real *, integer *, real *,
00042 integer *), sswap_(integer *, real *, integer *, real *, integer *
00043 ), strsm_(char *, char *, char *, char *, integer *, integer *,
00044 real *, real *, integer *, real *, integer *), sgbtf2_(integer *, integer *, integer *, integer
00045 *, real *, integer *, integer *, integer *), xerbla_(char *,
00046 integer *);
00047 extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
00048 integer *, integer *), isamax_(integer *, real *,
00049 integer *);
00050 extern int slaswp_(integer *, real *, integer *, integer
00051 *, integer *, integer *, integer *);
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147
00148
00149
00150
00151
00152
00153 ab_dim1 = *ldab;
00154 ab_offset = 1 + ab_dim1;
00155 ab -= ab_offset;
00156 --ipiv;
00157
00158
00159 kv = *ku + *kl;
00160
00161
00162
00163 *info = 0;
00164 if (*m < 0) {
00165 *info = -1;
00166 } else if (*n < 0) {
00167 *info = -2;
00168 } else if (*kl < 0) {
00169 *info = -3;
00170 } else if (*ku < 0) {
00171 *info = -4;
00172 } else if (*ldab < *kl + kv + 1) {
00173 *info = -6;
00174 }
00175 if (*info != 0) {
00176 i__1 = -(*info);
00177 xerbla_("SGBTRF", &i__1);
00178 return 0;
00179 }
00180
00181
00182
00183 if (*m == 0 || *n == 0) {
00184 return 0;
00185 }
00186
00187
00188
00189 nb = ilaenv_(&c__1, "SGBTRF", " ", m, n, kl, ku);
00190
00191
00192
00193
00194 nb = min(nb,64);
00195
00196 if (nb <= 1 || nb > *kl) {
00197
00198
00199
00200 sgbtf2_(m, n, kl, ku, &ab[ab_offset], ldab, &ipiv[1], info);
00201 } else {
00202
00203
00204
00205
00206
00207 i__1 = nb;
00208 for (j = 1; j <= i__1; ++j) {
00209 i__2 = j - 1;
00210 for (i__ = 1; i__ <= i__2; ++i__) {
00211 work13[i__ + j * 65 - 66] = 0.f;
00212
00213 }
00214
00215 }
00216
00217
00218
00219 i__1 = nb;
00220 for (j = 1; j <= i__1; ++j) {
00221 i__2 = nb;
00222 for (i__ = j + 1; i__ <= i__2; ++i__) {
00223 work31[i__ + j * 65 - 66] = 0.f;
00224
00225 }
00226
00227 }
00228
00229
00230
00231
00232
00233 i__1 = min(kv,*n);
00234 for (j = *ku + 2; j <= i__1; ++j) {
00235 i__2 = *kl;
00236 for (i__ = kv - j + 2; i__ <= i__2; ++i__) {
00237 ab[i__ + j * ab_dim1] = 0.f;
00238
00239 }
00240
00241 }
00242
00243
00244
00245
00246 ju = 1;
00247
00248 i__1 = min(*m,*n);
00249 i__2 = nb;
00250 for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
00251
00252 i__3 = nb, i__4 = min(*m,*n) - j + 1;
00253 jb = min(i__3,i__4);
00254
00255
00256
00257
00258
00259
00260
00261
00262
00263
00264
00265
00266
00267
00268 i__3 = *kl - jb, i__4 = *m - j - jb + 1;
00269 i2 = min(i__3,i__4);
00270
00271 i__3 = jb, i__4 = *m - j - *kl + 1;
00272 i3 = min(i__3,i__4);
00273
00274
00275
00276
00277
00278 i__3 = j + jb - 1;
00279 for (jj = j; jj <= i__3; ++jj) {
00280
00281
00282
00283 if (jj + kv <= *n) {
00284 i__4 = *kl;
00285 for (i__ = 1; i__ <= i__4; ++i__) {
00286 ab[i__ + (jj + kv) * ab_dim1] = 0.f;
00287
00288 }
00289 }
00290
00291
00292
00293
00294
00295 i__4 = *kl, i__5 = *m - jj;
00296 km = min(i__4,i__5);
00297 i__4 = km + 1;
00298 jp = isamax_(&i__4, &ab[kv + 1 + jj * ab_dim1], &c__1);
00299 ipiv[jj] = jp + jj - j;
00300 if (ab[kv + jp + jj * ab_dim1] != 0.f) {
00301
00302
00303 i__6 = jj + *ku + jp - 1;
00304 i__4 = ju, i__5 = min(i__6,*n);
00305 ju = max(i__4,i__5);
00306 if (jp != 1) {
00307
00308
00309
00310 if (jp + jj - 1 < j + *kl) {
00311
00312 i__4 = *ldab - 1;
00313 i__5 = *ldab - 1;
00314 sswap_(&jb, &ab[kv + 1 + jj - j + j * ab_dim1], &
00315 i__4, &ab[kv + jp + jj - j + j * ab_dim1],
00316 &i__5);
00317 } else {
00318
00319
00320
00321
00322 i__4 = jj - j;
00323 i__5 = *ldab - 1;
00324 sswap_(&i__4, &ab[kv + 1 + jj - j + j * ab_dim1],
00325 &i__5, &work31[jp + jj - j - *kl - 1], &
00326 c__65);
00327 i__4 = j + jb - jj;
00328 i__5 = *ldab - 1;
00329 i__6 = *ldab - 1;
00330 sswap_(&i__4, &ab[kv + 1 + jj * ab_dim1], &i__5, &
00331 ab[kv + jp + jj * ab_dim1], &i__6);
00332 }
00333 }
00334
00335
00336
00337 r__1 = 1.f / ab[kv + 1 + jj * ab_dim1];
00338 sscal_(&km, &r__1, &ab[kv + 2 + jj * ab_dim1], &c__1);
00339
00340
00341
00342
00343
00344
00345 i__4 = ju, i__5 = j + jb - 1;
00346 jm = min(i__4,i__5);
00347 if (jm > jj) {
00348 i__4 = jm - jj;
00349 i__5 = *ldab - 1;
00350 i__6 = *ldab - 1;
00351 sger_(&km, &i__4, &c_b18, &ab[kv + 2 + jj * ab_dim1],
00352 &c__1, &ab[kv + (jj + 1) * ab_dim1], &i__5, &
00353 ab[kv + 1 + (jj + 1) * ab_dim1], &i__6);
00354 }
00355 } else {
00356
00357
00358
00359
00360 if (*info == 0) {
00361 *info = jj;
00362 }
00363 }
00364
00365
00366
00367
00368 i__4 = jj - j + 1;
00369 nw = min(i__4,i3);
00370 if (nw > 0) {
00371 scopy_(&nw, &ab[kv + *kl + 1 - jj + j + jj * ab_dim1], &
00372 c__1, &work31[(jj - j + 1) * 65 - 65], &c__1);
00373 }
00374
00375 }
00376 if (j + jb <= *n) {
00377
00378
00379
00380
00381 i__3 = ju - j + 1;
00382 j2 = min(i__3,kv) - jb;
00383
00384 i__3 = 0, i__4 = ju - j - kv + 1;
00385 j3 = max(i__3,i__4);
00386
00387
00388
00389
00390 i__3 = *ldab - 1;
00391 slaswp_(&j2, &ab[kv + 1 - jb + (j + jb) * ab_dim1], &i__3, &
00392 c__1, &jb, &ipiv[j], &c__1);
00393
00394
00395
00396 i__3 = j + jb - 1;
00397 for (i__ = j; i__ <= i__3; ++i__) {
00398 ipiv[i__] = ipiv[i__] + j - 1;
00399
00400 }
00401
00402
00403
00404
00405 k2 = j - 1 + jb + j2;
00406 i__3 = j3;
00407 for (i__ = 1; i__ <= i__3; ++i__) {
00408 jj = k2 + i__;
00409 i__4 = j + jb - 1;
00410 for (ii = j + i__ - 1; ii <= i__4; ++ii) {
00411 ip = ipiv[ii];
00412 if (ip != ii) {
00413 temp = ab[kv + 1 + ii - jj + jj * ab_dim1];
00414 ab[kv + 1 + ii - jj + jj * ab_dim1] = ab[kv + 1 +
00415 ip - jj + jj * ab_dim1];
00416 ab[kv + 1 + ip - jj + jj * ab_dim1] = temp;
00417 }
00418
00419 }
00420
00421 }
00422
00423
00424
00425 if (j2 > 0) {
00426
00427
00428
00429 i__3 = *ldab - 1;
00430 i__4 = *ldab - 1;
00431 strsm_("Left", "Lower", "No transpose", "Unit", &jb, &j2,
00432 &c_b31, &ab[kv + 1 + j * ab_dim1], &i__3, &ab[kv
00433 + 1 - jb + (j + jb) * ab_dim1], &i__4);
00434
00435 if (i2 > 0) {
00436
00437
00438
00439 i__3 = *ldab - 1;
00440 i__4 = *ldab - 1;
00441 i__5 = *ldab - 1;
00442 sgemm_("No transpose", "No transpose", &i2, &j2, &jb,
00443 &c_b18, &ab[kv + 1 + jb + j * ab_dim1], &i__3,
00444 &ab[kv + 1 - jb + (j + jb) * ab_dim1], &i__4,
00445 &c_b31, &ab[kv + 1 + (j + jb) * ab_dim1], &
00446 i__5);
00447 }
00448
00449 if (i3 > 0) {
00450
00451
00452
00453 i__3 = *ldab - 1;
00454 i__4 = *ldab - 1;
00455 sgemm_("No transpose", "No transpose", &i3, &j2, &jb,
00456 &c_b18, work31, &c__65, &ab[kv + 1 - jb + (j
00457 + jb) * ab_dim1], &i__3, &c_b31, &ab[kv + *kl
00458 + 1 - jb + (j + jb) * ab_dim1], &i__4);
00459 }
00460 }
00461
00462 if (j3 > 0) {
00463
00464
00465
00466
00467 i__3 = j3;
00468 for (jj = 1; jj <= i__3; ++jj) {
00469 i__4 = jb;
00470 for (ii = jj; ii <= i__4; ++ii) {
00471 work13[ii + jj * 65 - 66] = ab[ii - jj + 1 + (jj
00472 + j + kv - 1) * ab_dim1];
00473
00474 }
00475
00476 }
00477
00478
00479
00480 i__3 = *ldab - 1;
00481 strsm_("Left", "Lower", "No transpose", "Unit", &jb, &j3,
00482 &c_b31, &ab[kv + 1 + j * ab_dim1], &i__3, work13,
00483 &c__65);
00484
00485 if (i2 > 0) {
00486
00487
00488
00489 i__3 = *ldab - 1;
00490 i__4 = *ldab - 1;
00491 sgemm_("No transpose", "No transpose", &i2, &j3, &jb,
00492 &c_b18, &ab[kv + 1 + jb + j * ab_dim1], &i__3,
00493 work13, &c__65, &c_b31, &ab[jb + 1 + (j + kv)
00494 * ab_dim1], &i__4);
00495 }
00496
00497 if (i3 > 0) {
00498
00499
00500
00501 i__3 = *ldab - 1;
00502 sgemm_("No transpose", "No transpose", &i3, &j3, &jb,
00503 &c_b18, work31, &c__65, work13, &c__65, &
00504 c_b31, &ab[*kl + 1 + (j + kv) * ab_dim1], &
00505 i__3);
00506 }
00507
00508
00509
00510 i__3 = j3;
00511 for (jj = 1; jj <= i__3; ++jj) {
00512 i__4 = jb;
00513 for (ii = jj; ii <= i__4; ++ii) {
00514 ab[ii - jj + 1 + (jj + j + kv - 1) * ab_dim1] =
00515 work13[ii + jj * 65 - 66];
00516
00517 }
00518
00519 }
00520 }
00521 } else {
00522
00523
00524
00525 i__3 = j + jb - 1;
00526 for (i__ = j; i__ <= i__3; ++i__) {
00527 ipiv[i__] = ipiv[i__] + j - 1;
00528
00529 }
00530 }
00531
00532
00533
00534
00535
00536 i__3 = j;
00537 for (jj = j + jb - 1; jj >= i__3; --jj) {
00538 jp = ipiv[jj] - jj + 1;
00539 if (jp != 1) {
00540
00541
00542
00543 if (jp + jj - 1 < j + *kl) {
00544
00545
00546
00547 i__4 = jj - j;
00548 i__5 = *ldab - 1;
00549 i__6 = *ldab - 1;
00550 sswap_(&i__4, &ab[kv + 1 + jj - j + j * ab_dim1], &
00551 i__5, &ab[kv + jp + jj - j + j * ab_dim1], &
00552 i__6);
00553 } else {
00554
00555
00556
00557 i__4 = jj - j;
00558 i__5 = *ldab - 1;
00559 sswap_(&i__4, &ab[kv + 1 + jj - j + j * ab_dim1], &
00560 i__5, &work31[jp + jj - j - *kl - 1], &c__65);
00561 }
00562 }
00563
00564
00565
00566
00567 i__4 = i3, i__5 = jj - j + 1;
00568 nw = min(i__4,i__5);
00569 if (nw > 0) {
00570 scopy_(&nw, &work31[(jj - j + 1) * 65 - 65], &c__1, &ab[
00571 kv + *kl + 1 - jj + j + jj * ab_dim1], &c__1);
00572 }
00573
00574 }
00575
00576 }
00577 }
00578
00579 return 0;
00580
00581
00582
00583 }