00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #include "f2c.h"
00014 #include "blaswrap.h"
00015
00016
00017
00018 static complex c_b1 = {1.f,0.f};
00019 static integer c__1 = 1;
00020 static integer c__65 = 65;
00021
00022 int cgbtrf_(integer *m, integer *n, integer *kl, integer *ku,
00023 complex *ab, integer *ldab, integer *ipiv, integer *info)
00024 {
00025
00026 integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4, i__5, i__6;
00027 complex q__1;
00028
00029
00030 void c_div(complex *, complex *, complex *);
00031
00032
00033 integer i__, j, i2, i3, j2, j3, k2, jb, nb, ii, jj, jm, ip, jp, km, ju,
00034 kv, nw;
00035 complex temp;
00036 extern int cscal_(integer *, complex *, complex *,
00037 integer *), cgemm_(char *, char *, integer *, integer *, integer *
00038 , complex *, complex *, integer *, complex *, integer *, complex *
00039 , complex *, integer *), cgeru_(integer *,
00040 integer *, complex *, complex *, integer *, complex *, integer *,
00041 complex *, integer *), ccopy_(integer *, complex *, integer *,
00042 complex *, integer *), cswap_(integer *, complex *, integer *,
00043 complex *, integer *);
00044 complex work13[4160] , work31[4160]
00045 ;
00046 extern int ctrsm_(char *, char *, char *, char *,
00047 integer *, integer *, complex *, complex *, integer *, complex *,
00048 integer *), cgbtf2_(integer *,
00049 integer *, integer *, integer *, complex *, integer *, integer *,
00050 integer *);
00051 extern integer icamax_(integer *, complex *, integer *);
00052 extern int xerbla_(char *, integer *);
00053 extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
00054 integer *, integer *);
00055 extern int claswp_(integer *, complex *, integer *,
00056 integer *, integer *, integer *, integer *);
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147
00148
00149
00150
00151
00152
00153
00154
00155
00156
00157
00158 ab_dim1 = *ldab;
00159 ab_offset = 1 + ab_dim1;
00160 ab -= ab_offset;
00161 --ipiv;
00162
00163
00164 kv = *ku + *kl;
00165
00166
00167
00168 *info = 0;
00169 if (*m < 0) {
00170 *info = -1;
00171 } else if (*n < 0) {
00172 *info = -2;
00173 } else if (*kl < 0) {
00174 *info = -3;
00175 } else if (*ku < 0) {
00176 *info = -4;
00177 } else if (*ldab < *kl + kv + 1) {
00178 *info = -6;
00179 }
00180 if (*info != 0) {
00181 i__1 = -(*info);
00182 xerbla_("CGBTRF", &i__1);
00183 return 0;
00184 }
00185
00186
00187
00188 if (*m == 0 || *n == 0) {
00189 return 0;
00190 }
00191
00192
00193
00194 nb = ilaenv_(&c__1, "CGBTRF", " ", m, n, kl, ku);
00195
00196
00197
00198
00199 nb = min(nb,64);
00200
00201 if (nb <= 1 || nb > *kl) {
00202
00203
00204
00205 cgbtf2_(m, n, kl, ku, &ab[ab_offset], ldab, &ipiv[1], info);
00206 } else {
00207
00208
00209
00210
00211
00212 i__1 = nb;
00213 for (j = 1; j <= i__1; ++j) {
00214 i__2 = j - 1;
00215 for (i__ = 1; i__ <= i__2; ++i__) {
00216 i__3 = i__ + j * 65 - 66;
00217 work13[i__3].r = 0.f, work13[i__3].i = 0.f;
00218
00219 }
00220
00221 }
00222
00223
00224
00225 i__1 = nb;
00226 for (j = 1; j <= i__1; ++j) {
00227 i__2 = nb;
00228 for (i__ = j + 1; i__ <= i__2; ++i__) {
00229 i__3 = i__ + j * 65 - 66;
00230 work31[i__3].r = 0.f, work31[i__3].i = 0.f;
00231
00232 }
00233
00234 }
00235
00236
00237
00238
00239
00240 i__1 = min(kv,*n);
00241 for (j = *ku + 2; j <= i__1; ++j) {
00242 i__2 = *kl;
00243 for (i__ = kv - j + 2; i__ <= i__2; ++i__) {
00244 i__3 = i__ + j * ab_dim1;
00245 ab[i__3].r = 0.f, ab[i__3].i = 0.f;
00246
00247 }
00248
00249 }
00250
00251
00252
00253
00254 ju = 1;
00255
00256 i__1 = min(*m,*n);
00257 i__2 = nb;
00258 for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
00259
00260 i__3 = nb, i__4 = min(*m,*n) - j + 1;
00261 jb = min(i__3,i__4);
00262
00263
00264
00265
00266
00267
00268
00269
00270
00271
00272
00273
00274
00275
00276 i__3 = *kl - jb, i__4 = *m - j - jb + 1;
00277 i2 = min(i__3,i__4);
00278
00279 i__3 = jb, i__4 = *m - j - *kl + 1;
00280 i3 = min(i__3,i__4);
00281
00282
00283
00284
00285
00286 i__3 = j + jb - 1;
00287 for (jj = j; jj <= i__3; ++jj) {
00288
00289
00290
00291 if (jj + kv <= *n) {
00292 i__4 = *kl;
00293 for (i__ = 1; i__ <= i__4; ++i__) {
00294 i__5 = i__ + (jj + kv) * ab_dim1;
00295 ab[i__5].r = 0.f, ab[i__5].i = 0.f;
00296
00297 }
00298 }
00299
00300
00301
00302
00303
00304 i__4 = *kl, i__5 = *m - jj;
00305 km = min(i__4,i__5);
00306 i__4 = km + 1;
00307 jp = icamax_(&i__4, &ab[kv + 1 + jj * ab_dim1], &c__1);
00308 ipiv[jj] = jp + jj - j;
00309 i__4 = kv + jp + jj * ab_dim1;
00310 if (ab[i__4].r != 0.f || ab[i__4].i != 0.f) {
00311
00312
00313 i__6 = jj + *ku + jp - 1;
00314 i__4 = ju, i__5 = min(i__6,*n);
00315 ju = max(i__4,i__5);
00316 if (jp != 1) {
00317
00318
00319
00320 if (jp + jj - 1 < j + *kl) {
00321
00322 i__4 = *ldab - 1;
00323 i__5 = *ldab - 1;
00324 cswap_(&jb, &ab[kv + 1 + jj - j + j * ab_dim1], &
00325 i__4, &ab[kv + jp + jj - j + j * ab_dim1],
00326 &i__5);
00327 } else {
00328
00329
00330
00331
00332 i__4 = jj - j;
00333 i__5 = *ldab - 1;
00334 cswap_(&i__4, &ab[kv + 1 + jj - j + j * ab_dim1],
00335 &i__5, &work31[jp + jj - j - *kl - 1], &
00336 c__65);
00337 i__4 = j + jb - jj;
00338 i__5 = *ldab - 1;
00339 i__6 = *ldab - 1;
00340 cswap_(&i__4, &ab[kv + 1 + jj * ab_dim1], &i__5, &
00341 ab[kv + jp + jj * ab_dim1], &i__6);
00342 }
00343 }
00344
00345
00346
00347 c_div(&q__1, &c_b1, &ab[kv + 1 + jj * ab_dim1]);
00348 cscal_(&km, &q__1, &ab[kv + 2 + jj * ab_dim1], &c__1);
00349
00350
00351
00352
00353
00354
00355 i__4 = ju, i__5 = j + jb - 1;
00356 jm = min(i__4,i__5);
00357 if (jm > jj) {
00358 i__4 = jm - jj;
00359 q__1.r = -1.f, q__1.i = -0.f;
00360 i__5 = *ldab - 1;
00361 i__6 = *ldab - 1;
00362 cgeru_(&km, &i__4, &q__1, &ab[kv + 2 + jj * ab_dim1],
00363 &c__1, &ab[kv + (jj + 1) * ab_dim1], &i__5, &
00364 ab[kv + 1 + (jj + 1) * ab_dim1], &i__6);
00365 }
00366 } else {
00367
00368
00369
00370
00371 if (*info == 0) {
00372 *info = jj;
00373 }
00374 }
00375
00376
00377
00378
00379 i__4 = jj - j + 1;
00380 nw = min(i__4,i3);
00381 if (nw > 0) {
00382 ccopy_(&nw, &ab[kv + *kl + 1 - jj + j + jj * ab_dim1], &
00383 c__1, &work31[(jj - j + 1) * 65 - 65], &c__1);
00384 }
00385
00386 }
00387 if (j + jb <= *n) {
00388
00389
00390
00391
00392 i__3 = ju - j + 1;
00393 j2 = min(i__3,kv) - jb;
00394
00395 i__3 = 0, i__4 = ju - j - kv + 1;
00396 j3 = max(i__3,i__4);
00397
00398
00399
00400
00401 i__3 = *ldab - 1;
00402 claswp_(&j2, &ab[kv + 1 - jb + (j + jb) * ab_dim1], &i__3, &
00403 c__1, &jb, &ipiv[j], &c__1);
00404
00405
00406
00407 i__3 = j + jb - 1;
00408 for (i__ = j; i__ <= i__3; ++i__) {
00409 ipiv[i__] = ipiv[i__] + j - 1;
00410
00411 }
00412
00413
00414
00415
00416 k2 = j - 1 + jb + j2;
00417 i__3 = j3;
00418 for (i__ = 1; i__ <= i__3; ++i__) {
00419 jj = k2 + i__;
00420 i__4 = j + jb - 1;
00421 for (ii = j + i__ - 1; ii <= i__4; ++ii) {
00422 ip = ipiv[ii];
00423 if (ip != ii) {
00424 i__5 = kv + 1 + ii - jj + jj * ab_dim1;
00425 temp.r = ab[i__5].r, temp.i = ab[i__5].i;
00426 i__5 = kv + 1 + ii - jj + jj * ab_dim1;
00427 i__6 = kv + 1 + ip - jj + jj * ab_dim1;
00428 ab[i__5].r = ab[i__6].r, ab[i__5].i = ab[i__6].i;
00429 i__5 = kv + 1 + ip - jj + jj * ab_dim1;
00430 ab[i__5].r = temp.r, ab[i__5].i = temp.i;
00431 }
00432
00433 }
00434
00435 }
00436
00437
00438
00439 if (j2 > 0) {
00440
00441
00442
00443 i__3 = *ldab - 1;
00444 i__4 = *ldab - 1;
00445 ctrsm_("Left", "Lower", "No transpose", "Unit", &jb, &j2,
00446 &c_b1, &ab[kv + 1 + j * ab_dim1], &i__3, &ab[kv +
00447 1 - jb + (j + jb) * ab_dim1], &i__4);
00448
00449 if (i2 > 0) {
00450
00451
00452
00453 q__1.r = -1.f, q__1.i = -0.f;
00454 i__3 = *ldab - 1;
00455 i__4 = *ldab - 1;
00456 i__5 = *ldab - 1;
00457 cgemm_("No transpose", "No transpose", &i2, &j2, &jb,
00458 &q__1, &ab[kv + 1 + jb + j * ab_dim1], &i__3,
00459 &ab[kv + 1 - jb + (j + jb) * ab_dim1], &i__4,
00460 &c_b1, &ab[kv + 1 + (j + jb) * ab_dim1], &
00461 i__5);
00462 }
00463
00464 if (i3 > 0) {
00465
00466
00467
00468 q__1.r = -1.f, q__1.i = -0.f;
00469 i__3 = *ldab - 1;
00470 i__4 = *ldab - 1;
00471 cgemm_("No transpose", "No transpose", &i3, &j2, &jb,
00472 &q__1, work31, &c__65, &ab[kv + 1 - jb + (j +
00473 jb) * ab_dim1], &i__3, &c_b1, &ab[kv + *kl +
00474 1 - jb + (j + jb) * ab_dim1], &i__4);
00475 }
00476 }
00477
00478 if (j3 > 0) {
00479
00480
00481
00482
00483 i__3 = j3;
00484 for (jj = 1; jj <= i__3; ++jj) {
00485 i__4 = jb;
00486 for (ii = jj; ii <= i__4; ++ii) {
00487 i__5 = ii + jj * 65 - 66;
00488 i__6 = ii - jj + 1 + (jj + j + kv - 1) * ab_dim1;
00489 work13[i__5].r = ab[i__6].r, work13[i__5].i = ab[
00490 i__6].i;
00491
00492 }
00493
00494 }
00495
00496
00497
00498 i__3 = *ldab - 1;
00499 ctrsm_("Left", "Lower", "No transpose", "Unit", &jb, &j3,
00500 &c_b1, &ab[kv + 1 + j * ab_dim1], &i__3, work13, &
00501 c__65);
00502
00503 if (i2 > 0) {
00504
00505
00506
00507 q__1.r = -1.f, q__1.i = -0.f;
00508 i__3 = *ldab - 1;
00509 i__4 = *ldab - 1;
00510 cgemm_("No transpose", "No transpose", &i2, &j3, &jb,
00511 &q__1, &ab[kv + 1 + jb + j * ab_dim1], &i__3,
00512 work13, &c__65, &c_b1, &ab[jb + 1 + (j + kv) *
00513 ab_dim1], &i__4);
00514 }
00515
00516 if (i3 > 0) {
00517
00518
00519
00520 q__1.r = -1.f, q__1.i = -0.f;
00521 i__3 = *ldab - 1;
00522 cgemm_("No transpose", "No transpose", &i3, &j3, &jb,
00523 &q__1, work31, &c__65, work13, &c__65, &c_b1,
00524 &ab[*kl + 1 + (j + kv) * ab_dim1], &i__3);
00525 }
00526
00527
00528
00529 i__3 = j3;
00530 for (jj = 1; jj <= i__3; ++jj) {
00531 i__4 = jb;
00532 for (ii = jj; ii <= i__4; ++ii) {
00533 i__5 = ii - jj + 1 + (jj + j + kv - 1) * ab_dim1;
00534 i__6 = ii + jj * 65 - 66;
00535 ab[i__5].r = work13[i__6].r, ab[i__5].i = work13[
00536 i__6].i;
00537
00538 }
00539
00540 }
00541 }
00542 } else {
00543
00544
00545
00546 i__3 = j + jb - 1;
00547 for (i__ = j; i__ <= i__3; ++i__) {
00548 ipiv[i__] = ipiv[i__] + j - 1;
00549
00550 }
00551 }
00552
00553
00554
00555
00556
00557 i__3 = j;
00558 for (jj = j + jb - 1; jj >= i__3; --jj) {
00559 jp = ipiv[jj] - jj + 1;
00560 if (jp != 1) {
00561
00562
00563
00564 if (jp + jj - 1 < j + *kl) {
00565
00566
00567
00568 i__4 = jj - j;
00569 i__5 = *ldab - 1;
00570 i__6 = *ldab - 1;
00571 cswap_(&i__4, &ab[kv + 1 + jj - j + j * ab_dim1], &
00572 i__5, &ab[kv + jp + jj - j + j * ab_dim1], &
00573 i__6);
00574 } else {
00575
00576
00577
00578 i__4 = jj - j;
00579 i__5 = *ldab - 1;
00580 cswap_(&i__4, &ab[kv + 1 + jj - j + j * ab_dim1], &
00581 i__5, &work31[jp + jj - j - *kl - 1], &c__65);
00582 }
00583 }
00584
00585
00586
00587
00588 i__4 = i3, i__5 = jj - j + 1;
00589 nw = min(i__4,i__5);
00590 if (nw > 0) {
00591 ccopy_(&nw, &work31[(jj - j + 1) * 65 - 65], &c__1, &ab[
00592 kv + *kl + 1 - jj + j + jj * ab_dim1], &c__1);
00593 }
00594
00595 }
00596
00597 }
00598 }
00599
00600 return 0;
00601
00602
00603
00604 }