00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #include "f2c.h"
00014 #include "blaswrap.h"
00015
00016
00017
00018 static integer c__2 = 2;
00019 static integer c_n1 = -1;
00020 static integer c__5 = 5;
00021 static real c_b14 = 0.f;
00022 static integer c__1 = 1;
00023 static real c_b51 = -1.f;
00024 static real c_b52 = 1.f;
00025
00026 int stgsyl_(char *trans, integer *ijob, integer *m, integer *
00027 n, real *a, integer *lda, real *b, integer *ldb, real *c__, integer *
00028 ldc, real *d__, integer *ldd, real *e, integer *lde, real *f, integer
00029 *ldf, real *scale, real *dif, real *work, integer *lwork, integer *
00030 iwork, integer *info)
00031 {
00032
00033 integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, d_dim1,
00034 d_offset, e_dim1, e_offset, f_dim1, f_offset, i__1, i__2, i__3,
00035 i__4;
00036
00037
00038 double sqrt(doublereal);
00039
00040
00041 integer i__, j, k, p, q, ie, je, mb, nb, is, js, pq;
00042 real dsum;
00043 integer ppqq;
00044 extern logical lsame_(char *, char *);
00045 integer ifunc;
00046 extern int sscal_(integer *, real *, real *, integer *);
00047 integer linfo;
00048 extern int sgemm_(char *, char *, integer *, integer *,
00049 integer *, real *, real *, integer *, real *, integer *, real *,
00050 real *, integer *);
00051 integer lwmin;
00052 real scale2, dscale;
00053 extern int stgsy2_(char *, integer *, integer *, integer
00054 *, real *, integer *, real *, integer *, real *, integer *, real *
00055 , integer *, real *, integer *, real *, integer *, real *, real *,
00056 real *, integer *, integer *, integer *);
00057 real scaloc;
00058 extern int xerbla_(char *, integer *);
00059 extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
00060 integer *, integer *);
00061 extern int slacpy_(char *, integer *, integer *, real *,
00062 integer *, real *, integer *), slaset_(char *, integer *,
00063 integer *, real *, real *, real *, integer *);
00064 integer iround;
00065 logical notran;
00066 integer isolve;
00067 logical lquery;
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147
00148
00149
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171
00172
00173
00174
00175
00176
00177
00178
00179
00180
00181
00182
00183
00184
00185
00186
00187
00188
00189
00190
00191
00192
00193
00194
00195
00196
00197
00198
00199
00200
00201
00202
00203
00204
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220
00221
00222
00223
00224
00225
00226
00227
00228
00229
00230
00231
00232
00233
00234
00235
00236
00237
00238
00239
00240
00241
00242
00243
00244
00245
00246
00247
00248
00249
00250
00251
00252
00253
00254
00255
00256
00257
00258
00259
00260
00261
00262
00263
00264
00265
00266
00267
00268
00269
00270 a_dim1 = *lda;
00271 a_offset = 1 + a_dim1;
00272 a -= a_offset;
00273 b_dim1 = *ldb;
00274 b_offset = 1 + b_dim1;
00275 b -= b_offset;
00276 c_dim1 = *ldc;
00277 c_offset = 1 + c_dim1;
00278 c__ -= c_offset;
00279 d_dim1 = *ldd;
00280 d_offset = 1 + d_dim1;
00281 d__ -= d_offset;
00282 e_dim1 = *lde;
00283 e_offset = 1 + e_dim1;
00284 e -= e_offset;
00285 f_dim1 = *ldf;
00286 f_offset = 1 + f_dim1;
00287 f -= f_offset;
00288 --work;
00289 --iwork;
00290
00291
00292 *info = 0;
00293 notran = lsame_(trans, "N");
00294 lquery = *lwork == -1;
00295
00296 if (! notran && ! lsame_(trans, "T")) {
00297 *info = -1;
00298 } else if (notran) {
00299 if (*ijob < 0 || *ijob > 4) {
00300 *info = -2;
00301 }
00302 }
00303 if (*info == 0) {
00304 if (*m <= 0) {
00305 *info = -3;
00306 } else if (*n <= 0) {
00307 *info = -4;
00308 } else if (*lda < max(1,*m)) {
00309 *info = -6;
00310 } else if (*ldb < max(1,*n)) {
00311 *info = -8;
00312 } else if (*ldc < max(1,*m)) {
00313 *info = -10;
00314 } else if (*ldd < max(1,*m)) {
00315 *info = -12;
00316 } else if (*lde < max(1,*n)) {
00317 *info = -14;
00318 } else if (*ldf < max(1,*m)) {
00319 *info = -16;
00320 }
00321 }
00322
00323 if (*info == 0) {
00324 if (notran) {
00325 if (*ijob == 1 || *ijob == 2) {
00326
00327 i__1 = 1, i__2 = (*m << 1) * *n;
00328 lwmin = max(i__1,i__2);
00329 } else {
00330 lwmin = 1;
00331 }
00332 } else {
00333 lwmin = 1;
00334 }
00335 work[1] = (real) lwmin;
00336
00337 if (*lwork < lwmin && ! lquery) {
00338 *info = -20;
00339 }
00340 }
00341
00342 if (*info != 0) {
00343 i__1 = -(*info);
00344 xerbla_("STGSYL", &i__1);
00345 return 0;
00346 } else if (lquery) {
00347 return 0;
00348 }
00349
00350
00351
00352 if (*m == 0 || *n == 0) {
00353 *scale = 1.f;
00354 if (notran) {
00355 if (*ijob != 0) {
00356 *dif = 0.f;
00357 }
00358 }
00359 return 0;
00360 }
00361
00362
00363
00364 mb = ilaenv_(&c__2, "STGSYL", trans, m, n, &c_n1, &c_n1);
00365 nb = ilaenv_(&c__5, "STGSYL", trans, m, n, &c_n1, &c_n1);
00366
00367 isolve = 1;
00368 ifunc = 0;
00369 if (notran) {
00370 if (*ijob >= 3) {
00371 ifunc = *ijob - 2;
00372 slaset_("F", m, n, &c_b14, &c_b14, &c__[c_offset], ldc)
00373 ;
00374 slaset_("F", m, n, &c_b14, &c_b14, &f[f_offset], ldf);
00375 } else if (*ijob >= 1 && notran) {
00376 isolve = 2;
00377 }
00378 }
00379
00380 if (mb <= 1 && nb <= 1 || mb >= *m && nb >= *n) {
00381
00382 i__1 = isolve;
00383 for (iround = 1; iround <= i__1; ++iround) {
00384
00385
00386
00387 dscale = 0.f;
00388 dsum = 1.f;
00389 pq = 0;
00390 stgsy2_(trans, &ifunc, m, n, &a[a_offset], lda, &b[b_offset], ldb,
00391 &c__[c_offset], ldc, &d__[d_offset], ldd, &e[e_offset],
00392 lde, &f[f_offset], ldf, scale, &dsum, &dscale, &iwork[1],
00393 &pq, info);
00394 if (dscale != 0.f) {
00395 if (*ijob == 1 || *ijob == 3) {
00396 *dif = sqrt((real) ((*m << 1) * *n)) / (dscale * sqrt(
00397 dsum));
00398 } else {
00399 *dif = sqrt((real) pq) / (dscale * sqrt(dsum));
00400 }
00401 }
00402
00403 if (isolve == 2 && iround == 1) {
00404 if (notran) {
00405 ifunc = *ijob;
00406 }
00407 scale2 = *scale;
00408 slacpy_("F", m, n, &c__[c_offset], ldc, &work[1], m);
00409 slacpy_("F", m, n, &f[f_offset], ldf, &work[*m * *n + 1], m);
00410 slaset_("F", m, n, &c_b14, &c_b14, &c__[c_offset], ldc);
00411 slaset_("F", m, n, &c_b14, &c_b14, &f[f_offset], ldf);
00412 } else if (isolve == 2 && iround == 2) {
00413 slacpy_("F", m, n, &work[1], m, &c__[c_offset], ldc);
00414 slacpy_("F", m, n, &work[*m * *n + 1], m, &f[f_offset], ldf);
00415 *scale = scale2;
00416 }
00417
00418 }
00419
00420 return 0;
00421 }
00422
00423
00424
00425 p = 0;
00426 i__ = 1;
00427 L40:
00428 if (i__ > *m) {
00429 goto L50;
00430 }
00431 ++p;
00432 iwork[p] = i__;
00433 i__ += mb;
00434 if (i__ >= *m) {
00435 goto L50;
00436 }
00437 if (a[i__ + (i__ - 1) * a_dim1] != 0.f) {
00438 ++i__;
00439 }
00440 goto L40;
00441 L50:
00442
00443 iwork[p + 1] = *m + 1;
00444 if (iwork[p] == iwork[p + 1]) {
00445 --p;
00446 }
00447
00448
00449
00450 q = p + 1;
00451 j = 1;
00452 L60:
00453 if (j > *n) {
00454 goto L70;
00455 }
00456 ++q;
00457 iwork[q] = j;
00458 j += nb;
00459 if (j >= *n) {
00460 goto L70;
00461 }
00462 if (b[j + (j - 1) * b_dim1] != 0.f) {
00463 ++j;
00464 }
00465 goto L60;
00466 L70:
00467
00468 iwork[q + 1] = *n + 1;
00469 if (iwork[q] == iwork[q + 1]) {
00470 --q;
00471 }
00472
00473 if (notran) {
00474
00475 i__1 = isolve;
00476 for (iround = 1; iround <= i__1; ++iround) {
00477
00478
00479
00480
00481
00482
00483 dscale = 0.f;
00484 dsum = 1.f;
00485 pq = 0;
00486 *scale = 1.f;
00487 i__2 = q;
00488 for (j = p + 2; j <= i__2; ++j) {
00489 js = iwork[j];
00490 je = iwork[j + 1] - 1;
00491 nb = je - js + 1;
00492 for (i__ = p; i__ >= 1; --i__) {
00493 is = iwork[i__];
00494 ie = iwork[i__ + 1] - 1;
00495 mb = ie - is + 1;
00496 ppqq = 0;
00497 stgsy2_(trans, &ifunc, &mb, &nb, &a[is + is * a_dim1],
00498 lda, &b[js + js * b_dim1], ldb, &c__[is + js *
00499 c_dim1], ldc, &d__[is + is * d_dim1], ldd, &e[js
00500 + js * e_dim1], lde, &f[is + js * f_dim1], ldf, &
00501 scaloc, &dsum, &dscale, &iwork[q + 2], &ppqq, &
00502 linfo);
00503 if (linfo > 0) {
00504 *info = linfo;
00505 }
00506
00507 pq += ppqq;
00508 if (scaloc != 1.f) {
00509 i__3 = js - 1;
00510 for (k = 1; k <= i__3; ++k) {
00511 sscal_(m, &scaloc, &c__[k * c_dim1 + 1], &c__1);
00512 sscal_(m, &scaloc, &f[k * f_dim1 + 1], &c__1);
00513
00514 }
00515 i__3 = je;
00516 for (k = js; k <= i__3; ++k) {
00517 i__4 = is - 1;
00518 sscal_(&i__4, &scaloc, &c__[k * c_dim1 + 1], &
00519 c__1);
00520 i__4 = is - 1;
00521 sscal_(&i__4, &scaloc, &f[k * f_dim1 + 1], &c__1);
00522
00523 }
00524 i__3 = je;
00525 for (k = js; k <= i__3; ++k) {
00526 i__4 = *m - ie;
00527 sscal_(&i__4, &scaloc, &c__[ie + 1 + k * c_dim1],
00528 &c__1);
00529 i__4 = *m - ie;
00530 sscal_(&i__4, &scaloc, &f[ie + 1 + k * f_dim1], &
00531 c__1);
00532
00533 }
00534 i__3 = *n;
00535 for (k = je + 1; k <= i__3; ++k) {
00536 sscal_(m, &scaloc, &c__[k * c_dim1 + 1], &c__1);
00537 sscal_(m, &scaloc, &f[k * f_dim1 + 1], &c__1);
00538
00539 }
00540 *scale *= scaloc;
00541 }
00542
00543
00544
00545
00546 if (i__ > 1) {
00547 i__3 = is - 1;
00548 sgemm_("N", "N", &i__3, &nb, &mb, &c_b51, &a[is *
00549 a_dim1 + 1], lda, &c__[is + js * c_dim1], ldc,
00550 &c_b52, &c__[js * c_dim1 + 1], ldc);
00551 i__3 = is - 1;
00552 sgemm_("N", "N", &i__3, &nb, &mb, &c_b51, &d__[is *
00553 d_dim1 + 1], ldd, &c__[is + js * c_dim1], ldc,
00554 &c_b52, &f[js * f_dim1 + 1], ldf);
00555 }
00556 if (j < q) {
00557 i__3 = *n - je;
00558 sgemm_("N", "N", &mb, &i__3, &nb, &c_b52, &f[is + js *
00559 f_dim1], ldf, &b[js + (je + 1) * b_dim1],
00560 ldb, &c_b52, &c__[is + (je + 1) * c_dim1],
00561 ldc);
00562 i__3 = *n - je;
00563 sgemm_("N", "N", &mb, &i__3, &nb, &c_b52, &f[is + js *
00564 f_dim1], ldf, &e[js + (je + 1) * e_dim1],
00565 lde, &c_b52, &f[is + (je + 1) * f_dim1], ldf);
00566 }
00567
00568 }
00569
00570 }
00571 if (dscale != 0.f) {
00572 if (*ijob == 1 || *ijob == 3) {
00573 *dif = sqrt((real) ((*m << 1) * *n)) / (dscale * sqrt(
00574 dsum));
00575 } else {
00576 *dif = sqrt((real) pq) / (dscale * sqrt(dsum));
00577 }
00578 }
00579 if (isolve == 2 && iround == 1) {
00580 if (notran) {
00581 ifunc = *ijob;
00582 }
00583 scale2 = *scale;
00584 slacpy_("F", m, n, &c__[c_offset], ldc, &work[1], m);
00585 slacpy_("F", m, n, &f[f_offset], ldf, &work[*m * *n + 1], m);
00586 slaset_("F", m, n, &c_b14, &c_b14, &c__[c_offset], ldc);
00587 slaset_("F", m, n, &c_b14, &c_b14, &f[f_offset], ldf);
00588 } else if (isolve == 2 && iround == 2) {
00589 slacpy_("F", m, n, &work[1], m, &c__[c_offset], ldc);
00590 slacpy_("F", m, n, &work[*m * *n + 1], m, &f[f_offset], ldf);
00591 *scale = scale2;
00592 }
00593
00594 }
00595
00596 } else {
00597
00598
00599
00600
00601
00602
00603 *scale = 1.f;
00604 i__1 = p;
00605 for (i__ = 1; i__ <= i__1; ++i__) {
00606 is = iwork[i__];
00607 ie = iwork[i__ + 1] - 1;
00608 mb = ie - is + 1;
00609 i__2 = p + 2;
00610 for (j = q; j >= i__2; --j) {
00611 js = iwork[j];
00612 je = iwork[j + 1] - 1;
00613 nb = je - js + 1;
00614 stgsy2_(trans, &ifunc, &mb, &nb, &a[is + is * a_dim1], lda, &
00615 b[js + js * b_dim1], ldb, &c__[is + js * c_dim1], ldc,
00616 &d__[is + is * d_dim1], ldd, &e[js + js * e_dim1],
00617 lde, &f[is + js * f_dim1], ldf, &scaloc, &dsum, &
00618 dscale, &iwork[q + 2], &ppqq, &linfo);
00619 if (linfo > 0) {
00620 *info = linfo;
00621 }
00622 if (scaloc != 1.f) {
00623 i__3 = js - 1;
00624 for (k = 1; k <= i__3; ++k) {
00625 sscal_(m, &scaloc, &c__[k * c_dim1 + 1], &c__1);
00626 sscal_(m, &scaloc, &f[k * f_dim1 + 1], &c__1);
00627
00628 }
00629 i__3 = je;
00630 for (k = js; k <= i__3; ++k) {
00631 i__4 = is - 1;
00632 sscal_(&i__4, &scaloc, &c__[k * c_dim1 + 1], &c__1);
00633 i__4 = is - 1;
00634 sscal_(&i__4, &scaloc, &f[k * f_dim1 + 1], &c__1);
00635
00636 }
00637 i__3 = je;
00638 for (k = js; k <= i__3; ++k) {
00639 i__4 = *m - ie;
00640 sscal_(&i__4, &scaloc, &c__[ie + 1 + k * c_dim1], &
00641 c__1);
00642 i__4 = *m - ie;
00643 sscal_(&i__4, &scaloc, &f[ie + 1 + k * f_dim1], &c__1)
00644 ;
00645
00646 }
00647 i__3 = *n;
00648 for (k = je + 1; k <= i__3; ++k) {
00649 sscal_(m, &scaloc, &c__[k * c_dim1 + 1], &c__1);
00650 sscal_(m, &scaloc, &f[k * f_dim1 + 1], &c__1);
00651
00652 }
00653 *scale *= scaloc;
00654 }
00655
00656
00657
00658 if (j > p + 2) {
00659 i__3 = js - 1;
00660 sgemm_("N", "T", &mb, &i__3, &nb, &c_b52, &c__[is + js *
00661 c_dim1], ldc, &b[js * b_dim1 + 1], ldb, &c_b52, &
00662 f[is + f_dim1], ldf);
00663 i__3 = js - 1;
00664 sgemm_("N", "T", &mb, &i__3, &nb, &c_b52, &f[is + js *
00665 f_dim1], ldf, &e[js * e_dim1 + 1], lde, &c_b52, &
00666 f[is + f_dim1], ldf);
00667 }
00668 if (i__ < p) {
00669 i__3 = *m - ie;
00670 sgemm_("T", "N", &i__3, &nb, &mb, &c_b51, &a[is + (ie + 1)
00671 * a_dim1], lda, &c__[is + js * c_dim1], ldc, &
00672 c_b52, &c__[ie + 1 + js * c_dim1], ldc);
00673 i__3 = *m - ie;
00674 sgemm_("T", "N", &i__3, &nb, &mb, &c_b51, &d__[is + (ie +
00675 1) * d_dim1], ldd, &f[is + js * f_dim1], ldf, &
00676 c_b52, &c__[ie + 1 + js * c_dim1], ldc);
00677 }
00678
00679 }
00680
00681 }
00682
00683 }
00684
00685 work[1] = (real) lwmin;
00686
00687 return 0;
00688
00689
00690
00691 }