00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #include "f2c.h"
00014 #include "blaswrap.h"
00015
00016 int cgemm_(char *transa, char *transb, integer *m, integer *
00017 n, integer *k, complex *alpha, complex *a, integer *lda, complex *b,
00018 integer *ldb, complex *beta, complex *c__, integer *ldc)
00019 {
00020
00021 integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
00022 i__3, i__4, i__5, i__6;
00023 complex q__1, q__2, q__3, q__4;
00024
00025
00026 void r_cnjg(complex *, complex *);
00027
00028
00029 integer i__, j, l, info;
00030 logical nota, notb;
00031 complex temp;
00032 logical conja, conjb;
00033 integer ncola;
00034 extern logical lsame_(char *, char *);
00035 integer nrowa, nrowb;
00036 extern int xerbla_(char *, integer *);
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147
00148
00149
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171
00172
00173
00174
00175
00176
00177
00178
00179
00180
00181 a_dim1 = *lda;
00182 a_offset = 1 + a_dim1;
00183 a -= a_offset;
00184 b_dim1 = *ldb;
00185 b_offset = 1 + b_dim1;
00186 b -= b_offset;
00187 c_dim1 = *ldc;
00188 c_offset = 1 + c_dim1;
00189 c__ -= c_offset;
00190
00191
00192 nota = lsame_(transa, "N");
00193 notb = lsame_(transb, "N");
00194 conja = lsame_(transa, "C");
00195 conjb = lsame_(transb, "C");
00196 if (nota) {
00197 nrowa = *m;
00198 ncola = *k;
00199 } else {
00200 nrowa = *k;
00201 ncola = *m;
00202 }
00203 if (notb) {
00204 nrowb = *k;
00205 } else {
00206 nrowb = *n;
00207 }
00208
00209
00210
00211 info = 0;
00212 if (! nota && ! conja && ! lsame_(transa, "T")) {
00213 info = 1;
00214 } else if (! notb && ! conjb && ! lsame_(transb, "T")) {
00215 info = 2;
00216 } else if (*m < 0) {
00217 info = 3;
00218 } else if (*n < 0) {
00219 info = 4;
00220 } else if (*k < 0) {
00221 info = 5;
00222 } else if (*lda < max(1,nrowa)) {
00223 info = 8;
00224 } else if (*ldb < max(1,nrowb)) {
00225 info = 10;
00226 } else if (*ldc < max(1,*m)) {
00227 info = 13;
00228 }
00229 if (info != 0) {
00230 xerbla_("CGEMM ", &info);
00231 return 0;
00232 }
00233
00234
00235
00236 if (*m == 0 || *n == 0 || (alpha->r == 0.f && alpha->i == 0.f || *k == 0)
00237 && (beta->r == 1.f && beta->i == 0.f)) {
00238 return 0;
00239 }
00240
00241
00242
00243 if (alpha->r == 0.f && alpha->i == 0.f) {
00244 if (beta->r == 0.f && beta->i == 0.f) {
00245 i__1 = *n;
00246 for (j = 1; j <= i__1; ++j) {
00247 i__2 = *m;
00248 for (i__ = 1; i__ <= i__2; ++i__) {
00249 i__3 = i__ + j * c_dim1;
00250 c__[i__3].r = 0.f, c__[i__3].i = 0.f;
00251
00252 }
00253
00254 }
00255 } else {
00256 i__1 = *n;
00257 for (j = 1; j <= i__1; ++j) {
00258 i__2 = *m;
00259 for (i__ = 1; i__ <= i__2; ++i__) {
00260 i__3 = i__ + j * c_dim1;
00261 i__4 = i__ + j * c_dim1;
00262 q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4].i,
00263 q__1.i = beta->r * c__[i__4].i + beta->i * c__[
00264 i__4].r;
00265 c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
00266
00267 }
00268
00269 }
00270 }
00271 return 0;
00272 }
00273
00274
00275
00276 if (notb) {
00277 if (nota) {
00278
00279
00280
00281 i__1 = *n;
00282 for (j = 1; j <= i__1; ++j) {
00283 if (beta->r == 0.f && beta->i == 0.f) {
00284 i__2 = *m;
00285 for (i__ = 1; i__ <= i__2; ++i__) {
00286 i__3 = i__ + j * c_dim1;
00287 c__[i__3].r = 0.f, c__[i__3].i = 0.f;
00288
00289 }
00290 } else if (beta->r != 1.f || beta->i != 0.f) {
00291 i__2 = *m;
00292 for (i__ = 1; i__ <= i__2; ++i__) {
00293 i__3 = i__ + j * c_dim1;
00294 i__4 = i__ + j * c_dim1;
00295 q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
00296 .i, q__1.i = beta->r * c__[i__4].i + beta->i *
00297 c__[i__4].r;
00298 c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
00299
00300 }
00301 }
00302 i__2 = *k;
00303 for (l = 1; l <= i__2; ++l) {
00304 i__3 = l + j * b_dim1;
00305 if (b[i__3].r != 0.f || b[i__3].i != 0.f) {
00306 i__3 = l + j * b_dim1;
00307 q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i,
00308 q__1.i = alpha->r * b[i__3].i + alpha->i * b[
00309 i__3].r;
00310 temp.r = q__1.r, temp.i = q__1.i;
00311 i__3 = *m;
00312 for (i__ = 1; i__ <= i__3; ++i__) {
00313 i__4 = i__ + j * c_dim1;
00314 i__5 = i__ + j * c_dim1;
00315 i__6 = i__ + l * a_dim1;
00316 q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i,
00317 q__2.i = temp.r * a[i__6].i + temp.i * a[
00318 i__6].r;
00319 q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5]
00320 .i + q__2.i;
00321 c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
00322
00323 }
00324 }
00325
00326 }
00327
00328 }
00329 } else if (conja) {
00330
00331
00332
00333 i__1 = *n;
00334 for (j = 1; j <= i__1; ++j) {
00335 i__2 = *m;
00336 for (i__ = 1; i__ <= i__2; ++i__) {
00337 temp.r = 0.f, temp.i = 0.f;
00338 i__3 = *k;
00339 for (l = 1; l <= i__3; ++l) {
00340 r_cnjg(&q__3, &a[l + i__ * a_dim1]);
00341 i__4 = l + j * b_dim1;
00342 q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4].i,
00343 q__2.i = q__3.r * b[i__4].i + q__3.i * b[i__4]
00344 .r;
00345 q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
00346 temp.r = q__1.r, temp.i = q__1.i;
00347
00348 }
00349 if (beta->r == 0.f && beta->i == 0.f) {
00350 i__3 = i__ + j * c_dim1;
00351 q__1.r = alpha->r * temp.r - alpha->i * temp.i,
00352 q__1.i = alpha->r * temp.i + alpha->i *
00353 temp.r;
00354 c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
00355 } else {
00356 i__3 = i__ + j * c_dim1;
00357 q__2.r = alpha->r * temp.r - alpha->i * temp.i,
00358 q__2.i = alpha->r * temp.i + alpha->i *
00359 temp.r;
00360 i__4 = i__ + j * c_dim1;
00361 q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
00362 .i, q__3.i = beta->r * c__[i__4].i + beta->i *
00363 c__[i__4].r;
00364 q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
00365 c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
00366 }
00367
00368 }
00369
00370 }
00371 } else {
00372
00373
00374
00375 i__1 = *n;
00376 for (j = 1; j <= i__1; ++j) {
00377 i__2 = *m;
00378 for (i__ = 1; i__ <= i__2; ++i__) {
00379 temp.r = 0.f, temp.i = 0.f;
00380 i__3 = *k;
00381 for (l = 1; l <= i__3; ++l) {
00382 i__4 = l + i__ * a_dim1;
00383 i__5 = l + j * b_dim1;
00384 q__2.r = a[i__4].r * b[i__5].r - a[i__4].i * b[i__5]
00385 .i, q__2.i = a[i__4].r * b[i__5].i + a[i__4]
00386 .i * b[i__5].r;
00387 q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
00388 temp.r = q__1.r, temp.i = q__1.i;
00389
00390 }
00391 if (beta->r == 0.f && beta->i == 0.f) {
00392 i__3 = i__ + j * c_dim1;
00393 q__1.r = alpha->r * temp.r - alpha->i * temp.i,
00394 q__1.i = alpha->r * temp.i + alpha->i *
00395 temp.r;
00396 c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
00397 } else {
00398 i__3 = i__ + j * c_dim1;
00399 q__2.r = alpha->r * temp.r - alpha->i * temp.i,
00400 q__2.i = alpha->r * temp.i + alpha->i *
00401 temp.r;
00402 i__4 = i__ + j * c_dim1;
00403 q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
00404 .i, q__3.i = beta->r * c__[i__4].i + beta->i *
00405 c__[i__4].r;
00406 q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
00407 c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
00408 }
00409
00410 }
00411
00412 }
00413 }
00414 } else if (nota) {
00415 if (conjb) {
00416
00417
00418
00419 i__1 = *n;
00420 for (j = 1; j <= i__1; ++j) {
00421 if (beta->r == 0.f && beta->i == 0.f) {
00422 i__2 = *m;
00423 for (i__ = 1; i__ <= i__2; ++i__) {
00424 i__3 = i__ + j * c_dim1;
00425 c__[i__3].r = 0.f, c__[i__3].i = 0.f;
00426
00427 }
00428 } else if (beta->r != 1.f || beta->i != 0.f) {
00429 i__2 = *m;
00430 for (i__ = 1; i__ <= i__2; ++i__) {
00431 i__3 = i__ + j * c_dim1;
00432 i__4 = i__ + j * c_dim1;
00433 q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
00434 .i, q__1.i = beta->r * c__[i__4].i + beta->i *
00435 c__[i__4].r;
00436 c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
00437
00438 }
00439 }
00440 i__2 = *k;
00441 for (l = 1; l <= i__2; ++l) {
00442 i__3 = j + l * b_dim1;
00443 if (b[i__3].r != 0.f || b[i__3].i != 0.f) {
00444 r_cnjg(&q__2, &b[j + l * b_dim1]);
00445 q__1.r = alpha->r * q__2.r - alpha->i * q__2.i,
00446 q__1.i = alpha->r * q__2.i + alpha->i *
00447 q__2.r;
00448 temp.r = q__1.r, temp.i = q__1.i;
00449 i__3 = *m;
00450 for (i__ = 1; i__ <= i__3; ++i__) {
00451 i__4 = i__ + j * c_dim1;
00452 i__5 = i__ + j * c_dim1;
00453 i__6 = i__ + l * a_dim1;
00454 q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i,
00455 q__2.i = temp.r * a[i__6].i + temp.i * a[
00456 i__6].r;
00457 q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5]
00458 .i + q__2.i;
00459 c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
00460
00461 }
00462 }
00463
00464 }
00465
00466 }
00467 } else {
00468
00469
00470
00471 i__1 = *n;
00472 for (j = 1; j <= i__1; ++j) {
00473 if (beta->r == 0.f && beta->i == 0.f) {
00474 i__2 = *m;
00475 for (i__ = 1; i__ <= i__2; ++i__) {
00476 i__3 = i__ + j * c_dim1;
00477 c__[i__3].r = 0.f, c__[i__3].i = 0.f;
00478
00479 }
00480 } else if (beta->r != 1.f || beta->i != 0.f) {
00481 i__2 = *m;
00482 for (i__ = 1; i__ <= i__2; ++i__) {
00483 i__3 = i__ + j * c_dim1;
00484 i__4 = i__ + j * c_dim1;
00485 q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
00486 .i, q__1.i = beta->r * c__[i__4].i + beta->i *
00487 c__[i__4].r;
00488 c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
00489
00490 }
00491 }
00492 i__2 = *k;
00493 for (l = 1; l <= i__2; ++l) {
00494 i__3 = j + l * b_dim1;
00495 if (b[i__3].r != 0.f || b[i__3].i != 0.f) {
00496 i__3 = j + l * b_dim1;
00497 q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i,
00498 q__1.i = alpha->r * b[i__3].i + alpha->i * b[
00499 i__3].r;
00500 temp.r = q__1.r, temp.i = q__1.i;
00501 i__3 = *m;
00502 for (i__ = 1; i__ <= i__3; ++i__) {
00503 i__4 = i__ + j * c_dim1;
00504 i__5 = i__ + j * c_dim1;
00505 i__6 = i__ + l * a_dim1;
00506 q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i,
00507 q__2.i = temp.r * a[i__6].i + temp.i * a[
00508 i__6].r;
00509 q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5]
00510 .i + q__2.i;
00511 c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
00512
00513 }
00514 }
00515
00516 }
00517
00518 }
00519 }
00520 } else if (conja) {
00521 if (conjb) {
00522
00523
00524
00525 i__1 = *n;
00526 for (j = 1; j <= i__1; ++j) {
00527 i__2 = *m;
00528 for (i__ = 1; i__ <= i__2; ++i__) {
00529 temp.r = 0.f, temp.i = 0.f;
00530 i__3 = *k;
00531 for (l = 1; l <= i__3; ++l) {
00532 r_cnjg(&q__3, &a[l + i__ * a_dim1]);
00533 r_cnjg(&q__4, &b[j + l * b_dim1]);
00534 q__2.r = q__3.r * q__4.r - q__3.i * q__4.i, q__2.i =
00535 q__3.r * q__4.i + q__3.i * q__4.r;
00536 q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
00537 temp.r = q__1.r, temp.i = q__1.i;
00538
00539 }
00540 if (beta->r == 0.f && beta->i == 0.f) {
00541 i__3 = i__ + j * c_dim1;
00542 q__1.r = alpha->r * temp.r - alpha->i * temp.i,
00543 q__1.i = alpha->r * temp.i + alpha->i *
00544 temp.r;
00545 c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
00546 } else {
00547 i__3 = i__ + j * c_dim1;
00548 q__2.r = alpha->r * temp.r - alpha->i * temp.i,
00549 q__2.i = alpha->r * temp.i + alpha->i *
00550 temp.r;
00551 i__4 = i__ + j * c_dim1;
00552 q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
00553 .i, q__3.i = beta->r * c__[i__4].i + beta->i *
00554 c__[i__4].r;
00555 q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
00556 c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
00557 }
00558
00559 }
00560
00561 }
00562 } else {
00563
00564
00565
00566 i__1 = *n;
00567 for (j = 1; j <= i__1; ++j) {
00568 i__2 = *m;
00569 for (i__ = 1; i__ <= i__2; ++i__) {
00570 temp.r = 0.f, temp.i = 0.f;
00571 i__3 = *k;
00572 for (l = 1; l <= i__3; ++l) {
00573 r_cnjg(&q__3, &a[l + i__ * a_dim1]);
00574 i__4 = j + l * b_dim1;
00575 q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4].i,
00576 q__2.i = q__3.r * b[i__4].i + q__3.i * b[i__4]
00577 .r;
00578 q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
00579 temp.r = q__1.r, temp.i = q__1.i;
00580
00581 }
00582 if (beta->r == 0.f && beta->i == 0.f) {
00583 i__3 = i__ + j * c_dim1;
00584 q__1.r = alpha->r * temp.r - alpha->i * temp.i,
00585 q__1.i = alpha->r * temp.i + alpha->i *
00586 temp.r;
00587 c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
00588 } else {
00589 i__3 = i__ + j * c_dim1;
00590 q__2.r = alpha->r * temp.r - alpha->i * temp.i,
00591 q__2.i = alpha->r * temp.i + alpha->i *
00592 temp.r;
00593 i__4 = i__ + j * c_dim1;
00594 q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
00595 .i, q__3.i = beta->r * c__[i__4].i + beta->i *
00596 c__[i__4].r;
00597 q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
00598 c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
00599 }
00600
00601 }
00602
00603 }
00604 }
00605 } else {
00606 if (conjb) {
00607
00608
00609
00610 i__1 = *n;
00611 for (j = 1; j <= i__1; ++j) {
00612 i__2 = *m;
00613 for (i__ = 1; i__ <= i__2; ++i__) {
00614 temp.r = 0.f, temp.i = 0.f;
00615 i__3 = *k;
00616 for (l = 1; l <= i__3; ++l) {
00617 i__4 = l + i__ * a_dim1;
00618 r_cnjg(&q__3, &b[j + l * b_dim1]);
00619 q__2.r = a[i__4].r * q__3.r - a[i__4].i * q__3.i,
00620 q__2.i = a[i__4].r * q__3.i + a[i__4].i *
00621 q__3.r;
00622 q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
00623 temp.r = q__1.r, temp.i = q__1.i;
00624
00625 }
00626 if (beta->r == 0.f && beta->i == 0.f) {
00627 i__3 = i__ + j * c_dim1;
00628 q__1.r = alpha->r * temp.r - alpha->i * temp.i,
00629 q__1.i = alpha->r * temp.i + alpha->i *
00630 temp.r;
00631 c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
00632 } else {
00633 i__3 = i__ + j * c_dim1;
00634 q__2.r = alpha->r * temp.r - alpha->i * temp.i,
00635 q__2.i = alpha->r * temp.i + alpha->i *
00636 temp.r;
00637 i__4 = i__ + j * c_dim1;
00638 q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
00639 .i, q__3.i = beta->r * c__[i__4].i + beta->i *
00640 c__[i__4].r;
00641 q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
00642 c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
00643 }
00644
00645 }
00646
00647 }
00648 } else {
00649
00650
00651
00652 i__1 = *n;
00653 for (j = 1; j <= i__1; ++j) {
00654 i__2 = *m;
00655 for (i__ = 1; i__ <= i__2; ++i__) {
00656 temp.r = 0.f, temp.i = 0.f;
00657 i__3 = *k;
00658 for (l = 1; l <= i__3; ++l) {
00659 i__4 = l + i__ * a_dim1;
00660 i__5 = j + l * b_dim1;
00661 q__2.r = a[i__4].r * b[i__5].r - a[i__4].i * b[i__5]
00662 .i, q__2.i = a[i__4].r * b[i__5].i + a[i__4]
00663 .i * b[i__5].r;
00664 q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
00665 temp.r = q__1.r, temp.i = q__1.i;
00666
00667 }
00668 if (beta->r == 0.f && beta->i == 0.f) {
00669 i__3 = i__ + j * c_dim1;
00670 q__1.r = alpha->r * temp.r - alpha->i * temp.i,
00671 q__1.i = alpha->r * temp.i + alpha->i *
00672 temp.r;
00673 c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
00674 } else {
00675 i__3 = i__ + j * c_dim1;
00676 q__2.r = alpha->r * temp.r - alpha->i * temp.i,
00677 q__2.i = alpha->r * temp.i + alpha->i *
00678 temp.r;
00679 i__4 = i__ + j * c_dim1;
00680 q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
00681 .i, q__3.i = beta->r * c__[i__4].i + beta->i *
00682 c__[i__4].r;
00683 q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
00684 c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
00685 }
00686
00687 }
00688
00689 }
00690 }
00691 }
00692
00693 return 0;
00694
00695
00696
00697 }