Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #include "f2c.h"
00014 #include "blaswrap.h"
00015
00016 int sgemm_(char *transa, char *transb, integer *m, integer *
00017 n, integer *k, real *alpha, real *a, integer *lda, real *b, integer *
00018 ldb, real *beta, real *c__, integer *ldc)
00019 {
00020
00021 integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
00022 i__3;
00023
00024
00025 integer i__, j, l, info;
00026 logical nota, notb;
00027 real temp;
00028 integer ncola;
00029 extern logical lsame_(char *, char *);
00030 integer nrowa, nrowb;
00031 extern int xerbla_(char *, integer *);
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147
00148
00149
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171
00172
00173
00174 a_dim1 = *lda;
00175 a_offset = 1 + a_dim1;
00176 a -= a_offset;
00177 b_dim1 = *ldb;
00178 b_offset = 1 + b_dim1;
00179 b -= b_offset;
00180 c_dim1 = *ldc;
00181 c_offset = 1 + c_dim1;
00182 c__ -= c_offset;
00183
00184
00185 nota = lsame_(transa, "N");
00186 notb = lsame_(transb, "N");
00187 if (nota) {
00188 nrowa = *m;
00189 ncola = *k;
00190 } else {
00191 nrowa = *k;
00192 ncola = *m;
00193 }
00194 if (notb) {
00195 nrowb = *k;
00196 } else {
00197 nrowb = *n;
00198 }
00199
00200
00201
00202 info = 0;
00203 if (! nota && ! lsame_(transa, "C") && ! lsame_(
00204 transa, "T")) {
00205 info = 1;
00206 } else if (! notb && ! lsame_(transb, "C") && !
00207 lsame_(transb, "T")) {
00208 info = 2;
00209 } else if (*m < 0) {
00210 info = 3;
00211 } else if (*n < 0) {
00212 info = 4;
00213 } else if (*k < 0) {
00214 info = 5;
00215 } else if (*lda < max(1,nrowa)) {
00216 info = 8;
00217 } else if (*ldb < max(1,nrowb)) {
00218 info = 10;
00219 } else if (*ldc < max(1,*m)) {
00220 info = 13;
00221 }
00222 if (info != 0) {
00223 xerbla_("SGEMM ", &info);
00224 return 0;
00225 }
00226
00227
00228
00229 if (*m == 0 || *n == 0 || (*alpha == 0.f || *k == 0) && *beta == 1.f) {
00230 return 0;
00231 }
00232
00233
00234
00235 if (*alpha == 0.f) {
00236 if (*beta == 0.f) {
00237 i__1 = *n;
00238 for (j = 1; j <= i__1; ++j) {
00239 i__2 = *m;
00240 for (i__ = 1; i__ <= i__2; ++i__) {
00241 c__[i__ + j * c_dim1] = 0.f;
00242
00243 }
00244
00245 }
00246 } else {
00247 i__1 = *n;
00248 for (j = 1; j <= i__1; ++j) {
00249 i__2 = *m;
00250 for (i__ = 1; i__ <= i__2; ++i__) {
00251 c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
00252
00253 }
00254
00255 }
00256 }
00257 return 0;
00258 }
00259
00260
00261
00262 if (notb) {
00263 if (nota) {
00264
00265
00266
00267 i__1 = *n;
00268 for (j = 1; j <= i__1; ++j) {
00269 if (*beta == 0.f) {
00270 i__2 = *m;
00271 for (i__ = 1; i__ <= i__2; ++i__) {
00272 c__[i__ + j * c_dim1] = 0.f;
00273
00274 }
00275 } else if (*beta != 1.f) {
00276 i__2 = *m;
00277 for (i__ = 1; i__ <= i__2; ++i__) {
00278 c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
00279
00280 }
00281 }
00282 i__2 = *k;
00283 for (l = 1; l <= i__2; ++l) {
00284 if (b[l + j * b_dim1] != 0.f) {
00285 temp = *alpha * b[l + j * b_dim1];
00286 i__3 = *m;
00287 for (i__ = 1; i__ <= i__3; ++i__) {
00288 c__[i__ + j * c_dim1] += temp * a[i__ + l *
00289 a_dim1];
00290
00291 }
00292 }
00293
00294 }
00295
00296 }
00297 } else {
00298
00299
00300
00301 i__1 = *n;
00302 for (j = 1; j <= i__1; ++j) {
00303 i__2 = *m;
00304 for (i__ = 1; i__ <= i__2; ++i__) {
00305 temp = 0.f;
00306 i__3 = *k;
00307 for (l = 1; l <= i__3; ++l) {
00308 temp += a[l + i__ * a_dim1] * b[l + j * b_dim1];
00309
00310 }
00311 if (*beta == 0.f) {
00312 c__[i__ + j * c_dim1] = *alpha * temp;
00313 } else {
00314 c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
00315 i__ + j * c_dim1];
00316 }
00317
00318 }
00319
00320 }
00321 }
00322 } else {
00323 if (nota) {
00324
00325
00326
00327 i__1 = *n;
00328 for (j = 1; j <= i__1; ++j) {
00329 if (*beta == 0.f) {
00330 i__2 = *m;
00331 for (i__ = 1; i__ <= i__2; ++i__) {
00332 c__[i__ + j * c_dim1] = 0.f;
00333
00334 }
00335 } else if (*beta != 1.f) {
00336 i__2 = *m;
00337 for (i__ = 1; i__ <= i__2; ++i__) {
00338 c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
00339
00340 }
00341 }
00342 i__2 = *k;
00343 for (l = 1; l <= i__2; ++l) {
00344 if (b[j + l * b_dim1] != 0.f) {
00345 temp = *alpha * b[j + l * b_dim1];
00346 i__3 = *m;
00347 for (i__ = 1; i__ <= i__3; ++i__) {
00348 c__[i__ + j * c_dim1] += temp * a[i__ + l *
00349 a_dim1];
00350
00351 }
00352 }
00353
00354 }
00355
00356 }
00357 } else {
00358
00359
00360
00361 i__1 = *n;
00362 for (j = 1; j <= i__1; ++j) {
00363 i__2 = *m;
00364 for (i__ = 1; i__ <= i__2; ++i__) {
00365 temp = 0.f;
00366 i__3 = *k;
00367 for (l = 1; l <= i__3; ++l) {
00368 temp += a[l + i__ * a_dim1] * b[j + l * b_dim1];
00369
00370 }
00371 if (*beta == 0.f) {
00372 c__[i__ + j * c_dim1] = *alpha * temp;
00373 } else {
00374 c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
00375 i__ + j * c_dim1];
00376 }
00377
00378 }
00379
00380 }
00381 }
00382 }
00383
00384 return 0;
00385
00386
00387
00388 }