00001
00002
00003
00004
00005
00006
00007 #define _FLOAT
00008
00009 #include <iostream>
00010
00011 #include <Eigen/Core>
00012 #include "BenchTimer.h"
00013
00014
00015 extern "C" {
00016 #include <cblas.h>
00017 }
00018 #include <string>
00019
00020 #ifdef _FLOAT
00021 typedef float Scalar;
00022 #define CBLAS_GEMM cblas_sgemm
00023 #else
00024 typedef double Scalar;
00025 #define CBLAS_GEMM cblas_dgemm
00026 #endif
00027
00028
00029 typedef Eigen::Matrix<Scalar,Eigen::Dynamic,Eigen::Dynamic> MyMatrix;
00030 void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops);
00031 void check_product(int M, int N, int K);
00032 void check_product(void);
00033
00034 int main(int argc, char *argv[])
00035 {
00036
00037 #ifdef __GNUC__
00038 {
00039 int aux;
00040 asm(
00041 "stmxcsr %[aux] \n\t"
00042 "orl $32832, %[aux] \n\t"
00043 "ldmxcsr %[aux] \n\t"
00044 : : [aux] "m" (aux));
00045 }
00046 #endif
00047
00048 int nbtries=1, nbloops=1, M, N, K;
00049
00050 if (argc==2)
00051 {
00052 if (std::string(argv[1])=="check")
00053 check_product();
00054 else
00055 M = N = K = atoi(argv[1]);
00056 }
00057 else if ((argc==3) && (std::string(argv[1])=="auto"))
00058 {
00059 M = N = K = atoi(argv[2]);
00060 nbloops = 1000000000/(M*M*M);
00061 if (nbloops<1)
00062 nbloops = 1;
00063 nbtries = 6;
00064 }
00065 else if (argc==4)
00066 {
00067 M = N = K = atoi(argv[1]);
00068 nbloops = atoi(argv[2]);
00069 nbtries = atoi(argv[3]);
00070 }
00071 else if (argc==6)
00072 {
00073 M = atoi(argv[1]);
00074 N = atoi(argv[2]);
00075 K = atoi(argv[3]);
00076 nbloops = atoi(argv[4]);
00077 nbtries = atoi(argv[5]);
00078 }
00079 else
00080 {
00081 std::cout << "Usage: " << argv[0] << " size \n";
00082 std::cout << "Usage: " << argv[0] << " auto size\n";
00083 std::cout << "Usage: " << argv[0] << " size nbloops nbtries\n";
00084 std::cout << "Usage: " << argv[0] << " M N K nbloops nbtries\n";
00085 std::cout << "Usage: " << argv[0] << " check\n";
00086 std::cout << "Options:\n";
00087 std::cout << " size unique size of the 2 matrices (integer)\n";
00088 std::cout << " auto automatically set the number of repetitions and tries\n";
00089 std::cout << " nbloops number of times the GEMM routines is executed\n";
00090 std::cout << " nbtries number of times the loop is benched (return the best try)\n";
00091 std::cout << " M N K sizes of the matrices: MxN = MxK * KxN (integers)\n";
00092 std::cout << " check check eigen product using cblas as a reference\n";
00093 exit(1);
00094 }
00095
00096 double nbmad = double(M) * double(N) * double(K) * double(nbloops);
00097
00098 if (!(std::string(argv[1])=="auto"))
00099 std::cout << M << " x " << N << " x " << K << "\n";
00100
00101 Scalar alpha, beta;
00102 MyMatrix ma(M,K), mb(K,N), mc(M,N);
00103 ma = MyMatrix::Random(M,K);
00104 mb = MyMatrix::Random(K,N);
00105 mc = MyMatrix::Random(M,N);
00106
00107 Eigen::BenchTimer timer;
00108
00109
00110 alpha = 1;
00111 beta = 1;
00112
00113
00114
00115 if (!(std::string(argv[1])=="auto"))
00116 {
00117 timer.reset();
00118 for (uint k=0 ; k<nbtries ; ++k)
00119 {
00120 timer.start();
00121 for (uint j=0 ; j<nbloops ; ++j)
00122 #ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
00123 CBLAS_GEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), K, mb.data(), N, beta, mc.data(), N);
00124 #else
00125 CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), M, mb.data(), K, beta, mc.data(), M);
00126 #endif
00127 timer.stop();
00128 }
00129 if (!(std::string(argv[1])=="auto"))
00130 std::cout << "cblas: " << timer.value() << " (" << 1e-3*floor(1e-6*nbmad/timer.value()) << " GFlops/s)\n";
00131 else
00132 std::cout << M << " : " << timer.value() << " ; " << 1e-3*floor(1e-6*nbmad/timer.value()) << "\n";
00133 }
00134
00135
00136 ma = MyMatrix::Random(M,K);
00137 mb = MyMatrix::Random(K,N);
00138 mc = MyMatrix::Random(M,N);
00139
00140
00141
00142 {
00143 timer.reset();
00144 for (uint k=0 ; k<nbtries ; ++k)
00145 {
00146 timer.start();
00147 bench_eigengemm(mc, ma, mb, nbloops);
00148 timer.stop();
00149 }
00150 if (!(std::string(argv[1])=="auto"))
00151 std::cout << "eigen : " << timer.value() << " (" << 1e-3*floor(1e-6*nbmad/timer.value()) << " GFlops/s)\n";
00152 else
00153 std::cout << M << " : " << timer.value() << " ; " << 1e-3*floor(1e-6*nbmad/timer.value()) << "\n";
00154 }
00155
00156 std::cout << "l1: " << Eigen::l1CacheSize() << std::endl;
00157 std::cout << "l2: " << Eigen::l2CacheSize() << std::endl;
00158
00159
00160 return 0;
00161 }
00162
00163 using namespace Eigen;
00164
00165 void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops)
00166 {
00167 for (uint j=0 ; j<nbloops ; ++j)
00168 mc.noalias() += ma * mb;
00169 }
00170
00171 #define MYVERIFY(A,M) if (!(A)) { \
00172 std::cout << "FAIL: " << M << "\n"; \
00173 }
00174 void check_product(int M, int N, int K)
00175 {
00176 MyMatrix ma(M,K), mb(K,N), mc(M,N), maT(K,M), mbT(N,K), meigen(M,N), mref(M,N);
00177 ma = MyMatrix::Random(M,K);
00178 mb = MyMatrix::Random(K,N);
00179 maT = ma.transpose();
00180 mbT = mb.transpose();
00181 mc = MyMatrix::Random(M,N);
00182
00183 MyMatrix::Scalar eps = 1e-4;
00184
00185 meigen = mref = mc;
00186 CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, 1, ma.data(), M, mb.data(), K, 1, mref.data(), M);
00187 meigen += ma * mb;
00188 MYVERIFY(meigen.isApprox(mref, eps),". * .");
00189
00190 meigen = mref = mc;
00191 CBLAS_GEMM(CblasColMajor, CblasTrans, CblasNoTrans, M, N, K, 1, maT.data(), K, mb.data(), K, 1, mref.data(), M);
00192 meigen += maT.transpose() * mb;
00193 MYVERIFY(meigen.isApprox(mref, eps),"T * .");
00194
00195 meigen = mref = mc;
00196 CBLAS_GEMM(CblasColMajor, CblasTrans, CblasTrans, M, N, K, 1, maT.data(), K, mbT.data(), N, 1, mref.data(), M);
00197 meigen += (maT.transpose()) * (mbT.transpose());
00198 MYVERIFY(meigen.isApprox(mref, eps),"T * T");
00199
00200 meigen = mref = mc;
00201 CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, ma.data(), M, mbT.data(), N, 1, mref.data(), M);
00202 meigen += ma * mbT.transpose();
00203 MYVERIFY(meigen.isApprox(mref, eps),". * T");
00204 }
00205
00206 void check_product(void)
00207 {
00208 int M, N, K;
00209 for (uint i=0; i<1000; ++i)
00210 {
00211 M = internal::random<int>(1,64);
00212 N = internal::random<int>(1,768);
00213 K = internal::random<int>(1,768);
00214 M = (0 + M) * 1;
00215 std::cout << M << " x " << N << " x " << K << "\n";
00216 check_product(M, N, K);
00217 }
00218 }
00219