benchBlasGemm.cpp
Go to the documentation of this file.
00001 // g++ -O3 -DNDEBUG -I.. -L /usr/lib64/atlas/ benchBlasGemm.cpp -o benchBlasGemm -lrt -lcblas
00002 // possible options:
00003 //    -DEIGEN_DONT_VECTORIZE
00004 //    -msse2
00005 
00006 // #define EIGEN_DEFAULT_TO_ROW_MAJOR
00007 #define _FLOAT
00008 
00009 #include <iostream>
00010 
00011 #include <Eigen/Core>
00012 #include "BenchTimer.h"
00013 
00014 // include the BLAS headers
00015 extern "C" {
00016 #include <cblas.h>
00017 }
00018 #include <string>
00019 
00020 #ifdef _FLOAT
00021 typedef float Scalar;
00022 #define CBLAS_GEMM cblas_sgemm
00023 #else
00024 typedef double Scalar;
00025 #define CBLAS_GEMM cblas_dgemm
00026 #endif
00027 
00028 
00029 typedef Eigen::Matrix<Scalar,Eigen::Dynamic,Eigen::Dynamic> MyMatrix;
00030 void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops);
00031 void check_product(int M, int N, int K);
00032 void check_product(void);
00033 
00034 int main(int argc, char *argv[])
00035 {
00036   // disable SSE exceptions
00037   #ifdef __GNUC__
00038   {
00039     int aux;
00040     asm(
00041     "stmxcsr   %[aux]           \n\t"
00042     "orl       $32832, %[aux]   \n\t"
00043     "ldmxcsr   %[aux]           \n\t"
00044     : : [aux] "m" (aux));
00045   }
00046   #endif
00047 
00048   int nbtries=1, nbloops=1, M, N, K;
00049 
00050   if (argc==2)
00051   {
00052     if (std::string(argv[1])=="check")
00053       check_product();
00054     else
00055       M = N = K = atoi(argv[1]);
00056   }
00057   else if ((argc==3) && (std::string(argv[1])=="auto"))
00058   {
00059     M = N = K = atoi(argv[2]);
00060     nbloops = 1000000000/(M*M*M);
00061     if (nbloops<1)
00062       nbloops = 1;
00063     nbtries = 6;
00064   }
00065   else if (argc==4)
00066   {
00067     M = N = K = atoi(argv[1]);
00068     nbloops = atoi(argv[2]);
00069     nbtries = atoi(argv[3]);
00070   }
00071   else if (argc==6)
00072   {
00073     M = atoi(argv[1]);
00074     N = atoi(argv[2]);
00075     K = atoi(argv[3]);
00076     nbloops = atoi(argv[4]);
00077     nbtries = atoi(argv[5]);
00078   }
00079   else
00080   {
00081     std::cout << "Usage: " << argv[0] << " size  \n";
00082     std::cout << "Usage: " << argv[0] << " auto size\n";
00083     std::cout << "Usage: " << argv[0] << " size nbloops nbtries\n";
00084     std::cout << "Usage: " << argv[0] << " M N K nbloops nbtries\n";
00085     std::cout << "Usage: " << argv[0] << " check\n";
00086     std::cout << "Options:\n";
00087     std::cout << "    size       unique size of the 2 matrices (integer)\n";
00088     std::cout << "    auto       automatically set the number of repetitions and tries\n";
00089     std::cout << "    nbloops    number of times the GEMM routines is executed\n";
00090     std::cout << "    nbtries    number of times the loop is benched (return the best try)\n";
00091     std::cout << "    M N K      sizes of the matrices: MxN  =  MxK * KxN (integers)\n";
00092     std::cout << "    check      check eigen product using cblas as a reference\n";
00093     exit(1);
00094   }
00095 
00096   double nbmad = double(M) * double(N) * double(K) * double(nbloops);
00097 
00098   if (!(std::string(argv[1])=="auto"))
00099     std::cout << M << " x " << N << " x " << K << "\n";
00100 
00101   Scalar alpha, beta;
00102   MyMatrix ma(M,K), mb(K,N), mc(M,N);
00103   ma = MyMatrix::Random(M,K);
00104   mb = MyMatrix::Random(K,N);
00105   mc = MyMatrix::Random(M,N);
00106 
00107   Eigen::BenchTimer timer;
00108 
00109   // we simply compute c += a*b, so:
00110   alpha = 1;
00111   beta = 1;
00112 
00113   // bench cblas
00114   // ROWS_A, COLS_B, COLS_A, 1.0,  A, COLS_A, B, COLS_B, 0.0, C, COLS_B);
00115   if (!(std::string(argv[1])=="auto"))
00116   {
00117     timer.reset();
00118     for (uint k=0 ; k<nbtries ; ++k)
00119     {
00120         timer.start();
00121         for (uint j=0 ; j<nbloops ; ++j)
00122               #ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
00123               CBLAS_GEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), K, mb.data(), N, beta, mc.data(), N);
00124               #else
00125               CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), M, mb.data(), K, beta, mc.data(), M);
00126               #endif
00127         timer.stop();
00128     }
00129     if (!(std::string(argv[1])=="auto"))
00130       std::cout << "cblas: " << timer.value() << " (" << 1e-3*floor(1e-6*nbmad/timer.value()) << " GFlops/s)\n";
00131     else
00132         std::cout << M << " : " << timer.value() << " ; " << 1e-3*floor(1e-6*nbmad/timer.value()) << "\n";
00133   }
00134 
00135   // clear
00136   ma = MyMatrix::Random(M,K);
00137   mb = MyMatrix::Random(K,N);
00138   mc = MyMatrix::Random(M,N);
00139 
00140   // eigen
00141 //   if (!(std::string(argv[1])=="auto"))
00142   {
00143       timer.reset();
00144       for (uint k=0 ; k<nbtries ; ++k)
00145       {
00146           timer.start();
00147           bench_eigengemm(mc, ma, mb, nbloops);
00148           timer.stop();
00149       }
00150       if (!(std::string(argv[1])=="auto"))
00151         std::cout << "eigen : " << timer.value() << " (" << 1e-3*floor(1e-6*nbmad/timer.value()) << " GFlops/s)\n";
00152       else
00153         std::cout << M << " : " << timer.value() << " ; " << 1e-3*floor(1e-6*nbmad/timer.value()) << "\n";
00154   }
00155 
00156   std::cout << "l1: " << Eigen::l1CacheSize() << std::endl;
00157   std::cout << "l2: " << Eigen::l2CacheSize() << std::endl;
00158   
00159 
00160   return 0;
00161 }
00162 
00163 using namespace Eigen;
00164 
00165 void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops)
00166 {
00167   for (uint j=0 ; j<nbloops ; ++j)
00168       mc.noalias() += ma * mb;
00169 }
00170 
00171 #define MYVERIFY(A,M) if (!(A)) { \
00172     std::cout << "FAIL: " << M << "\n"; \
00173   }
00174 void check_product(int M, int N, int K)
00175 {
00176   MyMatrix ma(M,K), mb(K,N), mc(M,N), maT(K,M), mbT(N,K), meigen(M,N), mref(M,N);
00177   ma = MyMatrix::Random(M,K);
00178   mb = MyMatrix::Random(K,N);
00179   maT = ma.transpose();
00180   mbT = mb.transpose();
00181   mc = MyMatrix::Random(M,N);
00182 
00183   MyMatrix::Scalar eps = 1e-4;
00184 
00185   meigen = mref = mc;
00186   CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, 1, ma.data(), M, mb.data(), K, 1, mref.data(), M);
00187   meigen += ma * mb;
00188   MYVERIFY(meigen.isApprox(mref, eps),". * .");
00189 
00190   meigen = mref = mc;
00191   CBLAS_GEMM(CblasColMajor, CblasTrans, CblasNoTrans, M, N, K, 1, maT.data(), K, mb.data(), K, 1, mref.data(), M);
00192   meigen += maT.transpose() * mb;
00193   MYVERIFY(meigen.isApprox(mref, eps),"T * .");
00194 
00195   meigen = mref = mc;
00196   CBLAS_GEMM(CblasColMajor, CblasTrans, CblasTrans, M, N, K, 1, maT.data(), K, mbT.data(), N, 1, mref.data(), M);
00197   meigen += (maT.transpose()) * (mbT.transpose());
00198   MYVERIFY(meigen.isApprox(mref, eps),"T * T");
00199 
00200   meigen = mref = mc;
00201   CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, ma.data(), M, mbT.data(), N, 1, mref.data(), M);
00202   meigen += ma * mbT.transpose();
00203   MYVERIFY(meigen.isApprox(mref, eps),". * T");
00204 }
00205 
00206 void check_product(void)
00207 {
00208   int M, N, K;
00209   for (uint i=0; i<1000; ++i)
00210   {
00211     M = internal::random<int>(1,64);
00212     N = internal::random<int>(1,768);
00213     K = internal::random<int>(1,768);
00214     M = (0 + M) * 1;
00215     std::cout << M << " x " << N << " x " << K << "\n";
00216     check_product(M, N, K);
00217   }
00218 }
00219 


libicr
Author(s): Robert Krug
autogenerated on Mon Jan 6 2014 11:32:30