benchBlasGemm.cpp
Go to the documentation of this file.
1 // g++ -O3 -DNDEBUG -I.. -L /usr/lib64/atlas/ benchBlasGemm.cpp -o benchBlasGemm -lrt -lcblas
2 // possible options:
3 // -DEIGEN_DONT_VECTORIZE
4 // -msse2
5 
6 // #define EIGEN_DEFAULT_TO_ROW_MAJOR
7 #define _FLOAT
8 
9 #include <iostream>
10 
11 #include <Eigen/Core>
12 #include "BenchTimer.h"
13 
14 // include the BLAS headers
15 extern "C" {
16 #include <cblas.h>
17 }
18 #include <string>
19 
20 #ifdef _FLOAT
21 typedef float Scalar;
22 #define CBLAS_GEMM cblas_sgemm
23 #else
24 typedef double Scalar;
25 #define CBLAS_GEMM cblas_dgemm
26 #endif
27 
28 
30 void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops);
31 void check_product(int M, int N, int K);
32 void check_product(void);
33 
34 int main(int argc, char *argv[])
35 {
36  // disable SSE exceptions
37  #ifdef __GNUC__
38  {
39  int aux;
40  asm(
41  "stmxcsr %[aux] \n\t"
42  "orl $32832, %[aux] \n\t"
43  "ldmxcsr %[aux] \n\t"
44  : : [aux] "m" (aux));
45  }
46  #endif
47 
48  int nbtries=1, nbloops=1, M, N, K;
49 
50  if (argc==2)
51  {
52  if (std::string(argv[1])=="check")
53  check_product();
54  else
55  M = N = K = atoi(argv[1]);
56  }
57  else if ((argc==3) && (std::string(argv[1])=="auto"))
58  {
59  M = N = K = atoi(argv[2]);
60  nbloops = 1000000000/(M*M*M);
61  if (nbloops<1)
62  nbloops = 1;
63  nbtries = 6;
64  }
65  else if (argc==4)
66  {
67  M = N = K = atoi(argv[1]);
68  nbloops = atoi(argv[2]);
69  nbtries = atoi(argv[3]);
70  }
71  else if (argc==6)
72  {
73  M = atoi(argv[1]);
74  N = atoi(argv[2]);
75  K = atoi(argv[3]);
76  nbloops = atoi(argv[4]);
77  nbtries = atoi(argv[5]);
78  }
79  else
80  {
81  std::cout << "Usage: " << argv[0] << " size \n";
82  std::cout << "Usage: " << argv[0] << " auto size\n";
83  std::cout << "Usage: " << argv[0] << " size nbloops nbtries\n";
84  std::cout << "Usage: " << argv[0] << " M N K nbloops nbtries\n";
85  std::cout << "Usage: " << argv[0] << " check\n";
86  std::cout << "Options:\n";
87  std::cout << " size unique size of the 2 matrices (integer)\n";
88  std::cout << " auto automatically set the number of repetitions and tries\n";
89  std::cout << " nbloops number of times the GEMM routines is executed\n";
90  std::cout << " nbtries number of times the loop is benched (return the best try)\n";
91  std::cout << " M N K sizes of the matrices: MxN = MxK * KxN (integers)\n";
92  std::cout << " check check eigen product using cblas as a reference\n";
93  exit(1);
94  }
95 
96  double nbmad = double(M) * double(N) * double(K) * double(nbloops);
97 
98  if (!(std::string(argv[1])=="auto"))
99  std::cout << M << " x " << N << " x " << K << "\n";
100 
101  Scalar alpha, beta;
102  MyMatrix ma(M,K), mb(K,N), mc(M,N);
103  ma = MyMatrix::Random(M,K);
104  mb = MyMatrix::Random(K,N);
105  mc = MyMatrix::Random(M,N);
106 
108 
109  // we simply compute c += a*b, so:
110  alpha = 1;
111  beta = 1;
112 
113  // bench cblas
114  // ROWS_A, COLS_B, COLS_A, 1.0, A, COLS_A, B, COLS_B, 0.0, C, COLS_B);
115  if (!(std::string(argv[1])=="auto"))
116  {
117  timer.reset();
118  for (uint k=0 ; k<nbtries ; ++k)
119  {
120  timer.start();
121  for (uint j=0 ; j<nbloops ; ++j)
122  #ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
123  CBLAS_GEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), K, mb.data(), N, beta, mc.data(), N);
124  #else
125  CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), M, mb.data(), K, beta, mc.data(), M);
126  #endif
127  timer.stop();
128  }
129  if (!(std::string(argv[1])=="auto"))
130  std::cout << "cblas: " << timer.value() << " (" << 1e-3*floor(1e-6*nbmad/timer.value()) << " GFlops/s)\n";
131  else
132  std::cout << M << " : " << timer.value() << " ; " << 1e-3*floor(1e-6*nbmad/timer.value()) << "\n";
133  }
134 
135  // clear
136  ma = MyMatrix::Random(M,K);
137  mb = MyMatrix::Random(K,N);
138  mc = MyMatrix::Random(M,N);
139 
140  // eigen
141 // if (!(std::string(argv[1])=="auto"))
142  {
143  timer.reset();
144  for (uint k=0 ; k<nbtries ; ++k)
145  {
146  timer.start();
147  bench_eigengemm(mc, ma, mb, nbloops);
148  timer.stop();
149  }
150  if (!(std::string(argv[1])=="auto"))
151  std::cout << "eigen : " << timer.value() << " (" << 1e-3*floor(1e-6*nbmad/timer.value()) << " GFlops/s)\n";
152  else
153  std::cout << M << " : " << timer.value() << " ; " << 1e-3*floor(1e-6*nbmad/timer.value()) << "\n";
154  }
155 
156  std::cout << "l1: " << Eigen::l1CacheSize() << std::endl;
157  std::cout << "l2: " << Eigen::l2CacheSize() << std::endl;
158 
159 
160  return 0;
161 }
162 
163 using namespace Eigen;
164 
165 void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops)
166 {
167  for (uint j=0 ; j<nbloops ; ++j)
168  mc.noalias() += ma * mb;
169 }
170 
171 #define MYVERIFY(A,M) if (!(A)) { \
172  std::cout << "FAIL: " << M << "\n"; \
173  }
174 void check_product(int M, int N, int K)
175 {
176  MyMatrix ma(M,K), mb(K,N), mc(M,N), maT(K,M), mbT(N,K), meigen(M,N), mref(M,N);
177  ma = MyMatrix::Random(M,K);
178  mb = MyMatrix::Random(K,N);
179  maT = ma.transpose();
180  mbT = mb.transpose();
181  mc = MyMatrix::Random(M,N);
182 
183  MyMatrix::Scalar eps = 1e-4;
184 
185  meigen = mref = mc;
186  CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, 1, ma.data(), M, mb.data(), K, 1, mref.data(), M);
187  meigen += ma * mb;
188  MYVERIFY(meigen.isApprox(mref, eps),". * .");
189 
190  meigen = mref = mc;
191  CBLAS_GEMM(CblasColMajor, CblasTrans, CblasNoTrans, M, N, K, 1, maT.data(), K, mb.data(), K, 1, mref.data(), M);
192  meigen += maT.transpose() * mb;
193  MYVERIFY(meigen.isApprox(mref, eps),"T * .");
194 
195  meigen = mref = mc;
196  CBLAS_GEMM(CblasColMajor, CblasTrans, CblasTrans, M, N, K, 1, maT.data(), K, mbT.data(), N, 1, mref.data(), M);
197  meigen += (maT.transpose()) * (mbT.transpose());
198  MYVERIFY(meigen.isApprox(mref, eps),"T * T");
199 
200  meigen = mref = mc;
201  CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, ma.data(), M, mbT.data(), N, 1, mref.data(), M);
202  meigen += ma * mbT.transpose();
203  MYVERIFY(meigen.isApprox(mref, eps),". * T");
204 }
205 
206 void check_product(void)
207 {
208  int M, N, K;
209  for (uint i=0; i<1000; ++i)
210  {
211  M = internal::random<int>(1,64);
212  N = internal::random<int>(1,768);
213  K = internal::random<int>(1,768);
214  M = (0 + M) * 1;
215  std::cout << M << " x " << N << " x " << K << "\n";
216  check_product(M, N, K);
217  }
218 }
219 
Eigen
Namespace containing all symbols from the Eigen library.
Definition: jet.h:637
alpha
RealScalar alpha
Definition: level1_cplx_impl.h:147
e
Array< double, 1, 3 > e(1./3., 0.5, 2.)
MYVERIFY
#define MYVERIFY(A, M)
Definition: benchBlasGemm.cpp:171
timer
static BenchTimer timer
Definition: benchmark-blocking-sizes.cpp:31
Scalar
float Scalar
Definition: benchBlasGemm.cpp:21
beta
double beta(double a, double b)
Definition: beta.c:61
bench_eigengemm
void bench_eigengemm(MyMatrix &mc, const MyMatrix &ma, const MyMatrix &mb, int nbloops)
Definition: benchBlasGemm.cpp:165
Eigen::BenchTimer::value
double value(int TIMER=CPU_TIMER) const
Definition: BenchTimer.h:104
MyMatrix
Eigen::Matrix< Scalar, Eigen::Dynamic, Eigen::Dynamic > MyMatrix
Definition: benchBlasGemm.cpp:29
j
std::ptrdiff_t j
Definition: tut_arithmetic_redux_minmax.cpp:2
Eigen::BenchTimer::start
void start()
Definition: BenchTimer.h:81
Eigen::BenchTimer
Definition: BenchTimer.h:59
Eigen::BenchTimer::reset
void reset()
Definition: BenchTimer.h:75
K
#define K
Definition: igam.h:8
BenchTimer.h
CBLAS_GEMM
#define CBLAS_GEMM
Definition: benchBlasGemm.cpp:22
Eigen::PlainObjectBase::data
EIGEN_DEVICE_FUNC const EIGEN_STRONG_INLINE Scalar * data() const
Definition: PlainObjectBase.h:247
Eigen::BenchTimer::stop
void stop()
Definition: BenchTimer.h:86
Eigen::Matrix< Scalar, Eigen::Dynamic, Eigen::Dynamic >
N
#define N
Definition: igam.h:9
Eigen::PlainObjectBase< Matrix< Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols > >::Scalar
internal::traits< Matrix< Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols > >::Scalar Scalar
Definition: PlainObjectBase.h:106
Eigen::l1CacheSize
std::ptrdiff_t l1CacheSize()
Definition: products/GeneralBlockPanelKernel.h:2607
check_product
void check_product(int M, int N, int K)
Definition: benchBlasGemm.cpp:174
i
int i
Definition: BiCGSTAB_step_by_step.cpp:9
main
int main(int argc, char *argv[])
Definition: benchBlasGemm.cpp:34
floor
const EIGEN_DEVICE_FUNC FloorReturnType floor() const
Definition: ArrayCwiseUnaryOps.h:481
Scalar
SCALAR Scalar
Definition: bench_gemm.cpp:46
Eigen::l2CacheSize
std::ptrdiff_t l2CacheSize()
Definition: products/GeneralBlockPanelKernel.h:2616
M
Matrix< RealScalar, Dynamic, Dynamic > M
Definition: bench_gemm.cpp:51


gtsam
Author(s):
autogenerated on Sat Nov 16 2024 04:01:54