benchVecAdd.cpp
Go to the documentation of this file.
1 
2 #include <iostream>
3 #include <Eigen/Core>
4 #include <bench/BenchTimer.h>
5 using namespace Eigen;
6 
7 #ifndef SIZE
8 #define SIZE 50
9 #endif
10 
11 #ifndef REPEAT
12 #define REPEAT 10000
13 #endif
14 
15 typedef float Scalar;
16 
17 __attribute__ ((noinline)) void benchVec(Scalar* a, Scalar* b, Scalar* c, int size);
18 __attribute__ ((noinline)) void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c);
19 __attribute__ ((noinline)) void benchVec(VectorXf& a, VectorXf& b, VectorXf& c);
20 
21 int main(int argc, char* argv[])
22 {
23  int size = SIZE * 8;
24  int size2 = size * size;
25  Scalar* a = internal::aligned_new<Scalar>(size2);
26  Scalar* b = internal::aligned_new<Scalar>(size2+4)+1;
27  Scalar* c = internal::aligned_new<Scalar>(size2);
28 
29  for (int i=0; i<size; ++i)
30  {
31  a[i] = b[i] = c[i] = 0;
32  }
33 
35 
36  timer.reset();
37  for (int k=0; k<10; ++k)
38  {
39  timer.start();
40  benchVec(a, b, c, size2);
41  timer.stop();
42  }
43  std::cout << timer.value() << "s " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n";
44  return 0;
45  for (int innersize = size; innersize>2 ; --innersize)
46  {
47  if (size2%innersize==0)
48  {
49  int outersize = size2/innersize;
50  MatrixXf ma = Map<MatrixXf>(a, innersize, outersize );
51  MatrixXf mb = Map<MatrixXf>(b, innersize, outersize );
52  MatrixXf mc = Map<MatrixXf>(c, innersize, outersize );
53  timer.reset();
54  for (int k=0; k<3; ++k)
55  {
56  timer.start();
57  benchVec(ma, mb, mc);
58  timer.stop();
59  }
60  std::cout << innersize << " x " << outersize << " " << timer.value() << "s " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n";
61  }
62  }
63 
64  VectorXf va = Map<VectorXf>(a, size2);
65  VectorXf vb = Map<VectorXf>(b, size2);
66  VectorXf vc = Map<VectorXf>(c, size2);
67  timer.reset();
68  for (int k=0; k<3; ++k)
69  {
70  timer.start();
71  benchVec(va, vb, vc);
72  timer.stop();
73  }
74  std::cout << timer.value() << "s " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n";
75 
76  return 0;
77 }
78 
79 void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c)
80 {
81  for (int k=0; k<REPEAT; ++k)
82  a = a + b;
83 }
84 
85 void benchVec(VectorXf& a, VectorXf& b, VectorXf& c)
86 {
87  for (int k=0; k<REPEAT; ++k)
88  a = a + b;
89 }
90 
91 void benchVec(Scalar* a, Scalar* b, Scalar* c, int size)
92 {
93  typedef internal::packet_traits<Scalar>::type PacketScalar;
94  const int PacketSize = internal::packet_traits<Scalar>::size;
95  PacketScalar a0, a1, a2, a3, b0, b1, b2, b3;
96  for (int k=0; k<REPEAT; ++k)
97  for (int i=0; i<size; i+=PacketSize*8)
98  {
99 // a0 = internal::pload(&a[i]);
100 // b0 = internal::pload(&b[i]);
101 // a1 = internal::pload(&a[i+1*PacketSize]);
102 // b1 = internal::pload(&b[i+1*PacketSize]);
103 // a2 = internal::pload(&a[i+2*PacketSize]);
104 // b2 = internal::pload(&b[i+2*PacketSize]);
105 // a3 = internal::pload(&a[i+3*PacketSize]);
106 // b3 = internal::pload(&b[i+3*PacketSize]);
107 // internal::pstore(&a[i], internal::padd(a0, b0));
108 // a0 = internal::pload(&a[i+4*PacketSize]);
109 // b0 = internal::pload(&b[i+4*PacketSize]);
110 //
111 // internal::pstore(&a[i+1*PacketSize], internal::padd(a1, b1));
112 // a1 = internal::pload(&a[i+5*PacketSize]);
113 // b1 = internal::pload(&b[i+5*PacketSize]);
114 //
115 // internal::pstore(&a[i+2*PacketSize], internal::padd(a2, b2));
116 // a2 = internal::pload(&a[i+6*PacketSize]);
117 // b2 = internal::pload(&b[i+6*PacketSize]);
118 //
119 // internal::pstore(&a[i+3*PacketSize], internal::padd(a3, b3));
120 // a3 = internal::pload(&a[i+7*PacketSize]);
121 // b3 = internal::pload(&b[i+7*PacketSize]);
122 //
123 // internal::pstore(&a[i+4*PacketSize], internal::padd(a0, b0));
124 // internal::pstore(&a[i+5*PacketSize], internal::padd(a1, b1));
125 // internal::pstore(&a[i+6*PacketSize], internal::padd(a2, b2));
126 // internal::pstore(&a[i+7*PacketSize], internal::padd(a3, b3));
127 
128  internal::pstore(&a[i+2*PacketSize], internal::padd(internal::ploadu(&a[i+2*PacketSize]), internal::ploadu(&b[i+2*PacketSize])));
129  internal::pstore(&a[i+3*PacketSize], internal::padd(internal::ploadu(&a[i+3*PacketSize]), internal::ploadu(&b[i+3*PacketSize])));
130  internal::pstore(&a[i+4*PacketSize], internal::padd(internal::ploadu(&a[i+4*PacketSize]), internal::ploadu(&b[i+4*PacketSize])));
131  internal::pstore(&a[i+5*PacketSize], internal::padd(internal::ploadu(&a[i+5*PacketSize]), internal::ploadu(&b[i+5*PacketSize])));
132  internal::pstore(&a[i+6*PacketSize], internal::padd(internal::ploadu(&a[i+6*PacketSize]), internal::ploadu(&b[i+6*PacketSize])));
133  internal::pstore(&a[i+7*PacketSize], internal::padd(internal::ploadu(&a[i+7*PacketSize]), internal::ploadu(&b[i+7*PacketSize])));
134  }
135 }
SCALAR Scalar
Definition: bench_gemm.cpp:33
Scalar * b
Definition: benchVecAdd.cpp:17
#define REPEAT
Definition: benchVecAdd.cpp:12
A matrix or vector expression mapping an existing array of data.
Definition: Map.h:94
Scalar Scalar * c
Definition: benchVecAdd.cpp:17
Namespace containing all symbols from the Eigen library.
Definition: jet.h:637
Vector2 b3(3,-6)
Array33i a
EIGEN_DEVICE_FUNC Packet padd(const Packet &a, const Packet &b)
Scalar Scalar int size
Definition: benchVecAdd.cpp:17
#define SIZE
Definition: benchVecAdd.cpp:8
void benchVec(MatrixXf &a, MatrixXf &b, MatrixXf &c)
Definition: benchVecAdd.cpp:79
int main(int argc, char *argv[])
Definition: benchVecAdd.cpp:21
EIGEN_DEVICE_FUNC Packet ploadu(const typename unpacket_traits< Packet >::type *from)
Vector2 b2(4,-5)
float Scalar
Definition: benchVecAdd.cpp:15
EIGEN_DEVICE_FUNC void pstore(Scalar *to, const Packet &from)
Vector2 b1(2,-1)
double value(int TIMER=CPU_TIMER) const
Definition: BenchTimer.h:100
__attribute__((noinline)) void benchVec(Scalar *a
static BenchTimer timer


gtsam
Author(s):
autogenerated on Sat May 8 2021 02:41:42