00001
00002 #include <iostream>
00003 #include <Eigen/Core>
00004 #include <bench/BenchTimer.h>
00005 using namespace Eigen;
00006
00007 #ifndef SIZE
00008 #define SIZE 50
00009 #endif
00010
00011 #ifndef REPEAT
00012 #define REPEAT 10000
00013 #endif
00014
00015 typedef float Scalar;
00016
00017 __attribute__ ((noinline)) void benchVec(Scalar* a, Scalar* b, Scalar* c, int size);
00018 __attribute__ ((noinline)) void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c);
00019 __attribute__ ((noinline)) void benchVec(VectorXf& a, VectorXf& b, VectorXf& c);
00020
00021 int main(int argc, char* argv[])
00022 {
00023 int size = SIZE * 8;
00024 int size2 = size * size;
00025 Scalar* a = internal::aligned_new<Scalar>(size2);
00026 Scalar* b = internal::aligned_new<Scalar>(size2+4)+1;
00027 Scalar* c = internal::aligned_new<Scalar>(size2);
00028
00029 for (int i=0; i<size; ++i)
00030 {
00031 a[i] = b[i] = c[i] = 0;
00032 }
00033
00034 BenchTimer timer;
00035
00036 timer.reset();
00037 for (int k=0; k<10; ++k)
00038 {
00039 timer.start();
00040 benchVec(a, b, c, size2);
00041 timer.stop();
00042 }
00043 std::cout << timer.value() << "s " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n";
00044 return 0;
00045 for (int innersize = size; innersize>2 ; --innersize)
00046 {
00047 if (size2%innersize==0)
00048 {
00049 int outersize = size2/innersize;
00050 MatrixXf ma = Map<MatrixXf>(a, innersize, outersize );
00051 MatrixXf mb = Map<MatrixXf>(b, innersize, outersize );
00052 MatrixXf mc = Map<MatrixXf>(c, innersize, outersize );
00053 timer.reset();
00054 for (int k=0; k<3; ++k)
00055 {
00056 timer.start();
00057 benchVec(ma, mb, mc);
00058 timer.stop();
00059 }
00060 std::cout << innersize << " x " << outersize << " " << timer.value() << "s " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n";
00061 }
00062 }
00063
00064 VectorXf va = Map<VectorXf>(a, size2);
00065 VectorXf vb = Map<VectorXf>(b, size2);
00066 VectorXf vc = Map<VectorXf>(c, size2);
00067 timer.reset();
00068 for (int k=0; k<3; ++k)
00069 {
00070 timer.start();
00071 benchVec(va, vb, vc);
00072 timer.stop();
00073 }
00074 std::cout << timer.value() << "s " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n";
00075
00076 return 0;
00077 }
00078
00079 void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c)
00080 {
00081 for (int k=0; k<REPEAT; ++k)
00082 a = a + b;
00083 }
00084
00085 void benchVec(VectorXf& a, VectorXf& b, VectorXf& c)
00086 {
00087 for (int k=0; k<REPEAT; ++k)
00088 a = a + b;
00089 }
00090
00091 void benchVec(Scalar* a, Scalar* b, Scalar* c, int size)
00092 {
00093 typedef internal::packet_traits<Scalar>::type PacketScalar;
00094 const int PacketSize = internal::packet_traits<Scalar>::size;
00095 PacketScalar a0, a1, a2, a3, b0, b1, b2, b3;
00096 for (int k=0; k<REPEAT; ++k)
00097 for (int i=0; i<size; i+=PacketSize*8)
00098 {
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128 internal::pstore(&a[i+2*PacketSize], internal::padd(internal::ploadu(&a[i+2*PacketSize]), internal::ploadu(&b[i+2*PacketSize])));
00129 internal::pstore(&a[i+3*PacketSize], internal::padd(internal::ploadu(&a[i+3*PacketSize]), internal::ploadu(&b[i+3*PacketSize])));
00130 internal::pstore(&a[i+4*PacketSize], internal::padd(internal::ploadu(&a[i+4*PacketSize]), internal::ploadu(&b[i+4*PacketSize])));
00131 internal::pstore(&a[i+5*PacketSize], internal::padd(internal::ploadu(&a[i+5*PacketSize]), internal::ploadu(&b[i+5*PacketSize])));
00132 internal::pstore(&a[i+6*PacketSize], internal::padd(internal::ploadu(&a[i+6*PacketSize]), internal::ploadu(&b[i+6*PacketSize])));
00133 internal::pstore(&a[i+7*PacketSize], internal::padd(internal::ploadu(&a[i+7*PacketSize]), internal::ploadu(&b[i+7*PacketSize])));
00134 }
00135 }