benchVecAdd.cpp
Go to the documentation of this file.
00001 
00002 #include <iostream>
00003 #include <Eigen/Core>
00004 #include <bench/BenchTimer.h>
00005 using namespace Eigen;
00006 
00007 #ifndef SIZE
00008 #define SIZE 50
00009 #endif
00010 
00011 #ifndef REPEAT
00012 #define REPEAT 10000
00013 #endif
00014 
00015 typedef float Scalar;
00016 
00017 __attribute__ ((noinline)) void benchVec(Scalar* a, Scalar* b, Scalar* c, int size);
00018 __attribute__ ((noinline)) void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c);
00019 __attribute__ ((noinline)) void benchVec(VectorXf& a, VectorXf& b, VectorXf& c);
00020 
00021 int main(int argc, char* argv[])
00022 {
00023     int size = SIZE * 8;
00024     int size2 = size * size;
00025     Scalar* a = internal::aligned_new<Scalar>(size2);
00026     Scalar* b = internal::aligned_new<Scalar>(size2+4)+1;
00027     Scalar* c = internal::aligned_new<Scalar>(size2); 
00028     
00029     for (int i=0; i<size; ++i)
00030     {
00031         a[i] = b[i] = c[i] = 0;
00032     }
00033     
00034     BenchTimer timer;
00035     
00036     timer.reset();
00037     for (int k=0; k<10; ++k)
00038     {
00039         timer.start();
00040         benchVec(a, b, c, size2);
00041         timer.stop();
00042     }
00043     std::cout << timer.value() << "s  " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n";
00044     return 0;
00045     for (int innersize = size; innersize>2 ; --innersize)
00046     {
00047         if (size2%innersize==0)
00048         {
00049             int outersize = size2/innersize;
00050             MatrixXf ma = Map<MatrixXf>(a, innersize, outersize );
00051             MatrixXf mb = Map<MatrixXf>(b, innersize, outersize );
00052             MatrixXf mc = Map<MatrixXf>(c, innersize, outersize );
00053             timer.reset();
00054             for (int k=0; k<3; ++k)
00055             {
00056                 timer.start();
00057                 benchVec(ma, mb, mc);
00058                 timer.stop();
00059             }
00060             std::cout << innersize << " x " << outersize << "  " << timer.value() << "s   " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n";
00061         }
00062     }
00063     
00064     VectorXf va = Map<VectorXf>(a, size2);
00065     VectorXf vb = Map<VectorXf>(b, size2);
00066     VectorXf vc = Map<VectorXf>(c, size2);
00067     timer.reset();
00068     for (int k=0; k<3; ++k)
00069     {
00070         timer.start();
00071         benchVec(va, vb, vc);
00072         timer.stop();
00073     }
00074     std::cout << timer.value() << "s   " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n";
00075 
00076     return 0;
00077 }
00078 
00079 void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c)
00080 {
00081     for (int k=0; k<REPEAT; ++k)
00082         a = a + b;
00083 }
00084 
00085 void benchVec(VectorXf& a, VectorXf& b, VectorXf& c)
00086 {
00087     for (int k=0; k<REPEAT; ++k)
00088         a = a + b;
00089 }
00090 
00091 void benchVec(Scalar* a, Scalar* b, Scalar* c, int size)
00092 {
00093     typedef internal::packet_traits<Scalar>::type PacketScalar;
00094     const int PacketSize = internal::packet_traits<Scalar>::size;
00095     PacketScalar a0, a1, a2, a3, b0, b1, b2, b3;
00096     for (int k=0; k<REPEAT; ++k)
00097         for (int i=0; i<size; i+=PacketSize*8)
00098         {
00099 //             a0 = internal::pload(&a[i]);
00100 //             b0 = internal::pload(&b[i]);
00101 //             a1 = internal::pload(&a[i+1*PacketSize]);
00102 //             b1 = internal::pload(&b[i+1*PacketSize]);
00103 //             a2 = internal::pload(&a[i+2*PacketSize]);
00104 //             b2 = internal::pload(&b[i+2*PacketSize]);
00105 //             a3 = internal::pload(&a[i+3*PacketSize]);
00106 //             b3 = internal::pload(&b[i+3*PacketSize]);
00107 //             internal::pstore(&a[i], internal::padd(a0, b0));
00108 //             a0 = internal::pload(&a[i+4*PacketSize]);
00109 //             b0 = internal::pload(&b[i+4*PacketSize]);
00110 //             
00111 //             internal::pstore(&a[i+1*PacketSize], internal::padd(a1, b1));
00112 //             a1 = internal::pload(&a[i+5*PacketSize]);
00113 //             b1 = internal::pload(&b[i+5*PacketSize]);
00114 //             
00115 //             internal::pstore(&a[i+2*PacketSize], internal::padd(a2, b2));
00116 //             a2 = internal::pload(&a[i+6*PacketSize]);
00117 //             b2 = internal::pload(&b[i+6*PacketSize]);
00118 //             
00119 //             internal::pstore(&a[i+3*PacketSize], internal::padd(a3, b3));
00120 //             a3 = internal::pload(&a[i+7*PacketSize]);
00121 //             b3 = internal::pload(&b[i+7*PacketSize]);
00122 //             
00123 //             internal::pstore(&a[i+4*PacketSize], internal::padd(a0, b0));
00124 //             internal::pstore(&a[i+5*PacketSize], internal::padd(a1, b1));
00125 //             internal::pstore(&a[i+6*PacketSize], internal::padd(a2, b2));
00126 //             internal::pstore(&a[i+7*PacketSize], internal::padd(a3, b3));
00127             
00128             internal::pstore(&a[i+2*PacketSize], internal::padd(internal::ploadu(&a[i+2*PacketSize]), internal::ploadu(&b[i+2*PacketSize])));
00129             internal::pstore(&a[i+3*PacketSize], internal::padd(internal::ploadu(&a[i+3*PacketSize]), internal::ploadu(&b[i+3*PacketSize])));
00130             internal::pstore(&a[i+4*PacketSize], internal::padd(internal::ploadu(&a[i+4*PacketSize]), internal::ploadu(&b[i+4*PacketSize])));
00131             internal::pstore(&a[i+5*PacketSize], internal::padd(internal::ploadu(&a[i+5*PacketSize]), internal::ploadu(&b[i+5*PacketSize])));
00132             internal::pstore(&a[i+6*PacketSize], internal::padd(internal::ploadu(&a[i+6*PacketSize]), internal::ploadu(&b[i+6*PacketSize])));
00133             internal::pstore(&a[i+7*PacketSize], internal::padd(internal::ploadu(&a[i+7*PacketSize]), internal::ploadu(&b[i+7*PacketSize])));
00134         }
00135 }


libicr
Author(s): Robert Krug
autogenerated on Mon Jan 6 2014 11:32:30