31 static std::mt19937
rng;
32 static std::uniform_real_distribution<>
uniform(-1.0, 0.0);
41 int main(
int argc,
char* argv[]) {
44 cout <<
"\nTiming matrix_block:" << endl;
48 volatile size_t m=500;
49 volatile size_t n=300;
50 volatile size_t nReps = 1000;
52 std::uniform_int_distribution<size_t> uniform_i(0,
m-1);
53 std::uniform_int_distribution<size_t> uniform_j(0,
n-1);
59 cout <<
" Basic: " << (
int)
m <<
"x" << (
int)
n << endl;
60 cout <<
" Full: mat(" << 0 <<
":" << (
int)
m <<
", " << 0 <<
":" << (
int)
n <<
")" << endl;
61 cout <<
" Top: mat(" << 0 <<
":" << (
int)
n <<
", " << 0 <<
":" << (
int)
n <<
")" << endl;
66 double basicTime, fullTime, topTime, blockTime;
68 cout <<
"Row-major matrix, row-major assignment:" << endl;
71 for(
size_t rep=0; rep<1000; ++rep)
77 for(
size_t rep=0; rep<nReps; ++rep)
83 basicTime = basicTimeNode->secs();
85 cout <<
" Basic: " << double(1000000 * basicTime /
double(
mat.rows()*
mat.cols()*nReps)) <<
" μs/element" << endl;
88 for(
size_t rep=0; rep<nReps; ++rep)
89 for(
size_t i=0;
i<(
size_t)full.rows(); ++
i)
90 for(
size_t j=0;
j<(
size_t)full.cols(); ++
j)
94 fullTime = fullTimeNode->secs();
96 cout <<
" Full: " << double(1000000 * fullTime /
double(full.rows()*full.cols()*nReps)) <<
" μs/element" << endl;
99 for(
size_t rep=0; rep<nReps; ++rep)
100 for(
size_t i=0;
i<(
size_t)top.rows(); ++
i)
101 for(
size_t j=0;
j<(
size_t)top.cols(); ++
j)
105 topTime = topTimeNode->secs();
107 cout <<
" Top: " << double(1000000 * topTime /
double(top.rows()*top.cols()*nReps)) <<
" μs/element" << endl;
110 for(
size_t rep=0; rep<nReps; ++rep)
116 blockTime = blockTimeNode->secs();
118 cout <<
" Block: " << double(1000000 * blockTime /
double(
block.rows()*
block.cols()*nReps)) <<
" μs/element" << endl;
124 double basicTime, fullTime, topTime, blockTime;
126 cout <<
"Row-major matrix, column-major assignment:" << endl;
129 for(
size_t rep=0; rep<1000; ++rep)
135 for(
size_t rep=0; rep<nReps; ++rep)
141 basicTime = basicTimeNode->secs();
143 cout <<
" Basic: " << double(1000000 * basicTime /
double(
mat.rows()*
mat.cols()*nReps)) <<
" μs/element" << endl;
146 for(
size_t rep=0; rep<nReps; ++rep)
147 for(
size_t j=0;
j<(
size_t)full.cols(); ++
j)
148 for(
size_t i=0;
i<(
size_t)full.rows(); ++
i)
152 fullTime = fullTimeNode->secs();
154 cout <<
" Full: " << double(1000000 * fullTime /
double(full.rows()*full.cols()*nReps)) <<
" μs/element" << endl;
157 for(
size_t rep=0; rep<nReps; ++rep)
158 for(
size_t j=0;
j<(
size_t)top.cols(); ++
j)
159 for(
size_t i=0;
i<(
size_t)top.rows(); ++
i)
163 topTime = topTimeNode->secs();
165 cout <<
" Top: " << double(1000000 * topTime /
double(top.rows()*top.cols()*nReps)) <<
" μs/element" << endl;
168 for(
size_t rep=0; rep<nReps; ++rep)
174 blockTime = blockTimeNode->secs();
176 cout <<
" Block: " << double(1000000 * blockTime /
double(
block.rows()*
block.cols()*nReps)) <<
" μs/element" << endl;
182 double basicTime, fullTime, topTime, blockTime;
183 typedef std::pair<size_t,size_t> ij_t;
184 std::vector<ij_t> ijs(100000);
186 cout <<
"Row-major matrix, random assignment:" << endl;
189 for (
auto& ij : ijs) ij = {uniform_i(
rng), uniform_j(
rng)};
190 for(
size_t rep=0; rep<1000; ++rep)
194 for (
auto& ij : ijs) ij = {uniform_i(
rng), uniform_j(
rng)};
195 for(
size_t rep=0; rep<1000; ++rep)
199 basicTime = basicTimeNode->secs();
201 cout <<
" Basic: " << double(1000000 * basicTime /
double(ijs.size()*nReps)) <<
" μs/element" << endl;
204 for (
auto& ij : ijs) ij = {uniform_i(
rng), uniform_j(
rng)};
205 for(
size_t rep=0; rep<1000; ++rep)
209 fullTime = fullTimeNode->secs();
211 cout <<
" Full: " << double(1000000 * fullTime /
double(ijs.size()*nReps)) <<
" μs/element" << endl;
214 for (
auto& ij : ijs) ij = {uniform_i(
rng) % top.rows(), uniform_j(
rng)};
215 for(
size_t rep=0; rep<1000; ++rep)
219 topTime = topTimeNode->secs();
221 cout <<
" Top: " << double(1000000 * topTime /
double(ijs.size()*nReps)) <<
" μs/element" << endl;
226 for(
size_t rep=0; rep<1000; ++rep)
230 blockTime = blockTimeNode->secs();
232 cout <<
" Block: " << double(1000000 * blockTime /
double(ijs.size()*nReps)) <<
" μs/element" << endl;