tensor_benchmarks_cpu.cc
Go to the documentation of this file.
1 #define EIGEN_USE_THREADS
2 
3 #include <string>
4 
5 #include "tensor_benchmarks.h"
6 
7 #define CREATE_THREAD_POOL(threads) \
8 Eigen::ThreadPool pool(threads); \
9 Eigen::ThreadPoolDevice device(&pool, threads);
10 
11 // Simple functions
12 #define BM_FuncCPU(FUNC, THREADS) \
13  static void BM_##FUNC##_##THREADS##T(int iters, int N) { \
14  StopBenchmarkTiming(); \
15  CREATE_THREAD_POOL(THREADS); \
16  BenchmarkSuite<Eigen::ThreadPoolDevice, float> suite(device, N); \
17  suite.FUNC(iters); \
18  } \
19  BENCHMARK_RANGE(BM_##FUNC##_##THREADS##T, 10, 5000);
20 
21 BM_FuncCPU(memcpy, 4);
22 BM_FuncCPU(memcpy, 8);
23 BM_FuncCPU(memcpy, 12);
24 
25 BM_FuncCPU(typeCasting, 4);
26 BM_FuncCPU(typeCasting, 8);
27 BM_FuncCPU(typeCasting, 12);
28 
29 BM_FuncCPU(random, 4);
30 BM_FuncCPU(random, 8);
31 BM_FuncCPU(random, 12);
32 
33 BM_FuncCPU(slicing, 4);
34 BM_FuncCPU(slicing, 8);
35 BM_FuncCPU(slicing, 12);
36 
37 BM_FuncCPU(rowChip, 4);
38 BM_FuncCPU(rowChip, 8);
39 BM_FuncCPU(rowChip, 12);
40 
41 BM_FuncCPU(colChip, 4);
42 BM_FuncCPU(colChip, 8);
43 BM_FuncCPU(colChip, 12);
44 
45 BM_FuncCPU(shuffling, 4);
46 BM_FuncCPU(shuffling, 8);
47 BM_FuncCPU(shuffling, 12);
48 
49 BM_FuncCPU(padding, 4);
50 BM_FuncCPU(padding, 8);
51 BM_FuncCPU(padding, 12);
52 
53 BM_FuncCPU(striding, 4);
54 BM_FuncCPU(striding, 8);
55 BM_FuncCPU(striding, 12);
56 
57 BM_FuncCPU(broadcasting, 4);
58 BM_FuncCPU(broadcasting, 8);
59 BM_FuncCPU(broadcasting, 12);
60 
61 BM_FuncCPU(coeffWiseOp, 4);
62 BM_FuncCPU(coeffWiseOp, 8);
63 BM_FuncCPU(coeffWiseOp, 12);
64 
65 BM_FuncCPU(algebraicFunc, 4);
66 BM_FuncCPU(algebraicFunc, 8);
67 BM_FuncCPU(algebraicFunc, 12);
68 
69 BM_FuncCPU(transcendentalFunc, 4);
70 BM_FuncCPU(transcendentalFunc, 8);
71 BM_FuncCPU(transcendentalFunc, 12);
72 
73 BM_FuncCPU(rowReduction, 4);
74 BM_FuncCPU(rowReduction, 8);
75 BM_FuncCPU(rowReduction, 12);
76 
77 BM_FuncCPU(colReduction, 4);
78 BM_FuncCPU(colReduction, 8);
79 BM_FuncCPU(colReduction, 12);
80 
81 
82 // Contractions
83 #define BM_FuncWithInputDimsCPU(FUNC, D1, D2, D3, THREADS) \
84  static void BM_##FUNC##_##D1##x##D2##x##D3##_##THREADS##T(int iters, int N) { \
85  StopBenchmarkTiming(); \
86  if (THREADS == 1) { \
87  Eigen::DefaultDevice device; \
88  BenchmarkSuite<Eigen::DefaultDevice, float> suite(device, D1, D2, D3); \
89  suite.FUNC(iters); \
90  } else { \
91  CREATE_THREAD_POOL(THREADS); \
92  BenchmarkSuite<Eigen::ThreadPoolDevice, float> suite(device, D1, D2, D3); \
93  suite.FUNC(iters); \
94  } \
95  } \
96  BENCHMARK_RANGE(BM_##FUNC##_##D1##x##D2##x##D3##_##THREADS##T, 10, 5000);
97 
98 
99 BM_FuncWithInputDimsCPU(contraction, N, N, N, 1);
100 BM_FuncWithInputDimsCPU(contraction, N, N, N, 4);
101 BM_FuncWithInputDimsCPU(contraction, N, N, N, 8);
102 BM_FuncWithInputDimsCPU(contraction, N, N, N, 12);
103 BM_FuncWithInputDimsCPU(contraction, N, N, N, 16);
104 
105 BM_FuncWithInputDimsCPU(contraction, 64, N, N, 1);
106 BM_FuncWithInputDimsCPU(contraction, 64, N, N, 4);
107 BM_FuncWithInputDimsCPU(contraction, 64, N, N, 8);
108 BM_FuncWithInputDimsCPU(contraction, 64, N, N, 12);
109 BM_FuncWithInputDimsCPU(contraction, 64, N, N, 16);
110 
111 BM_FuncWithInputDimsCPU(contraction, N, 64, N, 1);
112 BM_FuncWithInputDimsCPU(contraction, N, 64, N, 4);
113 BM_FuncWithInputDimsCPU(contraction, N, 64, N, 8);
114 BM_FuncWithInputDimsCPU(contraction, N, 64, N, 12);
115 BM_FuncWithInputDimsCPU(contraction, N, 64, N, 16);
116 
117 BM_FuncWithInputDimsCPU(contraction, N, N, 64, 1);
118 BM_FuncWithInputDimsCPU(contraction, N, N, 64, 4);
119 BM_FuncWithInputDimsCPU(contraction, N, N, 64, 8);
120 BM_FuncWithInputDimsCPU(contraction, N, N, 64, 12);
121 BM_FuncWithInputDimsCPU(contraction, N, N, 64, 16);
122 
123 BM_FuncWithInputDimsCPU(contraction, 1, N, N, 1);
124 BM_FuncWithInputDimsCPU(contraction, 1, N, N, 4);
125 BM_FuncWithInputDimsCPU(contraction, 1, N, N, 8);
126 BM_FuncWithInputDimsCPU(contraction, 1, N, N, 12);
127 BM_FuncWithInputDimsCPU(contraction, 1, N, N, 16);
128 
129 BM_FuncWithInputDimsCPU(contraction, N, N, 1, 1);
130 BM_FuncWithInputDimsCPU(contraction, N, N, 1, 4);
131 BM_FuncWithInputDimsCPU(contraction, N, N, 1, 8);
132 BM_FuncWithInputDimsCPU(contraction, N, N, 1, 12);
133 BM_FuncWithInputDimsCPU(contraction, N, N, 1, 16);
134 
135 
136 // Convolutions
137 #define BM_FuncWithKernelDimsCPU(FUNC, DIM1, DIM2, THREADS) \
138  static void BM_##FUNC##_##DIM1##x##DIM2##_##THREADS##T(int iters, int N) { \
139  StopBenchmarkTiming(); \
140  CREATE_THREAD_POOL(THREADS); \
141  BenchmarkSuite<Eigen::ThreadPoolDevice, float> suite(device, N); \
142  suite.FUNC(iters, DIM1, DIM2); \
143  } \
144  BENCHMARK_RANGE(BM_##FUNC##_##DIM1##x##DIM2##_##THREADS##T, 128, 5000);
145 
146 BM_FuncWithKernelDimsCPU(convolution, 7, 1, 4);
147 BM_FuncWithKernelDimsCPU(convolution, 7, 1, 8);
148 BM_FuncWithKernelDimsCPU(convolution, 7, 1, 12);
149 
150 BM_FuncWithKernelDimsCPU(convolution, 1, 7, 4);
151 BM_FuncWithKernelDimsCPU(convolution, 1, 7, 8);
152 BM_FuncWithKernelDimsCPU(convolution, 1, 7, 12);
153 
154 BM_FuncWithKernelDimsCPU(convolution, 7, 4, 4);
155 BM_FuncWithKernelDimsCPU(convolution, 7, 4, 8);
156 BM_FuncWithKernelDimsCPU(convolution, 7, 4, 12);
157 
158 BM_FuncWithKernelDimsCPU(convolution, 4, 7, 4);
159 BM_FuncWithKernelDimsCPU(convolution, 4, 7, 8);
160 BM_FuncWithKernelDimsCPU(convolution, 4, 7, 12);
161 
162 BM_FuncWithKernelDimsCPU(convolution, 7, 64, 4);
163 BM_FuncWithKernelDimsCPU(convolution, 7, 64, 8);
164 BM_FuncWithKernelDimsCPU(convolution, 7, 64, 12);
165 
166 BM_FuncWithKernelDimsCPU(convolution, 64, 7, 4);
167 BM_FuncWithKernelDimsCPU(convolution, 64, 7, 8);
168 BM_FuncWithKernelDimsCPU(convolution, 64, 7, 12);
#define BM_FuncWithKernelDimsCPU(FUNC, DIM1, DIM2, THREADS)
#define N
Definition: gksort.c:12
#define BM_FuncWithInputDimsCPU(FUNC, D1, D2, D3, THREADS)
const mpreal random(unsigned int seed=0)
Definition: mpreal.h:2614
#define BM_FuncCPU(FUNC, THREADS)


gtsam
Author(s):
autogenerated on Sat May 8 2021 02:45:09