gtsam
3rdparty
Eigen
bench
tensors
tensor_benchmarks_cpu.cc
Go to the documentation of this file.
1
#define EIGEN_USE_THREADS
2
3
#include <string>
4
5
#include "
tensor_benchmarks.h
"
6
7
#define CREATE_THREAD_POOL(threads) \
8
Eigen::ThreadPool pool(threads); \
9
Eigen::ThreadPoolDevice device(&pool, threads);
10
11
// Simple functions
12
#define BM_FuncCPU(FUNC, THREADS) \
13
static void BM_##FUNC##_##THREADS##T(int iters, int N) { \
14
StopBenchmarkTiming(); \
15
CREATE_THREAD_POOL(THREADS); \
16
BenchmarkSuite<Eigen::ThreadPoolDevice, float> suite(device, N); \
17
suite.FUNC(iters); \
18
} \
19
BENCHMARK_RANGE(BM_##FUNC##_##THREADS##T, 10, 5000);
20
21
BM_FuncCPU
(memcpy, 4);
22
BM_FuncCPU
(memcpy, 8);
23
BM_FuncCPU
(memcpy, 12);
24
25
BM_FuncCPU
(typeCasting, 4);
26
BM_FuncCPU
(typeCasting, 8);
27
BM_FuncCPU
(typeCasting, 12);
28
29
BM_FuncCPU
(random, 4);
30
BM_FuncCPU
(random, 8);
31
BM_FuncCPU
(random, 12);
32
33
BM_FuncCPU
(slicing, 4);
34
BM_FuncCPU
(slicing, 8);
35
BM_FuncCPU
(slicing, 12);
36
37
BM_FuncCPU
(rowChip, 4);
38
BM_FuncCPU
(rowChip, 8);
39
BM_FuncCPU
(rowChip, 12);
40
41
BM_FuncCPU
(colChip, 4);
42
BM_FuncCPU
(colChip, 8);
43
BM_FuncCPU
(colChip, 12);
44
45
BM_FuncCPU
(shuffling, 4);
46
BM_FuncCPU
(shuffling, 8);
47
BM_FuncCPU
(shuffling, 12);
48
49
BM_FuncCPU
(padding, 4);
50
BM_FuncCPU
(padding, 8);
51
BM_FuncCPU
(padding, 12);
52
53
BM_FuncCPU
(striding, 4);
54
BM_FuncCPU
(striding, 8);
55
BM_FuncCPU
(striding, 12);
56
57
BM_FuncCPU
(broadcasting, 4);
58
BM_FuncCPU
(broadcasting, 8);
59
BM_FuncCPU
(broadcasting, 12);
60
61
BM_FuncCPU
(coeffWiseOp, 4);
62
BM_FuncCPU
(coeffWiseOp, 8);
63
BM_FuncCPU
(coeffWiseOp, 12);
64
65
BM_FuncCPU
(algebraicFunc, 4);
66
BM_FuncCPU
(algebraicFunc, 8);
67
BM_FuncCPU
(algebraicFunc, 12);
68
69
BM_FuncCPU
(transcendentalFunc, 4);
70
BM_FuncCPU
(transcendentalFunc, 8);
71
BM_FuncCPU
(transcendentalFunc, 12);
72
73
BM_FuncCPU
(rowReduction, 4);
74
BM_FuncCPU
(rowReduction, 8);
75
BM_FuncCPU
(rowReduction, 12);
76
77
BM_FuncCPU
(colReduction, 4);
78
BM_FuncCPU
(colReduction, 8);
79
BM_FuncCPU
(colReduction, 12);
80
81
82
// Contractions
83
#define BM_FuncWithInputDimsCPU(FUNC, D1, D2, D3, THREADS) \
84
static void BM_##FUNC##_##D1##x##D2##x##D3##_##THREADS##T(int iters, int N) { \
85
StopBenchmarkTiming(); \
86
if (THREADS == 1) { \
87
Eigen::DefaultDevice device; \
88
BenchmarkSuite<Eigen::DefaultDevice, float> suite(device, D1, D2, D3); \
89
suite.FUNC(iters); \
90
} else { \
91
CREATE_THREAD_POOL(THREADS); \
92
BenchmarkSuite<Eigen::ThreadPoolDevice, float> suite(device, D1, D2, D3); \
93
suite.FUNC(iters); \
94
} \
95
} \
96
BENCHMARK_RANGE(BM_##FUNC##_##D1##x##D2##x##D3##_##THREADS##T, 10, 5000);
97
98
99
BM_FuncWithInputDimsCPU
(
contraction
,
N
,
N
,
N
, 1);
100
BM_FuncWithInputDimsCPU
(
contraction
,
N
,
N
,
N
, 4);
101
BM_FuncWithInputDimsCPU
(
contraction
,
N
,
N
,
N
, 8);
102
BM_FuncWithInputDimsCPU
(
contraction
,
N
,
N
,
N
, 12);
103
BM_FuncWithInputDimsCPU
(
contraction
,
N
,
N
,
N
, 16);
104
105
BM_FuncWithInputDimsCPU
(
contraction
, 64,
N
,
N
, 1);
106
BM_FuncWithInputDimsCPU
(
contraction
, 64,
N
,
N
, 4);
107
BM_FuncWithInputDimsCPU
(
contraction
, 64,
N
,
N
, 8);
108
BM_FuncWithInputDimsCPU
(
contraction
, 64,
N
,
N
, 12);
109
BM_FuncWithInputDimsCPU
(
contraction
, 64,
N
,
N
, 16);
110
111
BM_FuncWithInputDimsCPU
(
contraction
,
N
, 64,
N
, 1);
112
BM_FuncWithInputDimsCPU
(
contraction
,
N
, 64,
N
, 4);
113
BM_FuncWithInputDimsCPU
(
contraction
,
N
, 64,
N
, 8);
114
BM_FuncWithInputDimsCPU
(
contraction
,
N
, 64,
N
, 12);
115
BM_FuncWithInputDimsCPU
(
contraction
,
N
, 64,
N
, 16);
116
117
BM_FuncWithInputDimsCPU
(
contraction
,
N
,
N
, 64, 1);
118
BM_FuncWithInputDimsCPU
(
contraction
,
N
,
N
, 64, 4);
119
BM_FuncWithInputDimsCPU
(
contraction
,
N
,
N
, 64, 8);
120
BM_FuncWithInputDimsCPU
(
contraction
,
N
,
N
, 64, 12);
121
BM_FuncWithInputDimsCPU
(
contraction
,
N
,
N
, 64, 16);
122
123
BM_FuncWithInputDimsCPU
(
contraction
, 1,
N
,
N
, 1);
124
BM_FuncWithInputDimsCPU
(
contraction
, 1,
N
,
N
, 4);
125
BM_FuncWithInputDimsCPU
(
contraction
, 1,
N
,
N
, 8);
126
BM_FuncWithInputDimsCPU
(
contraction
, 1,
N
,
N
, 12);
127
BM_FuncWithInputDimsCPU
(
contraction
, 1,
N
,
N
, 16);
128
129
BM_FuncWithInputDimsCPU
(
contraction
,
N
,
N
, 1, 1);
130
BM_FuncWithInputDimsCPU
(
contraction
,
N
,
N
, 1, 4);
131
BM_FuncWithInputDimsCPU
(
contraction
,
N
,
N
, 1, 8);
132
BM_FuncWithInputDimsCPU
(
contraction
,
N
,
N
, 1, 12);
133
BM_FuncWithInputDimsCPU
(
contraction
,
N
,
N
, 1, 16);
134
135
136
// Convolutions
137
#define BM_FuncWithKernelDimsCPU(FUNC, DIM1, DIM2, THREADS) \
138
static void BM_##FUNC##_##DIM1##x##DIM2##_##THREADS##T(int iters, int N) { \
139
StopBenchmarkTiming(); \
140
CREATE_THREAD_POOL(THREADS); \
141
BenchmarkSuite<Eigen::ThreadPoolDevice, float> suite(device, N); \
142
suite.FUNC(iters, DIM1, DIM2); \
143
} \
144
BENCHMARK_RANGE(BM_##FUNC##_##DIM1##x##DIM2##_##THREADS##T, 128, 5000);
145
146
BM_FuncWithKernelDimsCPU
(convolution, 7, 1, 4);
147
BM_FuncWithKernelDimsCPU
(convolution, 7, 1, 8);
148
BM_FuncWithKernelDimsCPU
(convolution, 7, 1, 12);
149
150
BM_FuncWithKernelDimsCPU
(convolution, 1, 7, 4);
151
BM_FuncWithKernelDimsCPU
(convolution, 1, 7, 8);
152
BM_FuncWithKernelDimsCPU
(convolution, 1, 7, 12);
153
154
BM_FuncWithKernelDimsCPU
(convolution, 7, 4, 4);
155
BM_FuncWithKernelDimsCPU
(convolution, 7, 4, 8);
156
BM_FuncWithKernelDimsCPU
(convolution, 7, 4, 12);
157
158
BM_FuncWithKernelDimsCPU
(convolution, 4, 7, 4);
159
BM_FuncWithKernelDimsCPU
(convolution, 4, 7, 8);
160
BM_FuncWithKernelDimsCPU
(convolution, 4, 7, 12);
161
162
BM_FuncWithKernelDimsCPU
(convolution, 7, 64, 4);
163
BM_FuncWithKernelDimsCPU
(convolution, 7, 64, 8);
164
BM_FuncWithKernelDimsCPU
(convolution, 7, 64, 12);
165
166
BM_FuncWithKernelDimsCPU
(convolution, 64, 7, 4);
167
BM_FuncWithKernelDimsCPU
(convolution, 64, 7, 8);
168
BM_FuncWithKernelDimsCPU
(convolution, 64, 7, 12);
BM_FuncWithInputDimsCPU
#define BM_FuncWithInputDimsCPU(FUNC, D1, D2, D3, THREADS)
Definition:
tensor_benchmarks_cpu.cc:83
BM_FuncCPU
#define BM_FuncCPU(FUNC, THREADS)
Definition:
tensor_benchmarks_cpu.cc:12
tensor_benchmarks.h
contraction
void contraction(const Device &device_, TensorIndex num_iters, TensorIndex m_, TensorIndex k_, TensorIndex n_)
Definition:
tensor_contract_sycl_bench.cc:49
BM_FuncWithKernelDimsCPU
#define BM_FuncWithKernelDimsCPU(FUNC, DIM1, DIM2, THREADS)
Definition:
tensor_benchmarks_cpu.cc:137
N
#define N
Definition:
igam.h:9
gtsam
Author(s):
autogenerated on Wed Jan 1 2025 04:04:23