Macros | Functions
tensor_benchmarks_cpu.cc File Reference
#include <string>
#include "tensor_benchmarks.h"
Include dependency graph for tensor_benchmarks_cpu.cc:

Go to the source code of this file.

Macros

#define BM_FuncCPU(FUNC, THREADS)
 
#define BM_FuncWithInputDimsCPU(FUNC, D1, D2, D3, THREADS)
 
#define BM_FuncWithKernelDimsCPU(FUNC, DIM1, DIM2, THREADS)
 
#define CREATE_THREAD_POOL(threads)
 
#define EIGEN_USE_THREADS
 

Functions

 BM_FuncCPU (algebraicFunc, 12)
 
 BM_FuncCPU (algebraicFunc, 4)
 
 BM_FuncCPU (algebraicFunc, 8)
 
 BM_FuncCPU (broadcasting, 12)
 
 BM_FuncCPU (broadcasting, 4)
 
 BM_FuncCPU (broadcasting, 8)
 
 BM_FuncCPU (coeffWiseOp, 12)
 
 BM_FuncCPU (coeffWiseOp, 4)
 
 BM_FuncCPU (coeffWiseOp, 8)
 
 BM_FuncCPU (colChip, 12)
 
 BM_FuncCPU (colChip, 4)
 
 BM_FuncCPU (colChip, 8)
 
 BM_FuncCPU (colReduction, 12)
 
 BM_FuncCPU (colReduction, 4)
 
 BM_FuncCPU (colReduction, 8)
 
 BM_FuncCPU (memcpy, 12)
 
 BM_FuncCPU (memcpy, 4)
 
 BM_FuncCPU (memcpy, 8)
 
 BM_FuncCPU (padding, 12)
 
 BM_FuncCPU (padding, 4)
 
 BM_FuncCPU (padding, 8)
 
 BM_FuncCPU (random, 12)
 
 BM_FuncCPU (random, 4)
 
 BM_FuncCPU (random, 8)
 
 BM_FuncCPU (rowChip, 12)
 
 BM_FuncCPU (rowChip, 4)
 
 BM_FuncCPU (rowChip, 8)
 
 BM_FuncCPU (rowReduction, 12)
 
 BM_FuncCPU (rowReduction, 4)
 
 BM_FuncCPU (rowReduction, 8)
 
 BM_FuncCPU (shuffling, 12)
 
 BM_FuncCPU (shuffling, 4)
 
 BM_FuncCPU (shuffling, 8)
 
 BM_FuncCPU (slicing, 12)
 
 BM_FuncCPU (slicing, 4)
 
 BM_FuncCPU (slicing, 8)
 
 BM_FuncCPU (striding, 12)
 
 BM_FuncCPU (striding, 4)
 
 BM_FuncCPU (striding, 8)
 
 BM_FuncCPU (transcendentalFunc, 12)
 
 BM_FuncCPU (transcendentalFunc, 4)
 
 BM_FuncCPU (transcendentalFunc, 8)
 
 BM_FuncCPU (typeCasting, 12)
 
 BM_FuncCPU (typeCasting, 4)
 
 BM_FuncCPU (typeCasting, 8)
 
 BM_FuncWithInputDimsCPU (contraction, 1, N, N, 1)
 
 BM_FuncWithInputDimsCPU (contraction, 1, N, N, 12)
 
 BM_FuncWithInputDimsCPU (contraction, 1, N, N, 16)
 
 BM_FuncWithInputDimsCPU (contraction, 1, N, N, 4)
 
 BM_FuncWithInputDimsCPU (contraction, 1, N, N, 8)
 
 BM_FuncWithInputDimsCPU (contraction, 64, N, N, 1)
 
 BM_FuncWithInputDimsCPU (contraction, 64, N, N, 12)
 
 BM_FuncWithInputDimsCPU (contraction, 64, N, N, 16)
 
 BM_FuncWithInputDimsCPU (contraction, 64, N, N, 4)
 
 BM_FuncWithInputDimsCPU (contraction, 64, N, N, 8)
 
 BM_FuncWithInputDimsCPU (contraction, N, 64, N, 1)
 
 BM_FuncWithInputDimsCPU (contraction, N, 64, N, 12)
 
 BM_FuncWithInputDimsCPU (contraction, N, 64, N, 16)
 
 BM_FuncWithInputDimsCPU (contraction, N, 64, N, 4)
 
 BM_FuncWithInputDimsCPU (contraction, N, 64, N, 8)
 
 BM_FuncWithInputDimsCPU (contraction, N, N, 1, 1)
 
 BM_FuncWithInputDimsCPU (contraction, N, N, 1, 12)
 
 BM_FuncWithInputDimsCPU (contraction, N, N, 1, 16)
 
 BM_FuncWithInputDimsCPU (contraction, N, N, 1, 4)
 
 BM_FuncWithInputDimsCPU (contraction, N, N, 1, 8)
 
 BM_FuncWithInputDimsCPU (contraction, N, N, 64, 1)
 
 BM_FuncWithInputDimsCPU (contraction, N, N, 64, 12)
 
 BM_FuncWithInputDimsCPU (contraction, N, N, 64, 16)
 
 BM_FuncWithInputDimsCPU (contraction, N, N, 64, 4)
 
 BM_FuncWithInputDimsCPU (contraction, N, N, 64, 8)
 
 BM_FuncWithInputDimsCPU (contraction, N, N, N, 1)
 
 BM_FuncWithInputDimsCPU (contraction, N, N, N, 12)
 
 BM_FuncWithInputDimsCPU (contraction, N, N, N, 16)
 
 BM_FuncWithInputDimsCPU (contraction, N, N, N, 4)
 
 BM_FuncWithInputDimsCPU (contraction, N, N, N, 8)
 
 BM_FuncWithKernelDimsCPU (convolution, 1, 7, 12)
 
 BM_FuncWithKernelDimsCPU (convolution, 1, 7, 4)
 
 BM_FuncWithKernelDimsCPU (convolution, 1, 7, 8)
 
 BM_FuncWithKernelDimsCPU (convolution, 4, 7, 12)
 
 BM_FuncWithKernelDimsCPU (convolution, 4, 7, 4)
 
 BM_FuncWithKernelDimsCPU (convolution, 4, 7, 8)
 
 BM_FuncWithKernelDimsCPU (convolution, 64, 7, 12)
 
 BM_FuncWithKernelDimsCPU (convolution, 64, 7, 4)
 
 BM_FuncWithKernelDimsCPU (convolution, 64, 7, 8)
 
 BM_FuncWithKernelDimsCPU (convolution, 7, 1, 12)
 
 BM_FuncWithKernelDimsCPU (convolution, 7, 1, 4)
 
 BM_FuncWithKernelDimsCPU (convolution, 7, 1, 8)
 
 BM_FuncWithKernelDimsCPU (convolution, 7, 4, 12)
 
 BM_FuncWithKernelDimsCPU (convolution, 7, 4, 4)
 
 BM_FuncWithKernelDimsCPU (convolution, 7, 4, 8)
 
 BM_FuncWithKernelDimsCPU (convolution, 7, 64, 12)
 
 BM_FuncWithKernelDimsCPU (convolution, 7, 64, 4)
 
 BM_FuncWithKernelDimsCPU (convolution, 7, 64, 8)
 

Macro Definition Documentation

◆ BM_FuncCPU

#define BM_FuncCPU (   FUNC,
  THREADS 
)
Value:
static void BM_##FUNC##_##THREADS##T(int iters, int N) { \
StopBenchmarkTiming(); \
CREATE_THREAD_POOL(THREADS); \
BenchmarkSuite<Eigen::ThreadPoolDevice, float> suite(device, N); \
suite.FUNC(iters); \
} \
BENCHMARK_RANGE(BM_##FUNC##_##THREADS##T, 10, 5000);

Definition at line 12 of file tensor_benchmarks_cpu.cc.

◆ BM_FuncWithInputDimsCPU

#define BM_FuncWithInputDimsCPU (   FUNC,
  D1,
  D2,
  D3,
  THREADS 
)
Value:
static void BM_##FUNC##_##D1##x##D2##x##D3##_##THREADS##T(int iters, int N) { \
StopBenchmarkTiming(); \
if (THREADS == 1) { \
Eigen::DefaultDevice device; \
BenchmarkSuite<Eigen::DefaultDevice, float> suite(device, D1, D2, D3); \
suite.FUNC(iters); \
} else { \
CREATE_THREAD_POOL(THREADS); \
BenchmarkSuite<Eigen::ThreadPoolDevice, float> suite(device, D1, D2, D3); \
suite.FUNC(iters); \
} \
} \
BENCHMARK_RANGE(BM_##FUNC##_##D1##x##D2##x##D3##_##THREADS##T, 10, 5000);

Definition at line 83 of file tensor_benchmarks_cpu.cc.

◆ BM_FuncWithKernelDimsCPU

#define BM_FuncWithKernelDimsCPU (   FUNC,
  DIM1,
  DIM2,
  THREADS 
)
Value:
static void BM_##FUNC##_##DIM1##x##DIM2##_##THREADS##T(int iters, int N) { \
StopBenchmarkTiming(); \
CREATE_THREAD_POOL(THREADS); \
BenchmarkSuite<Eigen::ThreadPoolDevice, float> suite(device, N); \
suite.FUNC(iters, DIM1, DIM2); \
} \
BENCHMARK_RANGE(BM_##FUNC##_##DIM1##x##DIM2##_##THREADS##T, 128, 5000);

Definition at line 137 of file tensor_benchmarks_cpu.cc.

◆ CREATE_THREAD_POOL

#define CREATE_THREAD_POOL (   threads)
Value:
Eigen::ThreadPool pool(threads); \
Eigen::ThreadPoolDevice device(&pool, threads);

Definition at line 7 of file tensor_benchmarks_cpu.cc.

◆ EIGEN_USE_THREADS

#define EIGEN_USE_THREADS

Definition at line 1 of file tensor_benchmarks_cpu.cc.

Function Documentation

◆ BM_FuncCPU() [1/45]

BM_FuncCPU ( algebraicFunc  ,
12   
)

◆ BM_FuncCPU() [2/45]

BM_FuncCPU ( algebraicFunc  ,
 
)

◆ BM_FuncCPU() [3/45]

BM_FuncCPU ( algebraicFunc  ,
 
)

◆ BM_FuncCPU() [4/45]

BM_FuncCPU ( broadcasting  ,
12   
)

◆ BM_FuncCPU() [5/45]

BM_FuncCPU ( broadcasting  ,
 
)

◆ BM_FuncCPU() [6/45]

BM_FuncCPU ( broadcasting  ,
 
)

◆ BM_FuncCPU() [7/45]

BM_FuncCPU ( coeffWiseOp  ,
12   
)

◆ BM_FuncCPU() [8/45]

BM_FuncCPU ( coeffWiseOp  ,
 
)

◆ BM_FuncCPU() [9/45]

BM_FuncCPU ( coeffWiseOp  ,
 
)

◆ BM_FuncCPU() [10/45]

BM_FuncCPU ( colChip  ,
12   
)

◆ BM_FuncCPU() [11/45]

BM_FuncCPU ( colChip  ,
 
)

◆ BM_FuncCPU() [12/45]

BM_FuncCPU ( colChip  ,
 
)

◆ BM_FuncCPU() [13/45]

BM_FuncCPU ( colReduction  ,
12   
)

◆ BM_FuncCPU() [14/45]

BM_FuncCPU ( colReduction  ,
 
)

◆ BM_FuncCPU() [15/45]

BM_FuncCPU ( colReduction  ,
 
)

◆ BM_FuncCPU() [16/45]

BM_FuncCPU ( memcpy  ,
12   
)

◆ BM_FuncCPU() [17/45]

BM_FuncCPU ( memcpy  ,
 
)

◆ BM_FuncCPU() [18/45]

BM_FuncCPU ( memcpy  ,
 
)

◆ BM_FuncCPU() [19/45]

BM_FuncCPU ( padding  ,
12   
)

◆ BM_FuncCPU() [20/45]

BM_FuncCPU ( padding  ,
 
)

◆ BM_FuncCPU() [21/45]

BM_FuncCPU ( padding  ,
 
)

◆ BM_FuncCPU() [22/45]

BM_FuncCPU ( random  ,
12   
)

◆ BM_FuncCPU() [23/45]

BM_FuncCPU ( random  ,
 
)

◆ BM_FuncCPU() [24/45]

BM_FuncCPU ( random  ,
 
)

◆ BM_FuncCPU() [25/45]

BM_FuncCPU ( rowChip  ,
12   
)

◆ BM_FuncCPU() [26/45]

BM_FuncCPU ( rowChip  ,
 
)

◆ BM_FuncCPU() [27/45]

BM_FuncCPU ( rowChip  ,
 
)

◆ BM_FuncCPU() [28/45]

BM_FuncCPU ( rowReduction  ,
12   
)

◆ BM_FuncCPU() [29/45]

BM_FuncCPU ( rowReduction  ,
 
)

◆ BM_FuncCPU() [30/45]

BM_FuncCPU ( rowReduction  ,
 
)

◆ BM_FuncCPU() [31/45]

BM_FuncCPU ( shuffling  ,
12   
)

◆ BM_FuncCPU() [32/45]

BM_FuncCPU ( shuffling  ,
 
)

◆ BM_FuncCPU() [33/45]

BM_FuncCPU ( shuffling  ,
 
)

◆ BM_FuncCPU() [34/45]

BM_FuncCPU ( slicing  ,
12   
)

◆ BM_FuncCPU() [35/45]

BM_FuncCPU ( slicing  ,
 
)

◆ BM_FuncCPU() [36/45]

BM_FuncCPU ( slicing  ,
 
)

◆ BM_FuncCPU() [37/45]

BM_FuncCPU ( striding  ,
12   
)

◆ BM_FuncCPU() [38/45]

BM_FuncCPU ( striding  ,
 
)

◆ BM_FuncCPU() [39/45]

BM_FuncCPU ( striding  ,
 
)

◆ BM_FuncCPU() [40/45]

BM_FuncCPU ( transcendentalFunc  ,
12   
)

◆ BM_FuncCPU() [41/45]

BM_FuncCPU ( transcendentalFunc  ,
 
)

◆ BM_FuncCPU() [42/45]

BM_FuncCPU ( transcendentalFunc  ,
 
)

◆ BM_FuncCPU() [43/45]

BM_FuncCPU ( typeCasting  ,
12   
)

◆ BM_FuncCPU() [44/45]

BM_FuncCPU ( typeCasting  ,
 
)

◆ BM_FuncCPU() [45/45]

BM_FuncCPU ( typeCasting  ,
 
)

◆ BM_FuncWithInputDimsCPU() [1/30]

BM_FuncWithInputDimsCPU ( contraction  ,
,
N  ,
N  ,
 
)

◆ BM_FuncWithInputDimsCPU() [2/30]

BM_FuncWithInputDimsCPU ( contraction  ,
,
N  ,
N  ,
12   
)

◆ BM_FuncWithInputDimsCPU() [3/30]

BM_FuncWithInputDimsCPU ( contraction  ,
,
N  ,
N  ,
16   
)

◆ BM_FuncWithInputDimsCPU() [4/30]

BM_FuncWithInputDimsCPU ( contraction  ,
,
N  ,
N  ,
 
)

◆ BM_FuncWithInputDimsCPU() [5/30]

BM_FuncWithInputDimsCPU ( contraction  ,
,
N  ,
N  ,
 
)

◆ BM_FuncWithInputDimsCPU() [6/30]

BM_FuncWithInputDimsCPU ( contraction  ,
64  ,
N  ,
N  ,
 
)

◆ BM_FuncWithInputDimsCPU() [7/30]

BM_FuncWithInputDimsCPU ( contraction  ,
64  ,
N  ,
N  ,
12   
)

◆ BM_FuncWithInputDimsCPU() [8/30]

BM_FuncWithInputDimsCPU ( contraction  ,
64  ,
N  ,
N  ,
16   
)

◆ BM_FuncWithInputDimsCPU() [9/30]

BM_FuncWithInputDimsCPU ( contraction  ,
64  ,
N  ,
N  ,
 
)

◆ BM_FuncWithInputDimsCPU() [10/30]

BM_FuncWithInputDimsCPU ( contraction  ,
64  ,
N  ,
N  ,
 
)

◆ BM_FuncWithInputDimsCPU() [11/30]

BM_FuncWithInputDimsCPU ( contraction  ,
N  ,
64  ,
N  ,
 
)

◆ BM_FuncWithInputDimsCPU() [12/30]

BM_FuncWithInputDimsCPU ( contraction  ,
N  ,
64  ,
N  ,
12   
)

◆ BM_FuncWithInputDimsCPU() [13/30]

BM_FuncWithInputDimsCPU ( contraction  ,
N  ,
64  ,
N  ,
16   
)

◆ BM_FuncWithInputDimsCPU() [14/30]

BM_FuncWithInputDimsCPU ( contraction  ,
N  ,
64  ,
N  ,
 
)

◆ BM_FuncWithInputDimsCPU() [15/30]

BM_FuncWithInputDimsCPU ( contraction  ,
N  ,
64  ,
N  ,
 
)

◆ BM_FuncWithInputDimsCPU() [16/30]

BM_FuncWithInputDimsCPU ( contraction  ,
N  ,
N  ,
,
 
)

◆ BM_FuncWithInputDimsCPU() [17/30]

BM_FuncWithInputDimsCPU ( contraction  ,
N  ,
N  ,
,
12   
)

◆ BM_FuncWithInputDimsCPU() [18/30]

BM_FuncWithInputDimsCPU ( contraction  ,
N  ,
N  ,
,
16   
)

◆ BM_FuncWithInputDimsCPU() [19/30]

BM_FuncWithInputDimsCPU ( contraction  ,
N  ,
N  ,
,
 
)

◆ BM_FuncWithInputDimsCPU() [20/30]

BM_FuncWithInputDimsCPU ( contraction  ,
N  ,
N  ,
,
 
)

◆ BM_FuncWithInputDimsCPU() [21/30]

BM_FuncWithInputDimsCPU ( contraction  ,
N  ,
N  ,
64  ,
 
)

◆ BM_FuncWithInputDimsCPU() [22/30]

BM_FuncWithInputDimsCPU ( contraction  ,
N  ,
N  ,
64  ,
12   
)

◆ BM_FuncWithInputDimsCPU() [23/30]

BM_FuncWithInputDimsCPU ( contraction  ,
N  ,
N  ,
64  ,
16   
)

◆ BM_FuncWithInputDimsCPU() [24/30]

BM_FuncWithInputDimsCPU ( contraction  ,
N  ,
N  ,
64  ,
 
)

◆ BM_FuncWithInputDimsCPU() [25/30]

BM_FuncWithInputDimsCPU ( contraction  ,
N  ,
N  ,
64  ,
 
)

◆ BM_FuncWithInputDimsCPU() [26/30]

BM_FuncWithInputDimsCPU ( contraction  ,
N  ,
N  ,
N  ,
 
)

◆ BM_FuncWithInputDimsCPU() [27/30]

BM_FuncWithInputDimsCPU ( contraction  ,
N  ,
N  ,
N  ,
12   
)

◆ BM_FuncWithInputDimsCPU() [28/30]

BM_FuncWithInputDimsCPU ( contraction  ,
N  ,
N  ,
N  ,
16   
)

◆ BM_FuncWithInputDimsCPU() [29/30]

BM_FuncWithInputDimsCPU ( contraction  ,
N  ,
N  ,
N  ,
 
)

◆ BM_FuncWithInputDimsCPU() [30/30]

BM_FuncWithInputDimsCPU ( contraction  ,
N  ,
N  ,
N  ,
 
)

◆ BM_FuncWithKernelDimsCPU() [1/18]

BM_FuncWithKernelDimsCPU ( convolution  ,
,
,
12   
)

◆ BM_FuncWithKernelDimsCPU() [2/18]

BM_FuncWithKernelDimsCPU ( convolution  ,
,
,
 
)

◆ BM_FuncWithKernelDimsCPU() [3/18]

BM_FuncWithKernelDimsCPU ( convolution  ,
,
,
 
)

◆ BM_FuncWithKernelDimsCPU() [4/18]

BM_FuncWithKernelDimsCPU ( convolution  ,
,
,
12   
)

◆ BM_FuncWithKernelDimsCPU() [5/18]

BM_FuncWithKernelDimsCPU ( convolution  ,
,
,
 
)

◆ BM_FuncWithKernelDimsCPU() [6/18]

BM_FuncWithKernelDimsCPU ( convolution  ,
,
,
 
)

◆ BM_FuncWithKernelDimsCPU() [7/18]

BM_FuncWithKernelDimsCPU ( convolution  ,
64  ,
,
12   
)

◆ BM_FuncWithKernelDimsCPU() [8/18]

BM_FuncWithKernelDimsCPU ( convolution  ,
64  ,
,
 
)

◆ BM_FuncWithKernelDimsCPU() [9/18]

BM_FuncWithKernelDimsCPU ( convolution  ,
64  ,
,
 
)

◆ BM_FuncWithKernelDimsCPU() [10/18]

BM_FuncWithKernelDimsCPU ( convolution  ,
,
,
12   
)

◆ BM_FuncWithKernelDimsCPU() [11/18]

BM_FuncWithKernelDimsCPU ( convolution  ,
,
,
 
)

◆ BM_FuncWithKernelDimsCPU() [12/18]

BM_FuncWithKernelDimsCPU ( convolution  ,
,
,
 
)

◆ BM_FuncWithKernelDimsCPU() [13/18]

BM_FuncWithKernelDimsCPU ( convolution  ,
,
,
12   
)

◆ BM_FuncWithKernelDimsCPU() [14/18]

BM_FuncWithKernelDimsCPU ( convolution  ,
,
,
 
)

◆ BM_FuncWithKernelDimsCPU() [15/18]

BM_FuncWithKernelDimsCPU ( convolution  ,
,
,
 
)

◆ BM_FuncWithKernelDimsCPU() [16/18]

BM_FuncWithKernelDimsCPU ( convolution  ,
,
64  ,
12   
)

◆ BM_FuncWithKernelDimsCPU() [17/18]

BM_FuncWithKernelDimsCPU ( convolution  ,
,
64  ,
 
)

◆ BM_FuncWithKernelDimsCPU() [18/18]

BM_FuncWithKernelDimsCPU ( convolution  ,
,
64  ,
 
)
x
set noclip points set clip one set noclip two set bar set border lt lw set xdata set ydata set zdata set x2data set y2data set boxwidth set dummy x
Definition: gnuplot_common_settings.hh:12
T
Eigen::Triplet< double > T
Definition: Tutorial_sparse_example.cpp:6
Eigen::Triplet< double >
Eigen::ThreadPoolTempl
Definition: NonBlockingThreadPool.h:16
N
#define N
Definition: igam.h:9
_
constexpr descr< N - 1 > _(char const (&text)[N])
Definition: descr.h:109


gtsam
Author(s):
autogenerated on Wed Jan 1 2025 04:08:32