Go to the documentation of this file.
10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_COST_MODEL_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_COST_MODEL_H
30 template <
typename ArgType>
35 template <
typename ArgType>
39 template <
typename ArgType>
44 template <
typename ArgType>
48 template <
typename SrcType,
typename TargetType>
64 bool vectorized,
double packet_size)
84 double load_cost,
double store_cost,
double compute_cost)
const {
160 template <
typename Device>
175 double output_size,
const TensorOpCost& cost_per_coeff,
int max_threads) {
176 double cost =
totalCost(output_size, cost_per_coeff);
181 numext::maxi<int>(1,
static_cast<int>(threads)));
188 double output_size,
const TensorOpCost& cost_per_coeff) {
193 double output_size,
const TensorOpCost& cost_per_coeff) {
203 const double kLoadCycles = 1.0 / 64 * 11;
204 const double kStoreCycles = 1.0 / 64 * 11;
207 cost_per_coeff.
total_cost(kLoadCycles, kStoreCycles,
214 #endif // EIGEN_CXX11_TENSOR_TENSOR_COST_MODEL_H
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost & operator*=(double rhs)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double compute_cycles() const
#define EIGEN_DEVICE_FUNC
Namespace containing all symbols from the Eigen library.
EIGEN_DEVICE_FUNC TensorOpCost()
EIGEN_DEVICE_FUNC TensorOpCost(double bytes_loaded, double bytes_stored, double compute_cycles, bool vectorized, double packet_size)
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool() isfinite(const Eigen::bfloat16 &h)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend TensorOpCost operator+(TensorOpCost lhs, const TensorOpCost &rhs)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend TensorOpCost operator*(double lhs, TensorOpCost rhs)
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int ModCost()
ofstream os("timeSchurFactors.csv")
EIGEN_DEVICE_FUNC void dropMemoryCost()
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bytes_loaded() const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost cwiseMin(const TensorOpCost &rhs) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bytes_stored() const
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int AddCost()
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T mini(const T &x, const T &y)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double total_cost(double load_cost, double store_cost, double compute_cost) const
#define EIGEN_STRONG_INLINE
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int DivCost()
static const int kPerThreadCycles
friend std::ostream & operator<<(std::ostream &os, const TensorOpCost &tc)
static const int kDeviceCyclesPerComputeCycle
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double totalCost(double output_size, const TensorOpCost &cost_per_coeff)
static const int kStartupCycles
static const int kTaskSize
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double taskSize(double output_size, const TensorOpCost &cost_per_coeff)
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int numThreads(double output_size, const TensorOpCost &cost_per_coeff, int max_threads)
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int MulCost()
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost & operator+=(const TensorOpCost &rhs)
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T maxi(const T &x, const T &y)
EIGEN_DEVICE_FUNC TensorOpCost(double bytes_loaded, double bytes_stored, double compute_cycles)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend TensorOpCost operator*(TensorOpCost lhs, double rhs)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost cwiseMax(const TensorOpCost &rhs) const
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int CastCost()
gtsam
Author(s):
autogenerated on Sat Nov 16 2024 04:05:45