34 #ifndef EIGEN_ASSIGN_VML_H 35 #define EIGEN_ASSIGN_VML_H 41 template<
typename Dst,
typename Src>
49 InnerSize =
int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
50 : int(Dst::Flags)&
RowMajorBit ? int(Dst::ColsAtCompileTime)
51 : int(Dst::RowsAtCompileTime),
53 : int(Dst::Flags)&
RowMajorBit ? int(Dst::MaxColsAtCompileTime)
54 : int(Dst::MaxRowsAtCompileTime),
69 #define EIGEN_PP_EXPAND(ARG) ARG 70 #if !defined (EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1) 71 #define EIGEN_VMLMODE_EXPAND_LA , VML_HA 73 #define EIGEN_VMLMODE_EXPAND_LA , VML_LA 76 #define EIGEN_VMLMODE_EXPAND__ 78 #define EIGEN_VMLMODE_PREFIX_LA vm 79 #define EIGEN_VMLMODE_PREFIX__ v 80 #define EIGEN_VMLMODE_PREFIX(VMLMODE) EIGEN_CAT(EIGEN_VMLMODE_PREFIX_,VMLMODE) 82 #define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \ 83 template< typename DstXprType, typename SrcXprNested> \ 84 struct Assignment<DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested>, assign_op<EIGENTYPE,EIGENTYPE>, \ 85 Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> { \ 86 typedef CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested> SrcXprType; \ 87 static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &) { \ 88 eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \ 89 if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) { \ 90 VMLOP(dst.size(), (const VMLTYPE*)src.nestedExpression().data(), \ 91 (VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE) ); \ 93 const Index outerSize = dst.outerSize(); \ 94 for(Index outer = 0; outer < outerSize; ++outer) { \ 95 const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) : \ 96 &(src.nestedExpression().coeffRef(0, outer)); \ 97 EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); \ 98 VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr, \ 99 (VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE)); \ 106 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE) \ 107 EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),s##VMLOP), float, float, VMLMODE) \ 108 EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),d##VMLOP), double, double, VMLMODE) 110 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE) \ 111 EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),c##VMLOP), scomplex, MKL_Complex8, VMLMODE) \ 112 EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),z##VMLOP), dcomplex, MKL_Complex16, VMLMODE) 114 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP, VMLMODE) \ 115 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE) \ 116 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE) 140 #define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \ 141 template< typename DstXprType, typename SrcXprNested, typename Plain> \ 142 struct Assignment<DstXprType, CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE,EIGENTYPE>, SrcXprNested, \ 143 const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>,Plain> >, assign_op<EIGENTYPE,EIGENTYPE>, \ 144 Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> { \ 145 typedef CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE,EIGENTYPE>, SrcXprNested, \ 146 const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>,Plain> > SrcXprType; \ 147 static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &) { \ 148 eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \ 149 VMLTYPE exponent = reinterpret_cast<const VMLTYPE&>(src.rhs().functor().m_other); \ 150 if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) \ 152 VMLOP( dst.size(), (const VMLTYPE*)src.lhs().data(), exponent, \ 153 (VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE) ); \ 155 const Index outerSize = dst.outerSize(); \ 156 for(Index outer = 0; outer < outerSize; ++outer) { \ 157 const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.lhs().coeffRef(outer,0)) : \ 158 &(src.lhs().coeffRef(0, outer)); \ 159 EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); \ 160 VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr, exponent, \ 161 (VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE)); \ 176 #endif // EIGEN_ASSIGN_VML_H
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half pow(const half &a, const half &b)
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE)
EIGEN_DEVICE_FUNC const ExpReturnType exp() const
const unsigned int DirectAccessBit
EIGEN_DEVICE_FUNC const TanhReturnType tanh() const
EIGEN_DEVICE_FUNC const LogReturnType log() const
EIGEN_DEVICE_FUNC const SqrtReturnType sqrt() const
EIGEN_DEVICE_FUNC const CoshReturnType cosh() const
const unsigned int RowMajorBit
EIGEN_DEVICE_FUNC const RoundReturnType round() const
EIGEN_DEVICE_FUNC const CosReturnType cos() const
EIGEN_DEVICE_FUNC const CeilReturnType ceil() const
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP, VMLMODE)
std::complex< float > scomplex
EIGEN_DEVICE_FUNC const SinhReturnType sinh() const
std::complex< double > dcomplex
EIGEN_DEVICE_FUNC const AtanReturnType atan() const
EIGEN_DEVICE_FUNC const TanReturnType tan() const
EIGEN_DEVICE_FUNC const AcosReturnType acos() const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const ArgReturnType arg() const
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE)
EIGEN_DEVICE_FUNC const FloorReturnType floor() const
EIGEN_DEVICE_FUNC const Log10ReturnType log10() const
EIGEN_DEVICE_FUNC const SinReturnType sin() const
#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE)
const unsigned int LinearAccessBit
EIGEN_DEVICE_FUNC const AsinReturnType asin() const
EIGEN_DEVICE_FUNC const SquareReturnType square() const