00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033 #ifndef EIGEN_ASSIGN_VML_H
00034 #define EIGEN_ASSIGN_VML_H
00035
00036 namespace Eigen {
00037
00038 namespace internal {
00039
00040 template<typename Op> struct vml_call
00041 { enum { IsSupported = 0 }; };
00042
00043 template<typename Dst, typename Src, typename UnaryOp>
00044 class vml_assign_traits
00045 {
00046 private:
00047 enum {
00048 DstHasDirectAccess = Dst::Flags & DirectAccessBit,
00049 SrcHasDirectAccess = Src::Flags & DirectAccessBit,
00050
00051 StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)),
00052 InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
00053 : int(Dst::Flags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
00054 : int(Dst::RowsAtCompileTime),
00055 InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
00056 : int(Dst::Flags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
00057 : int(Dst::MaxRowsAtCompileTime),
00058 MaxSizeAtCompileTime = Dst::SizeAtCompileTime,
00059
00060 MightEnableVml = vml_call<UnaryOp>::IsSupported && StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess
00061 && Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1,
00062 MightLinearize = MightEnableVml && (int(Dst::Flags) & int(Src::Flags) & LinearAccessBit),
00063 VmlSize = MightLinearize ? MaxSizeAtCompileTime : InnerMaxSize,
00064 LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD,
00065 MayEnableVml = MightEnableVml && LargeEnough,
00066 MayLinearize = MayEnableVml && MightLinearize
00067 };
00068 public:
00069 enum {
00070 Traversal = MayLinearize ? LinearVectorizedTraversal
00071 : MayEnableVml ? InnerVectorizedTraversal
00072 : DefaultTraversal
00073 };
00074 };
00075
00076 template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling,
00077 int VmlTraversal = vml_assign_traits<Derived1, Derived2, UnaryOp>::Traversal >
00078 struct vml_assign_impl
00079 : assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>
00080 {
00081 };
00082
00083 template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling>
00084 struct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, InnerVectorizedTraversal>
00085 {
00086 typedef typename Derived1::Scalar Scalar;
00087 typedef typename Derived1::Index Index;
00088 static inline void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src)
00089 {
00090
00091
00092 const Index innerSize = dst.innerSize();
00093 const Index outerSize = dst.outerSize();
00094 for(Index outer = 0; outer < outerSize; ++outer) {
00095 const Scalar *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) :
00096 &(src.nestedExpression().coeffRef(0, outer));
00097 Scalar *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer));
00098 vml_call<UnaryOp>::run(src.functor(), innerSize, src_ptr, dst_ptr );
00099 }
00100 }
00101 };
00102
00103 template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling>
00104 struct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, LinearVectorizedTraversal>
00105 {
00106 static inline void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src)
00107 {
00108
00109
00110 vml_call<UnaryOp>::run(src.functor(), dst.size(), src.nestedExpression().data(), dst.data() );
00111 }
00112 };
00113
00114
00115
00116 #define EIGEN_MKL_VML_SPECIALIZE_ASSIGN(TRAVERSAL,UNROLLING) \
00117 template<typename Derived1, typename Derived2, typename UnaryOp> \
00118 struct assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>, TRAVERSAL, UNROLLING, Specialized> { \
00119 static inline void run(Derived1 &dst, const Eigen::CwiseUnaryOp<UnaryOp, Derived2> &src) { \
00120 vml_assign_impl<Derived1,Derived2,UnaryOp,TRAVERSAL,UNROLLING>::run(dst, src); \
00121 } \
00122 };
00123
00124 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,NoUnrolling)
00125 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,CompleteUnrolling)
00126 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,InnerUnrolling)
00127 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,NoUnrolling)
00128 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,CompleteUnrolling)
00129 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,NoUnrolling)
00130 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,CompleteUnrolling)
00131 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,InnerUnrolling)
00132 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,CompleteUnrolling)
00133 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,NoUnrolling)
00134 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(SliceVectorizedTraversal,NoUnrolling)
00135
00136
00137 #if !defined (EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1)
00138 #define EIGEN_MKL_VML_MODE VML_HA
00139 #else
00140 #define EIGEN_MKL_VML_MODE VML_LA
00141 #endif
00142
00143 #define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \
00144 template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \
00145 enum { IsSupported = 1 }; \
00146 static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& , \
00147 int size, const EIGENTYPE* src, EIGENTYPE* dst) { \
00148 VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst); \
00149 } \
00150 };
00151
00152 #define EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \
00153 template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \
00154 enum { IsSupported = 1 }; \
00155 static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& , \
00156 int size, const EIGENTYPE* src, EIGENTYPE* dst) { \
00157 MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \
00158 VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst, vmlMode); \
00159 } \
00160 };
00161
00162 #define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \
00163 template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \
00164 enum { IsSupported = 1 }; \
00165 static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& func, \
00166 int size, const EIGENTYPE* src, EIGENTYPE* dst) { \
00167 EIGENTYPE exponent = func.m_exponent; \
00168 MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \
00169 VMLOP(&size, (const VMLTYPE*)src, (const VMLTYPE*)&exponent, \
00170 (VMLTYPE*)dst, &vmlMode); \
00171 } \
00172 };
00173
00174 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \
00175 EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vs##VMLOP, float, float) \
00176 EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vd##VMLOP, double, double)
00177
00178 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP) \
00179 EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vc##VMLOP, scomplex, MKL_Complex8) \
00180 EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vz##VMLOP, dcomplex, MKL_Complex16)
00181
00182 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP) \
00183 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \
00184 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP)
00185
00186
00187 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \
00188 EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vms##VMLOP, float, float) \
00189 EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmd##VMLOP, double, double)
00190
00191 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP) \
00192 EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmc##VMLOP, scomplex, MKL_Complex8) \
00193 EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmz##VMLOP, dcomplex, MKL_Complex16)
00194
00195 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(EIGENOP, VMLOP) \
00196 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \
00197 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP)
00198
00199
00200 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sin, Sin)
00201 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(asin, Asin)
00202 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(cos, Cos)
00203 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(acos, Acos)
00204 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(tan, Tan)
00205
00206 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(exp, Exp)
00207 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(log, Ln)
00208 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sqrt, Sqrt)
00209
00210 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr)
00211
00212
00213 #ifdef _WIN32
00214 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmspowx_, float, float)
00215 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdpowx_, double, double)
00216 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcpowx_, scomplex, MKL_Complex8)
00217 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzpowx_, dcomplex, MKL_Complex16)
00218 #endif
00219
00220 }
00221
00222 }
00223
00224 #endif // EIGEN_ASSIGN_VML_H