Assign_MKL.h
Go to the documentation of this file.
00001 /*
00002  Copyright (c) 2011, Intel Corporation. All rights reserved.
00003 
00004  Redistribution and use in source and binary forms, with or without modification,
00005  are permitted provided that the following conditions are met:
00006 
00007  * Redistributions of source code must retain the above copyright notice, this
00008    list of conditions and the following disclaimer.
00009  * Redistributions in binary form must reproduce the above copyright notice,
00010    this list of conditions and the following disclaimer in the documentation
00011    and/or other materials provided with the distribution.
00012  * Neither the name of Intel Corporation nor the names of its contributors may
00013    be used to endorse or promote products derived from this software without
00014    specific prior written permission.
00015 
00016  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
00017  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
00018  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
00019  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
00020  ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
00021  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00022  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
00023  ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00024  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00025  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00026 
00027  ********************************************************************************
00028  *   Content : Eigen bindings to Intel(R) MKL
00029  *   MKL VML support for coefficient-wise unary Eigen expressions like a=b.sin()
00030  ********************************************************************************
00031 */
00032 
00033 #ifndef EIGEN_ASSIGN_VML_H
00034 #define EIGEN_ASSIGN_VML_H
00035 
00036 namespace Eigen { 
00037 
00038 namespace internal {
00039 
00040 template<typename Op> struct vml_call
00041 { enum { IsSupported = 0 }; };
00042 
00043 template<typename Dst, typename Src, typename UnaryOp>
00044 class vml_assign_traits
00045 {
00046   private:
00047     enum {
00048       DstHasDirectAccess = Dst::Flags & DirectAccessBit,
00049       SrcHasDirectAccess = Src::Flags & DirectAccessBit,
00050 
00051       StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)),
00052       InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
00053                 : int(Dst::Flags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
00054                 : int(Dst::RowsAtCompileTime),
00055       InnerMaxSize  = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
00056                     : int(Dst::Flags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
00057                     : int(Dst::MaxRowsAtCompileTime),
00058       MaxSizeAtCompileTime = Dst::SizeAtCompileTime,
00059 
00060       MightEnableVml =  vml_call<UnaryOp>::IsSupported && StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess
00061                      && Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1,
00062       MightLinearize = MightEnableVml && (int(Dst::Flags) & int(Src::Flags) & LinearAccessBit),
00063       VmlSize = MightLinearize ? MaxSizeAtCompileTime : InnerMaxSize,
00064       LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD,
00065       MayEnableVml = MightEnableVml && LargeEnough,
00066       MayLinearize = MayEnableVml && MightLinearize
00067     };
00068   public:
00069     enum {
00070       Traversal = MayLinearize ? LinearVectorizedTraversal
00071                 : MayEnableVml ? InnerVectorizedTraversal
00072                 : DefaultTraversal
00073     };
00074 };
00075 
00076 template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling,
00077          int VmlTraversal = vml_assign_traits<Derived1, Derived2, UnaryOp>::Traversal >
00078 struct vml_assign_impl
00079   : assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>
00080 {
00081 };
00082 
00083 template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling>
00084 struct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, InnerVectorizedTraversal>
00085 {
00086   typedef typename Derived1::Scalar Scalar;
00087   typedef typename Derived1::Index Index;
00088   static inline void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src)
00089   {
00090     // in case we want to (or have to) skip VML at runtime we can call:
00091     // assign_impl<Derived1,Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>::run(dst,src);
00092     const Index innerSize = dst.innerSize();
00093     const Index outerSize = dst.outerSize();
00094     for(Index outer = 0; outer < outerSize; ++outer) {
00095       const Scalar *src_ptr = src.IsRowMajor ?  &(src.nestedExpression().coeffRef(outer,0)) :
00096                                                 &(src.nestedExpression().coeffRef(0, outer));
00097       Scalar *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer));
00098       vml_call<UnaryOp>::run(src.functor(), innerSize, src_ptr, dst_ptr );
00099     }
00100   }
00101 };
00102 
00103 template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling>
00104 struct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, LinearVectorizedTraversal>
00105 {
00106   static inline void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src)
00107   {
00108     // in case we want to (or have to) skip VML at runtime we can call:
00109     // assign_impl<Derived1,Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>::run(dst,src);
00110     vml_call<UnaryOp>::run(src.functor(), dst.size(), src.nestedExpression().data(), dst.data() );
00111   }
00112 };
00113 
00114 // Macroses
00115 
00116 #define EIGEN_MKL_VML_SPECIALIZE_ASSIGN(TRAVERSAL,UNROLLING) \
00117   template<typename Derived1, typename Derived2, typename UnaryOp> \
00118   struct assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>, TRAVERSAL, UNROLLING, Specialized>  {  \
00119     static inline void run(Derived1 &dst, const Eigen::CwiseUnaryOp<UnaryOp, Derived2> &src) { \
00120       vml_assign_impl<Derived1,Derived2,UnaryOp,TRAVERSAL,UNROLLING>::run(dst, src); \
00121     } \
00122   };
00123 
00124 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,NoUnrolling)
00125 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,CompleteUnrolling)
00126 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,InnerUnrolling)
00127 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,NoUnrolling)
00128 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,CompleteUnrolling)
00129 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,NoUnrolling)
00130 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,CompleteUnrolling)
00131 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,InnerUnrolling)
00132 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,CompleteUnrolling)
00133 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,NoUnrolling)
00134 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(SliceVectorizedTraversal,NoUnrolling)
00135 
00136 
00137 #if !defined (EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1)
00138 #define  EIGEN_MKL_VML_MODE VML_HA
00139 #else
00140 #define  EIGEN_MKL_VML_MODE VML_LA
00141 #endif
00142 
00143 #define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE)     \
00144   template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > {               \
00145     enum { IsSupported = 1 };                                                    \
00146     static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& /*func*/,        \
00147                             int size, const EIGENTYPE* src, EIGENTYPE* dst) {    \
00148       VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst);                           \
00149     }                                                                            \
00150   };
00151 
00152 #define EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE)  \
00153   template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > {               \
00154     enum { IsSupported = 1 };                                                    \
00155     static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& /*func*/,        \
00156                             int size, const EIGENTYPE* src, EIGENTYPE* dst) {    \
00157       MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE;                                    \
00158       VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst, vmlMode);                  \
00159     }                                                                            \
00160   };
00161 
00162 #define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE)       \
00163   template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > {               \
00164     enum { IsSupported = 1 };                                                    \
00165     static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& func,        \
00166                           int size, const EIGENTYPE* src, EIGENTYPE* dst) {      \
00167       EIGENTYPE exponent = func.m_exponent;                                      \
00168       MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE;                                    \
00169       VMLOP(&size, (const VMLTYPE*)src, (const VMLTYPE*)&exponent,               \
00170                         (VMLTYPE*)dst, &vmlMode);                                \
00171     }                                                                            \
00172   };
00173 
00174 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP)                   \
00175   EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vs##VMLOP, float, float)             \
00176   EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vd##VMLOP, double, double)
00177 
00178 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP)                \
00179   EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vc##VMLOP, scomplex, MKL_Complex8)   \
00180   EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vz##VMLOP, dcomplex, MKL_Complex16)
00181 
00182 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP)                        \
00183   EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP)                         \
00184   EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP)
00185 
00186 
00187 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP)                \
00188   EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vms##VMLOP, float, float)         \
00189   EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmd##VMLOP, double, double)
00190 
00191 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP)             \
00192   EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmc##VMLOP, scomplex, MKL_Complex8)  \
00193   EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmz##VMLOP, dcomplex, MKL_Complex16)
00194 
00195 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(EIGENOP, VMLOP)                     \
00196   EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP)                      \
00197   EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP)
00198 
00199 
00200 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sin,  Sin)
00201 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(asin, Asin)
00202 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(cos,  Cos)
00203 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(acos, Acos)
00204 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(tan,  Tan)
00205 //EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs,  Abs)
00206 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(exp,  Exp)
00207 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(log,  Ln)
00208 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sqrt, Sqrt)
00209 
00210 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr)
00211 
00212 // The vm*powx functions are not avaibale in the windows version of MKL.
00213 #ifndef _WIN32
00214 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmspowx_, float, float)
00215 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdpowx_, double, double)
00216 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcpowx_, scomplex, MKL_Complex8)
00217 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzpowx_, dcomplex, MKL_Complex16)
00218 #endif
00219 
00220 } // end namespace internal
00221 
00222 } // end namespace Eigen
00223 
00224 #endif // EIGEN_ASSIGN_VML_H


acado
Author(s): Milan Vukov, Rien Quirynen
autogenerated on Sat Jun 8 2019 19:36:43