Assign_MKL.h
Go to the documentation of this file.
1 /*
2  Copyright (c) 2011, Intel Corporation. All rights reserved.
3 
4  Redistribution and use in source and binary forms, with or without modification,
5  are permitted provided that the following conditions are met:
6 
7  * Redistributions of source code must retain the above copyright notice, this
8  list of conditions and the following disclaimer.
9  * Redistributions in binary form must reproduce the above copyright notice,
10  this list of conditions and the following disclaimer in the documentation
11  and/or other materials provided with the distribution.
12  * Neither the name of Intel Corporation nor the names of its contributors may
13  be used to endorse or promote products derived from this software without
14  specific prior written permission.
15 
16  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20  ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
23  ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27  ********************************************************************************
28  * Content : Eigen bindings to Intel(R) MKL
29  * MKL VML support for coefficient-wise unary Eigen expressions like a=b.sin()
30  ********************************************************************************
31 */
32 
33 #ifndef EIGEN_ASSIGN_VML_H
34 #define EIGEN_ASSIGN_VML_H
35 
36 namespace Eigen {
37 
38 namespace internal {
39 
40 template<typename Op> struct vml_call
41 { enum { IsSupported = 0 }; };
42 
43 template<typename Dst, typename Src, typename UnaryOp>
45 {
46  private:
47  enum {
48  DstHasDirectAccess = Dst::Flags & DirectAccessBit,
49  SrcHasDirectAccess = Src::Flags & DirectAccessBit,
50 
51  StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)),
52  InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
53  : int(Dst::Flags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
54  : int(Dst::RowsAtCompileTime),
55  InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
56  : int(Dst::Flags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
57  : int(Dst::MaxRowsAtCompileTime),
58  MaxSizeAtCompileTime = Dst::SizeAtCompileTime,
59 
60  MightEnableVml = vml_call<UnaryOp>::IsSupported && StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess
61  && Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1,
62  MightLinearize = MightEnableVml && (int(Dst::Flags) & int(Src::Flags) & LinearAccessBit),
63  VmlSize = MightLinearize ? MaxSizeAtCompileTime : InnerMaxSize,
64  LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD,
65  MayEnableVml = MightEnableVml && LargeEnough,
66  MayLinearize = MayEnableVml && MightLinearize
67  };
68  public:
69  enum {
70  Traversal = MayLinearize ? LinearVectorizedTraversal
71  : MayEnableVml ? InnerVectorizedTraversal
73  };
74 };
75 
76 template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling,
79  : assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>
80 {
81 };
82 
83 template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling>
84 struct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, InnerVectorizedTraversal>
85 {
86  typedef typename Derived1::Scalar Scalar;
87  typedef typename Derived1::Index Index;
88  static inline void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src)
89  {
90  // in case we want to (or have to) skip VML at runtime we can call:
91  // assign_impl<Derived1,Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>::run(dst,src);
92  const Index innerSize = dst.innerSize();
93  const Index outerSize = dst.outerSize();
94  for(Index outer = 0; outer < outerSize; ++outer) {
95  const Scalar *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) :
96  &(src.nestedExpression().coeffRef(0, outer));
97  Scalar *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer));
98  vml_call<UnaryOp>::run(src.functor(), innerSize, src_ptr, dst_ptr );
99  }
100  }
101 };
102 
103 template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling>
104 struct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, LinearVectorizedTraversal>
105 {
106  static inline void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src)
107  {
108  // in case we want to (or have to) skip VML at runtime we can call:
109  // assign_impl<Derived1,Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>::run(dst,src);
110  vml_call<UnaryOp>::run(src.functor(), dst.size(), src.nestedExpression().data(), dst.data() );
111  }
112 };
113 
114 // Macroses
115 
116 #define EIGEN_MKL_VML_SPECIALIZE_ASSIGN(TRAVERSAL,UNROLLING) \
117  template<typename Derived1, typename Derived2, typename UnaryOp> \
118  struct assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>, TRAVERSAL, UNROLLING, Specialized> { \
119  static inline void run(Derived1 &dst, const Eigen::CwiseUnaryOp<UnaryOp, Derived2> &src) { \
120  vml_assign_impl<Derived1,Derived2,UnaryOp,TRAVERSAL,UNROLLING>::run(dst, src); \
121  } \
122  };
123 
135 
136 
137 #if !defined (EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1)
138 #define EIGEN_MKL_VML_MODE VML_HA
139 #else
140 #define EIGEN_MKL_VML_MODE VML_LA
141 #endif
142 
143 #define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \
144  template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \
145  enum { IsSupported = 1 }; \
146  static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& /*func*/, \
147  int size, const EIGENTYPE* src, EIGENTYPE* dst) { \
148  VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst); \
149  } \
150  };
151 
152 #define EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \
153  template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \
154  enum { IsSupported = 1 }; \
155  static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& /*func*/, \
156  int size, const EIGENTYPE* src, EIGENTYPE* dst) { \
157  MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \
158  VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst, vmlMode); \
159  } \
160  };
161 
162 #define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \
163  template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \
164  enum { IsSupported = 1 }; \
165  static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& func, \
166  int size, const EIGENTYPE* src, EIGENTYPE* dst) { \
167  EIGENTYPE exponent = func.m_exponent; \
168  MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \
169  VMLOP(&size, (const VMLTYPE*)src, (const VMLTYPE*)&exponent, \
170  (VMLTYPE*)dst, &vmlMode); \
171  } \
172  };
173 
174 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \
175  EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vs##VMLOP, float, float) \
176  EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vd##VMLOP, double, double)
177 
178 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP) \
179  EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vc##VMLOP, scomplex, MKL_Complex8) \
180  EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vz##VMLOP, dcomplex, MKL_Complex16)
181 
182 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP) \
183  EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \
184  EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP)
185 
186 
187 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \
188  EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vms##VMLOP, float, float) \
189  EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmd##VMLOP, double, double)
190 
191 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP) \
192  EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmc##VMLOP, scomplex, MKL_Complex8) \
193  EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmz##VMLOP, dcomplex, MKL_Complex16)
194 
195 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(EIGENOP, VMLOP) \
196  EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \
197  EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP)
198 
199 
205 //EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs, Abs)
209 
211 
212 // The vm*powx functions are not avaibale in the windows version of MKL.
213 #ifndef _WIN32
214 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmspowx_, float, float)
215 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdpowx_, double, double)
216 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcpowx_, scomplex, MKL_Complex8)
217 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzpowx_, dcomplex, MKL_Complex16)
218 #endif
219 
220 } // end namespace internal
221 
222 } // end namespace Eigen
223 
224 #endif // EIGEN_ASSIGN_VML_H
#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE)
Definition: Assign_MKL.h:162
const CwiseUnaryOp< internal::scalar_exp_op< Scalar >, const Derived > exp() const
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(EIGENOP, VMLOP)
Definition: Assign_MKL.h:195
const internal::remove_all< typename XprType::Nested >::type & nestedExpression() const
Definition: CwiseUnaryOp.h:78
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_pow_op< typename Derived::Scalar >, const Derived > pow(const Eigen::ArrayBase< Derived > &x, const typename Derived::Scalar &exponent)
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP)
Definition: Assign_MKL.h:174
const CwiseUnaryOp< internal::scalar_square_op< Scalar >, const Derived > square() const
const unsigned int DirectAccessBit
Definition: Constants.h:142
Definition: LDLT.h:16
const unsigned int RowMajorBit
Definition: Constants.h:53
const CwiseUnaryOp< internal::scalar_cos_op< Scalar >, const Derived > cos() const
const CwiseUnaryOp< internal::scalar_tan_op< Scalar >, Derived > tan() const
const CwiseUnaryOp< internal::scalar_acos_op< Scalar >, const Derived > acos() const
const UnaryOp & functor() const
Definition: CwiseUnaryOp.h:74
const CwiseUnaryOp< internal::scalar_log_op< Scalar >, const Derived > log() const
#define EIGEN_MKL_VML_SPECIALIZE_ASSIGN(TRAVERSAL, UNROLLING)
Definition: Assign_MKL.h:116
const CwiseUnaryOp< internal::scalar_sin_op< Scalar >, const Derived > sin() const
const CwiseUnaryOp< internal::scalar_asin_op< Scalar >, const Derived > asin() const
const int Dynamic
Definition: Constants.h:21
Generic expression where a coefficient-wise unary operator is applied to an expression.
Definition: CwiseUnaryOp.h:59
static void run(Derived1 &dst, const CwiseUnaryOp< UnaryOp, Derived2 > &src)
Definition: Assign_MKL.h:88
const unsigned int LinearAccessBit
Definition: Constants.h:117
const CwiseUnaryOp< internal::scalar_sqrt_op< Scalar >, const Derived > sqrt() const


tuw_aruco
Author(s): Lukas Pfeifhofer
autogenerated on Mon Feb 28 2022 23:57:50