CUDA/TypeCasting.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_TYPE_CASTING_CUDA_H
11 #define EIGEN_TYPE_CASTING_CUDA_H
12 
13 namespace Eigen {
14 
15 namespace internal {
16 
17 template<>
18 struct scalar_cast_op<float, Eigen::half> {
21  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half operator() (const float& a) const {
22  #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
23  return __float2half(a);
24  #else
25  return Eigen::half(a);
26  #endif
27  }
28 };
29 
30 template<>
32 { enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
33 
34 
35 template<>
36 struct scalar_cast_op<int, Eigen::half> {
39  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half operator() (const int& a) const {
40  #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
41  return __float2half(static_cast<float>(a));
42  #else
43  return Eigen::half(static_cast<float>(a));
44  #endif
45  }
46 };
47 
48 template<>
50 { enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
51 
52 
53 template<>
54 struct scalar_cast_op<Eigen::half, float> {
56  typedef float result_type;
57  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float operator() (const Eigen::half& a) const {
58  #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
59  return __half2float(a);
60  #else
61  return static_cast<float>(a);
62  #endif
63  }
64 };
65 
66 template<>
68 { enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
69 
70 
71 
72 #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
73 
74 template <>
75 struct type_casting_traits<Eigen::half, float> {
76  enum {
77  VectorizedCast = 1,
78  SrcCoeffRatio = 2,
79  TgtCoeffRatio = 1
80  };
81 };
82 
83 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<half2, float4>(const half2& a, const half2& b) {
84  float2 r1 = __half22float2(a);
85  float2 r2 = __half22float2(b);
86  return make_float4(r1.x, r1.y, r2.x, r2.y);
87 }
88 
89 template <>
90 struct type_casting_traits<float, Eigen::half> {
91  enum {
92  VectorizedCast = 1,
93  SrcCoeffRatio = 1,
94  TgtCoeffRatio = 2
95  };
96 };
97 
98 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcast<float4, half2>(const float4& a) {
99  // Simply discard the second half of the input
100  return __floats2half2_rn(a.x, a.y);
101 }
102 
103 #elif defined EIGEN_VECTORIZE_AVX512
104 template <>
105 struct type_casting_traits<half, float> {
106  enum {
107  VectorizedCast = 1,
108  SrcCoeffRatio = 1,
109  TgtCoeffRatio = 1
110  };
111 };
112 
113 template<> EIGEN_STRONG_INLINE Packet16f pcast<Packet16h, Packet16f>(const Packet16h& a) {
114  return half2float(a);
115 }
116 
117 template <>
118 struct type_casting_traits<float, half> {
119  enum {
120  VectorizedCast = 1,
121  SrcCoeffRatio = 1,
122  TgtCoeffRatio = 1
123  };
124 };
125 
126 template<> EIGEN_STRONG_INLINE Packet16h pcast<Packet16f, Packet16h>(const Packet16f& a) {
127  return float2half(a);
128 }
129 
130 #elif defined EIGEN_VECTORIZE_AVX
131 
132 template <>
133 struct type_casting_traits<Eigen::half, float> {
134  enum {
135  VectorizedCast = 1,
136  SrcCoeffRatio = 1,
137  TgtCoeffRatio = 1
138  };
139 };
140 
141 template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8h, Packet8f>(const Packet8h& a) {
142  return half2float(a);
143 }
144 
145 template <>
146 struct type_casting_traits<float, Eigen::half> {
147  enum {
148  VectorizedCast = 1,
149  SrcCoeffRatio = 1,
150  TgtCoeffRatio = 1
151  };
152 };
153 
154 template<> EIGEN_STRONG_INLINE Packet8h pcast<Packet8f, Packet8h>(const Packet8f& a) {
155  return float2half(a);
156 }
157 
158 // Disable the following code since it's broken on too many platforms / compilers.
159 //#elif defined(EIGEN_VECTORIZE_SSE) && (!EIGEN_ARCH_x86_64) && (!EIGEN_COMP_MSVC)
160 #elif 0
161 
162 template <>
163 struct type_casting_traits<Eigen::half, float> {
164  enum {
165  VectorizedCast = 1,
166  SrcCoeffRatio = 1,
167  TgtCoeffRatio = 1
168  };
169 };
170 
171 template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4h, Packet4f>(const Packet4h& a) {
172  __int64_t a64 = _mm_cvtm64_si64(a.x);
173  Eigen::half h = raw_uint16_to_half(static_cast<unsigned short>(a64));
174  float f1 = static_cast<float>(h);
175  h = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 16));
176  float f2 = static_cast<float>(h);
177  h = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 32));
178  float f3 = static_cast<float>(h);
179  h = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 48));
180  float f4 = static_cast<float>(h);
181  return _mm_set_ps(f4, f3, f2, f1);
182 }
183 
184 template <>
185 struct type_casting_traits<float, Eigen::half> {
186  enum {
187  VectorizedCast = 1,
188  SrcCoeffRatio = 1,
189  TgtCoeffRatio = 1
190  };
191 };
192 
193 template<> EIGEN_STRONG_INLINE Packet4h pcast<Packet4f, Packet4h>(const Packet4f& a) {
194  EIGEN_ALIGN16 float aux[4];
195  pstore(aux, a);
196  Eigen::half h0(aux[0]);
197  Eigen::half h1(aux[1]);
198  Eigen::half h2(aux[2]);
199  Eigen::half h3(aux[3]);
200 
201  Packet4h result;
202  result.x = _mm_set_pi16(h3.x, h2.x, h1.x, h0.x);
203  return result;
204 }
205 
206 #endif
207 
208 } // end namespace internal
209 
210 } // end namespace Eigen
211 
212 #endif // EIGEN_TYPE_CASTING_CUDA_H
#define EIGEN_STRONG_INLINE
Definition: Macros.h:493
#define EIGEN_EMPTY_STRUCT_CTOR(X)
Definition: XprHelper.h:22
Definition: LDLT.h:16
Holds information about the various numeric (i.e. scalar) types allowed by Eigen. ...
Definition: NumTraits.h:150
EIGEN_DEVICE_FUNC void pstore(Scalar *to, const Packet &from)
#define EIGEN_ALIGN16
Definition: Macros.h:751
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half raw_uint16_to_half(unsigned short x)
Definition: Half.h:272
unsigned short x
Definition: Half.h:58
EIGEN_DEVICE_FUNC const Scalar & b


hebiros
Author(s): Xavier Artache , Matthew Tesch
autogenerated on Thu Sep 3 2020 04:09:45