10 #ifndef EIGEN_TYPE_CASTING_CUDA_H 11 #define EIGEN_TYPE_CASTING_CUDA_H 22 #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 23 return __float2half(a);
40 #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 41 return __float2half(static_cast<float>(a));
58 #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 59 return __half2float(a);
61 return static_cast<float>(a);
72 #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 83 template<> EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE float4 pcast<half2, float4>(
const half2& a,
const half2&
b) {
84 float2 r1 = __half22float2(a);
85 float2 r2 = __half22float2(
b);
86 return make_float4(r1.x, r1.y, r2.x, r2.y);
98 template<> EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE half2 pcast<float4, half2>(
const float4& a) {
100 return __floats2half2_rn(a.x, a.y);
103 #elif defined EIGEN_VECTORIZE_AVX512 114 return half2float(a);
127 return float2half(a);
130 #elif defined EIGEN_VECTORIZE_AVX 142 return half2float(a);
155 return float2half(a);
172 __int64_t a64 = _mm_cvtm64_si64(a.x);
174 float f1 =
static_cast<float>(h);
176 float f2 =
static_cast<float>(h);
178 float f3 =
static_cast<float>(h);
180 float f4 =
static_cast<float>(h);
181 return _mm_set_ps(f4, f3, f2, f1);
202 result.x = _mm_set_pi16(h3.
x, h2.
x, h1.
x, h0.
x);
212 #endif // EIGEN_TYPE_CASTING_CUDA_H #define EIGEN_STRONG_INLINE
#define EIGEN_EMPTY_STRUCT_CTOR(X)
Holds information about the various numeric (i.e. scalar) types allowed by Eigen. ...
EIGEN_DEVICE_FUNC void pstore(Scalar *to, const Packet &from)
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half raw_uint16_to_half(unsigned short x)
EIGEN_DEVICE_FUNC const Scalar & b