GPU/TypeCasting.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_TYPE_CASTING_GPU_H
11 #define EIGEN_TYPE_CASTING_GPU_H
12 
13 namespace Eigen {
14 
15 namespace internal {
16 
17 #if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300) || \
18  (defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE))
19 
20 
21 template <>
22 struct type_casting_traits<Eigen::half, float> {
23  enum {
24  VectorizedCast = 1,
25  SrcCoeffRatio = 1,
26  TgtCoeffRatio = 2
27  };
28 };
29 
30 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<half2, float4>(const half2& a, const half2& b) {
31  float2 r1 = __half22float2(a);
32  float2 r2 = __half22float2(b);
33  return make_float4(r1.x, r1.y, r2.x, r2.y);
34 }
35 
36 
37 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pcast<float4, Packet4h2>(const float4& a, const float4& b) {
38  Packet4h2 r;
39  half2* r_alias=reinterpret_cast<half2*>(&r);
40  r_alias[0]=__floats2half2_rn(a.x,a.y);
41  r_alias[1]=__floats2half2_rn(a.z,a.w);
42  r_alias[2]=__floats2half2_rn(b.x,b.y);
43  r_alias[3]=__floats2half2_rn(b.z,b.w);
44  return r;
45 }
46 
47 template <>
48 struct type_casting_traits<float, Eigen::half> {
49  enum {
50  VectorizedCast = 1,
51  SrcCoeffRatio = 2,
52  TgtCoeffRatio = 1
53  };
54 };
55 
56 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<Packet4h2, float4>(const Packet4h2& a) {
57  // Simply discard the second half of the input
58  float4 r;
59  const half2* a_alias=reinterpret_cast<const half2*>(&a);
60  float2 r1 = __half22float2(a_alias[0]);
61  float2 r2 = __half22float2(a_alias[1]);
62  r.x=static_cast<float>(r1.x);
63  r.y=static_cast<float>(r1.y);
64  r.z=static_cast<float>(r2.x);
65  r.w=static_cast<float>(r2.y);
66  return r;
67 }
68 
69 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcast<float4, half2>(const float4& a) {
70  // Simply discard the second half of the input
71  return __floats2half2_rn(a.x, a.y);
72 }
73 
74 #endif
75 
76 } // end namespace internal
77 
78 } // end namespace Eigen
79 
80 #endif // EIGEN_TYPE_CASTING_GPU_H
EIGEN_DEVICE_FUNC
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:976
Eigen
Namespace containing all symbols from the Eigen library.
Definition: jet.h:637
r2
static const double r2
Definition: testSmartRangeFactor.cpp:32
b
Scalar * b
Definition: benchVecAdd.cpp:17
r1
static const double r1
Definition: testSmartRangeFactor.cpp:32
EIGEN_STRONG_INLINE
#define EIGEN_STRONG_INLINE
Definition: Macros.h:917
Eigen::internal::type_casting_traits::SrcCoeffRatio
@ SrcCoeffRatio
Definition: GenericPacketMath.h:151
a
ArrayXXi a
Definition: Array_initializer_list_23_cxx11.cpp:1
Eigen::internal::type_casting_traits::TgtCoeffRatio
@ TgtCoeffRatio
Definition: GenericPacketMath.h:152
gtsam.examples.DogLegOptimizerExample.float
float
Definition: DogLegOptimizerExample.py:113
internal
Definition: BandTriangularSolver.h:13
Eigen::internal::type_casting_traits::VectorizedCast
@ VectorizedCast
Definition: GenericPacketMath.h:150


gtsam
Author(s):
autogenerated on Tue Jan 7 2025 04:09:25