device_memory.cpp
Go to the documentation of this file.
2 #include <kfusion/safe_call.hpp>
3 #include <cassert>
4 #include <iostream>
5 #include <cstdlib>
6 
7 void kfusion::cuda::error(const char *error_string, const char *file, const int line, const char *func)
8 {
9  std::cout << "KinFu2 error: " << error_string << "\t" << file << ":" << line << std::endl;
10  exit(0);
11 }
12 
14 
15 #ifdef __GNUC__
16 
17  #if __GNUC__*10 + __GNUC_MINOR__ >= 42
18 
19  #if !defined WIN32 && (defined __i486__ || defined __i586__ || defined __i686__ || defined __MMX__ || defined __SSE__ || defined __ppc__)
20  #define CV_XADD __sync_fetch_and_add
21  #else
22  #include <ext/atomicity.h>
23  #define CV_XADD __gnu_cxx::__exchange_and_add
24  #endif
25  #else
26  #include <bits/atomicity.h>
27  #if __GNUC__*10 + __GNUC_MINOR__ >= 34
28  #define CV_XADD __gnu_cxx::__exchange_and_add
29  #else
30  #define CV_XADD __exchange_and_add
31  #endif
32  #endif
33 
34 #elif defined WIN32 || defined _WIN32
35  #include <intrin.h>
36  #define CV_XADD(addr,delta) _InterlockedExchangeAdd((long volatile*)(addr), (delta))
37 #else
38 
39  template<typename _Tp> static inline _Tp CV_XADD(_Tp* addr, _Tp delta)
40  { int tmp = *addr; *addr += delta; return tmp; }
41 
42 #endif
43 
45 
46 kfusion::cuda::DeviceMemory::DeviceMemory() : data_(0), sizeBytes_(0), refcount_(0) {}
47 kfusion::cuda::DeviceMemory::DeviceMemory(void *ptr_arg, size_t sizeBytes_arg) : data_(ptr_arg), sizeBytes_(sizeBytes_arg), refcount_(0){}
48 kfusion::cuda::DeviceMemory::DeviceMemory(size_t sizeBtes_arg) : data_(0), sizeBytes_(0), refcount_(0) { create(sizeBtes_arg); }
50 
52  : data_(other_arg.data_), sizeBytes_(other_arg.sizeBytes_), refcount_(other_arg.refcount_)
53 {
54  if( refcount_ )
55  CV_XADD(refcount_, 1);
56 }
57 
59 {
60  if( this != &other_arg )
61  {
62  if( other_arg.refcount_ )
63  CV_XADD(other_arg.refcount_, 1);
64  release();
65 
66  data_ = other_arg.data_;
67  sizeBytes_ = other_arg.sizeBytes_;
68  refcount_ = other_arg.refcount_;
69  }
70  return *this;
71 }
72 
73 void kfusion::cuda::DeviceMemory::create(size_t sizeBytes_arg)
74 {
75  if (sizeBytes_arg == sizeBytes_)
76  return;
77 
78  if( sizeBytes_arg > 0)
79  {
80  if( data_ )
81  release();
82 
83  sizeBytes_ = sizeBytes_arg;
84 
85  cudaSafeCall( cudaMalloc(&data_, sizeBytes_) );
86 
87  //refcount_ = (int*)cv::fastMalloc(sizeof(*refcount_));
88  refcount_ = new int;
89  *refcount_ = 1;
90  }
91 }
92 
94 {
95  if (empty())
96  other.release();
97  else
98  {
99  other.create(sizeBytes_);
100  cudaSafeCall( cudaMemcpy(other.data_, data_, sizeBytes_, cudaMemcpyDeviceToDevice) );
101  cudaSafeCall( cudaDeviceSynchronize() );
102  }
103 }
104 
106 {
107  if( refcount_ && CV_XADD(refcount_, -1) == 1 )
108  {
109  //cv::fastFree(refcount);
110  delete refcount_;
111  cudaSafeCall( cudaFree(data_) );
112  }
113  data_ = 0;
114  sizeBytes_ = 0;
115  refcount_ = 0;
116 }
117 
118 void kfusion::cuda::DeviceMemory::upload(const void *host_ptr_arg, size_t sizeBytes_arg)
119 {
120  create(sizeBytes_arg);
121  cudaSafeCall( cudaMemcpy(data_, host_ptr_arg, sizeBytes_, cudaMemcpyHostToDevice) );
122  cudaSafeCall( cudaDeviceSynchronize() );
123 }
124 
125 void kfusion::cuda::DeviceMemory::download(void *host_ptr_arg) const
126 {
127  cudaSafeCall( cudaMemcpy(host_ptr_arg, data_, sizeBytes_, cudaMemcpyDeviceToHost) );
128  cudaSafeCall( cudaDeviceSynchronize() );
129 }
130 
132 {
133  std::swap(data_, other_arg.data_);
134  std::swap(sizeBytes_, other_arg.sizeBytes_);
135  std::swap(refcount_, other_arg.refcount_);
136 }
137 
138 bool kfusion::cuda::DeviceMemory::empty() const { return !data_; }
140 
141 
143 
144 kfusion::cuda::DeviceMemory2D::DeviceMemory2D() : data_(0), step_(0), colsBytes_(0), rows_(0), refcount_(0) {}
145 
146 kfusion::cuda::DeviceMemory2D::DeviceMemory2D(int rows_arg, int colsBytes_arg)
147  : data_(0), step_(0), colsBytes_(0), rows_(0), refcount_(0)
148 {
149  create(rows_arg, colsBytes_arg);
150 }
151 
152 kfusion::cuda::DeviceMemory2D::DeviceMemory2D(int rows_arg, int colsBytes_arg, void *data_arg, size_t step_arg)
153  : data_(data_arg), step_(step_arg), colsBytes_(colsBytes_arg), rows_(rows_arg), refcount_(0) {}
154 
156 
157 
159  data_(other_arg.data_), step_(other_arg.step_), colsBytes_(other_arg.colsBytes_), rows_(other_arg.rows_), refcount_(other_arg.refcount_)
160 {
161  if( refcount_ )
162  CV_XADD(refcount_, 1);
163 }
164 
166 {
167  if( this != &other_arg )
168  {
169  if( other_arg.refcount_ )
170  CV_XADD(other_arg.refcount_, 1);
171  release();
172 
173  colsBytes_ = other_arg.colsBytes_;
174  rows_ = other_arg.rows_;
175  data_ = other_arg.data_;
176  step_ = other_arg.step_;
177 
178  refcount_ = other_arg.refcount_;
179  }
180  return *this;
181 }
182 
183 void kfusion::cuda::DeviceMemory2D::create(int rows_arg, int colsBytes_arg)
184 {
185  if (colsBytes_ == colsBytes_arg && rows_ == rows_arg)
186  return;
187 
188  if( rows_arg > 0 && colsBytes_arg > 0)
189  {
190  if( data_ )
191  release();
192 
193  colsBytes_ = colsBytes_arg;
194  rows_ = rows_arg;
195 
196  cudaSafeCall( cudaMallocPitch( (void**)&data_, &step_, colsBytes_, rows_) );
197 
198  //refcount = (int*)cv::fastMalloc(sizeof(*refcount));
199  refcount_ = new int;
200  *refcount_ = 1;
201  }
202 }
203 
205 {
206  if( refcount_ && CV_XADD(refcount_, -1) == 1 )
207  {
208  //cv::fastFree(refcount);
209  delete refcount_;
210  cudaSafeCall( cudaFree(data_) );
211  }
212 
213  colsBytes_ = 0;
214  rows_ = 0;
215  data_ = 0;
216  step_ = 0;
217  refcount_ = 0;
218 }
219 
221 {
222  if (empty())
223  other.release();
224  else
225  {
226  other.create(rows_, colsBytes_);
227  cudaSafeCall( cudaMemcpy2D(other.data_, other.step_, data_, step_, colsBytes_, rows_, cudaMemcpyDeviceToDevice) );
228  cudaSafeCall( cudaDeviceSynchronize() );
229  }
230 }
231 
232 void kfusion::cuda::DeviceMemory2D::upload(const void *host_ptr_arg, size_t host_step_arg, int rows_arg, int colsBytes_arg)
233 {
234  create(rows_arg, colsBytes_arg);
235  cudaSafeCall( cudaMemcpy2D(data_, step_, host_ptr_arg, host_step_arg, colsBytes_, rows_, cudaMemcpyHostToDevice) );
236  cudaSafeCall( cudaDeviceSynchronize() );
237 }
238 
239 void kfusion::cuda::DeviceMemory2D::download(void *host_ptr_arg, size_t host_step_arg) const
240 {
241  cudaSafeCall( cudaMemcpy2D(host_ptr_arg, host_step_arg, data_, step_, colsBytes_, rows_, cudaMemcpyDeviceToHost) );
242  cudaSafeCall( cudaDeviceSynchronize() );
243 }
244 
246 {
247  std::swap(data_, other_arg.data_);
248  std::swap(step_, other_arg.step_);
249 
250  std::swap(colsBytes_, other_arg.colsBytes_);
251  std::swap(rows_, other_arg.rows_);
252  std::swap(refcount_, other_arg.refcount_);
253 }
254 
258 size_t kfusion::cuda::DeviceMemory2D::step() const { return step_; }
void download(void *host_ptr_arg) const
Downloads data from internal buffer to CPU memory.
void release()
Decrements reference counter and releases internal buffer if needed.
int * refcount_
Pointer to reference counter in CPU memory.
size_t step() const
Returns stride between two consecutive rows in bytes for internal buffer. Step is stored always and e...
void swap(DeviceMemory &other_arg)
Performs swap of data pointed with another device memory.
int * refcount_
Pointer to reference counter in CPU memory.
void copyTo(DeviceMemory2D &other) const
Performs data copying. If destination size differs it will be reallocated.
void copyTo(DeviceMemory &other) const
Performs data copying. If destination size differs it will be reallocated.
size_t step_
Stride between two consecutive rows in bytes for internal buffer. Step is stored always and everywher...
size_t sizeBytes_
Allocated size in bytes.
void create(int rows_arg, int colsBytes_arg)
Allocates internal buffer in GPU memory. If internal buffer was created before the function recreates...
int colsBytes() const
Returns number of bytes in each row.
static _Tp CV_XADD(_Tp *addr, _Tp delta)
bool empty() const
Returns true if unallocated otherwise false.
void swap(DeviceMemory2D &other_arg)
Performs swap of data pointed with another device memory.
__kf_hdevice__ void swap(T &a, T &b)
Definition: temp_utils.hpp:10
void * data_
Device pointer.
#define cudaSafeCall(expr)
Definition: safe_call.hpp:16
void upload(const void *host_ptr_arg, size_t sizeBytes_arg)
Uploads data to internal buffer in GPU memory. It calls create() inside to ensure that intenal buffer...
bool empty() const
Returns true if unallocated otherwise false.
void upload(const void *host_ptr_arg, size_t host_step_arg, int rows_arg, int colsBytes_arg)
Uploads data to internal buffer in GPU memory. It calls create() inside to ensure that intenal buffer...
void release()
Decrements reference counter and releases internal buffer if needed.
void create(size_t sizeBytes_arg)
Allocates internal buffer in GPU memory. If internal buffer was created before the function recreates...
FILE * file
DeviceMemory()
Empty constructor.
DeviceMemory2D & operator=(const DeviceMemory2D &other_arg)
Assigment operator. Just increments reference counter.
int colsBytes_
Width of the buffer in bytes.
KF_EXPORTS void error(const char *error_string, const char *file, const int line, const char *func="")
Error handler. All GPU functions from this subsystem call the function to report an error...
DeviceMemory & operator=(const DeviceMemory &other_arg)
Assigment operator. Just increments reference counter.
int rows() const
Returns number of rows.
void download(void *host_ptr_arg, size_t host_step_arg) const
Downloads data from internal buffer to CPU memory. User is resposible for correct host buffer size...
void * data_
Device pointer.
DeviceMemory2D()
Empty constructor.


lvr2
Author(s): Thomas Wiemann , Sebastian Pütz , Alexander Mock , Lars Kiesow , Lukas Kalbertodt , Tristan Igelbrink , Johan M. von Behren , Dominik Feldschnieders , Alexander Löhr
autogenerated on Mon Feb 28 2022 22:46:06