7 #include "../include/librealsense2/hpp/rs_sensor.hpp" 8 #include "../include/librealsense2/hpp/rs_processing.hpp" 9 #include "../include/librealsense2/rsutil.h" 28 template<rs2_distortion dist>
29 inline void distorte_x_y(
const __m128 &
x,
const __m128 &
y, __m128 * distorted_x, __m128 * distorted_y,
const rs2_intrinsics& to)
35 inline void distorte_x_y<RS2_DISTORTION_MODIFIED_BROWN_CONRADY>(
const __m128&
x,
const __m128&
y, __m128* distorted_x, __m128* distorted_y,
const rs2_intrinsics& to)
38 auto one = _mm_set_ps1(1);
39 auto two = _mm_set_ps1(2);
41 for (
int i = 0;
i < 5; ++
i)
45 auto r2_0 = _mm_add_ps(_mm_mul_ps(x, x), _mm_mul_ps(y, y));
46 auto r3_0 = _mm_add_ps(_mm_mul_ps(c[1], _mm_mul_ps(r2_0, r2_0)), _mm_mul_ps(c[4], _mm_mul_ps(r2_0, _mm_mul_ps(r2_0, r2_0))));
47 auto f_0 = _mm_add_ps(
one, _mm_add_ps(_mm_mul_ps(c[0], r2_0), r3_0));
49 auto x_f0 = _mm_mul_ps(x, f_0);
50 auto y_f0 = _mm_mul_ps(y, f_0);
52 auto r4_0 = _mm_mul_ps(c[3], _mm_add_ps(r2_0, _mm_mul_ps(two, _mm_mul_ps(x_f0, x_f0))));
53 auto d_x0 = _mm_add_ps(x_f0, _mm_add_ps(_mm_mul_ps(two, _mm_mul_ps(c[2], _mm_mul_ps(x_f0, y_f0))), r4_0));
55 auto r5_0 = _mm_mul_ps(c[2], _mm_add_ps(r2_0, _mm_mul_ps(two, _mm_mul_ps(y_f0, y_f0))));
56 auto d_y0 = _mm_add_ps(y_f0, _mm_add_ps(_mm_mul_ps(two, _mm_mul_ps(c[3], _mm_mul_ps(x_f0, y_f0))), r4_0));
63 template<rs2_distortion dist>
64 inline void get_texture_map_sse(
const uint16_t * depth,
66 const unsigned int size,
67 const float * pre_compute_x,
const float * pre_compute_y,
68 byte * pixels_ptr_int,
73 const __m128i mask0 = _mm_set_epi8((
char)0xff, (
char)0xff, (
char)7, (
char)6, (
char)0xff, (
char)0xff, (
char)5, (
char)4,
74 (
char)0xff, (
char)0xff, (
char)3, (
char)2, (
char)0xff, (
char)0xff, (
char)1, (
char)0);
75 const __m128i mask1 = _mm_set_epi8((
char)0xff, (
char)0xff, (
char)15, (
char)14, (
char)0xff, (
char)0xff, (
char)13, (
char)12,
76 (
char)0xff, (
char)0xff, (
char)11, (
char)10, (
char)0xff, (
char)0xff, (
char)9, (
char)8);
78 auto scale = _mm_set_ps1(depth_scale);
80 auto mapx = pre_compute_x;
81 auto mapy = pre_compute_y;
83 auto res =
reinterpret_cast<__m128i*
>(pixels_ptr_int);
89 for (
int i = 0;
i < 9; ++
i)
93 for (
int i = 0;
i < 3; ++
i)
97 for (
int i = 0;
i < 5; ++
i)
101 auto zero = _mm_set_ps1(0);
102 auto fx = _mm_set_ps1(to.
fx);
103 auto fy = _mm_set_ps1(to.
fy);
104 auto ppx = _mm_set_ps1(to.
ppx);
105 auto ppy = _mm_set_ps1(to.
ppy);
107 for (
unsigned int i = 0;
i <
size;
i += 8)
109 auto x0 = _mm_load_ps(mapx +
i);
110 auto x1 = _mm_load_ps(mapx +
i + 4);
112 auto y0 = _mm_load_ps(mapy +
i);
113 auto y1 = _mm_load_ps(mapy +
i + 4);
116 __m128i
d = _mm_load_si128((__m128i
const*)(depth +
i));
119 __m128i d0 = _mm_shuffle_epi8(d, mask0);
120 __m128i d1 = _mm_shuffle_epi8(d, mask1);
122 __m128 depth0 = _mm_cvtepi32_ps(d0);
123 __m128 depth1 = _mm_cvtepi32_ps(d1);
125 depth0 = _mm_mul_ps(depth0,
scale);
126 depth1 = _mm_mul_ps(depth1,
scale);
128 auto p0x = _mm_mul_ps(depth0,
x0);
129 auto p0y = _mm_mul_ps(depth0,
y0);
131 auto p1x = _mm_mul_ps(depth1,
x1);
132 auto p1y = _mm_mul_ps(depth1,
y1);
134 auto p_x0 = _mm_add_ps(_mm_mul_ps(r[0], p0x), _mm_add_ps(_mm_mul_ps(r[3], p0y), _mm_add_ps(_mm_mul_ps(r[6], depth0), t[0])));
135 auto p_y0 = _mm_add_ps(_mm_mul_ps(r[1], p0x), _mm_add_ps(_mm_mul_ps(r[4], p0y), _mm_add_ps(_mm_mul_ps(r[7], depth0), t[1])));
136 auto p_z0 = _mm_add_ps(_mm_mul_ps(r[2], p0x), _mm_add_ps(_mm_mul_ps(r[5], p0y), _mm_add_ps(_mm_mul_ps(r[8], depth0), t[2])));
138 auto p_x1 = _mm_add_ps(_mm_mul_ps(r[0], p1x), _mm_add_ps(_mm_mul_ps(r[3], p1y), _mm_add_ps(_mm_mul_ps(r[6], depth1), t[0])));
139 auto p_y1 = _mm_add_ps(_mm_mul_ps(r[1], p1x), _mm_add_ps(_mm_mul_ps(r[4], p1y), _mm_add_ps(_mm_mul_ps(r[7], depth1), t[1])));
140 auto p_z1 = _mm_add_ps(_mm_mul_ps(r[2], p1x), _mm_add_ps(_mm_mul_ps(r[5], p1y), _mm_add_ps(_mm_mul_ps(r[8], depth1), t[2])));
142 p_x0 = _mm_div_ps(p_x0, p_z0);
143 p_y0 = _mm_div_ps(p_y0, p_z0);
145 p_x1 = _mm_div_ps(p_x1, p_z1);
146 p_y1 = _mm_div_ps(p_y1, p_z1);
148 distorte_x_y<dist>(p_x0, p_y0, &p_x0, &p_y0, to);
149 distorte_x_y<dist>(p_x1, p_y1, &p_x1, &p_y1, to);
152 auto cmp = _mm_cmpneq_ps(depth0, zero);
153 p_x0 = _mm_and_ps(_mm_add_ps(_mm_mul_ps(p_x0,
fx),
ppx), cmp);
154 p_y0 = _mm_and_ps(_mm_add_ps(_mm_mul_ps(p_y0,
fy),
ppy), cmp);
157 p_x1 = _mm_add_ps(_mm_mul_ps(p_x1,
fx),
ppx);
158 p_y1 = _mm_add_ps(_mm_mul_ps(p_y1,
fy),
ppy);
160 cmp = _mm_cmpneq_ps(depth0, zero);
161 auto half = _mm_set_ps1(0.5);
162 auto u_round0 = _mm_and_ps(_mm_add_ps(p_x0, half), cmp);
163 auto v_round0 = _mm_and_ps(_mm_add_ps(p_y0, half), cmp);
165 auto uuvv1_0 = _mm_shuffle_ps(u_round0, v_round0, _MM_SHUFFLE(1, 0, 1, 0));
166 auto uuvv2_0 = _mm_shuffle_ps(u_round0, v_round0, _MM_SHUFFLE(3, 2, 3, 2));
168 auto res1_0 = _mm_shuffle_ps(uuvv1_0, uuvv1_0, _MM_SHUFFLE(3, 1, 2, 0));
169 auto res2_0 = _mm_shuffle_ps(uuvv2_0, uuvv2_0, _MM_SHUFFLE(3, 1, 2, 0));
171 auto res1_int0 = _mm_cvtps_epi32(res1_0);
172 auto res2_int0 = _mm_cvtps_epi32(res2_0);
174 _mm_stream_si128(&
res[0], res1_int0);
175 _mm_stream_si128(&
res[1], res2_int0);
178 cmp = _mm_cmpneq_ps(depth1, zero);
179 auto u_round1 = _mm_and_ps(_mm_add_ps(p_x1, half), cmp);
180 auto v_round1 = _mm_and_ps(_mm_add_ps(p_y1, half), cmp);
182 auto uuvv1_1 = _mm_shuffle_ps(u_round1, v_round1, _MM_SHUFFLE(1, 0, 1, 0));
183 auto uuvv2_1 = _mm_shuffle_ps(u_round1, v_round1, _MM_SHUFFLE(3, 2, 3, 2));
185 auto res1 = _mm_shuffle_ps(uuvv1_1, uuvv1_1, _MM_SHUFFLE(3, 1, 2, 0));
186 auto res2 = _mm_shuffle_ps(uuvv2_1, uuvv2_1, _MM_SHUFFLE(3, 1, 2, 0));
188 auto res1_int1 = _mm_cvtps_epi32(res1);
189 auto res2_int1 = _mm_cvtps_epi32(res2);
191 _mm_stream_si128(&
res[0], res1_int1);
192 _mm_stream_si128(&
res[1], res2_int1);
197 image_transform::image_transform(
const rs2_intrinsics& from,
float depth_scale)
199 _depth_scale(depth_scale),
205 void image_transform::pre_compute_x_y_map_corners()
207 pre_compute_x_y_map(_pre_compute_map_x_top_left, _pre_compute_map_y_top_left, -0.5
f);
208 pre_compute_x_y_map(_pre_compute_map_x_bottom_right, _pre_compute_map_y_bottom_right, 0.5
f);
211 void image_transform::pre_compute_x_y_map(std::vector<float>& pre_compute_map_x,
212 std::vector<float>& pre_compute_map_y,
215 pre_compute_map_x.resize(_depth.width*_depth.height);
216 pre_compute_map_y.resize(_depth.width*_depth.height);
218 for (
int h = 0;
h < _depth.height; ++
h)
220 for (
int w = 0;
w < _depth.width; ++
w)
222 const float pixel[] = { (float)
w + offset, (
float)
h + offset };
224 float x = (pixel[0] - _depth.ppx) / _depth.fx;
225 float y = (pixel[1] - _depth.ppy) / _depth.fy;
229 float r2 = x * x + y *
y;
230 float f = 1 + _depth.coeffs[0] * r2 + _depth.coeffs[1] * r2*r2 + _depth.coeffs[4] * r2*r2*r2;
231 float ux = x * f + 2 * _depth.coeffs[2] * x*y + _depth.coeffs[3] * (r2 + 2 * x*
x);
232 float uy = y * f + 2 * _depth.coeffs[3] * x*y + _depth.coeffs[2] * (r2 + 2 * y*
y);
237 pre_compute_map_x[
h*_depth.width +
w] =
x;
238 pre_compute_map_y[
h*_depth.width +
w] =
y;
249 align_depth_to_other_sse<RS2_DISTORTION_MODIFIED_BROWN_CONRADY>(z_pixels,
dest,
depth, to, from_to_other);
252 align_depth_to_other_sse(z_pixels, dest, depth, to, from_to_other);
258 const std::vector<librealsense::int2>& pixel_top_left_int,
259 const std::vector<librealsense::int2>& pixel_bottom_right_int)
261 for (
int y = 0; y < _depth.height; ++
y)
263 for (
int x = 0; x < _depth.width; ++
x)
265 auto depth_pixel_index = y * _depth.width +
x;
267 if (z_pixels[depth_pixel_index])
269 for (
int other_y = pixel_top_left_int[depth_pixel_index].y; other_y <= pixel_bottom_right_int[depth_pixel_index].y; ++other_y)
271 for (
int other_x = pixel_top_left_int[depth_pixel_index].x; other_x <= pixel_bottom_right_int[depth_pixel_index].x; ++other_x)
273 if (other_x < 0 || other_y < 0 || other_x >= to.
width || other_y >= to.
height)
275 auto other_ind = other_y * to.
width + other_x;
277 dest[other_ind] = dest[other_ind] ?
std::min(dest[other_ind], z_pixels[depth_pixel_index]) : z_pixels[depth_pixel_index];
292 align_other_to_depth_sse<RS2_DISTORTION_MODIFIED_BROWN_CONRADY>(z_pixels,
source,
dest,
bpp, to, from_to_other);
295 align_other_to_depth_sse(z_pixels, source, dest, bpp, to, from_to_other);
301 template<rs2_distortion dist>
305 get_texture_map_sse<dist>(z_pixels, _depth_scale, _depth.height*_depth.width, _pre_compute_map_x_top_left.data(),
306 _pre_compute_map_y_top_left.data(), (
byte*)_pixel_top_left_int.data(), to, from_to_other);
310 float2 pixels_per_angle_depth = { (float)depth.
width / fov[0], (
float)depth.
height / fov[1] };
313 float2 pixels_per_angle_target = { (float)to.
width / fov[0], (
float)to.
height / fov[1] };
315 if (pixels_per_angle_depth.
x < pixels_per_angle_target.
x || pixels_per_angle_depth.
y < pixels_per_angle_target.
y || is_special_resolution(depth, to))
317 get_texture_map_sse<dist>(z_pixels, _depth_scale, _depth.height*_depth.width, _pre_compute_map_x_bottom_right.data(),
318 _pre_compute_map_y_bottom_right.data(), (
byte*)_pixel_bottom_right_int.data(), to, from_to_other);
320 move_depth_to_other(z_pixels, dest, to, _pixel_top_left_int, _pixel_bottom_right_int);
324 move_depth_to_other(z_pixels, dest, to, _pixel_top_left_int, _pixel_top_left_int);
329 template<rs2_distortion dist>
330 inline void image_transform::align_other_to_depth_sse(
const uint16_t * z_pixels,
const byte * source,
byte * dest,
int bpp,
const rs2_intrinsics& to,
333 get_texture_map_sse<dist>(z_pixels, _depth_scale, _depth.height*_depth.width, _pre_compute_map_x_top_left.data(),
334 _pre_compute_map_y_top_left.data(), (
byte*)_pixel_top_left_int.data(), to, from_to_other);
336 std::vector<int2>& bottom_right = _pixel_top_left_int;
337 if (to.
height < _depth.height && to.
width < _depth.width)
339 get_texture_map_sse<dist>(z_pixels, _depth_scale, _depth.height*_depth.width, _pre_compute_map_x_bottom_right.data(),
340 _pre_compute_map_y_bottom_right.data(), (
byte*)_pixel_bottom_right_int.data(), to, from_to_other);
342 bottom_right = _pixel_bottom_right_int;
348 move_other_to_depth(z_pixels,
reinterpret_cast<const bytes<1>*
>(source),
reinterpret_cast<bytes<1>*
>(dest), to,
349 _pixel_top_left_int, bottom_right);
352 move_other_to_depth(z_pixels,
reinterpret_cast<const bytes<2>*
>(source),
reinterpret_cast<bytes<2>*
>(dest), to,
353 _pixel_top_left_int, bottom_right);
356 move_other_to_depth(z_pixels,
reinterpret_cast<const bytes<3>*
>(source),
reinterpret_cast<bytes<3>*
>(dest), to,
357 _pixel_top_left_int, bottom_right);
360 move_other_to_depth(z_pixels,
reinterpret_cast<const bytes<4>*
>(source),
reinterpret_cast<bytes<4>*
>(dest), to,
361 _pixel_top_left_int, bottom_right);
369 void image_transform::move_other_to_depth(
const uint16_t* z_pixels,
372 const std::vector<librealsense::int2>& pixel_top_left_int,
373 const std::vector<librealsense::int2>& pixel_bottom_right_int)
376 for (
int y = 0; y < _depth.height; ++
y)
378 for (
int x = 0; x < _depth.width; ++
x)
380 auto depth_pixel_index = y * _depth.width +
x;
382 if (z_pixels[depth_pixel_index])
384 for (
int other_y = pixel_top_left_int[depth_pixel_index].y; other_y <= pixel_bottom_right_int[depth_pixel_index].y; ++other_y)
386 for (
int other_x = pixel_top_left_int[depth_pixel_index].x; other_x <= pixel_bottom_right_int[depth_pixel_index].x; ++other_x)
388 if (other_x < 0 || other_y < 0 || other_x >= to.
width || other_y >= to.
height)
390 auto other_ind = other_y * to.
width + other_x;
392 dest[depth_pixel_index] = source[other_ind];
402 _stream_transform =
nullptr;
407 byte* aligned_data =
reinterpret_cast<byte*
>(
const_cast<void*
>(aligned.
get_data()));
409 memset(aligned_data, 0, aligned_profile.height() * aligned_profile.width() * aligned.
get_bytes_per_pixel());
415 auto z_to_other = depth_profile.get_extrinsics_to(other_profile);
419 if (_stream_transform ==
nullptr)
421 _stream_transform = std::make_shared<image_transform>(z_intrin, z_scale);
422 _stream_transform->pre_compute_x_y_map_corners();
424 _stream_transform->align_depth_to_other(z_pixels, reinterpret_cast<uint16_t*>(aligned_data), 2, z_intrin, other_intrin, z_to_other);
429 byte* aligned_data =
reinterpret_cast<byte*
>(
const_cast<void*
>(aligned.
get_data()));
431 memset(aligned_data, 0, aligned_profile.height() * aligned_profile.width() * aligned.
get_bytes_per_pixel());
438 auto z_to_other = depth_profile.get_extrinsics_to(other_profile);
441 auto other_pixels =
reinterpret_cast<const byte*
>(other.
get_data());
443 if (_stream_transform ==
nullptr)
445 _stream_transform = std::make_shared<image_transform>(z_intrin, z_scale);
446 _stream_transform->pre_compute_x_y_map_corners();
449 _stream_transform->align_other_to_depth(z_pixels, other_pixels, aligned_data, other.
get_bytes_per_pixel(), other_intrin, z_to_other);
GLboolean GLboolean GLboolean b
GLenum GLenum GLenum GLenum GLenum scale
static void rs2_fov(const struct rs2_intrinsics *intrin, float to_fov[2])
stream_profile get_profile() const
int get_bytes_per_pixel() const
GLint GLint GLsizei GLsizei GLsizei depth
const void * get_data() const
GLdouble GLdouble GLdouble w
GLfloat GLfloat GLfloat GLfloat h
GLint GLsizei GLsizei height
rs2_intrinsics get_intrinsics() const
rs2_stream
Streams are different types of data provided by RealSense devices.
Cross-stream extrinsics: encodes the topology describing how the different devices are oriented...
GLuint GLfloat GLfloat GLfloat x1
GLuint GLfloat GLfloat y0
GLsizei GLsizei GLchar * source
void align_other_to_depth(byte *other_aligned_to_depth, GET_DEPTH get_depth, const rs2_intrinsics &depth_intrin, const rs2_extrinsics &depth_to_other, const rs2_intrinsics &other_intrin, const byte *other_pixels, rs2_format other_format)