sse-pointcloud.cpp
Go to the documentation of this file.
1 // License: Apache 2.0. See LICENSE file in root directory.
2 // Copyright(c) 2017 Intel Corporation. All Rights Reserved.
3 
4 #include "../include/librealsense2/rs.hpp"
5 #include "../include/librealsense2/rsutil.h"
6 
8 #include "environment.h"
11 #include "option.h"
12 #include "environment.h"
13 #include "context.h"
14 
15 #include <iostream>
16 
17 #ifdef __SSSE3__
18 
19 #include <tmmintrin.h> // For SSSE3 intrinsics
20 
21 #endif
22 
23 namespace librealsense
24 {
25  pointcloud_sse::pointcloud_sse() : pointcloud("Pointcloud (SSE3)") {}
26 
28  {
31 
32  for (int h = 0; h < _depth_intrinsics->height; ++h)
33  {
34  for (int w = 0; w < _depth_intrinsics->width; ++w)
35  {
36  const float pixel[] = { (float)w, (float)h };
37 
38  float x = (pixel[0] - _depth_intrinsics->ppx) / _depth_intrinsics->fx;
39  float y = (pixel[1] - _depth_intrinsics->ppy) / _depth_intrinsics->fy;
40 
41 
43  {
44  float r2 = x * x + y * y;
45  float f = 1 + _depth_intrinsics->coeffs[0] * r2 + _depth_intrinsics->coeffs[1] * r2*r2 + _depth_intrinsics->coeffs[4] * r2*r2*r2;
46  float ux = x * f + 2 * _depth_intrinsics->coeffs[2] * x*y + _depth_intrinsics->coeffs[3] * (r2 + 2 * x*x);
47  float uy = y * f + 2 * _depth_intrinsics->coeffs[3] * x*y + _depth_intrinsics->coeffs[2] * (r2 + 2 * y*y);
48  x = ux;
49  y = uy;
50  }
51 
54  }
55  }
56  }
57 
61  float depth_scale)
62  {
63 #ifdef __SSSE3__
64 
65  auto depth_image = (const uint16_t*)depth_frame.get_data();
66 
67  float* pre_compute_x = _pre_compute_map_x.data();
68  float* pre_compute_y = _pre_compute_map_y.data();
69 
70  uint32_t size = depth_intrinsics.height * depth_intrinsics.width;
71 
72  auto point = (float*)output.get_vertices();
73 
74  //mask for shuffle
75  const __m128i mask0 = _mm_set_epi8((char)0xff, (char)0xff, (char)7, (char)6, (char)0xff, (char)0xff, (char)5, (char)4,
76  (char)0xff, (char)0xff, (char)3, (char)2, (char)0xff, (char)0xff, (char)1, (char)0);
77  const __m128i mask1 = _mm_set_epi8((char)0xff, (char)0xff, (char)15, (char)14, (char)0xff, (char)0xff, (char)13, (char)12,
78  (char)0xff, (char)0xff, (char)11, (char)10, (char)0xff, (char)0xff, (char)9, (char)8);
79 
80  auto scale = _mm_set_ps1(depth_scale);
81 
82  auto mapx = pre_compute_x;
83  auto mapy = pre_compute_y;
84 
85  for (unsigned int i = 0; i < size; i += 8)
86  {
87  auto x0 = _mm_load_ps(mapx + i);
88  auto x1 = _mm_load_ps(mapx + i + 4);
89 
90  auto y0 = _mm_load_ps(mapy + i);
91  auto y1 = _mm_load_ps(mapy + i + 4);
92 
93  __m128i d = _mm_load_si128((__m128i const*)(depth_image + i)); //d7 d7 d6 d6 d5 d5 d4 d4 d3 d3 d2 d2 d1 d1 d0 d0
94 
95  //split the depth pixel to 2 registers of 4 floats each
96  __m128i d0 = _mm_shuffle_epi8(d, mask0); // 00 00 d3 d3 00 00 d2 d2 00 00 d1 d1 00 00 d0 d0
97  __m128i d1 = _mm_shuffle_epi8(d, mask1); // 00 00 d7 d7 00 00 d6 d6 00 00 d5 d5 00 00 d4 d4
98 
99  __m128 depth0 = _mm_cvtepi32_ps(d0); //convert depth to float
100  __m128 depth1 = _mm_cvtepi32_ps(d1); //convert depth to float
101 
102  depth0 = _mm_mul_ps(depth0, scale);
103  depth1 = _mm_mul_ps(depth1, scale);
104 
105  auto p0x = _mm_mul_ps(depth0, x0);
106  auto p0y = _mm_mul_ps(depth0, y0);
107 
108  auto p1x = _mm_mul_ps(depth1, x1);
109  auto p1y = _mm_mul_ps(depth1, y1);
110 
111  //scattering of the x y z
112  auto x_y0 = _mm_shuffle_ps(p0x, p0y, _MM_SHUFFLE(2, 0, 2, 0));
113  auto z_x0 = _mm_shuffle_ps(depth0, p0x, _MM_SHUFFLE(3, 1, 2, 0));
114  auto y_z0 = _mm_shuffle_ps(p0y, depth0, _MM_SHUFFLE(3, 1, 3, 1));
115 
116  auto xyz01 = _mm_shuffle_ps(x_y0, z_x0, _MM_SHUFFLE(2, 0, 2, 0));
117  auto xyz02 = _mm_shuffle_ps(y_z0, x_y0, _MM_SHUFFLE(3, 1, 2, 0));
118  auto xyz03 = _mm_shuffle_ps(z_x0, y_z0, _MM_SHUFFLE(3, 1, 3, 1));
119 
120  auto x_y1 = _mm_shuffle_ps(p1x, p1y, _MM_SHUFFLE(2, 0, 2, 0));
121  auto z_x1 = _mm_shuffle_ps(depth1, p1x, _MM_SHUFFLE(3, 1, 2, 0));
122  auto y_z1 = _mm_shuffle_ps(p1y, depth1, _MM_SHUFFLE(3, 1, 3, 1));
123 
124  auto xyz11 = _mm_shuffle_ps(x_y1, z_x1, _MM_SHUFFLE(2, 0, 2, 0));
125  auto xyz12 = _mm_shuffle_ps(y_z1, x_y1, _MM_SHUFFLE(3, 1, 2, 0));
126  auto xyz13 = _mm_shuffle_ps(z_x1, y_z1, _MM_SHUFFLE(3, 1, 3, 1));
127 
128 
129  //store 8 points of x y z
130  _mm_stream_ps(&point[0], xyz01);
131  _mm_stream_ps(&point[4], xyz02);
132  _mm_stream_ps(&point[8], xyz03);
133  _mm_stream_ps(&point[12], xyz11);
134  _mm_stream_ps(&point[16], xyz12);
135  _mm_stream_ps(&point[20], xyz13);
136  point += 24;
137  }
138 #endif
139  return (float3*)output.get_vertices();
140  }
141 
143  const float3 * points,
144  const unsigned int width,
145  const unsigned int height,
146  const rs2_intrinsics & other_intrinsics,
147  const rs2_extrinsics & extr,
148  float2 * pixels_ptr )
149  {
150  auto tex_ptr = texture_map;
151 
152 #ifdef __SSSE3__
153  auto point = reinterpret_cast<const float*>(points);
154  auto res = reinterpret_cast<float*>(tex_ptr);
155  auto res1 = reinterpret_cast<float*>(pixels_ptr);
156 
157  __m128 r[9];
158  __m128 t[3];
159  __m128 c[5];
160 
161  for (int i = 0; i < 9; ++i)
162  {
163  r[i] = _mm_set_ps1(extr.rotation[i]);
164  }
165  for (int i = 0; i < 3; ++i)
166  {
167  t[i] = _mm_set_ps1(extr.translation[i]);
168  }
169  for (int i = 0; i < 5; ++i)
170  {
171  c[i] = _mm_set_ps1(other_intrinsics.coeffs[i]);
172  }
173 
174  auto fx = _mm_set_ps1(other_intrinsics.fx);
175  auto fy = _mm_set_ps1(other_intrinsics.fy);
176  auto ppx = _mm_set_ps1(other_intrinsics.ppx);
177  auto ppy = _mm_set_ps1(other_intrinsics.ppy);
178  auto w = _mm_set_ps1(float(other_intrinsics.width));
179  auto h = _mm_set_ps1(float(other_intrinsics.height));
180  auto mask_brown_conrady = _mm_set_ps1(RS2_DISTORTION_BROWN_CONRADY);
181  auto mask_distortion_none = _mm_set_ps1(RS2_DISTORTION_NONE);
182  auto zero = _mm_set_ps1(0);
183  auto one = _mm_set_ps1(1);
184  auto two = _mm_set_ps1(2);
185 
186  for (auto i = 0UL; i < height*width * 3; i += 12)
187  {
188  //load 4 points (x,y,z)
189  auto xyz1 = _mm_load_ps(point + i);
190  auto xyz2 = _mm_load_ps(point + i + 4);
191  auto xyz3 = _mm_load_ps(point + i + 8);
192 
193 
194  //gather x,y,z
195  auto yz = _mm_shuffle_ps(xyz1, xyz2, _MM_SHUFFLE(1, 0, 2, 1));
196  auto xy = _mm_shuffle_ps(xyz2, xyz3, _MM_SHUFFLE(2, 1, 3, 2));
197 
198  auto x = _mm_shuffle_ps(xyz1, xy, _MM_SHUFFLE(2, 0, 3, 0));
199  auto y = _mm_shuffle_ps(yz, xy, _MM_SHUFFLE(3, 1, 2, 0));
200  auto z = _mm_shuffle_ps(yz, xyz3, _MM_SHUFFLE(3, 0, 3, 1));
201 
202  auto p_x = _mm_add_ps(_mm_mul_ps(r[0], x), _mm_add_ps(_mm_mul_ps(r[3], y), _mm_add_ps(_mm_mul_ps(r[6], z), t[0])));
203  auto p_y = _mm_add_ps(_mm_mul_ps(r[1], x), _mm_add_ps(_mm_mul_ps(r[4], y), _mm_add_ps(_mm_mul_ps(r[7], z), t[1])));
204  auto p_z = _mm_add_ps(_mm_mul_ps(r[2], x), _mm_add_ps(_mm_mul_ps(r[5], y), _mm_add_ps(_mm_mul_ps(r[8], z), t[2])));
205 
206  p_x = _mm_div_ps(p_x, p_z);
207  p_y = _mm_div_ps(p_y, p_z);
208 
209  // if(model == RS2_DISTORTION_MODIFIED_BROWN_CONRADY)
210  auto dist = _mm_set_ps1( (float)other_intrinsics.model );
211 
212  auto r2 = _mm_add_ps(_mm_mul_ps(p_x, p_x), _mm_mul_ps(p_y, p_y));
213  auto r3 = _mm_add_ps(_mm_mul_ps(c[1], _mm_mul_ps(r2, r2)), _mm_mul_ps(c[4], _mm_mul_ps(r2, _mm_mul_ps(r2, r2))));
214  auto f = _mm_add_ps(one, _mm_add_ps(_mm_mul_ps(c[0], r2), r3));
215 
216  auto brown = _mm_cmpeq_ps(mask_brown_conrady, dist);
217 
218  auto x_f = _mm_mul_ps(p_x, f);
219  auto y_f = _mm_mul_ps(p_y, f);
220 
221  auto x_f_dist = _mm_or_ps(_mm_and_ps(brown, p_x), _mm_andnot_ps(brown, x_f));
222  auto y_f_dist = _mm_or_ps(_mm_and_ps(brown, p_y), _mm_andnot_ps(brown, y_f));
223 
224  auto r4 = _mm_mul_ps(c[3], _mm_add_ps(r2, _mm_mul_ps(two, _mm_mul_ps(x_f_dist, x_f_dist))));
225  auto d_x = _mm_add_ps(x_f, _mm_add_ps(_mm_mul_ps(two, _mm_mul_ps(c[2], _mm_mul_ps(x_f_dist, y_f_dist))), r4));
226 
227  auto r5 = _mm_mul_ps(c[2], _mm_add_ps(r2, _mm_mul_ps(two, _mm_mul_ps(y_f_dist, y_f_dist))));
228  auto d_y = _mm_add_ps(y_f, _mm_add_ps(_mm_mul_ps(two, _mm_mul_ps(c[3], _mm_mul_ps(x_f_dist, y_f_dist))), r5));
229 
230  auto distortion_none = _mm_cmpeq_ps(mask_distortion_none, dist);
231 
232  p_x = _mm_or_ps(_mm_and_ps(distortion_none, p_x ), _mm_andnot_ps(distortion_none, d_x));
233  p_y = _mm_or_ps(_mm_and_ps(distortion_none, p_y ), _mm_andnot_ps(distortion_none, d_y));
234 
235  //TODO: add handle to RS2_DISTORTION_FTHETA
236 
237  //zero the x and y if z is zero
238  auto cmp = _mm_cmpneq_ps(z, zero);
239  p_x = _mm_and_ps(_mm_add_ps(_mm_mul_ps(p_x, fx), ppx), cmp);
240  p_y = _mm_and_ps(_mm_add_ps(_mm_mul_ps(p_y, fy), ppy), cmp);
241 
242  //scattering of the x y before normalize and store in pixels_ptr
243  auto xx_yy01 = _mm_shuffle_ps(p_x, p_y, _MM_SHUFFLE(2, 0, 2, 0));
244  auto xx_yy23 = _mm_shuffle_ps(p_x, p_y, _MM_SHUFFLE(3, 1, 3, 1));
245 
246  auto xyxy1 = _mm_shuffle_ps(xx_yy01, xx_yy23, _MM_SHUFFLE(2, 0, 2, 0));
247  auto xyxy2 = _mm_shuffle_ps(xx_yy01, xx_yy23, _MM_SHUFFLE(3, 1, 3, 1));
248 
249  _mm_stream_ps(res1, xyxy1);
250  _mm_stream_ps(res1 + 4, xyxy2);
251  res1 += 8;
252 
253  //normalize x and y
254  p_x = _mm_div_ps(p_x, w);
255  p_y = _mm_div_ps(p_y, h);
256 
257  //scattering of the x y after normalize and store in tex_ptr
258  xx_yy01 = _mm_shuffle_ps(p_x, p_y, _MM_SHUFFLE(2, 0, 2, 0));
259  xx_yy23 = _mm_shuffle_ps(p_x, p_y, _MM_SHUFFLE(3, 1, 3, 1));
260 
261  xyxy1 = _mm_shuffle_ps(xx_yy01, xx_yy23, _MM_SHUFFLE(2, 0, 2, 0));
262  xyxy2 = _mm_shuffle_ps(xx_yy01, xx_yy23, _MM_SHUFFLE(3, 1, 3, 1));
263 
264  _mm_stream_ps(res, xyxy1);
265  _mm_stream_ps(res + 4, xyxy2);
266  res += 8;
267  }
268 #endif
269 
270  }
271 
273  const float3 * points,
274  const unsigned int width,
275  const unsigned int height,
276  const rs2_intrinsics & other_intrinsics,
277  const rs2_extrinsics & extr,
278  float2 * pixels_ptr )
279  {
280 
282  points,
283  width,
284  height,
285  other_intrinsics,
286  extr,
287  pixels_ptr );
288  }
289  }
GLint y
float translation[3]
Definition: rs_sensor.h:99
GLenum GLenum GLenum GLenum GLenum scale
Definition: glext.h:10806
const void * get_data() const
Definition: rs_frame.hpp:545
unsigned short uint16_t
Definition: stdint.h:79
float coeffs[5]
Definition: rs_types.h:67
GLdouble GLdouble GLdouble w
d
Definition: rmse.py:171
GLfloat GLfloat GLfloat GLfloat h
Definition: glext.h:1960
GLdouble GLdouble z
static const int one
point
Definition: rmse.py:166
const texture_coordinate * get_texture_coordinates() const
Definition: rs_frame.hpp:792
float rotation[9]
Definition: rs_sensor.h:98
GLdouble t
GLdouble f
GLsizeiptr size
const GLubyte * c
Definition: glext.h:12690
GLdouble GLdouble r
GLdouble x
unsigned int uint32_t
Definition: stdint.h:80
std::vector< float > _pre_compute_map_y
GLint GLsizei GLsizei height
const vertex * get_vertices() const
Definition: rs_frame.hpp:767
GLuint GLfloat x0
Definition: glext.h:9721
std::vector< float > _pre_compute_map_x
void get_texture_map_sse(float2 *texture_map, const float3 *points, const unsigned int width, const unsigned int height, const rs2_intrinsics &other_intrinsics, const rs2_extrinsics &extr, float2 *pixels_ptr)
Cross-stream extrinsics: encodes the topology describing how the different devices are oriented...
Definition: rs_sensor.h:96
const float3 * depth_to_points(rs2::points output, const rs2_intrinsics &depth_intrinsics, const rs2::depth_frame &depth_frame, float depth_scale) override
GLuint GLfloat GLfloat GLfloat x1
Definition: glext.h:9721
rs2_distortion model
Definition: rs_types.h:66
optional_value< rs2_intrinsics > _depth_intrinsics
Definition: pointcloud.h:41
rs2_extrinsics extr
Definition: test-pose.cpp:258
GLuint GLfloat GLfloat y0
Definition: glext.h:9721
Video stream intrinsics.
Definition: rs_types.h:58
int i
GLuint res
Definition: glext.h:8856
void get_texture_map(rs2::points output, const float3 *points, const unsigned int width, const unsigned int height, const rs2_intrinsics &other_intrinsics, const rs2_extrinsics &extr, float2 *pixels_ptr) override
GLdouble y1
GLint GLsizei width
GLdouble GLdouble GLint GLint const GLdouble * points


librealsense2
Author(s): Sergey Dorodnicov , Doron Hirshberg , Mark Horn , Reagan Lopez , Itay Carpis
autogenerated on Mon May 3 2021 02:50:10