4 #include "../include/librealsense2/rs.hpp" 5 #include "../include/librealsense2/rsutil.h" 19 #include <tmmintrin.h> 36 const float pixel[] = { (float)
w, (
float)
h };
44 float r2 = x * x +
y *
y;
75 const __m128i mask0 = _mm_set_epi8((
char)0xff, (
char)0xff, (
char)7, (
char)6, (
char)0xff, (
char)0xff, (
char)5, (
char)4,
76 (
char)0xff, (
char)0xff, (
char)3, (
char)2, (
char)0xff, (
char)0xff, (
char)1, (
char)0);
77 const __m128i mask1 = _mm_set_epi8((
char)0xff, (
char)0xff, (
char)15, (
char)14, (
char)0xff, (
char)0xff, (
char)13, (
char)12,
78 (
char)0xff, (
char)0xff, (
char)11, (
char)10, (
char)0xff, (
char)0xff, (
char)9, (
char)8);
80 auto scale = _mm_set_ps1(depth_scale);
82 auto mapx = pre_compute_x;
83 auto mapy = pre_compute_y;
85 for (
unsigned int i = 0;
i <
size;
i += 8)
87 auto x0 = _mm_load_ps(mapx +
i);
88 auto x1 = _mm_load_ps(mapx +
i + 4);
90 auto y0 = _mm_load_ps(mapy +
i);
91 auto y1 = _mm_load_ps(mapy +
i + 4);
93 __m128i
d = _mm_load_si128((__m128i
const*)(
depth_image +
i));
96 __m128i d0 = _mm_shuffle_epi8(d, mask0);
97 __m128i d1 = _mm_shuffle_epi8(d, mask1);
99 __m128 depth0 = _mm_cvtepi32_ps(d0);
100 __m128 depth1 = _mm_cvtepi32_ps(d1);
102 depth0 = _mm_mul_ps(depth0,
scale);
103 depth1 = _mm_mul_ps(depth1,
scale);
105 auto p0x = _mm_mul_ps(depth0,
x0);
106 auto p0y = _mm_mul_ps(depth0,
y0);
108 auto p1x = _mm_mul_ps(depth1,
x1);
109 auto p1y = _mm_mul_ps(depth1,
y1);
112 auto x_y0 = _mm_shuffle_ps(p0x, p0y, _MM_SHUFFLE(2, 0, 2, 0));
113 auto z_x0 = _mm_shuffle_ps(depth0, p0x, _MM_SHUFFLE(3, 1, 2, 0));
114 auto y_z0 = _mm_shuffle_ps(p0y, depth0, _MM_SHUFFLE(3, 1, 3, 1));
116 auto xyz01 = _mm_shuffle_ps(x_y0, z_x0, _MM_SHUFFLE(2, 0, 2, 0));
117 auto xyz02 = _mm_shuffle_ps(y_z0, x_y0, _MM_SHUFFLE(3, 1, 2, 0));
118 auto xyz03 = _mm_shuffle_ps(z_x0, y_z0, _MM_SHUFFLE(3, 1, 3, 1));
120 auto x_y1 = _mm_shuffle_ps(p1x, p1y, _MM_SHUFFLE(2, 0, 2, 0));
121 auto z_x1 = _mm_shuffle_ps(depth1, p1x, _MM_SHUFFLE(3, 1, 2, 0));
122 auto y_z1 = _mm_shuffle_ps(p1y, depth1, _MM_SHUFFLE(3, 1, 3, 1));
124 auto xyz11 = _mm_shuffle_ps(x_y1, z_x1, _MM_SHUFFLE(2, 0, 2, 0));
125 auto xyz12 = _mm_shuffle_ps(y_z1, x_y1, _MM_SHUFFLE(3, 1, 2, 0));
126 auto xyz13 = _mm_shuffle_ps(z_x1, y_z1, _MM_SHUFFLE(3, 1, 3, 1));
130 _mm_stream_ps(&
point[0], xyz01);
131 _mm_stream_ps(&
point[4], xyz02);
132 _mm_stream_ps(&
point[8], xyz03);
133 _mm_stream_ps(&
point[12], xyz11);
134 _mm_stream_ps(&
point[16], xyz12);
135 _mm_stream_ps(&
point[20], xyz13);
144 const unsigned int width,
145 const unsigned int height,
150 auto tex_ptr = texture_map;
153 auto point =
reinterpret_cast<const float*
>(
points);
154 auto res =
reinterpret_cast<float*
>(tex_ptr);
155 auto res1 =
reinterpret_cast<float*
>(pixels_ptr);
161 for (
int i = 0;
i < 9; ++
i)
165 for (
int i = 0;
i < 3; ++
i)
169 for (
int i = 0;
i < 5; ++
i)
171 c[
i] = _mm_set_ps1(other_intrinsics.
coeffs[
i]);
174 auto fx = _mm_set_ps1(other_intrinsics.
fx);
175 auto fy = _mm_set_ps1(other_intrinsics.
fy);
176 auto ppx = _mm_set_ps1(other_intrinsics.
ppx);
177 auto ppy = _mm_set_ps1(other_intrinsics.
ppy);
178 auto w = _mm_set_ps1(
float(other_intrinsics.
width));
179 auto h = _mm_set_ps1(
float(other_intrinsics.
height));
182 auto zero = _mm_set_ps1(0);
183 auto one = _mm_set_ps1(1);
184 auto two = _mm_set_ps1(2);
186 for (
auto i = 0UL;
i < height*width * 3;
i += 12)
189 auto xyz1 = _mm_load_ps(
point +
i);
190 auto xyz2 = _mm_load_ps(
point +
i + 4);
191 auto xyz3 = _mm_load_ps(
point +
i + 8);
195 auto yz = _mm_shuffle_ps(xyz1, xyz2, _MM_SHUFFLE(1, 0, 2, 1));
196 auto xy = _mm_shuffle_ps(xyz2, xyz3, _MM_SHUFFLE(2, 1, 3, 2));
198 auto x = _mm_shuffle_ps(xyz1, xy, _MM_SHUFFLE(2, 0, 3, 0));
199 auto y = _mm_shuffle_ps(yz, xy, _MM_SHUFFLE(3, 1, 2, 0));
200 auto z = _mm_shuffle_ps(yz, xyz3, _MM_SHUFFLE(3, 0, 3, 1));
202 auto p_x = _mm_add_ps(_mm_mul_ps(r[0],
x), _mm_add_ps(_mm_mul_ps(r[3],
y), _mm_add_ps(_mm_mul_ps(r[6],
z), t[0])));
203 auto p_y = _mm_add_ps(_mm_mul_ps(r[1],
x), _mm_add_ps(_mm_mul_ps(r[4],
y), _mm_add_ps(_mm_mul_ps(r[7],
z), t[1])));
204 auto p_z = _mm_add_ps(_mm_mul_ps(r[2],
x), _mm_add_ps(_mm_mul_ps(r[5],
y), _mm_add_ps(_mm_mul_ps(r[8],
z), t[2])));
206 p_x = _mm_div_ps(p_x, p_z);
207 p_y = _mm_div_ps(p_y, p_z);
210 auto dist = _mm_set_ps1( (
float)other_intrinsics.
model );
212 auto r2 = _mm_add_ps(_mm_mul_ps(p_x, p_x), _mm_mul_ps(p_y, p_y));
213 auto r3 = _mm_add_ps(_mm_mul_ps(c[1], _mm_mul_ps(r2, r2)), _mm_mul_ps(c[4], _mm_mul_ps(r2, _mm_mul_ps(r2, r2))));
214 auto f = _mm_add_ps(
one, _mm_add_ps(_mm_mul_ps(c[0], r2), r3));
216 auto brown = _mm_cmpeq_ps(mask_brown_conrady,
dist);
218 auto x_f = _mm_mul_ps(p_x,
f);
219 auto y_f = _mm_mul_ps(p_y,
f);
221 auto x_f_dist = _mm_or_ps(_mm_and_ps(brown, p_x), _mm_andnot_ps(brown, x_f));
222 auto y_f_dist = _mm_or_ps(_mm_and_ps(brown, p_y), _mm_andnot_ps(brown, y_f));
224 auto r4 = _mm_mul_ps(c[3], _mm_add_ps(r2, _mm_mul_ps(two, _mm_mul_ps(x_f_dist, x_f_dist))));
225 auto d_x = _mm_add_ps(x_f, _mm_add_ps(_mm_mul_ps(two, _mm_mul_ps(c[2], _mm_mul_ps(x_f_dist, y_f_dist))), r4));
227 auto r5 = _mm_mul_ps(c[2], _mm_add_ps(r2, _mm_mul_ps(two, _mm_mul_ps(y_f_dist, y_f_dist))));
228 auto d_y = _mm_add_ps(y_f, _mm_add_ps(_mm_mul_ps(two, _mm_mul_ps(c[3], _mm_mul_ps(x_f_dist, y_f_dist))), r5));
230 auto distortion_none = _mm_cmpeq_ps(mask_distortion_none,
dist);
232 p_x = _mm_or_ps(_mm_and_ps(distortion_none, p_x ), _mm_andnot_ps(distortion_none, d_x));
233 p_y = _mm_or_ps(_mm_and_ps(distortion_none, p_y ), _mm_andnot_ps(distortion_none, d_y));
238 auto cmp = _mm_cmpneq_ps(
z, zero);
239 p_x = _mm_and_ps(_mm_add_ps(_mm_mul_ps(p_x,
fx),
ppx), cmp);
240 p_y = _mm_and_ps(_mm_add_ps(_mm_mul_ps(p_y,
fy),
ppy), cmp);
243 auto xx_yy01 = _mm_shuffle_ps(p_x, p_y, _MM_SHUFFLE(2, 0, 2, 0));
244 auto xx_yy23 = _mm_shuffle_ps(p_x, p_y, _MM_SHUFFLE(3, 1, 3, 1));
246 auto xyxy1 = _mm_shuffle_ps(xx_yy01, xx_yy23, _MM_SHUFFLE(2, 0, 2, 0));
247 auto xyxy2 = _mm_shuffle_ps(xx_yy01, xx_yy23, _MM_SHUFFLE(3, 1, 3, 1));
249 _mm_stream_ps(res1, xyxy1);
250 _mm_stream_ps(res1 + 4, xyxy2);
254 p_x = _mm_div_ps(p_x,
w);
255 p_y = _mm_div_ps(p_y,
h);
258 xx_yy01 = _mm_shuffle_ps(p_x, p_y, _MM_SHUFFLE(2, 0, 2, 0));
259 xx_yy23 = _mm_shuffle_ps(p_x, p_y, _MM_SHUFFLE(3, 1, 3, 1));
261 xyxy1 = _mm_shuffle_ps(xx_yy01, xx_yy23, _MM_SHUFFLE(2, 0, 2, 0));
262 xyxy2 = _mm_shuffle_ps(xx_yy01, xx_yy23, _MM_SHUFFLE(3, 1, 3, 1));
264 _mm_stream_ps(
res, xyxy1);
265 _mm_stream_ps(
res + 4, xyxy2);
274 const unsigned int width,
275 const unsigned int height,
GLenum GLenum GLenum GLenum GLenum scale
const void * get_data() const
GLdouble GLdouble GLdouble w
GLfloat GLfloat GLfloat GLfloat h
const texture_coordinate * get_texture_coordinates() const
std::vector< float > _pre_compute_map_y
GLint GLsizei GLsizei height
const vertex * get_vertices() const
std::vector< float > _pre_compute_map_x
void get_texture_map_sse(float2 *texture_map, const float3 *points, const unsigned int width, const unsigned int height, const rs2_intrinsics &other_intrinsics, const rs2_extrinsics &extr, float2 *pixels_ptr)
Cross-stream extrinsics: encodes the topology describing how the different devices are oriented...
const float3 * depth_to_points(rs2::points output, const rs2_intrinsics &depth_intrinsics, const rs2::depth_frame &depth_frame, float depth_scale) override
GLuint GLfloat GLfloat GLfloat x1
optional_value< rs2_intrinsics > _depth_intrinsics
void preprocess() override
GLuint GLfloat GLfloat y0
void get_texture_map(rs2::points output, const float3 *points, const unsigned int width, const unsigned int height, const rs2_intrinsics &other_intrinsics, const rs2_extrinsics &extr, float2 *pixels_ptr) override
GLdouble GLdouble GLint GLint const GLdouble * points