10 #define STB_IMAGE_STATIC 11 #define STB_IMAGE_IMPLEMENTATION 12 #include "../third-party/stb_image.h" 15 #include "cuda/cuda-conversion.cuh" 18 #include <tmmintrin.h> 21 #if defined (ANDROID) || (defined (__linux__) && !defined (__x86_64__)) || (defined (__APPLE__) && !defined (__x86_64__)) 23 bool has_avx() {
return false; }
29 #define cpuid(info, x) __cpuidex(info, x, 0) 33 __cpuid_count(info_type, 0, info[0], info[1], info[2], info[3]);
41 cpuid(info, 0x80000000);
42 return (info[2] & ((
int)1 << 28)) != 0;
59 rscuda::unpack_yuy2_cuda<FORMAT>(
d,
s,
n);
62 #if defined __SSSE3__ && ! defined ANDROID 78 auto src =
reinterpret_cast<const __m128i *
>(
s);
79 auto dst =
reinterpret_cast<__m128i *
>(d[0]);
81 #pragma omp parallel for 82 for (
int i = 0;
i <
n / 16;
i++)
84 const __m128i zero = _mm_set1_epi8(0);
85 const __m128i n100 = _mm_set1_epi16(100 << 4);
86 const __m128i n208 = _mm_set1_epi16(208 << 4);
87 const __m128i n298 = _mm_set1_epi16(298 << 4);
88 const __m128i n409 = _mm_set1_epi16(409 << 4);
89 const __m128i n516 = _mm_set1_epi16(516 << 4);
90 const __m128i evens_odds = _mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15);
93 __m128i s0 = _mm_loadu_si128(&
src[
i * 2]);
94 __m128i s1 = _mm_loadu_si128(&
src[i * 2 + 1]);
99 __m128i
y0 = _mm_shuffle_epi8(s0, _mm_setr_epi8(1, 3, 5, 7, 9, 11, 13, 15, 0, 2, 4, 6, 8, 10, 12, 14));
100 __m128i
y1 = _mm_shuffle_epi8(s1, _mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15));
101 _mm_storeu_si128(&
dst[i], _mm_alignr_epi8(y0, y1, 8));
106 const __m128i evens_odd1s_odd3s = _mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, 1, 5, 9, 13, 3, 7, 11, 15);
107 __m128i yyyyyyyyuuuuvvvv0 = _mm_shuffle_epi8(s0, evens_odd1s_odd3s);
108 __m128i yyyyyyyyuuuuvvvv8 = _mm_shuffle_epi8(s1, evens_odd1s_odd3s);
111 __m128i y16__0_7 = _mm_unpacklo_epi8(yyyyyyyyuuuuvvvv0, zero);
112 __m128i y16__8_F = _mm_unpacklo_epi8(yyyyyyyyuuuuvvvv8, zero);
117 _mm_storeu_si128(&
dst[i * 2], _mm_slli_epi16(y16__0_7, 8));
118 _mm_storeu_si128(&
dst[i * 2 + 1], _mm_slli_epi16(y16__8_F, 8));
123 __m128i uv = _mm_unpackhi_epi32(yyyyyyyyuuuuvvvv0, yyyyyyyyuuuuvvvv8);
124 __m128i u = _mm_unpacklo_epi8(uv, uv);
125 __m128i
v = _mm_unpackhi_epi8(uv, uv);
126 __m128i u16__0_7 = _mm_unpacklo_epi8(u, zero);
127 __m128i u16__8_F = _mm_unpackhi_epi8(u, zero);
128 __m128i v16__0_7 = _mm_unpacklo_epi8(v, zero);
129 __m128i v16__8_F = _mm_unpackhi_epi8(v, zero);
132 __m128i c16__0_7 = _mm_slli_epi16(_mm_subs_epi16(y16__0_7, _mm_set1_epi16(16)), 4);
133 __m128i d16__0_7 = _mm_slli_epi16(_mm_subs_epi16(u16__0_7, _mm_set1_epi16(128)), 4);
134 __m128i e16__0_7 = _mm_slli_epi16(_mm_subs_epi16(v16__0_7, _mm_set1_epi16(128)), 4);
135 __m128i r16__0_7 = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_add_epi16(_mm_mulhi_epi16(c16__0_7, n298), _mm_mulhi_epi16(e16__0_7, n409))))));
136 __m128i g16__0_7 = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_sub_epi16(_mm_sub_epi16(_mm_mulhi_epi16(c16__0_7, n298), _mm_mulhi_epi16(d16__0_7, n100)), _mm_mulhi_epi16(e16__0_7, n208))))));
137 __m128i b16__0_7 = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_add_epi16(_mm_mulhi_epi16(c16__0_7, n298), _mm_mulhi_epi16(d16__0_7, n516))))));
140 __m128i c16__8_F = _mm_slli_epi16(_mm_subs_epi16(y16__8_F, _mm_set1_epi16(16)), 4);
141 __m128i d16__8_F = _mm_slli_epi16(_mm_subs_epi16(u16__8_F, _mm_set1_epi16(128)), 4);
142 __m128i e16__8_F = _mm_slli_epi16(_mm_subs_epi16(v16__8_F, _mm_set1_epi16(128)), 4);
143 __m128i r16__8_F = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_add_epi16(_mm_mulhi_epi16(c16__8_F, n298), _mm_mulhi_epi16(e16__8_F, n409))))));
144 __m128i g16__8_F = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_sub_epi16(_mm_sub_epi16(_mm_mulhi_epi16(c16__8_F, n298), _mm_mulhi_epi16(d16__8_F, n100)), _mm_mulhi_epi16(e16__8_F, n208))))));
145 __m128i b16__8_F = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_add_epi16(_mm_mulhi_epi16(c16__8_F, n298), _mm_mulhi_epi16(d16__8_F, n516))))));
150 __m128i rg8__0_7 = _mm_unpacklo_epi8(_mm_shuffle_epi8(r16__0_7, evens_odds), _mm_shuffle_epi8(g16__0_7, evens_odds));
151 __m128i ba8__0_7 = _mm_unpacklo_epi8(_mm_shuffle_epi8(b16__0_7, evens_odds), _mm_set1_epi8(-1));
152 __m128i rgba_0_3 = _mm_unpacklo_epi16(rg8__0_7, ba8__0_7);
153 __m128i rgba_4_7 = _mm_unpackhi_epi16(rg8__0_7, ba8__0_7);
155 __m128i rg8__8_F = _mm_unpacklo_epi8(_mm_shuffle_epi8(r16__8_F, evens_odds), _mm_shuffle_epi8(g16__8_F, evens_odds));
156 __m128i ba8__8_F = _mm_unpacklo_epi8(_mm_shuffle_epi8(b16__8_F, evens_odds), _mm_set1_epi8(-1));
157 __m128i rgba_8_B = _mm_unpacklo_epi16(rg8__8_F, ba8__8_F);
158 __m128i rgba_C_F = _mm_unpackhi_epi16(rg8__8_F, ba8__8_F);
163 _mm_storeu_si128(&
dst[i * 4], rgba_0_3);
164 _mm_storeu_si128(&
dst[i * 4 + 1], rgba_4_7);
165 _mm_storeu_si128(&
dst[i * 4 + 2], rgba_8_B);
166 _mm_storeu_si128(&
dst[i * 4 + 3], rgba_C_F);
172 __m128i rgb0 = _mm_shuffle_epi8(rgba_0_3, _mm_setr_epi8(3, 7, 11, 15, 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14));
173 __m128i rgb1 = _mm_shuffle_epi8(rgba_4_7, _mm_setr_epi8(0, 1, 2, 4, 3, 7, 11, 15, 5, 6, 8, 9, 10, 12, 13, 14));
174 __m128i rgb2 = _mm_shuffle_epi8(rgba_8_B, _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9, 3, 7, 11, 15, 10, 12, 13, 14));
175 __m128i rgb3 = _mm_shuffle_epi8(rgba_C_F, _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15));
178 _mm_storeu_si128(&
dst[i * 3], _mm_alignr_epi8(rgb1, rgb0, 4));
179 _mm_storeu_si128(&
dst[i * 3 + 1], _mm_alignr_epi8(rgb2, rgb1, 8));
180 _mm_storeu_si128(&
dst[i * 3 + 2], _mm_alignr_epi8(rgb3, rgb2, 12));
187 __m128i bg8__0_7 = _mm_unpacklo_epi8(_mm_shuffle_epi8(b16__0_7, evens_odds), _mm_shuffle_epi8(g16__0_7, evens_odds));
188 __m128i ra8__0_7 = _mm_unpacklo_epi8(_mm_shuffle_epi8(r16__0_7, evens_odds), _mm_set1_epi8(-1));
189 __m128i bgra_0_3 = _mm_unpacklo_epi16(bg8__0_7, ra8__0_7);
190 __m128i bgra_4_7 = _mm_unpackhi_epi16(bg8__0_7, ra8__0_7);
192 __m128i bg8__8_F = _mm_unpacklo_epi8(_mm_shuffle_epi8(b16__8_F, evens_odds), _mm_shuffle_epi8(g16__8_F, evens_odds));
193 __m128i ra8__8_F = _mm_unpacklo_epi8(_mm_shuffle_epi8(r16__8_F, evens_odds), _mm_set1_epi8(-1));
194 __m128i bgra_8_B = _mm_unpacklo_epi16(bg8__8_F, ra8__8_F);
195 __m128i bgra_C_F = _mm_unpackhi_epi16(bg8__8_F, ra8__8_F);
200 _mm_storeu_si128(&
dst[i * 4], bgra_0_3);
201 _mm_storeu_si128(&
dst[i * 4 + 1], bgra_4_7);
202 _mm_storeu_si128(&
dst[i * 4 + 2], bgra_8_B);
203 _mm_storeu_si128(&
dst[i * 4 + 3], bgra_C_F);
209 __m128i bgr0 = _mm_shuffle_epi8(bgra_0_3, _mm_setr_epi8(3, 7, 11, 15, 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14));
210 __m128i bgr1 = _mm_shuffle_epi8(bgra_4_7, _mm_setr_epi8(0, 1, 2, 4, 3, 7, 11, 15, 5, 6, 8, 9, 10, 12, 13, 14));
211 __m128i bgr2 = _mm_shuffle_epi8(bgra_8_B, _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9, 3, 7, 11, 15, 10, 12, 13, 14));
212 __m128i bgr3 = _mm_shuffle_epi8(bgra_C_F, _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15));
215 _mm_storeu_si128(&
dst[i * 3], _mm_alignr_epi8(bgr1, bgr0, 4));
216 _mm_storeu_si128(&
dst[i * 3 + 1], _mm_alignr_epi8(bgr2, bgr1, 8));
217 _mm_storeu_si128(&
dst[i * 3 + 2], _mm_alignr_epi8(bgr3, bgr2, 12));
222 #else // Generic code for when SSSE3 is not available. 225 for (;
n;
n -= 16,
src += 32)
230 src[0],
src[2], src[4], src[6],
231 src[8], src[10], src[12], src[14],
232 src[16], src[18], src[20], src[22],
233 src[24], src[26], src[28], src[30],
244 0,
src[0], 0,
src[2], 0, src[4], 0, src[6],
245 0, src[8], 0, src[10], 0, src[12], 0, src[14],
246 0, src[16], 0, src[18], 0, src[20], 0, src[22],
247 0, src[24], 0, src[26], 0, src[28], 0, src[30],
255 src[0],
src[2], src[4], src[6],
256 src[8], src[10], src[12], src[14],
257 src[16], src[18], src[20], src[22],
258 src[24], src[26], src[28], src[30],
260 src[1],
src[1], src[5], src[5],
261 src[9], src[9], src[13], src[13],
262 src[17], src[17], src[21], src[21],
263 src[25], src[25], src[29], src[29],
265 src[3],
src[3], src[7], src[7],
266 src[11], src[11], src[15], src[15],
267 src[19], src[19], src[23], src[23],
268 src[27], src[27], src[31], src[31],
272 for (
int i = 0;
i < 16;
i++)
279 #define clamp(x) ((t=(x)) > 255 ? 255 : t < 0 ? 0 : t) 280 r[
i] =
clamp((298 * c + 409 * e + 128) >> 8);
281 g[
i] =
clamp((298 * c - 100 * d - 208 * e + 128) >> 8);
282 b[
i] =
clamp((298 * c + 516 * d + 128) >> 8);
289 r[0], g[0], b[0], r[1], g[1], b[1],
290 r[2], g[2], b[2], r[3], g[3], b[3],
291 r[4], g[4], b[4], r[5], g[5], b[5],
292 r[6], g[6], b[6], r[7], g[7], b[7],
293 r[8], g[8], b[8], r[9], g[9], b[9],
294 r[10], g[10], b[10], r[11], g[11], b[11],
295 r[12], g[12], b[12], r[13], g[13], b[13],
296 r[14], g[14], b[14], r[15], g[15], b[15],
306 b[0], g[0], r[0], b[1], g[1], r[1],
307 b[2], g[2], r[2], b[3], g[3], r[3],
308 b[4], g[4], r[4], b[5], g[5], r[5],
309 b[6], g[6], r[6], b[7], g[7], r[7],
310 b[8], g[8], r[8], b[9], g[9], r[9],
311 b[10], g[10], r[10], b[11], g[11], r[11],
312 b[12], g[12], r[12], b[13], g[13], r[13],
313 b[14], g[14], r[14], b[15], g[15], r[15],
323 r[0], g[0], b[0], 255, r[1], g[1], b[1], 255,
324 r[2], g[2], b[2], 255, r[3], g[3], b[3], 255,
325 r[4], g[4], b[4], 255, r[5], g[5], b[5], 255,
326 r[6], g[6], b[6], 255, r[7], g[7], b[7], 255,
327 r[8], g[8], b[8], 255, r[9], g[9], b[9], 255,
328 r[10], g[10], b[10], 255, r[11], g[11], b[11], 255,
329 r[12], g[12], b[12], 255, r[13], g[13], b[13], 255,
330 r[14], g[14], b[14], 255, r[15], g[15], b[15], 255,
340 b[0], g[0], r[0], 255, b[1], g[1], r[1], 255,
341 b[2], g[2], r[2], 255, b[3], g[3], r[3], 255,
342 b[4], g[4], r[4], 255, b[5], g[5], r[5], 255,
343 b[6], g[6], r[6], 255, b[7], g[7], r[7], 255,
344 b[8], g[8], r[8], 255, b[9], g[9], r[9], 255,
345 b[10], g[10], r[10], 255, b[11], g[11], r[11], 255,
346 b[12], g[12], r[12], 255, b[13], g[13], r[13], 255,
347 b[14], g[14], r[14], 255, b[15], g[15], r[15], 255,
362 unpack_yuy2<RS2_FORMAT_Y8>(
d,
s,
w,
h, actual_size);
365 unpack_yuy2<RS2_FORMAT_Y16>(
d,
s,
w,
h, actual_size);
368 unpack_yuy2<RS2_FORMAT_RGB8>(
d,
s,
w,
h, actual_size);
371 unpack_yuy2<RS2_FORMAT_RGBA8>(
d,
s,
w,
h, actual_size);
374 unpack_yuy2<RS2_FORMAT_BGR8>(
d,
s,
w,
h, actual_size);
377 unpack_yuy2<RS2_FORMAT_BGRA8>(
d,
s,
w,
h, actual_size);
380 LOG_ERROR(
"Unsupported format for YUY2 conversion.");
396 auto src =
reinterpret_cast<const __m128i *
>(
s);
397 auto dst =
reinterpret_cast<__m128i *
>(d[0]);
400 const __m128i zero = _mm_set1_epi8(0);
401 const __m128i n100 = _mm_set1_epi16(100 << 4);
402 const __m128i n208 = _mm_set1_epi16(208 << 4);
403 const __m128i n298 = _mm_set1_epi16(298 << 4);
404 const __m128i n409 = _mm_set1_epi16(409 << 4);
405 const __m128i n516 = _mm_set1_epi16(516 << 4);
406 const __m128i evens_odds = _mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15);
409 __m128i s0 = _mm_loadu_si128(
src++);
410 __m128i s1 = _mm_loadu_si128(
src++);
414 const __m128i evens_odd1s_odd3s = _mm_setr_epi8(1, 3, 5, 7, 9, 11, 13, 15, 0, 4, 8, 12, 2, 6, 10, 14);
415 __m128i yyyyyyyyuuuuvvvv0 = _mm_shuffle_epi8(s0, evens_odd1s_odd3s);
416 __m128i yyyyyyyyuuuuvvvv8 = _mm_shuffle_epi8(s1, evens_odd1s_odd3s);
419 __m128i y16__0_7 = _mm_unpacklo_epi8(yyyyyyyyuuuuvvvv0, zero);
420 __m128i y16__8_F = _mm_unpacklo_epi8(yyyyyyyyuuuuvvvv8, zero);
424 __m128i uv = _mm_unpackhi_epi32(yyyyyyyyuuuuvvvv0, yyyyyyyyuuuuvvvv8);
425 __m128i u = _mm_unpacklo_epi8(uv, uv);
426 __m128i
v = _mm_unpackhi_epi8(uv, uv);
427 __m128i u16__0_7 = _mm_unpacklo_epi8(u, zero);
428 __m128i u16__8_F = _mm_unpackhi_epi8(u, zero);
429 __m128i v16__0_7 = _mm_unpacklo_epi8(v, zero);
430 __m128i v16__8_F = _mm_unpackhi_epi8(v, zero);
433 __m128i c16__0_7 = _mm_slli_epi16(_mm_subs_epi16(y16__0_7, _mm_set1_epi16(16)), 4);
434 __m128i d16__0_7 = _mm_slli_epi16(_mm_subs_epi16(u16__0_7, _mm_set1_epi16(128)), 4);
435 __m128i e16__0_7 = _mm_slli_epi16(_mm_subs_epi16(v16__0_7, _mm_set1_epi16(128)), 4);
436 __m128i r16__0_7 = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_add_epi16(_mm_mulhi_epi16(c16__0_7, n298), _mm_mulhi_epi16(e16__0_7, n409))))));
437 __m128i g16__0_7 = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_sub_epi16(_mm_sub_epi16(_mm_mulhi_epi16(c16__0_7, n298), _mm_mulhi_epi16(d16__0_7, n100)), _mm_mulhi_epi16(e16__0_7, n208))))));
438 __m128i b16__0_7 = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_add_epi16(_mm_mulhi_epi16(c16__0_7, n298), _mm_mulhi_epi16(d16__0_7, n516))))));
441 __m128i c16__8_F = _mm_slli_epi16(_mm_subs_epi16(y16__8_F, _mm_set1_epi16(16)), 4);
442 __m128i d16__8_F = _mm_slli_epi16(_mm_subs_epi16(u16__8_F, _mm_set1_epi16(128)), 4);
443 __m128i e16__8_F = _mm_slli_epi16(_mm_subs_epi16(v16__8_F, _mm_set1_epi16(128)), 4);
444 __m128i r16__8_F = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_add_epi16(_mm_mulhi_epi16(c16__8_F, n298), _mm_mulhi_epi16(e16__8_F, n409))))));
445 __m128i g16__8_F = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_sub_epi16(_mm_sub_epi16(_mm_mulhi_epi16(c16__8_F, n298), _mm_mulhi_epi16(d16__8_F, n100)), _mm_mulhi_epi16(e16__8_F, n208))))));
446 __m128i b16__8_F = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_add_epi16(_mm_mulhi_epi16(c16__8_F, n298), _mm_mulhi_epi16(d16__8_F, n516))))));
451 __m128i rg8__0_7 = _mm_unpacklo_epi8(_mm_shuffle_epi8(r16__0_7, evens_odds), _mm_shuffle_epi8(g16__0_7, evens_odds));
452 __m128i ba8__0_7 = _mm_unpacklo_epi8(_mm_shuffle_epi8(b16__0_7, evens_odds), _mm_set1_epi8(-1));
453 __m128i rgba_0_3 = _mm_unpacklo_epi16(rg8__0_7, ba8__0_7);
454 __m128i rgba_4_7 = _mm_unpackhi_epi16(rg8__0_7, ba8__0_7);
456 __m128i rg8__8_F = _mm_unpacklo_epi8(_mm_shuffle_epi8(r16__8_F, evens_odds), _mm_shuffle_epi8(g16__8_F, evens_odds));
457 __m128i ba8__8_F = _mm_unpacklo_epi8(_mm_shuffle_epi8(b16__8_F, evens_odds), _mm_set1_epi8(-1));
458 __m128i rgba_8_B = _mm_unpacklo_epi16(rg8__8_F, ba8__8_F);
459 __m128i rgba_C_F = _mm_unpackhi_epi16(rg8__8_F, ba8__8_F);
464 _mm_storeu_si128(
dst++, rgba_0_3);
465 _mm_storeu_si128(
dst++, rgba_4_7);
466 _mm_storeu_si128(
dst++, rgba_8_B);
467 _mm_storeu_si128(
dst++, rgba_C_F);
473 __m128i rgb0 = _mm_shuffle_epi8(rgba_0_3, _mm_setr_epi8(3, 7, 11, 15, 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14));
474 __m128i rgb1 = _mm_shuffle_epi8(rgba_4_7, _mm_setr_epi8(0, 1, 2, 4, 3, 7, 11, 15, 5, 6, 8, 9, 10, 12, 13, 14));
475 __m128i rgb2 = _mm_shuffle_epi8(rgba_8_B, _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9, 3, 7, 11, 15, 10, 12, 13, 14));
476 __m128i rgb3 = _mm_shuffle_epi8(rgba_C_F, _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15));
479 _mm_storeu_si128(
dst++, _mm_alignr_epi8(rgb1, rgb0, 4));
480 _mm_storeu_si128(
dst++, _mm_alignr_epi8(rgb2, rgb1, 8));
481 _mm_storeu_si128(
dst++, _mm_alignr_epi8(rgb3, rgb2, 12));
488 __m128i bg8__0_7 = _mm_unpacklo_epi8(_mm_shuffle_epi8(b16__0_7, evens_odds), _mm_shuffle_epi8(g16__0_7, evens_odds));
489 __m128i ra8__0_7 = _mm_unpacklo_epi8(_mm_shuffle_epi8(r16__0_7, evens_odds), _mm_set1_epi8(-1));
490 __m128i bgra_0_3 = _mm_unpacklo_epi16(bg8__0_7, ra8__0_7);
491 __m128i bgra_4_7 = _mm_unpackhi_epi16(bg8__0_7, ra8__0_7);
493 __m128i bg8__8_F = _mm_unpacklo_epi8(_mm_shuffle_epi8(b16__8_F, evens_odds), _mm_shuffle_epi8(g16__8_F, evens_odds));
494 __m128i ra8__8_F = _mm_unpacklo_epi8(_mm_shuffle_epi8(r16__8_F, evens_odds), _mm_set1_epi8(-1));
495 __m128i bgra_8_B = _mm_unpacklo_epi16(bg8__8_F, ra8__8_F);
496 __m128i bgra_C_F = _mm_unpackhi_epi16(bg8__8_F, ra8__8_F);
501 _mm_storeu_si128(
dst++, bgra_0_3);
502 _mm_storeu_si128(
dst++, bgra_4_7);
503 _mm_storeu_si128(
dst++, bgra_8_B);
504 _mm_storeu_si128(
dst++, bgra_C_F);
510 __m128i bgr0 = _mm_shuffle_epi8(bgra_0_3, _mm_setr_epi8(3, 7, 11, 15, 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14));
511 __m128i bgr1 = _mm_shuffle_epi8(bgra_4_7, _mm_setr_epi8(0, 1, 2, 4, 3, 7, 11, 15, 5, 6, 8, 9, 10, 12, 13, 14));
512 __m128i bgr2 = _mm_shuffle_epi8(bgra_8_B, _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9, 3, 7, 11, 15, 10, 12, 13, 14));
513 __m128i bgr3 = _mm_shuffle_epi8(bgra_C_F, _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15));
516 _mm_storeu_si128(
dst++, _mm_alignr_epi8(bgr1, bgr0, 4));
517 _mm_storeu_si128(
dst++, _mm_alignr_epi8(bgr2, bgr1, 8));
518 _mm_storeu_si128(
dst++, _mm_alignr_epi8(bgr3, bgr2, 12));
522 #else // Generic code for when SSSE3 is not available. 525 for (;
n;
n -= 16,
src += 32)
528 src[1],
src[3], src[5], src[7],
529 src[9], src[11], src[13], src[15],
530 src[17], src[19], src[21], src[23],
531 src[25], src[27], src[29], src[31],
533 src[0],
src[0], src[4], src[4],
534 src[8], src[8], src[12], src[12],
535 src[16], src[16], src[20], src[20],
536 src[24], src[24], src[28], src[28],
538 src[2],
src[2], src[6], src[6],
539 src[10], src[10], src[14], src[14],
540 src[18], src[18], src[22], src[22],
541 src[26], src[26], src[30], src[30],
545 for (
int i = 0;
i < 16;
i++)
552 #define clamp(x) ((t=(x)) > 255 ? 255 : t < 0 ? 0 : t) 553 r[
i] =
clamp((298 * c + 409 * e + 128) >> 8);
554 g[
i] =
clamp((298 * c - 100 * d - 208 * e + 128) >> 8);
555 b[
i] =
clamp((298 * c + 516 * d + 128) >> 8);
562 r[0], g[0], b[0], r[1], g[1], b[1],
563 r[2], g[2], b[2], r[3], g[3], b[3],
564 r[4], g[4], b[4], r[5], g[5], b[5],
565 r[6], g[6], b[6], r[7], g[7], b[7],
566 r[8], g[8], b[8], r[9], g[9], b[9],
567 r[10], g[10], b[10], r[11], g[11], b[11],
568 r[12], g[12], b[12], r[13], g[13], b[13],
569 r[14], g[14], b[14], r[15], g[15], b[15],
579 b[0], g[0], r[0], b[1], g[1], r[1],
580 b[2], g[2], r[2], b[3], g[3], r[3],
581 b[4], g[4], r[4], b[5], g[5], r[5],
582 b[6], g[6], r[6], b[7], g[7], r[7],
583 b[8], g[8], r[8], b[9], g[9], r[9],
584 b[10], g[10], r[10], b[11], g[11], r[11],
585 b[12], g[12], r[12], b[13], g[13], r[13],
586 b[14], g[14], r[14], b[15], g[15], r[15],
596 r[0], g[0], b[0], 255, r[1], g[1], b[1], 255,
597 r[2], g[2], b[2], 255, r[3], g[3], b[3], 255,
598 r[4], g[4], b[4], 255, r[5], g[5], b[5], 255,
599 r[6], g[6], b[6], 255, r[7], g[7], b[7], 255,
600 r[8], g[8], b[8], 255, r[9], g[9], b[9], 255,
601 r[10], g[10], b[10], 255, r[11], g[11], b[11], 255,
602 r[12], g[12], b[12], 255, r[13], g[13], b[13], 255,
603 r[14], g[14], b[14], 255, r[15], g[15], b[15], 255,
613 b[0], g[0], r[0], 255, b[1], g[1], r[1], 255,
614 b[2], g[2], r[2], 255, b[3], g[3], r[3], 255,
615 b[4], g[4], r[4], 255, b[5], g[5], r[5], 255,
616 b[6], g[6], r[6], 255, b[7], g[7], r[7], 255,
617 b[8], g[8], r[8], 255, b[9], g[9], r[9], 255,
618 b[10], g[10], r[10], 255, b[11], g[11], r[11], 255,
619 b[12], g[12], r[12], 255, b[13], g[13], r[13], 255,
620 b[14], g[14], r[14], 255, b[15], g[15], r[15], 255,
635 unpack_uyvy<RS2_FORMAT_RGB8>(
d,
s,
w,
h, actual_size);
638 unpack_uyvy<RS2_FORMAT_RGBA8>(
d,
s,
w,
h, actual_size);
641 unpack_uyvy<RS2_FORMAT_BGR8>(
d,
s,
w,
h, actual_size);
644 unpack_uyvy<RS2_FORMAT_BGRA8>(
d,
s,
w,
h, actual_size);
647 LOG_ERROR(
"Unsupported format for UYVY conversion.");
659 if (uncompressed_rgb)
661 auto uncompressed_size = w * h *
bpp;
676 auto out =
reinterpret_cast<uint8_t *
>(dest[0]);
697 unpack_mjpeg(dest, source, width, height, actual_size, input_size);
GLsizei GLsizei GLchar * source
void unpack_uyvyc(rs2_format dst_format, rs2_stream dst_stream, byte *const d[], const byte *s, int w, int h, int actual_size)
void unpack_rgb_from_bgr(byte *const dest[], const byte *source, int width, int height, int actual_size)
rs2_format _target_format
STBIDEF stbi_uc * stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
GLdouble GLdouble GLdouble w
def info(name, value, persistent=False)
#define assert(condition)
NLOHMANN_BASIC_JSON_TPL_DECLARATION void swap(nlohmann::NLOHMANN_BASIC_JSON_TPL &j1, nlohmann::NLOHMANN_BASIC_JSON_TPL &j2) noexcept(//NOLINT(readability-inconsistent-declaration-parameter-name) is_nothrow_move_constructible< nlohmann::NLOHMANN_BASIC_JSON_TPL >::value &&//NOLINT(misc-redundant-expression) is_nothrow_move_assignable< nlohmann::NLOHMANN_BASIC_JSON_TPL >::value)
exchanges the values of two JSON objects
void process_function(byte *const dest[], const byte *source, int width, int height, int actual_size, int input_size) override
GLint GLsizei GLsizei height
void process_function(byte *const dest[], const byte *source, int width, int height, int actual_size, int input_size) override
rs2_stream _target_stream
rs2_format
A stream's format identifies how binary data is encoded within a frame.
void process_function(byte *const dest[], const byte *source, int width, int height, int actual_size, int input_size) override
rs2_stream
Streams are different types of data provided by RealSense devices.
void unpack_uyvy(byte *const d[], const byte *s, int width, int height, int actual_size)
GLboolean GLboolean GLboolean b
void unpack_yuy2(byte *const d[], const byte *s, int width, int height, int actual_size)
void unpack_mjpeg(byte *const dest[], const byte *source, int width, int height, int actual_size, int input_size)
STBIDEF void stbi_image_free(void *retval_from_stbi_load)
void process_function(byte *const dest[], const byte *source, int width, int height, int actual_size, int input_size) override
void copy(void *dst, void const *src, size_t size)