10 #define STB_IMAGE_STATIC
11 #define STB_IMAGE_IMPLEMENTATION
12 #include "../third-party/stb_image.h"
15 #include "cuda/cuda-conversion.cuh"
18 #include <tmmintrin.h>
21 #if defined (ANDROID) || (defined (__linux__) && !defined (__x86_64__)) || (defined (__APPLE__) && !defined (__x86_64__))
23 bool has_avx() {
return false; }
29 #define cpuid(info, x) __cpuidex(info, x, 0)
42 return (
info[2] & ((
int)1 << 28)) != 0;
59 rscuda::unpack_yuy2_cuda<FORMAT>(
d,
s,
n);
62 #if defined __SSSE3__ && ! defined ANDROID
78 auto src =
reinterpret_cast<const __m128i *
>(
s);
79 auto dst =
reinterpret_cast<__m128i *
>(
d[0]);
81 #pragma omp parallel for
82 for (
int i = 0;
i <
n / 16;
i++)
84 const __m128i zero = _mm_set1_epi8(0);
85 const __m128i n100 = _mm_set1_epi16(100 << 4);
86 const __m128i n208 = _mm_set1_epi16(208 << 4);
87 const __m128i n298 = _mm_set1_epi16(298 << 4);
88 const __m128i n409 = _mm_set1_epi16(409 << 4);
89 const __m128i n516 = _mm_set1_epi16(516 << 4);
90 const __m128i evens_odds = _mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15);
93 __m128i s0 = _mm_loadu_si128(&
src[
i * 2]);
94 __m128i s1 = _mm_loadu_si128(&
src[
i * 2 + 1]);
99 __m128i
y0 = _mm_shuffle_epi8(s0, _mm_setr_epi8(1, 3, 5, 7, 9, 11, 13, 15, 0, 2, 4, 6, 8, 10, 12, 14));
100 __m128i
y1 = _mm_shuffle_epi8(s1, _mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15));
101 _mm_storeu_si128(&
dst[
i], _mm_alignr_epi8(
y0,
y1, 8));
106 const __m128i evens_odd1s_odd3s = _mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, 1, 5, 9, 13, 3, 7, 11, 15);
107 __m128i yyyyyyyyuuuuvvvv0 = _mm_shuffle_epi8(s0, evens_odd1s_odd3s);
108 __m128i yyyyyyyyuuuuvvvv8 = _mm_shuffle_epi8(s1, evens_odd1s_odd3s);
111 __m128i y16__0_7 = _mm_unpacklo_epi8(yyyyyyyyuuuuvvvv0, zero);
112 __m128i y16__8_F = _mm_unpacklo_epi8(yyyyyyyyuuuuvvvv8, zero);
117 _mm_storeu_si128(&
dst[
i * 2], _mm_slli_epi16(y16__0_7, 8));
118 _mm_storeu_si128(&
dst[
i * 2 + 1], _mm_slli_epi16(y16__8_F, 8));
123 __m128i uv = _mm_unpackhi_epi32(yyyyyyyyuuuuvvvv0, yyyyyyyyuuuuvvvv8);
124 __m128i u = _mm_unpacklo_epi8(uv, uv);
125 __m128i
v = _mm_unpackhi_epi8(uv, uv);
126 __m128i u16__0_7 = _mm_unpacklo_epi8(u, zero);
127 __m128i u16__8_F = _mm_unpackhi_epi8(u, zero);
128 __m128i v16__0_7 = _mm_unpacklo_epi8(
v, zero);
129 __m128i v16__8_F = _mm_unpackhi_epi8(
v, zero);
132 __m128i c16__0_7 = _mm_slli_epi16(_mm_subs_epi16(y16__0_7, _mm_set1_epi16(16)), 4);
133 __m128i d16__0_7 = _mm_slli_epi16(_mm_subs_epi16(u16__0_7, _mm_set1_epi16(128)), 4);
134 __m128i e16__0_7 = _mm_slli_epi16(_mm_subs_epi16(v16__0_7, _mm_set1_epi16(128)), 4);
135 __m128i r16__0_7 = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_add_epi16(_mm_mulhi_epi16(c16__0_7, n298), _mm_mulhi_epi16(e16__0_7, n409))))));
136 __m128i g16__0_7 = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_sub_epi16(_mm_sub_epi16(_mm_mulhi_epi16(c16__0_7, n298), _mm_mulhi_epi16(d16__0_7, n100)), _mm_mulhi_epi16(e16__0_7, n208))))));
137 __m128i b16__0_7 = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_add_epi16(_mm_mulhi_epi16(c16__0_7, n298), _mm_mulhi_epi16(d16__0_7, n516))))));
140 __m128i c16__8_F = _mm_slli_epi16(_mm_subs_epi16(y16__8_F, _mm_set1_epi16(16)), 4);
141 __m128i d16__8_F = _mm_slli_epi16(_mm_subs_epi16(u16__8_F, _mm_set1_epi16(128)), 4);
142 __m128i e16__8_F = _mm_slli_epi16(_mm_subs_epi16(v16__8_F, _mm_set1_epi16(128)), 4);
143 __m128i r16__8_F = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_add_epi16(_mm_mulhi_epi16(c16__8_F, n298), _mm_mulhi_epi16(e16__8_F, n409))))));
144 __m128i g16__8_F = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_sub_epi16(_mm_sub_epi16(_mm_mulhi_epi16(c16__8_F, n298), _mm_mulhi_epi16(d16__8_F, n100)), _mm_mulhi_epi16(e16__8_F, n208))))));
145 __m128i b16__8_F = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_add_epi16(_mm_mulhi_epi16(c16__8_F, n298), _mm_mulhi_epi16(d16__8_F, n516))))));
150 __m128i rg8__0_7 = _mm_unpacklo_epi8(_mm_shuffle_epi8(r16__0_7, evens_odds), _mm_shuffle_epi8(g16__0_7, evens_odds));
151 __m128i ba8__0_7 = _mm_unpacklo_epi8(_mm_shuffle_epi8(b16__0_7, evens_odds), _mm_set1_epi8(-1));
152 __m128i rgba_0_3 = _mm_unpacklo_epi16(rg8__0_7, ba8__0_7);
153 __m128i rgba_4_7 = _mm_unpackhi_epi16(rg8__0_7, ba8__0_7);
155 __m128i rg8__8_F = _mm_unpacklo_epi8(_mm_shuffle_epi8(r16__8_F, evens_odds), _mm_shuffle_epi8(g16__8_F, evens_odds));
156 __m128i ba8__8_F = _mm_unpacklo_epi8(_mm_shuffle_epi8(b16__8_F, evens_odds), _mm_set1_epi8(-1));
157 __m128i rgba_8_B = _mm_unpacklo_epi16(rg8__8_F, ba8__8_F);
158 __m128i rgba_C_F = _mm_unpackhi_epi16(rg8__8_F, ba8__8_F);
163 _mm_storeu_si128(&
dst[
i * 4], rgba_0_3);
164 _mm_storeu_si128(&
dst[
i * 4 + 1], rgba_4_7);
165 _mm_storeu_si128(&
dst[
i * 4 + 2], rgba_8_B);
166 _mm_storeu_si128(&
dst[
i * 4 + 3], rgba_C_F);
172 __m128i rgb0 = _mm_shuffle_epi8(rgba_0_3, _mm_setr_epi8(3, 7, 11, 15, 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14));
173 __m128i rgb1 = _mm_shuffle_epi8(rgba_4_7, _mm_setr_epi8(0, 1, 2, 4, 3, 7, 11, 15, 5, 6, 8, 9, 10, 12, 13, 14));
174 __m128i rgb2 = _mm_shuffle_epi8(rgba_8_B, _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9, 3, 7, 11, 15, 10, 12, 13, 14));
175 __m128i rgb3 = _mm_shuffle_epi8(rgba_C_F, _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15));
178 _mm_storeu_si128(&
dst[
i * 3], _mm_alignr_epi8(rgb1, rgb0, 4));
179 _mm_storeu_si128(&
dst[
i * 3 + 1], _mm_alignr_epi8(rgb2, rgb1, 8));
180 _mm_storeu_si128(&
dst[
i * 3 + 2], _mm_alignr_epi8(rgb3, rgb2, 12));
187 __m128i bg8__0_7 = _mm_unpacklo_epi8(_mm_shuffle_epi8(b16__0_7, evens_odds), _mm_shuffle_epi8(g16__0_7, evens_odds));
188 __m128i ra8__0_7 = _mm_unpacklo_epi8(_mm_shuffle_epi8(r16__0_7, evens_odds), _mm_set1_epi8(-1));
189 __m128i bgra_0_3 = _mm_unpacklo_epi16(bg8__0_7, ra8__0_7);
190 __m128i bgra_4_7 = _mm_unpackhi_epi16(bg8__0_7, ra8__0_7);
192 __m128i bg8__8_F = _mm_unpacklo_epi8(_mm_shuffle_epi8(b16__8_F, evens_odds), _mm_shuffle_epi8(g16__8_F, evens_odds));
193 __m128i ra8__8_F = _mm_unpacklo_epi8(_mm_shuffle_epi8(r16__8_F, evens_odds), _mm_set1_epi8(-1));
194 __m128i bgra_8_B = _mm_unpacklo_epi16(bg8__8_F, ra8__8_F);
195 __m128i bgra_C_F = _mm_unpackhi_epi16(bg8__8_F, ra8__8_F);
200 _mm_storeu_si128(&
dst[
i * 4], bgra_0_3);
201 _mm_storeu_si128(&
dst[
i * 4 + 1], bgra_4_7);
202 _mm_storeu_si128(&
dst[
i * 4 + 2], bgra_8_B);
203 _mm_storeu_si128(&
dst[
i * 4 + 3], bgra_C_F);
209 __m128i bgr0 = _mm_shuffle_epi8(bgra_0_3, _mm_setr_epi8(3, 7, 11, 15, 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14));
210 __m128i bgr1 = _mm_shuffle_epi8(bgra_4_7, _mm_setr_epi8(0, 1, 2, 4, 3, 7, 11, 15, 5, 6, 8, 9, 10, 12, 13, 14));
211 __m128i bgr2 = _mm_shuffle_epi8(bgra_8_B, _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9, 3, 7, 11, 15, 10, 12, 13, 14));
212 __m128i bgr3 = _mm_shuffle_epi8(bgra_C_F, _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15));
215 _mm_storeu_si128(&
dst[
i * 3], _mm_alignr_epi8(bgr1, bgr0, 4));
216 _mm_storeu_si128(&
dst[
i * 3 + 1], _mm_alignr_epi8(bgr2, bgr1, 8));
217 _mm_storeu_si128(&
dst[
i * 3 + 2], _mm_alignr_epi8(bgr3, bgr2, 12));
222 #else // Generic code for when SSSE3 is not available.
225 for (;
n;
n -= 16,
src += 32)
272 for (
int i = 0;
i < 16;
i++)
279 #define clamp(x) ((t=(x)) > 255 ? 255 : t < 0 ? 0 : t)
280 r[
i] =
clamp((298 *
c + 409 *
e + 128) >> 8);
281 g[
i] =
clamp((298 *
c - 100 *
d - 208 *
e + 128) >> 8);
282 b[
i] =
clamp((298 *
c + 516 *
d + 128) >> 8);
289 r[0],
g[0],
b[0],
r[1],
g[1],
b[1],
290 r[2],
g[2],
b[2],
r[3],
g[3],
b[3],
291 r[4],
g[4],
b[4],
r[5],
g[5],
b[5],
292 r[6],
g[6],
b[6],
r[7],
g[7],
b[7],
293 r[8],
g[8],
b[8],
r[9],
g[9],
b[9],
294 r[10],
g[10],
b[10],
r[11],
g[11],
b[11],
295 r[12],
g[12],
b[12],
r[13],
g[13],
b[13],
296 r[14],
g[14],
b[14],
r[15],
g[15],
b[15],
306 b[0],
g[0],
r[0],
b[1],
g[1],
r[1],
307 b[2],
g[2],
r[2],
b[3],
g[3],
r[3],
308 b[4],
g[4],
r[4],
b[5],
g[5],
r[5],
309 b[6],
g[6],
r[6],
b[7],
g[7],
r[7],
310 b[8],
g[8],
r[8],
b[9],
g[9],
r[9],
311 b[10],
g[10],
r[10],
b[11],
g[11],
r[11],
312 b[12],
g[12],
r[12],
b[13],
g[13],
r[13],
313 b[14],
g[14],
r[14],
b[15],
g[15],
r[15],
323 r[0],
g[0],
b[0], 255,
r[1],
g[1],
b[1], 255,
324 r[2],
g[2],
b[2], 255,
r[3],
g[3],
b[3], 255,
325 r[4],
g[4],
b[4], 255,
r[5],
g[5],
b[5], 255,
326 r[6],
g[6],
b[6], 255,
r[7],
g[7],
b[7], 255,
327 r[8],
g[8],
b[8], 255,
r[9],
g[9],
b[9], 255,
328 r[10],
g[10],
b[10], 255,
r[11],
g[11],
b[11], 255,
329 r[12],
g[12],
b[12], 255,
r[13],
g[13],
b[13], 255,
330 r[14],
g[14],
b[14], 255,
r[15],
g[15],
b[15], 255,
340 b[0],
g[0],
r[0], 255,
b[1],
g[1],
r[1], 255,
341 b[2],
g[2],
r[2], 255,
b[3],
g[3],
r[3], 255,
342 b[4],
g[4],
r[4], 255,
b[5],
g[5],
r[5], 255,
343 b[6],
g[6],
r[6], 255,
b[7],
g[7],
r[7], 255,
344 b[8],
g[8],
r[8], 255,
b[9],
g[9],
r[9], 255,
345 b[10],
g[10],
r[10], 255,
b[11],
g[11],
r[11], 255,
346 b[12],
g[12],
r[12], 255,
b[13],
g[13],
r[13], 255,
347 b[14],
g[14],
r[14], 255,
b[15],
g[15],
r[15], 255,
362 unpack_yuy2<RS2_FORMAT_Y8>(
d,
s,
w,
h, actual_size);
365 unpack_yuy2<RS2_FORMAT_Y16>(
d,
s,
w,
h, actual_size);
368 unpack_yuy2<RS2_FORMAT_RGB8>(
d,
s,
w,
h, actual_size);
371 unpack_yuy2<RS2_FORMAT_RGBA8>(
d,
s,
w,
h, actual_size);
374 unpack_yuy2<RS2_FORMAT_BGR8>(
d,
s,
w,
h, actual_size);
377 unpack_yuy2<RS2_FORMAT_BGRA8>(
d,
s,
w,
h, actual_size);
380 LOG_ERROR(
"Unsupported format for YUY2 conversion.");
396 auto src =
reinterpret_cast<const __m128i *
>(
s);
397 auto dst =
reinterpret_cast<__m128i *
>(
d[0]);
400 const __m128i zero = _mm_set1_epi8(0);
401 const __m128i n100 = _mm_set1_epi16(100 << 4);
402 const __m128i n208 = _mm_set1_epi16(208 << 4);
403 const __m128i n298 = _mm_set1_epi16(298 << 4);
404 const __m128i n409 = _mm_set1_epi16(409 << 4);
405 const __m128i n516 = _mm_set1_epi16(516 << 4);
406 const __m128i evens_odds = _mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15);
409 __m128i s0 = _mm_loadu_si128(
src++);
410 __m128i s1 = _mm_loadu_si128(
src++);
414 const __m128i evens_odd1s_odd3s = _mm_setr_epi8(1, 3, 5, 7, 9, 11, 13, 15, 0, 4, 8, 12, 2, 6, 10, 14);
415 __m128i yyyyyyyyuuuuvvvv0 = _mm_shuffle_epi8(s0, evens_odd1s_odd3s);
416 __m128i yyyyyyyyuuuuvvvv8 = _mm_shuffle_epi8(s1, evens_odd1s_odd3s);
419 __m128i y16__0_7 = _mm_unpacklo_epi8(yyyyyyyyuuuuvvvv0, zero);
420 __m128i y16__8_F = _mm_unpacklo_epi8(yyyyyyyyuuuuvvvv8, zero);
424 __m128i uv = _mm_unpackhi_epi32(yyyyyyyyuuuuvvvv0, yyyyyyyyuuuuvvvv8);
425 __m128i u = _mm_unpacklo_epi8(uv, uv);
426 __m128i
v = _mm_unpackhi_epi8(uv, uv);
427 __m128i u16__0_7 = _mm_unpacklo_epi8(u, zero);
428 __m128i u16__8_F = _mm_unpackhi_epi8(u, zero);
429 __m128i v16__0_7 = _mm_unpacklo_epi8(
v, zero);
430 __m128i v16__8_F = _mm_unpackhi_epi8(
v, zero);
433 __m128i c16__0_7 = _mm_slli_epi16(_mm_subs_epi16(y16__0_7, _mm_set1_epi16(16)), 4);
434 __m128i d16__0_7 = _mm_slli_epi16(_mm_subs_epi16(u16__0_7, _mm_set1_epi16(128)), 4);
435 __m128i e16__0_7 = _mm_slli_epi16(_mm_subs_epi16(v16__0_7, _mm_set1_epi16(128)), 4);
436 __m128i r16__0_7 = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_add_epi16(_mm_mulhi_epi16(c16__0_7, n298), _mm_mulhi_epi16(e16__0_7, n409))))));
437 __m128i g16__0_7 = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_sub_epi16(_mm_sub_epi16(_mm_mulhi_epi16(c16__0_7, n298), _mm_mulhi_epi16(d16__0_7, n100)), _mm_mulhi_epi16(e16__0_7, n208))))));
438 __m128i b16__0_7 = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_add_epi16(_mm_mulhi_epi16(c16__0_7, n298), _mm_mulhi_epi16(d16__0_7, n516))))));
441 __m128i c16__8_F = _mm_slli_epi16(_mm_subs_epi16(y16__8_F, _mm_set1_epi16(16)), 4);
442 __m128i d16__8_F = _mm_slli_epi16(_mm_subs_epi16(u16__8_F, _mm_set1_epi16(128)), 4);
443 __m128i e16__8_F = _mm_slli_epi16(_mm_subs_epi16(v16__8_F, _mm_set1_epi16(128)), 4);
444 __m128i r16__8_F = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_add_epi16(_mm_mulhi_epi16(c16__8_F, n298), _mm_mulhi_epi16(e16__8_F, n409))))));
445 __m128i g16__8_F = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_sub_epi16(_mm_sub_epi16(_mm_mulhi_epi16(c16__8_F, n298), _mm_mulhi_epi16(d16__8_F, n100)), _mm_mulhi_epi16(e16__8_F, n208))))));
446 __m128i b16__8_F = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_add_epi16(_mm_mulhi_epi16(c16__8_F, n298), _mm_mulhi_epi16(d16__8_F, n516))))));
451 __m128i rg8__0_7 = _mm_unpacklo_epi8(_mm_shuffle_epi8(r16__0_7, evens_odds), _mm_shuffle_epi8(g16__0_7, evens_odds));
452 __m128i ba8__0_7 = _mm_unpacklo_epi8(_mm_shuffle_epi8(b16__0_7, evens_odds), _mm_set1_epi8(-1));
453 __m128i rgba_0_3 = _mm_unpacklo_epi16(rg8__0_7, ba8__0_7);
454 __m128i rgba_4_7 = _mm_unpackhi_epi16(rg8__0_7, ba8__0_7);
456 __m128i rg8__8_F = _mm_unpacklo_epi8(_mm_shuffle_epi8(r16__8_F, evens_odds), _mm_shuffle_epi8(g16__8_F, evens_odds));
457 __m128i ba8__8_F = _mm_unpacklo_epi8(_mm_shuffle_epi8(b16__8_F, evens_odds), _mm_set1_epi8(-1));
458 __m128i rgba_8_B = _mm_unpacklo_epi16(rg8__8_F, ba8__8_F);
459 __m128i rgba_C_F = _mm_unpackhi_epi16(rg8__8_F, ba8__8_F);
464 _mm_storeu_si128(
dst++, rgba_0_3);
465 _mm_storeu_si128(
dst++, rgba_4_7);
466 _mm_storeu_si128(
dst++, rgba_8_B);
467 _mm_storeu_si128(
dst++, rgba_C_F);
473 __m128i rgb0 = _mm_shuffle_epi8(rgba_0_3, _mm_setr_epi8(3, 7, 11, 15, 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14));
474 __m128i rgb1 = _mm_shuffle_epi8(rgba_4_7, _mm_setr_epi8(0, 1, 2, 4, 3, 7, 11, 15, 5, 6, 8, 9, 10, 12, 13, 14));
475 __m128i rgb2 = _mm_shuffle_epi8(rgba_8_B, _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9, 3, 7, 11, 15, 10, 12, 13, 14));
476 __m128i rgb3 = _mm_shuffle_epi8(rgba_C_F, _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15));
479 _mm_storeu_si128(
dst++, _mm_alignr_epi8(rgb1, rgb0, 4));
480 _mm_storeu_si128(
dst++, _mm_alignr_epi8(rgb2, rgb1, 8));
481 _mm_storeu_si128(
dst++, _mm_alignr_epi8(rgb3, rgb2, 12));
488 __m128i bg8__0_7 = _mm_unpacklo_epi8(_mm_shuffle_epi8(b16__0_7, evens_odds), _mm_shuffle_epi8(g16__0_7, evens_odds));
489 __m128i ra8__0_7 = _mm_unpacklo_epi8(_mm_shuffle_epi8(r16__0_7, evens_odds), _mm_set1_epi8(-1));
490 __m128i bgra_0_3 = _mm_unpacklo_epi16(bg8__0_7, ra8__0_7);
491 __m128i bgra_4_7 = _mm_unpackhi_epi16(bg8__0_7, ra8__0_7);
493 __m128i bg8__8_F = _mm_unpacklo_epi8(_mm_shuffle_epi8(b16__8_F, evens_odds), _mm_shuffle_epi8(g16__8_F, evens_odds));
494 __m128i ra8__8_F = _mm_unpacklo_epi8(_mm_shuffle_epi8(r16__8_F, evens_odds), _mm_set1_epi8(-1));
495 __m128i bgra_8_B = _mm_unpacklo_epi16(bg8__8_F, ra8__8_F);
496 __m128i bgra_C_F = _mm_unpackhi_epi16(bg8__8_F, ra8__8_F);
501 _mm_storeu_si128(
dst++, bgra_0_3);
502 _mm_storeu_si128(
dst++, bgra_4_7);
503 _mm_storeu_si128(
dst++, bgra_8_B);
504 _mm_storeu_si128(
dst++, bgra_C_F);
510 __m128i bgr0 = _mm_shuffle_epi8(bgra_0_3, _mm_setr_epi8(3, 7, 11, 15, 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14));
511 __m128i bgr1 = _mm_shuffle_epi8(bgra_4_7, _mm_setr_epi8(0, 1, 2, 4, 3, 7, 11, 15, 5, 6, 8, 9, 10, 12, 13, 14));
512 __m128i bgr2 = _mm_shuffle_epi8(bgra_8_B, _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9, 3, 7, 11, 15, 10, 12, 13, 14));
513 __m128i bgr3 = _mm_shuffle_epi8(bgra_C_F, _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15));
516 _mm_storeu_si128(
dst++, _mm_alignr_epi8(bgr1, bgr0, 4));
517 _mm_storeu_si128(
dst++, _mm_alignr_epi8(bgr2, bgr1, 8));
518 _mm_storeu_si128(
dst++, _mm_alignr_epi8(bgr3, bgr2, 12));
522 #else // Generic code for when SSSE3 is not available.
525 for (;
n;
n -= 16,
src += 32)
545 for (
int i = 0;
i < 16;
i++)
552 #define clamp(x) ((t=(x)) > 255 ? 255 : t < 0 ? 0 : t)
553 r[
i] =
clamp((298 *
c + 409 *
e + 128) >> 8);
554 g[
i] =
clamp((298 *
c - 100 *
d - 208 *
e + 128) >> 8);
555 b[
i] =
clamp((298 *
c + 516 *
d + 128) >> 8);
562 r[0],
g[0],
b[0],
r[1],
g[1],
b[1],
563 r[2],
g[2],
b[2],
r[3],
g[3],
b[3],
564 r[4],
g[4],
b[4],
r[5],
g[5],
b[5],
565 r[6],
g[6],
b[6],
r[7],
g[7],
b[7],
566 r[8],
g[8],
b[8],
r[9],
g[9],
b[9],
567 r[10],
g[10],
b[10],
r[11],
g[11],
b[11],
568 r[12],
g[12],
b[12],
r[13],
g[13],
b[13],
569 r[14],
g[14],
b[14],
r[15],
g[15],
b[15],
579 b[0],
g[0],
r[0],
b[1],
g[1],
r[1],
580 b[2],
g[2],
r[2],
b[3],
g[3],
r[3],
581 b[4],
g[4],
r[4],
b[5],
g[5],
r[5],
582 b[6],
g[6],
r[6],
b[7],
g[7],
r[7],
583 b[8],
g[8],
r[8],
b[9],
g[9],
r[9],
584 b[10],
g[10],
r[10],
b[11],
g[11],
r[11],
585 b[12],
g[12],
r[12],
b[13],
g[13],
r[13],
586 b[14],
g[14],
r[14],
b[15],
g[15],
r[15],
596 r[0],
g[0],
b[0], 255,
r[1],
g[1],
b[1], 255,
597 r[2],
g[2],
b[2], 255,
r[3],
g[3],
b[3], 255,
598 r[4],
g[4],
b[4], 255,
r[5],
g[5],
b[5], 255,
599 r[6],
g[6],
b[6], 255,
r[7],
g[7],
b[7], 255,
600 r[8],
g[8],
b[8], 255,
r[9],
g[9],
b[9], 255,
601 r[10],
g[10],
b[10], 255,
r[11],
g[11],
b[11], 255,
602 r[12],
g[12],
b[12], 255,
r[13],
g[13],
b[13], 255,
603 r[14],
g[14],
b[14], 255,
r[15],
g[15],
b[15], 255,
613 b[0],
g[0],
r[0], 255,
b[1],
g[1],
r[1], 255,
614 b[2],
g[2],
r[2], 255,
b[3],
g[3],
r[3], 255,
615 b[4],
g[4],
r[4], 255,
b[5],
g[5],
r[5], 255,
616 b[6],
g[6],
r[6], 255,
b[7],
g[7],
r[7], 255,
617 b[8],
g[8],
r[8], 255,
b[9],
g[9],
r[9], 255,
618 b[10],
g[10],
r[10], 255,
b[11],
g[11],
r[11], 255,
619 b[12],
g[12],
r[12], 255,
b[13],
g[13],
r[13], 255,
620 b[14],
g[14],
r[14], 255,
b[15],
g[15],
r[15], 255,
635 unpack_uyvy<RS2_FORMAT_RGB8>(
d,
s,
w,
h, actual_size);
638 unpack_uyvy<RS2_FORMAT_RGBA8>(
d,
s,
w,
h, actual_size);
641 unpack_uyvy<RS2_FORMAT_BGR8>(
d,
s,
w,
h, actual_size);
644 unpack_uyvy<RS2_FORMAT_BGRA8>(
d,
s,
w,
h, actual_size);
647 LOG_ERROR(
"Unsupported format for UYVY conversion.");
659 if (uncompressed_rgb)
661 auto uncompressed_size =
w *
h *
bpp;