color-formats-converter.cpp
Go to the documentation of this file.
1 // License: Apache 2.0. See LICENSE file in root directory.
2 // Copyright(c) 2019 Intel Corporation. All Rights Reserved.
3 
5 
6 #include "option.h"
7 #include "image-avx.h"
8 #include "image.h"
9 
10 #define STB_IMAGE_STATIC
11 #define STB_IMAGE_IMPLEMENTATION
12 #include "../third-party/stb_image.h"
13 
14 #ifdef RS2_USE_CUDA
15 #include "cuda/cuda-conversion.cuh"
16 #endif
17 #ifdef __SSSE3__
18 #include <tmmintrin.h> // For SSSE3 intrinsics
19 #endif
20 
21 #if defined (ANDROID) || (defined (__linux__) && !defined (__x86_64__)) || (defined (__APPLE__) && !defined (__x86_64__))
22 
23 bool has_avx() { return false; }
24 
25 #else
26 
27 #ifdef _WIN32
28 #include <intrin.h>
29 #define cpuid(info, x) __cpuidex(info, x, 0)
30 #else
31 #include <cpuid.h>
32 void cpuid(int info[4], int info_type) {
33  __cpuid_count(info_type, 0, info[0], info[1], info[2], info[3]);
34 }
35 #endif
36 
37 bool has_avx()
38 {
39  int info[4];
40  cpuid(info, 0);
41  cpuid(info, 0x80000000);
42  return (info[2] & ((int)1 << 28)) != 0;
43 }
44 
45 #endif
46 
47 namespace librealsense
48 {
50  // YUY2 unpacking routines //
52  // This templated function unpacks YUY2 into Y8/Y16/RGB8/RGBA8/BGR8/BGRA8, depending on the compile-time parameter FORMAT.
53  // It is expected that all branching outside of the loop control variable will be removed due to constant-folding.
54  template<rs2_format FORMAT> void unpack_yuy2( uint8_t * const d[], const uint8_t * s, int width, int height, int actual_size)
55  {
56  auto n = width * height;
57  assert(n % 16 == 0); // All currently supported color resolutions are multiples of 16 pixels. Could easily extend support to other resolutions by copying final n<16 pixels into a zero-padded buffer and recursively calling self for final iteration.
58 #ifdef RS2_USE_CUDA
59  rscuda::unpack_yuy2_cuda<FORMAT>(d, s, n);
60  return;
61 #endif
62 #if defined __SSSE3__ && ! defined ANDROID
63  static bool do_avx = has_avx();
64 #ifdef __AVX2__
65 
66  if (do_avx)
67  {
68  if (FORMAT == RS2_FORMAT_Y8) unpack_yuy2_avx_y8(d, s, n);
69  if (FORMAT == RS2_FORMAT_Y16) unpack_yuy2_avx_y16(d, s, n);
70  if (FORMAT == RS2_FORMAT_RGB8) unpack_yuy2_avx_rgb8(d, s, n);
71  if (FORMAT == RS2_FORMAT_RGBA8) unpack_yuy2_avx_rgba8(d, s, n);
72  if (FORMAT == RS2_FORMAT_BGR8) unpack_yuy2_avx_bgr8(d, s, n);
73  if (FORMAT == RS2_FORMAT_BGRA8) unpack_yuy2_avx_bgra8(d, s, n);
74  }
75  else
76 #endif
77  {
78  auto src = reinterpret_cast<const __m128i *>(s);
79  auto dst = reinterpret_cast<__m128i *>(d[0]);
80 
81 #pragma omp parallel for
82  for (int i = 0; i < n / 16; i++)
83  {
84  const __m128i zero = _mm_set1_epi8(0);
85  const __m128i n100 = _mm_set1_epi16(100 << 4);
86  const __m128i n208 = _mm_set1_epi16(208 << 4);
87  const __m128i n298 = _mm_set1_epi16(298 << 4);
88  const __m128i n409 = _mm_set1_epi16(409 << 4);
89  const __m128i n516 = _mm_set1_epi16(516 << 4);
90  const __m128i evens_odds = _mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15);
91 
92  // Load 8 YUY2 pixels each into two 16-byte registers
93  __m128i s0 = _mm_loadu_si128(&src[i * 2]);
94  __m128i s1 = _mm_loadu_si128(&src[i * 2 + 1]);
95 
96  if (FORMAT == RS2_FORMAT_Y8)
97  {
98  const __m128i vmask = _mm_set1_epi16( 0x00ff );
99  s0 = _mm_and_si128( s0, vmask ); // mask unwanted bytes
100  s1 = _mm_and_si128( s1, vmask );
101  // Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation
102  _mm_storeu_si128( &dst[i], _mm_packus_epi16( s0, s1 ) );
103  continue;
104  }
105 
106  // Shuffle all Y components to the low order bytes of the register, and all U/V components to the high order bytes
107  const __m128i evens_odd1s_odd3s = _mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, 1, 5, 9, 13, 3, 7, 11, 15); // to get yyyyyyyyuuuuvvvv
108  __m128i yyyyyyyyuuuuvvvv0 = _mm_shuffle_epi8(s0, evens_odd1s_odd3s);
109  __m128i yyyyyyyyuuuuvvvv8 = _mm_shuffle_epi8(s1, evens_odd1s_odd3s);
110 
111  // Retrieve all 16 Y components as 16-bit values (8 components per register))
112  __m128i y16__0_7 = _mm_unpacklo_epi8(yyyyyyyyuuuuvvvv0, zero); // convert to 16 bit
113  __m128i y16__8_F = _mm_unpacklo_epi8(yyyyyyyyuuuuvvvv8, zero); // convert to 16 bit
114 
115  if (FORMAT == RS2_FORMAT_Y16)
116  {
117  // Output 16 pixels (32 bytes) at once
118  _mm_storeu_si128(&dst[i * 2], _mm_slli_epi16(y16__0_7, 8));
119  _mm_storeu_si128(&dst[i * 2 + 1], _mm_slli_epi16(y16__8_F, 8));
120  continue;
121  }
122 
123  // Retrieve all 16 U and V components as 16-bit values (8 components per register)
124  __m128i uv = _mm_unpackhi_epi32(yyyyyyyyuuuuvvvv0, yyyyyyyyuuuuvvvv8); // uuuuuuuuvvvvvvvv
125  __m128i u = _mm_unpacklo_epi8(uv, uv); // uu uu uu uu uu uu uu uu u's duplicated
126  __m128i v = _mm_unpackhi_epi8(uv, uv); // vv vv vv vv vv vv vv vv
127  __m128i u16__0_7 = _mm_unpacklo_epi8(u, zero); // convert to 16 bit
128  __m128i u16__8_F = _mm_unpackhi_epi8(u, zero); // convert to 16 bit
129  __m128i v16__0_7 = _mm_unpacklo_epi8(v, zero); // convert to 16 bit
130  __m128i v16__8_F = _mm_unpackhi_epi8(v, zero); // convert to 16 bit
131 
132  // Compute R, G, B values for first 8 pixels
133  __m128i c16__0_7 = _mm_slli_epi16(_mm_subs_epi16(y16__0_7, _mm_set1_epi16(16)), 4);
134  __m128i d16__0_7 = _mm_slli_epi16(_mm_subs_epi16(u16__0_7, _mm_set1_epi16(128)), 4); // perhaps could have done these u,v to d,e before the duplication
135  __m128i e16__0_7 = _mm_slli_epi16(_mm_subs_epi16(v16__0_7, _mm_set1_epi16(128)), 4);
136  __m128i r16__0_7 = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_add_epi16(_mm_mulhi_epi16(c16__0_7, n298), _mm_mulhi_epi16(e16__0_7, n409)))))); // (298 * c + 409 * e + 128) ; //
137  __m128i g16__0_7 = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_sub_epi16(_mm_sub_epi16(_mm_mulhi_epi16(c16__0_7, n298), _mm_mulhi_epi16(d16__0_7, n100)), _mm_mulhi_epi16(e16__0_7, n208)))))); // (298 * c - 100 * d - 208 * e + 128)
138  __m128i b16__0_7 = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_add_epi16(_mm_mulhi_epi16(c16__0_7, n298), _mm_mulhi_epi16(d16__0_7, n516)))))); // clampbyte((298 * c + 516 * d + 128) >> 8);
139 
140  // Compute R, G, B values for second 8 pixels
141  __m128i c16__8_F = _mm_slli_epi16(_mm_subs_epi16(y16__8_F, _mm_set1_epi16(16)), 4);
142  __m128i d16__8_F = _mm_slli_epi16(_mm_subs_epi16(u16__8_F, _mm_set1_epi16(128)), 4); // perhaps could have done these u,v to d,e before the duplication
143  __m128i e16__8_F = _mm_slli_epi16(_mm_subs_epi16(v16__8_F, _mm_set1_epi16(128)), 4);
144  __m128i r16__8_F = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_add_epi16(_mm_mulhi_epi16(c16__8_F, n298), _mm_mulhi_epi16(e16__8_F, n409)))))); // (298 * c + 409 * e + 128) ; //
145  __m128i g16__8_F = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_sub_epi16(_mm_sub_epi16(_mm_mulhi_epi16(c16__8_F, n298), _mm_mulhi_epi16(d16__8_F, n100)), _mm_mulhi_epi16(e16__8_F, n208)))))); // (298 * c - 100 * d - 208 * e + 128)
146  __m128i b16__8_F = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_add_epi16(_mm_mulhi_epi16(c16__8_F, n298), _mm_mulhi_epi16(d16__8_F, n516)))))); // clampbyte((298 * c + 516 * d + 128) >> 8);
147 
149  {
150  // Shuffle separate R, G, B values into four registers storing four pixels each in (R, G, B, A) order
151  __m128i rg8__0_7 = _mm_unpacklo_epi8(_mm_shuffle_epi8(r16__0_7, evens_odds), _mm_shuffle_epi8(g16__0_7, evens_odds)); // hi to take the odds which are the upper bytes we care about
152  __m128i ba8__0_7 = _mm_unpacklo_epi8(_mm_shuffle_epi8(b16__0_7, evens_odds), _mm_set1_epi8(-1));
153  __m128i rgba_0_3 = _mm_unpacklo_epi16(rg8__0_7, ba8__0_7);
154  __m128i rgba_4_7 = _mm_unpackhi_epi16(rg8__0_7, ba8__0_7);
155 
156  __m128i rg8__8_F = _mm_unpacklo_epi8(_mm_shuffle_epi8(r16__8_F, evens_odds), _mm_shuffle_epi8(g16__8_F, evens_odds)); // hi to take the odds which are the upper bytes we care about
157  __m128i ba8__8_F = _mm_unpacklo_epi8(_mm_shuffle_epi8(b16__8_F, evens_odds), _mm_set1_epi8(-1));
158  __m128i rgba_8_B = _mm_unpacklo_epi16(rg8__8_F, ba8__8_F);
159  __m128i rgba_C_F = _mm_unpackhi_epi16(rg8__8_F, ba8__8_F);
160 
161  if (FORMAT == RS2_FORMAT_RGBA8)
162  {
163  // Store 16 pixels (64 bytes) at once
164  _mm_storeu_si128(&dst[i * 4], rgba_0_3);
165  _mm_storeu_si128(&dst[i * 4 + 1], rgba_4_7);
166  _mm_storeu_si128(&dst[i * 4 + 2], rgba_8_B);
167  _mm_storeu_si128(&dst[i * 4 + 3], rgba_C_F);
168  }
169 
170  if (FORMAT == RS2_FORMAT_RGB8)
171  {
172  // Shuffle rgb triples to the start and end of each register
173  __m128i rgb0 = _mm_shuffle_epi8(rgba_0_3, _mm_setr_epi8(3, 7, 11, 15, 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14));
174  __m128i rgb1 = _mm_shuffle_epi8(rgba_4_7, _mm_setr_epi8(0, 1, 2, 4, 3, 7, 11, 15, 5, 6, 8, 9, 10, 12, 13, 14));
175  __m128i rgb2 = _mm_shuffle_epi8(rgba_8_B, _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9, 3, 7, 11, 15, 10, 12, 13, 14));
176  __m128i rgb3 = _mm_shuffle_epi8(rgba_C_F, _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15));
177 
178  // Align registers and store 16 pixels (48 bytes) at once
179  _mm_storeu_si128(&dst[i * 3], _mm_alignr_epi8(rgb1, rgb0, 4));
180  _mm_storeu_si128(&dst[i * 3 + 1], _mm_alignr_epi8(rgb2, rgb1, 8));
181  _mm_storeu_si128(&dst[i * 3 + 2], _mm_alignr_epi8(rgb3, rgb2, 12));
182  }
183  }
184 
186  {
187  // Shuffle separate R, G, B values into four registers storing four pixels each in (B, G, R, A) order
188  __m128i bg8__0_7 = _mm_unpacklo_epi8(_mm_shuffle_epi8(b16__0_7, evens_odds), _mm_shuffle_epi8(g16__0_7, evens_odds)); // hi to take the odds which are the upper bytes we care about
189  __m128i ra8__0_7 = _mm_unpacklo_epi8(_mm_shuffle_epi8(r16__0_7, evens_odds), _mm_set1_epi8(-1));
190  __m128i bgra_0_3 = _mm_unpacklo_epi16(bg8__0_7, ra8__0_7);
191  __m128i bgra_4_7 = _mm_unpackhi_epi16(bg8__0_7, ra8__0_7);
192 
193  __m128i bg8__8_F = _mm_unpacklo_epi8(_mm_shuffle_epi8(b16__8_F, evens_odds), _mm_shuffle_epi8(g16__8_F, evens_odds)); // hi to take the odds which are the upper bytes we care about
194  __m128i ra8__8_F = _mm_unpacklo_epi8(_mm_shuffle_epi8(r16__8_F, evens_odds), _mm_set1_epi8(-1));
195  __m128i bgra_8_B = _mm_unpacklo_epi16(bg8__8_F, ra8__8_F);
196  __m128i bgra_C_F = _mm_unpackhi_epi16(bg8__8_F, ra8__8_F);
197 
198  if (FORMAT == RS2_FORMAT_BGRA8)
199  {
200  // Store 16 pixels (64 bytes) at once
201  _mm_storeu_si128(&dst[i * 4], bgra_0_3);
202  _mm_storeu_si128(&dst[i * 4 + 1], bgra_4_7);
203  _mm_storeu_si128(&dst[i * 4 + 2], bgra_8_B);
204  _mm_storeu_si128(&dst[i * 4 + 3], bgra_C_F);
205  }
206 
207  if (FORMAT == RS2_FORMAT_BGR8)
208  {
209  // Shuffle rgb triples to the start and end of each register
210  __m128i bgr0 = _mm_shuffle_epi8(bgra_0_3, _mm_setr_epi8(3, 7, 11, 15, 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14));
211  __m128i bgr1 = _mm_shuffle_epi8(bgra_4_7, _mm_setr_epi8(0, 1, 2, 4, 3, 7, 11, 15, 5, 6, 8, 9, 10, 12, 13, 14));
212  __m128i bgr2 = _mm_shuffle_epi8(bgra_8_B, _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9, 3, 7, 11, 15, 10, 12, 13, 14));
213  __m128i bgr3 = _mm_shuffle_epi8(bgra_C_F, _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15));
214 
215  // Align registers and store 16 pixels (48 bytes) at once
216  _mm_storeu_si128(&dst[i * 3], _mm_alignr_epi8(bgr1, bgr0, 4));
217  _mm_storeu_si128(&dst[i * 3 + 1], _mm_alignr_epi8(bgr2, bgr1, 8));
218  _mm_storeu_si128(&dst[i * 3 + 2], _mm_alignr_epi8(bgr3, bgr2, 12));
219  }
220  }
221  }
222  }
223 #else // Generic code for when SSSE3 is not available.
224  auto src = reinterpret_cast<const uint8_t *>(s);
225  auto dst = reinterpret_cast<uint8_t *>(d[0]);
226  for (; n; n -= 16, src += 32)
227  {
228  if (FORMAT == RS2_FORMAT_Y8)
229  {
230  uint8_t out[16] = {
231  src[0], src[2], src[4], src[6],
232  src[8], src[10], src[12], src[14],
233  src[16], src[18], src[20], src[22],
234  src[24], src[26], src[28], src[30],
235  };
236  std::memcpy( dst, out, sizeof out );
237  dst += sizeof out;
238  continue;
239  }
240 
241  if (FORMAT == RS2_FORMAT_Y16)
242  {
243  // Y16 is little-endian. We output Y << 8.
244  uint8_t out[32] = {
245  0, src[0], 0, src[2], 0, src[4], 0, src[6],
246  0, src[8], 0, src[10], 0, src[12], 0, src[14],
247  0, src[16], 0, src[18], 0, src[20], 0, src[22],
248  0, src[24], 0, src[26], 0, src[28], 0, src[30],
249  };
250  std::memcpy(dst, out, sizeof out);
251  dst += sizeof out;
252  continue;
253  }
254 
255  int16_t y[16] = {
256  src[0], src[2], src[4], src[6],
257  src[8], src[10], src[12], src[14],
258  src[16], src[18], src[20], src[22],
259  src[24], src[26], src[28], src[30],
260  }, u[16] = {
261  src[1], src[1], src[5], src[5],
262  src[9], src[9], src[13], src[13],
263  src[17], src[17], src[21], src[21],
264  src[25], src[25], src[29], src[29],
265  }, v[16] = {
266  src[3], src[3], src[7], src[7],
267  src[11], src[11], src[15], src[15],
268  src[19], src[19], src[23], src[23],
269  src[27], src[27], src[31], src[31],
270  };
271 
272  uint8_t r[16], g[16], b[16];
273  for (int i = 0; i < 16; i++)
274  {
275  int32_t c = y[i] - 16;
276  int32_t d = u[i] - 128;
277  int32_t e = v[i] - 128;
278 
279  int32_t t;
280 #define clamp(x) ((t=(x)) > 255 ? 255 : t < 0 ? 0 : t)
281  r[i] = clamp((298 * c + 409 * e + 128) >> 8);
282  g[i] = clamp((298 * c - 100 * d - 208 * e + 128) >> 8);
283  b[i] = clamp((298 * c + 516 * d + 128) >> 8);
284 #undef clamp
285  }
286 
287  if (FORMAT == RS2_FORMAT_RGB8)
288  {
289  uint8_t out[16 * 3] = {
290  r[0], g[0], b[0], r[1], g[1], b[1],
291  r[2], g[2], b[2], r[3], g[3], b[3],
292  r[4], g[4], b[4], r[5], g[5], b[5],
293  r[6], g[6], b[6], r[7], g[7], b[7],
294  r[8], g[8], b[8], r[9], g[9], b[9],
295  r[10], g[10], b[10], r[11], g[11], b[11],
296  r[12], g[12], b[12], r[13], g[13], b[13],
297  r[14], g[14], b[14], r[15], g[15], b[15],
298  };
299  std::memcpy( dst, out, sizeof out );
300  dst += sizeof out;
301  continue;
302  }
303 
304  if (FORMAT == RS2_FORMAT_BGR8)
305  {
306  uint8_t out[16 * 3] = {
307  b[0], g[0], r[0], b[1], g[1], r[1],
308  b[2], g[2], r[2], b[3], g[3], r[3],
309  b[4], g[4], r[4], b[5], g[5], r[5],
310  b[6], g[6], r[6], b[7], g[7], r[7],
311  b[8], g[8], r[8], b[9], g[9], r[9],
312  b[10], g[10], r[10], b[11], g[11], r[11],
313  b[12], g[12], r[12], b[13], g[13], r[13],
314  b[14], g[14], r[14], b[15], g[15], r[15],
315  };
316  std::memcpy( dst, out, sizeof out );
317  dst += sizeof out;
318  continue;
319  }
320 
321  if (FORMAT == RS2_FORMAT_RGBA8)
322  {
323  uint8_t out[16 * 4] = {
324  r[0], g[0], b[0], 255, r[1], g[1], b[1], 255,
325  r[2], g[2], b[2], 255, r[3], g[3], b[3], 255,
326  r[4], g[4], b[4], 255, r[5], g[5], b[5], 255,
327  r[6], g[6], b[6], 255, r[7], g[7], b[7], 255,
328  r[8], g[8], b[8], 255, r[9], g[9], b[9], 255,
329  r[10], g[10], b[10], 255, r[11], g[11], b[11], 255,
330  r[12], g[12], b[12], 255, r[13], g[13], b[13], 255,
331  r[14], g[14], b[14], 255, r[15], g[15], b[15], 255,
332  };
333  std::memcpy( dst, out, sizeof out );
334  dst += sizeof out;
335  continue;
336  }
337 
338  if (FORMAT == RS2_FORMAT_BGRA8)
339  {
340  uint8_t out[16 * 4] = {
341  b[0], g[0], r[0], 255, b[1], g[1], r[1], 255,
342  b[2], g[2], r[2], 255, b[3], g[3], r[3], 255,
343  b[4], g[4], r[4], 255, b[5], g[5], r[5], 255,
344  b[6], g[6], r[6], 255, b[7], g[7], r[7], 255,
345  b[8], g[8], r[8], 255, b[9], g[9], r[9], 255,
346  b[10], g[10], r[10], 255, b[11], g[11], r[11], 255,
347  b[12], g[12], r[12], 255, b[13], g[13], r[13], 255,
348  b[14], g[14], r[14], 255, b[15], g[15], r[15], 255,
349  };
350  std::memcpy( dst, out, sizeof out );
351  dst += sizeof out;
352  continue;
353  }
354  }
355 #endif
356  }
357 
358  template<rs2_format FORMAT>
359  void m420_parse_one_line(const uint8_t * y_one_line, const uint8_t * uv_one_line, uint8_t** dst, int width)
360  {
361  // building 16 pixels at each iteration
362  for (int y_pix = 0, uv_pix = 0; y_pix < width; y_pix += 16, uv_pix += 16)
363  {
364  // grabbing matching y,u,v values
365  uint8_t y[16] = { 0 };
366  std::memcpy( y, &y_one_line[y_pix], 16 );
367 
368  uint8_t u[16] = {
369  uv_one_line[uv_pix + 0], uv_one_line[uv_pix + 0], uv_one_line[uv_pix + 2], uv_one_line[uv_pix + 2],
370  uv_one_line[uv_pix + 4], uv_one_line[uv_pix + 4], uv_one_line[uv_pix + 6], uv_one_line[uv_pix + 6],
371  uv_one_line[uv_pix + 8], uv_one_line[uv_pix + 8], uv_one_line[uv_pix + 10], uv_one_line[uv_pix + 10],
372  uv_one_line[uv_pix + 12], uv_one_line[uv_pix + 12], uv_one_line[uv_pix + 14], uv_one_line[uv_pix + 14]
373  };
374 
375  uint8_t v[16] = {
376  uv_one_line[uv_pix + 1], uv_one_line[uv_pix + 1], uv_one_line[uv_pix + 3], uv_one_line[uv_pix + 3],
377  uv_one_line[uv_pix + 5], uv_one_line[uv_pix + 5], uv_one_line[uv_pix + 7], uv_one_line[uv_pix + 7],
378  uv_one_line[uv_pix + 9], uv_one_line[uv_pix + 9], uv_one_line[uv_pix + 11], uv_one_line[uv_pix + 11],
379  uv_one_line[uv_pix + 13], uv_one_line[uv_pix + 13], uv_one_line[uv_pix + 15], uv_one_line[uv_pix + 15]
380  };
381 
382  // converting y,u,v values to r,g,b values
383  uint8_t r[16], g[16], b[16];
384  for (int i = 0; i < 16; i++)
385  {
386  int32_t c = y[i] - 16;
387  int32_t d = u[i] - 128;
388  int32_t e = v[i] - 128;
389 
390  int32_t t;
391 #define clamp(x) ((t=(x)) > 255 ? 255 : t < 0 ? 0 : t)
392  r[i] = clamp((298 * c + 409 * e + 128) >> 8);
393  g[i] = clamp((298 * c - 100 * d - 208 * e + 128) >> 8);
394  b[i] = clamp((298 * c + 516 * d + 128) >> 8);
395 #undef clamp
396  }
397 
398  // outputting r,g,b values in the order needed for each format
399  if (FORMAT == RS2_FORMAT_RGB8)
400  {
401  uint8_t out[16 * 3] = {
402  r[0], g[0], b[0], r[1], g[1], b[1],
403  r[2], g[2], b[2], r[3], g[3], b[3],
404  r[4], g[4], b[4], r[5], g[5], b[5],
405  r[6], g[6], b[6], r[7], g[7], b[7],
406  r[8], g[8], b[8], r[9], g[9], b[9],
407  r[10], g[10], b[10], r[11], g[11], b[11],
408  r[12], g[12], b[12], r[13], g[13], b[13],
409  r[14], g[14], b[14], r[15], g[15], b[15]
410  };
411  std::memcpy( *dst, out, sizeof( out ) );
412  *dst += sizeof out;
413  continue;
414  }
415 
416  if (FORMAT == RS2_FORMAT_BGR8)
417  {
418  uint8_t out[16 * 3] = {
419  b[0], g[0], r[0], b[1], g[1], r[1],
420  b[2], g[2], r[2], b[3], g[3], r[3],
421  b[4], g[4], r[4], b[5], g[5], r[5],
422  b[6], g[6], r[6], b[7], g[7], r[7],
423  b[8], g[8], r[8], b[9], g[9], r[9],
424  b[10], g[10], r[10], b[11], g[11], r[11],
425  b[12], g[12], r[12], b[13], g[13], r[13],
426  b[14], g[14], r[14], b[15], g[15], r[15],
427  };
428  std::memcpy( *dst, out, sizeof out );
429  *dst += sizeof out;
430  continue;
431  }
432 
433  if (FORMAT == RS2_FORMAT_RGBA8)
434  {
435  uint8_t out[16 * 4] = {
436  r[0], g[0], b[0], 255, r[1], g[1], b[1], 255,
437  r[2], g[2], b[2], 255, r[3], g[3], b[3], 255,
438  r[4], g[4], b[4], 255, r[5], g[5], b[5], 255,
439  r[6], g[6], b[6], 255, r[7], g[7], b[7], 255,
440  r[8], g[8], b[8], 255, r[9], g[9], b[9], 255,
441  r[10], g[10], b[10], 255, r[11], g[11], b[11], 255,
442  r[12], g[12], b[12], 255, r[13], g[13], b[13], 255,
443  r[14], g[14], b[14], 255, r[15], g[15], b[15], 255,
444  };
445  std::memcpy( *dst, out, sizeof out );
446  *dst += sizeof out;
447  continue;
448  }
449 
450  if (FORMAT == RS2_FORMAT_BGRA8)
451  {
452  uint8_t out[16 * 4] = {
453  b[0], g[0], r[0], 255, b[1], g[1], r[1], 255,
454  b[2], g[2], r[2], 255, b[3], g[3], r[3], 255,
455  b[4], g[4], r[4], 255, b[5], g[5], r[5], 255,
456  b[6], g[6], r[6], 255, b[7], g[7], r[7], 255,
457  b[8], g[8], r[8], 255, b[9], g[9], r[9], 255,
458  b[10], g[10], r[10], 255, b[11], g[11], r[11], 255,
459  b[12], g[12], r[12], 255, b[13], g[13], r[13], 255,
460  b[14], g[14], r[14], 255, b[15], g[15], r[15], 255,
461  };
462  std::memcpy( *dst, out, sizeof out );
463  *dst += sizeof out;
464  continue;
465  }
466  }
467  }
468 
469 #if defined __SSSE3__ && ! defined ANDROID
470  // This method receives 1 line of y and one line of uv.
471  // source_chunks_y // yyyyyyyyyyyyyyyy
472  // source_chunks_uv // uvuvuvuvuvuvuvuv
473  // Each coupling is done as: 2 bytes of y coupled with 2 bytes of uv (one u, and one v)
474  template<rs2_format FORMAT>
475  void m420_sse_parse_one_line(const __m128i* source_chunks_y, const __m128i* source_chunks_uv, __m128i* dst, int line_length)
476  {
477 #pragma omp parallel for
478  for (int i = 0; i < line_length; ++i)
479  {
480  const __m128i zero = _mm_set1_epi8(0);
481  __m128i y16__0_7 = _mm_unpacklo_epi8(source_chunks_y[i], zero);
482  __m128i y16__8_F = _mm_unpackhi_epi8(source_chunks_y[i], zero);
483 
484  const __m128i evens_odds = _mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15); // to get uuuuuuuuvvvvvvvv
485 
486  __m128i uuuuuuuuvvvvvvvv = _mm_shuffle_epi8(source_chunks_uv[i], evens_odds);
487  __m128i u = _mm_unpacklo_epi8(uuuuuuuuvvvvvvvv, uuuuuuuuvvvvvvvv); // uu duplicated
488  __m128i v = _mm_unpackhi_epi8(uuuuuuuuvvvvvvvv, uuuuuuuuvvvvvvvv); // vv duplicated
489 
490  __m128i u16__0_7 = _mm_unpacklo_epi8(u, zero); // convert to 16 bit
491  __m128i u16__8_F = _mm_unpackhi_epi8(u, zero); // convert to 16 bit
492  __m128i v16__0_7 = _mm_unpacklo_epi8(v, zero); // convert to 16 bit
493  __m128i v16__8_F = _mm_unpackhi_epi8(v, zero); // convert to 16 bit
494 
495  const __m128i n100 = _mm_set1_epi16(100 << 4);
496  const __m128i n208 = _mm_set1_epi16(208 << 4);
497  const __m128i n298 = _mm_set1_epi16(298 << 4);
498  const __m128i n409 = _mm_set1_epi16(409 << 4);
499  const __m128i n516 = _mm_set1_epi16(516 << 4);
500 
501  __m128i c16__0_7 = _mm_slli_epi16(_mm_subs_epi16(y16__0_7, _mm_set1_epi16(16)), 4);
502  __m128i d16__0_7 = _mm_slli_epi16(_mm_subs_epi16(u16__0_7, _mm_set1_epi16(128)), 4); // perhaps could have done these u,v to d,e before the duplication
503  __m128i e16__0_7 = _mm_slli_epi16(_mm_subs_epi16(v16__0_7, _mm_set1_epi16(128)), 4);
504  __m128i r16__0_7 = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_add_epi16(_mm_mulhi_epi16(c16__0_7, n298), _mm_mulhi_epi16(e16__0_7, n409)))))); // (298 * c + 409 * e + 128) ; //
505  __m128i g16__0_7 = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_sub_epi16(_mm_sub_epi16(_mm_mulhi_epi16(c16__0_7, n298), _mm_mulhi_epi16(d16__0_7, n100)), _mm_mulhi_epi16(e16__0_7, n208)))))); // (298 * c - 100 * d - 208 * e + 128)
506  __m128i b16__0_7 = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_add_epi16(_mm_mulhi_epi16(c16__0_7, n298), _mm_mulhi_epi16(d16__0_7, n516)))))); // clampbyte((298 * c + 516 * d + 128) >> 8);
507 
508  // Compute R, G, B values for second 8 pixels
509  __m128i c16__8_F = _mm_slli_epi16(_mm_subs_epi16(y16__8_F, _mm_set1_epi16(16)), 4);
510  __m128i d16__8_F = _mm_slli_epi16(_mm_subs_epi16(u16__8_F, _mm_set1_epi16(128)), 4); // perhaps could have done these u,v to d,e before the duplication
511  __m128i e16__8_F = _mm_slli_epi16(_mm_subs_epi16(v16__8_F, _mm_set1_epi16(128)), 4);
512  __m128i r16__8_F = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_add_epi16(_mm_mulhi_epi16(c16__8_F, n298), _mm_mulhi_epi16(e16__8_F, n409)))))); // (298 * c + 409 * e + 128) ; //
513  __m128i g16__8_F = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_sub_epi16(_mm_sub_epi16(_mm_mulhi_epi16(c16__8_F, n298), _mm_mulhi_epi16(d16__8_F, n100)), _mm_mulhi_epi16(e16__8_F, n208)))))); // (298 * c - 100 * d - 208 * e + 128)
514  __m128i b16__8_F = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_add_epi16(_mm_mulhi_epi16(c16__8_F, n298), _mm_mulhi_epi16(d16__8_F, n516)))))); // clampbyte((298 * c + 516 * d + 128) >> 8);
515 
516 
517 
519  {
520  // Shuffle separate R, G, B values into four registers storing four pixels each in (R, G, B, A) order
521  __m128i rg8__0_7 = _mm_unpacklo_epi8(_mm_shuffle_epi8(r16__0_7, evens_odds), _mm_shuffle_epi8(g16__0_7, evens_odds)); // hi to take the odds which are the upper bytes we care about
522  __m128i ba8__0_7 = _mm_unpacklo_epi8(_mm_shuffle_epi8(b16__0_7, evens_odds), _mm_set1_epi8(-1));
523  __m128i rgba_0_3 = _mm_unpacklo_epi16(rg8__0_7, ba8__0_7);
524  __m128i rgba_4_7 = _mm_unpackhi_epi16(rg8__0_7, ba8__0_7);
525 
526  __m128i rg8__8_F = _mm_unpacklo_epi8(_mm_shuffle_epi8(r16__8_F, evens_odds), _mm_shuffle_epi8(g16__8_F, evens_odds)); // hi to take the odds which are the upper bytes we care about
527  __m128i ba8__8_F = _mm_unpacklo_epi8(_mm_shuffle_epi8(b16__8_F, evens_odds), _mm_set1_epi8(-1));
528  __m128i rgba_8_B = _mm_unpacklo_epi16(rg8__8_F, ba8__8_F);
529  __m128i rgba_C_F = _mm_unpackhi_epi16(rg8__8_F, ba8__8_F);
530 
531  if (FORMAT == RS2_FORMAT_RGBA8)
532  {
533  // Store 16 pixels (64 bytes) at once
534  _mm_storeu_si128(&dst[i * 4], rgba_0_3);
535  _mm_storeu_si128(&dst[i * 4 + 1], rgba_4_7);
536  _mm_storeu_si128(&dst[i * 4 + 2], rgba_8_B);
537  _mm_storeu_si128(&dst[i * 4 + 3], rgba_C_F);
538 
539  continue;
540  }
541 
542  if (FORMAT == RS2_FORMAT_RGB8)
543  {
544  // Shuffle rgb triples to the start and end of each register
545  __m128i rgb0 = _mm_shuffle_epi8(rgba_0_3, _mm_setr_epi8(3, 7, 11, 15, 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14));
546  __m128i rgb1 = _mm_shuffle_epi8(rgba_4_7, _mm_setr_epi8(0, 1, 2, 4, 3, 7, 11, 15, 5, 6, 8, 9, 10, 12, 13, 14));
547  __m128i rgb2 = _mm_shuffle_epi8(rgba_8_B, _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9, 3, 7, 11, 15, 10, 12, 13, 14));
548  __m128i rgb3 = _mm_shuffle_epi8(rgba_C_F, _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15));
549 
550  // Align registers and store 16 pixels (48 bytes) at once
551  _mm_storeu_si128(&dst[i * 3], _mm_alignr_epi8(rgb1, rgb0, 4));
552  _mm_storeu_si128(&dst[i * 3 + 1], _mm_alignr_epi8(rgb2, rgb1, 8));
553  _mm_storeu_si128(&dst[i * 3 + 2], _mm_alignr_epi8(rgb3, rgb2, 12));
554 
555  continue;
556  }
557  }
558 
560  {
561  // Shuffle separate R, G, B values into four registers storing four pixels each in (B, G, R, A) order
562  __m128i bg8__0_7 = _mm_unpacklo_epi8(_mm_shuffle_epi8(b16__0_7, evens_odds), _mm_shuffle_epi8(g16__0_7, evens_odds)); // hi to take the odds which are the upper bytes we care about
563  __m128i ra8__0_7 = _mm_unpacklo_epi8(_mm_shuffle_epi8(r16__0_7, evens_odds), _mm_set1_epi8(-1));
564  __m128i bgra_0_3 = _mm_unpacklo_epi16(bg8__0_7, ra8__0_7);
565  __m128i bgra_4_7 = _mm_unpackhi_epi16(bg8__0_7, ra8__0_7);
566 
567  __m128i bg8__8_F = _mm_unpacklo_epi8(_mm_shuffle_epi8(b16__8_F, evens_odds), _mm_shuffle_epi8(g16__8_F, evens_odds)); // hi to take the odds which are the upper bytes we care about
568  __m128i ra8__8_F = _mm_unpacklo_epi8(_mm_shuffle_epi8(r16__8_F, evens_odds), _mm_set1_epi8(-1));
569  __m128i bgra_8_B = _mm_unpacklo_epi16(bg8__8_F, ra8__8_F);
570  __m128i bgra_C_F = _mm_unpackhi_epi16(bg8__8_F, ra8__8_F);
571 
572  if (FORMAT == RS2_FORMAT_BGRA8)
573  {
574  // Store 16 pixels (64 bytes) at once
575  _mm_storeu_si128(&dst[i * 4], bgra_0_3);
576  _mm_storeu_si128(&dst[i * 4 + 1], bgra_4_7);
577  _mm_storeu_si128(&dst[i * 4 + 2], bgra_8_B);
578  _mm_storeu_si128(&dst[i * 4 + 3], bgra_C_F);
579 
580  continue;
581  }
582 
583  if (FORMAT == RS2_FORMAT_BGR8)
584  {
585  // Shuffle rgb triples to the start and end of each register
586  __m128i bgr0 = _mm_shuffle_epi8(bgra_0_3, _mm_setr_epi8(3, 7, 11, 15, 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14));
587  __m128i bgr1 = _mm_shuffle_epi8(bgra_4_7, _mm_setr_epi8(0, 1, 2, 4, 3, 7, 11, 15, 5, 6, 8, 9, 10, 12, 13, 14));
588  __m128i bgr2 = _mm_shuffle_epi8(bgra_8_B, _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9, 3, 7, 11, 15, 10, 12, 13, 14));
589  __m128i bgr3 = _mm_shuffle_epi8(bgra_C_F, _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15));
590 
591  // Align registers and store 16 pixels (48 bytes) at once
592  _mm_storeu_si128(&dst[i * 3], _mm_alignr_epi8(bgr1, bgr0, 4));
593  _mm_storeu_si128(&dst[i * 3 + 1], _mm_alignr_epi8(bgr2, bgr1, 8));
594  _mm_storeu_si128(&dst[i * 3 + 2], _mm_alignr_epi8(bgr3, bgr2, 12));
595 
596  continue;
597  }
598  }
599  }
600  }
601 #endif
602 
604  // M420 unpacking routines //
606  // This templated function unpacks M420 into Y8/Y16/RGB8/RGBA8/BGR8/BGRA8, depending on the compile-time parameter FORMAT.
607  // It is expected that all branching outside of the loop control variable will be removed due to constant-folding.
608  // The M420 is a standard format - see: https://www.kernel.org/doc/html/v4.10/media/uapi/v4l/pixfmt-m420.html
609  // Its configuration is as following: 2 lines of Y then one line of UV (line size is width)
610  // There is one Y value for each pixel, and one pair of U,V values for 4 pixels.
611  // For example: for the first 3 lines of the frame:
612  // Y0 Y1 Y2 Y3 .... Yw-1 (Yw:Ywidth)
613  // Yw Yw+1 Yw+2 Yw+3 .... Y2w-1
614  // U0 V0 U1 V1
615  // The first pixel is (Y0, U0, V0), second pixel is (Y1, U0, V0)
616  // The first pixel in the second line is (Yw, U0, V0) second pixel in second line is (Yw+1, U0, V0)
617  // The third pixel in second line is (Yw+2, U1, V1)
618  template<rs2_format FORMAT> void unpack_m420( uint8_t * const d[], const uint8_t * s, int width, int height, int actual_size)
619  {
620  auto n = width * height;
621  assert(n % 16 == 0); // All currently supported color resolutions are multiples of 16 pixels. Could easily extend support to other resolutions by copying final n<16 pixels into a zero-padded buffer and recursively calling self for final iteration.
622 
623 #if defined __SSSE3__ && ! defined ANDROID
624  static bool do_avx = has_avx();
625 
626  auto src = reinterpret_cast<const __m128i*>(s);
627  auto dst = reinterpret_cast<__m128i*>(d[0]);
628 
629  __m128i* source_chunks_y = new __m128i[2 * width / 16];
630  __m128i* source_chunks_uv = new __m128i[width / 16];
631 
632 #pragma omp parallel for
633  for (int j = 0; j < height / 2; ++j)
634  {
635 #pragma omp parallel for
636  for (int i = 0; i < 2 * width / 16; ++i)
637  {
638  auto offset_to_current_2_y_lines_for_src = (3 * width * j) / 16;
639 
640  source_chunks_y[i] = _mm_loadu_si128(&src[offset_to_current_2_y_lines_for_src + i]);
641 
642  if (FORMAT == RS2_FORMAT_Y8)
643  {
644  auto offset_to_current_2_y_lines_for_dst = (2 * width * j) / 16;
645  // Align all Y components and output 2 lines of Y at once
646  _mm_storeu_si128(&dst[offset_to_current_2_y_lines_for_dst + i], source_chunks_y[i]);
647  continue;
648  }
649 
650  if (FORMAT == RS2_FORMAT_Y16)
651  {
652  auto bpp = 2;
653  auto offset_to_current_2_y_lines_for_dst = (2 * width * j) / 16 * bpp;
654  const __m128i zero = _mm_set1_epi8(0);
655  __m128i y16__0_7 = _mm_unpacklo_epi8(source_chunks_y[i], zero);
656  __m128i y16__8_F = _mm_unpackhi_epi8(source_chunks_y[i], zero);
657  __m128i y16_0_7_epi_16 = _mm_slli_epi16(y16__0_7, 8);
658  __m128i y16_8_F_epi_16 = _mm_slli_epi16(y16__8_F, 8);
659  // Align all Y components and output 2 _m128i of Y at once
660  _mm_storeu_si128(&dst[offset_to_current_2_y_lines_for_dst + i * 2], y16_0_7_epi_16);
661  _mm_storeu_si128(&dst[offset_to_current_2_y_lines_for_dst + i * 2 + 1], y16_8_F_epi_16);
662  continue;
663  }
664 
665  auto offset_to_current_uv_line_for_src = offset_to_current_2_y_lines_for_src + 2 * width / 16;
666  if (i < width / 16)
667  source_chunks_uv[i] = _mm_load_si128(&src[offset_to_current_uv_line_for_src + i]);
668  }
669 
671  {
672  int bpp = 3;
674  bpp = 4;
675 
676  auto offset_to_current_first_line_for_dst = (2 * width * j) / 16 * bpp;
677  auto offset_to_current_second_line_for_dst = offset_to_current_first_line_for_dst + width * bpp / 16;
678 
679  auto line_length = width / 16;
680  auto first_line_y = source_chunks_y;
681  auto second_line_y = source_chunks_y + line_length;
682 
683  m420_sse_parse_one_line<FORMAT>(first_line_y, source_chunks_uv, &dst[offset_to_current_first_line_for_dst], line_length);
684  m420_sse_parse_one_line<FORMAT>(second_line_y, source_chunks_uv, &dst[offset_to_current_second_line_for_dst], line_length);
685  }
686  }
687 
688  delete[] source_chunks_y;
689  delete[] source_chunks_uv;
690 
691 #else
692  auto src = reinterpret_cast<const uint8_t*>(s);
693  auto dst = reinterpret_cast<uint8_t*>(d[0]);
694 
695  auto src_height = height * 12 >> 3;
696 
697  if (FORMAT == RS2_FORMAT_Y8)
698  {
699  for (int k = 0; k < src_height; k += 3)
700  {
701  // fill the destination with y values
702  // while y is on 2 lines, and uv on the third line
703  auto start_of_y = src + k * width;
704  std::memcpy( dst, start_of_y, 2 * width );
705  dst += 2 * width;
706  }
707  return;
708  }
709  if (FORMAT == RS2_FORMAT_Y16)
710  {
711  for (int k = 0; k < src_height; k += 3)
712  {
713  // fill the destination with y values
714  // while y is on 2 lines, and uv on the third line
715  auto start_of_y = src + k * width;
716 
717  for (int pix = 0; pix < 2 * width; pix += 16)
718  {
719  uint16_t y[16];
720  for (int dst_idx = 0, src_idx = 0; dst_idx < 16; dst_idx += 1, ++src_idx)
721  {
722  y[dst_idx] = start_of_y[src_idx + pix] << 8;
723  }
724  std::memcpy( dst, y, sizeof y );
725  dst += sizeof y;
726  }
727  }
728  return;
729  }
730 
731  for (int k = 0; k < src_height; k += 3)
732  {
733  // fill the y_buffer and uv_buffer
734  // while y is on 2 lines, and uv on the third line
735  auto start_of_y = src + k * width;
736  auto start_of_second_line = start_of_y + width;
737  auto end_of_y = start_of_second_line + width;
738  auto start_of_uv = end_of_y;
739  auto end_of_uv = start_of_uv + width;
740 
741  m420_parse_one_line<FORMAT>(start_of_y, start_of_uv, &dst, width);
742  m420_parse_one_line<FORMAT>(start_of_second_line, start_of_uv, &dst, width);
743  }
744  return;
745 #endif // __SSSE3__
746  }
747 
748  void unpack_yuy2(rs2_format dst_format, rs2_stream dst_stream, uint8_t * const d[], const uint8_t * s, int w, int h, int actual_size)
749  {
750  switch (dst_format)
751  {
752  case RS2_FORMAT_RGB8:
753  unpack_yuy2<RS2_FORMAT_RGB8>(d, s, w, h, actual_size);
754  break;
755  case RS2_FORMAT_Y8:
756  unpack_yuy2<RS2_FORMAT_Y8>(d, s, w, h, actual_size);
757  break;
758  case RS2_FORMAT_RGBA8:
759  unpack_yuy2<RS2_FORMAT_RGBA8>(d, s, w, h, actual_size);
760  break;
761  case RS2_FORMAT_BGR8:
762  unpack_yuy2<RS2_FORMAT_BGR8>(d, s, w, h, actual_size);
763  break;
764  case RS2_FORMAT_BGRA8:
765  unpack_yuy2<RS2_FORMAT_BGRA8>(d, s, w, h, actual_size);
766  break;
767  case RS2_FORMAT_Y16:
768  unpack_yuy2<RS2_FORMAT_Y16>( d, s, w, h, actual_size );
769  break;
770  default:
771  LOG_ERROR("Unsupported format for YUY2 conversion.");
772  break;
773  }
774  }
775 
776  void unpack_m420(rs2_format dst_format, rs2_stream dst_stream, uint8_t * const d[], const uint8_t * s, int w, int h, int actual_size)
777  {
778  LOG_DEBUG("unpack m420 called with dst_format: " << rs2_format_to_string(dst_format));
779  switch (dst_format)
780  {
781  case RS2_FORMAT_Y8:
782  unpack_m420<RS2_FORMAT_Y8>(d, s, w, h, actual_size);
783  break;
784  case RS2_FORMAT_Y16:
785  unpack_m420<RS2_FORMAT_Y16>(d, s, w, h, actual_size);
786  break;
787  case RS2_FORMAT_RGB8:
788  unpack_m420<RS2_FORMAT_RGB8>(d, s, w, h, actual_size);
789  break;
790  case RS2_FORMAT_RGBA8:
791  unpack_m420<RS2_FORMAT_RGBA8>(d, s, w, h, actual_size);
792  break;
793  case RS2_FORMAT_BGR8:
794  unpack_m420<RS2_FORMAT_BGR8>(d, s, w, h, actual_size);
795  break;
796  case RS2_FORMAT_BGRA8:
797  unpack_m420<RS2_FORMAT_BGRA8>(d, s, w, h, actual_size);
798  break;
799  default:
800  LOG_ERROR("Unsupported format for M420 conversion.");
801  break;
802  }
803  }
804 
806  // UYVY unpacking routines //
808  // This templated function unpacks UYVY into RGB8/RGBA8/BGR8/BGRA8, depending on the compile-time parameter FORMAT.
809  // It is expected that all branching outside of the loop control variable will be removed due to constant-folding.
810  template<rs2_format FORMAT> void unpack_uyvy( uint8_t * const d[], const uint8_t * s, int width, int height, int actual_size)
811  {
812  auto n = width * height;
813  assert(n % 16 == 0); // All currently supported color resolutions are multiples of 16 pixels. Could easily extend support to other resolutions by copying final n<16 pixels into a zero-padded buffer and recursively calling self for final iteration.
814 #ifdef __SSSE3__
815  auto src = reinterpret_cast<const __m128i *>(s);
816  auto dst = reinterpret_cast<__m128i *>(d[0]);
817  for (; n; n -= 16)
818  {
819  const __m128i zero = _mm_set1_epi8(0);
820  const __m128i n100 = _mm_set1_epi16(100 << 4);
821  const __m128i n208 = _mm_set1_epi16(208 << 4);
822  const __m128i n298 = _mm_set1_epi16(298 << 4);
823  const __m128i n409 = _mm_set1_epi16(409 << 4);
824  const __m128i n516 = _mm_set1_epi16(516 << 4);
825  const __m128i evens_odds = _mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15);
826 
827  // Load 8 UYVY pixels each into two 16-byte registers
828  __m128i s0 = _mm_loadu_si128(src++);
829  __m128i s1 = _mm_loadu_si128(src++);
830 
831 
832  // Shuffle all Y components to the low order bytes of the register, and all U/V components to the high order bytes
833  const __m128i evens_odd1s_odd3s = _mm_setr_epi8(1, 3, 5, 7, 9, 11, 13, 15, 0, 4, 8, 12, 2, 6, 10, 14); // to get yyyyyyyyuuuuvvvv
834  __m128i yyyyyyyyuuuuvvvv0 = _mm_shuffle_epi8(s0, evens_odd1s_odd3s);
835  __m128i yyyyyyyyuuuuvvvv8 = _mm_shuffle_epi8(s1, evens_odd1s_odd3s);
836 
837  // Retrieve all 16 Y components as 16-bit values (8 components per register))
838  __m128i y16__0_7 = _mm_unpacklo_epi8(yyyyyyyyuuuuvvvv0, zero); // convert to 16 bit
839  __m128i y16__8_F = _mm_unpacklo_epi8(yyyyyyyyuuuuvvvv8, zero); // convert to 16 bit
840 
841 
842  // Retrieve all 16 U and V components as 16-bit values (8 components per register)
843  __m128i uv = _mm_unpackhi_epi32(yyyyyyyyuuuuvvvv0, yyyyyyyyuuuuvvvv8); // uuuuuuuuvvvvvvvv
844  __m128i u = _mm_unpacklo_epi8(uv, uv); // uu uu uu uu uu uu uu uu u's duplicated
845  __m128i v = _mm_unpackhi_epi8(uv, uv); // vv vv vv vv vv vv vv vv
846  __m128i u16__0_7 = _mm_unpacklo_epi8(u, zero); // convert to 16 bit
847  __m128i u16__8_F = _mm_unpackhi_epi8(u, zero); // convert to 16 bit
848  __m128i v16__0_7 = _mm_unpacklo_epi8(v, zero); // convert to 16 bit
849  __m128i v16__8_F = _mm_unpackhi_epi8(v, zero); // convert to 16 bit
850 
851  // Compute R, G, B values for first 8 pixels
852  __m128i c16__0_7 = _mm_slli_epi16(_mm_subs_epi16(y16__0_7, _mm_set1_epi16(16)), 4);
853  __m128i d16__0_7 = _mm_slli_epi16(_mm_subs_epi16(u16__0_7, _mm_set1_epi16(128)), 4); // perhaps could have done these u,v to d,e before the duplication
854  __m128i e16__0_7 = _mm_slli_epi16(_mm_subs_epi16(v16__0_7, _mm_set1_epi16(128)), 4);
855  __m128i r16__0_7 = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_add_epi16(_mm_mulhi_epi16(c16__0_7, n298), _mm_mulhi_epi16(e16__0_7, n409)))))); // (298 * c + 409 * e + 128) ; //
856  __m128i g16__0_7 = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_sub_epi16(_mm_sub_epi16(_mm_mulhi_epi16(c16__0_7, n298), _mm_mulhi_epi16(d16__0_7, n100)), _mm_mulhi_epi16(e16__0_7, n208)))))); // (298 * c - 100 * d - 208 * e + 128)
857  __m128i b16__0_7 = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_add_epi16(_mm_mulhi_epi16(c16__0_7, n298), _mm_mulhi_epi16(d16__0_7, n516)))))); // clampbyte((298 * c + 516 * d + 128) >> 8);
858 
859  // Compute R, G, B values for second 8 pixels
860  __m128i c16__8_F = _mm_slli_epi16(_mm_subs_epi16(y16__8_F, _mm_set1_epi16(16)), 4);
861  __m128i d16__8_F = _mm_slli_epi16(_mm_subs_epi16(u16__8_F, _mm_set1_epi16(128)), 4); // perhaps could have done these u,v to d,e before the duplication
862  __m128i e16__8_F = _mm_slli_epi16(_mm_subs_epi16(v16__8_F, _mm_set1_epi16(128)), 4);
863  __m128i r16__8_F = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_add_epi16(_mm_mulhi_epi16(c16__8_F, n298), _mm_mulhi_epi16(e16__8_F, n409)))))); // (298 * c + 409 * e + 128) ; //
864  __m128i g16__8_F = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_sub_epi16(_mm_sub_epi16(_mm_mulhi_epi16(c16__8_F, n298), _mm_mulhi_epi16(d16__8_F, n100)), _mm_mulhi_epi16(e16__8_F, n208)))))); // (298 * c - 100 * d - 208 * e + 128)
865  __m128i b16__8_F = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_add_epi16(_mm_mulhi_epi16(c16__8_F, n298), _mm_mulhi_epi16(d16__8_F, n516)))))); // clampbyte((298 * c + 516 * d + 128) >> 8);
866 
868  {
869  // Shuffle separate R, G, B values into four registers storing four pixels each in (R, G, B, A) order
870  __m128i rg8__0_7 = _mm_unpacklo_epi8(_mm_shuffle_epi8(r16__0_7, evens_odds), _mm_shuffle_epi8(g16__0_7, evens_odds)); // hi to take the odds which are the upper bytes we care about
871  __m128i ba8__0_7 = _mm_unpacklo_epi8(_mm_shuffle_epi8(b16__0_7, evens_odds), _mm_set1_epi8(-1));
872  __m128i rgba_0_3 = _mm_unpacklo_epi16(rg8__0_7, ba8__0_7);
873  __m128i rgba_4_7 = _mm_unpackhi_epi16(rg8__0_7, ba8__0_7);
874 
875  __m128i rg8__8_F = _mm_unpacklo_epi8(_mm_shuffle_epi8(r16__8_F, evens_odds), _mm_shuffle_epi8(g16__8_F, evens_odds)); // hi to take the odds which are the upper bytes we care about
876  __m128i ba8__8_F = _mm_unpacklo_epi8(_mm_shuffle_epi8(b16__8_F, evens_odds), _mm_set1_epi8(-1));
877  __m128i rgba_8_B = _mm_unpacklo_epi16(rg8__8_F, ba8__8_F);
878  __m128i rgba_C_F = _mm_unpackhi_epi16(rg8__8_F, ba8__8_F);
879 
880  if (FORMAT == RS2_FORMAT_RGBA8)
881  {
882  // Store 16 pixels (64 bytes) at once
883  _mm_storeu_si128(dst++, rgba_0_3);
884  _mm_storeu_si128(dst++, rgba_4_7);
885  _mm_storeu_si128(dst++, rgba_8_B);
886  _mm_storeu_si128(dst++, rgba_C_F);
887  }
888 
889  if (FORMAT == RS2_FORMAT_RGB8)
890  {
891  // Shuffle rgb triples to the start and end of each register
892  __m128i rgb0 = _mm_shuffle_epi8(rgba_0_3, _mm_setr_epi8(3, 7, 11, 15, 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14));
893  __m128i rgb1 = _mm_shuffle_epi8(rgba_4_7, _mm_setr_epi8(0, 1, 2, 4, 3, 7, 11, 15, 5, 6, 8, 9, 10, 12, 13, 14));
894  __m128i rgb2 = _mm_shuffle_epi8(rgba_8_B, _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9, 3, 7, 11, 15, 10, 12, 13, 14));
895  __m128i rgb3 = _mm_shuffle_epi8(rgba_C_F, _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15));
896 
897  // Align registers and store 16 pixels (48 bytes) at once
898  _mm_storeu_si128(dst++, _mm_alignr_epi8(rgb1, rgb0, 4));
899  _mm_storeu_si128(dst++, _mm_alignr_epi8(rgb2, rgb1, 8));
900  _mm_storeu_si128(dst++, _mm_alignr_epi8(rgb3, rgb2, 12));
901  }
902  }
903 
905  {
906  // Shuffle separate R, G, B values into four registers storing four pixels each in (B, G, R, A) order
907  __m128i bg8__0_7 = _mm_unpacklo_epi8(_mm_shuffle_epi8(b16__0_7, evens_odds), _mm_shuffle_epi8(g16__0_7, evens_odds)); // hi to take the odds which are the upper bytes we care about
908  __m128i ra8__0_7 = _mm_unpacklo_epi8(_mm_shuffle_epi8(r16__0_7, evens_odds), _mm_set1_epi8(-1));
909  __m128i bgra_0_3 = _mm_unpacklo_epi16(bg8__0_7, ra8__0_7);
910  __m128i bgra_4_7 = _mm_unpackhi_epi16(bg8__0_7, ra8__0_7);
911 
912  __m128i bg8__8_F = _mm_unpacklo_epi8(_mm_shuffle_epi8(b16__8_F, evens_odds), _mm_shuffle_epi8(g16__8_F, evens_odds)); // hi to take the odds which are the upper bytes we care about
913  __m128i ra8__8_F = _mm_unpacklo_epi8(_mm_shuffle_epi8(r16__8_F, evens_odds), _mm_set1_epi8(-1));
914  __m128i bgra_8_B = _mm_unpacklo_epi16(bg8__8_F, ra8__8_F);
915  __m128i bgra_C_F = _mm_unpackhi_epi16(bg8__8_F, ra8__8_F);
916 
917  if (FORMAT == RS2_FORMAT_BGRA8)
918  {
919  // Store 16 pixels (64 bytes) at once
920  _mm_storeu_si128(dst++, bgra_0_3);
921  _mm_storeu_si128(dst++, bgra_4_7);
922  _mm_storeu_si128(dst++, bgra_8_B);
923  _mm_storeu_si128(dst++, bgra_C_F);
924  }
925 
926  if (FORMAT == RS2_FORMAT_BGR8)
927  {
928  // Shuffle rgb triples to the start and end of each register
929  __m128i bgr0 = _mm_shuffle_epi8(bgra_0_3, _mm_setr_epi8(3, 7, 11, 15, 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14));
930  __m128i bgr1 = _mm_shuffle_epi8(bgra_4_7, _mm_setr_epi8(0, 1, 2, 4, 3, 7, 11, 15, 5, 6, 8, 9, 10, 12, 13, 14));
931  __m128i bgr2 = _mm_shuffle_epi8(bgra_8_B, _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9, 3, 7, 11, 15, 10, 12, 13, 14));
932  __m128i bgr3 = _mm_shuffle_epi8(bgra_C_F, _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15));
933 
934  // Align registers and store 16 pixels (48 bytes) at once
935  _mm_storeu_si128(dst++, _mm_alignr_epi8(bgr1, bgr0, 4));
936  _mm_storeu_si128(dst++, _mm_alignr_epi8(bgr2, bgr1, 8));
937  _mm_storeu_si128(dst++, _mm_alignr_epi8(bgr3, bgr2, 12));
938  }
939  }
940  }
941 #else // Generic code for when SSSE3 is not available.
942  auto src = reinterpret_cast<const uint8_t *>(s);
943  auto dst = reinterpret_cast<uint8_t *>(d[0]);
944  for (; n; n -= 16, src += 32)
945  {
946  int16_t y[16] = {
947  src[1], src[3], src[5], src[7],
948  src[9], src[11], src[13], src[15],
949  src[17], src[19], src[21], src[23],
950  src[25], src[27], src[29], src[31],
951  }, u[16] = {
952  src[0], src[0], src[4], src[4],
953  src[8], src[8], src[12], src[12],
954  src[16], src[16], src[20], src[20],
955  src[24], src[24], src[28], src[28],
956  }, v[16] = {
957  src[2], src[2], src[6], src[6],
958  src[10], src[10], src[14], src[14],
959  src[18], src[18], src[22], src[22],
960  src[26], src[26], src[30], src[30],
961  };
962 
963  uint8_t r[16], g[16], b[16];
964  for (int i = 0; i < 16; i++)
965  {
966  int32_t c = y[i] - 16;
967  int32_t d = u[i] - 128;
968  int32_t e = v[i] - 128;
969 
970  int32_t t;
971 #define clamp(x) ((t=(x)) > 255 ? 255 : t < 0 ? 0 : t)
972  r[i] = clamp((298 * c + 409 * e + 128) >> 8);
973  g[i] = clamp((298 * c - 100 * d - 208 * e + 128) >> 8);
974  b[i] = clamp((298 * c + 516 * d + 128) >> 8);
975 #undef clamp
976  }
977 
978  if (FORMAT == RS2_FORMAT_RGB8)
979  {
980  uint8_t out[16 * 3] = {
981  r[0], g[0], b[0], r[1], g[1], b[1],
982  r[2], g[2], b[2], r[3], g[3], b[3],
983  r[4], g[4], b[4], r[5], g[5], b[5],
984  r[6], g[6], b[6], r[7], g[7], b[7],
985  r[8], g[8], b[8], r[9], g[9], b[9],
986  r[10], g[10], b[10], r[11], g[11], b[11],
987  r[12], g[12], b[12], r[13], g[13], b[13],
988  r[14], g[14], b[14], r[15], g[15], b[15],
989  };
990  std::memcpy( dst, out, sizeof out );
991  dst += sizeof out;
992  continue;
993  }
994 
995  if (FORMAT == RS2_FORMAT_BGR8)
996  {
997  uint8_t out[16 * 3] = {
998  b[0], g[0], r[0], b[1], g[1], r[1],
999  b[2], g[2], r[2], b[3], g[3], r[3],
1000  b[4], g[4], r[4], b[5], g[5], r[5],
1001  b[6], g[6], r[6], b[7], g[7], r[7],
1002  b[8], g[8], r[8], b[9], g[9], r[9],
1003  b[10], g[10], r[10], b[11], g[11], r[11],
1004  b[12], g[12], r[12], b[13], g[13], r[13],
1005  b[14], g[14], r[14], b[15], g[15], r[15],
1006  };
1007  std::memcpy( dst, out, sizeof out );
1008  dst += sizeof out;
1009  continue;
1010  }
1011 
1012  if (FORMAT == RS2_FORMAT_RGBA8)
1013  {
1014  uint8_t out[16 * 4] = {
1015  r[0], g[0], b[0], 255, r[1], g[1], b[1], 255,
1016  r[2], g[2], b[2], 255, r[3], g[3], b[3], 255,
1017  r[4], g[4], b[4], 255, r[5], g[5], b[5], 255,
1018  r[6], g[6], b[6], 255, r[7], g[7], b[7], 255,
1019  r[8], g[8], b[8], 255, r[9], g[9], b[9], 255,
1020  r[10], g[10], b[10], 255, r[11], g[11], b[11], 255,
1021  r[12], g[12], b[12], 255, r[13], g[13], b[13], 255,
1022  r[14], g[14], b[14], 255, r[15], g[15], b[15], 255,
1023  };
1024  std::memcpy( dst, out, sizeof out );
1025  dst += sizeof out;
1026  continue;
1027  }
1028 
1029  if (FORMAT == RS2_FORMAT_BGRA8)
1030  {
1031  uint8_t out[16 * 4] = {
1032  b[0], g[0], r[0], 255, b[1], g[1], r[1], 255,
1033  b[2], g[2], r[2], 255, b[3], g[3], r[3], 255,
1034  b[4], g[4], r[4], 255, b[5], g[5], r[5], 255,
1035  b[6], g[6], r[6], 255, b[7], g[7], r[7], 255,
1036  b[8], g[8], r[8], 255, b[9], g[9], r[9], 255,
1037  b[10], g[10], r[10], 255, b[11], g[11], r[11], 255,
1038  b[12], g[12], r[12], 255, b[13], g[13], r[13], 255,
1039  b[14], g[14], r[14], 255, b[15], g[15], r[15], 255,
1040  };
1041  std::memcpy( dst, out, sizeof out );
1042  dst += sizeof out;
1043  continue;
1044  }
1045  }
1046 #endif
1047  }
1048 
1049  void unpack_uyvyc(rs2_format dst_format, rs2_stream dst_stream, uint8_t * const d[], const uint8_t * s, int w, int h, int actual_size)
1050  {
1051  switch (dst_format)
1052  {
1053  case RS2_FORMAT_RGB8:
1054  unpack_uyvy<RS2_FORMAT_RGB8>(d, s, w, h, actual_size);
1055  break;
1056  case RS2_FORMAT_RGBA8:
1057  unpack_uyvy<RS2_FORMAT_RGBA8>(d, s, w, h, actual_size);
1058  break;
1059  case RS2_FORMAT_BGR8:
1060  unpack_uyvy<RS2_FORMAT_BGR8>(d, s, w, h, actual_size);
1061  break;
1062  case RS2_FORMAT_BGRA8:
1063  unpack_uyvy<RS2_FORMAT_BGRA8>(d, s, w, h, actual_size);
1064  break;
1065  default:
1066  LOG_ERROR("Unsupported format for UYVY conversion.");
1067  break;
1068  }
1069  }
1070 
1072  // MJPEG unpacking routines //
1074  void unpack_mjpeg( uint8_t * const dest[], const uint8_t * source, int width, int height, int actual_size, int input_size)
1075  {
1076  int w, h, bpp;
1077  auto uncompressed_rgb = stbi_load_from_memory(source, actual_size, &w, &h, &bpp, false);
1078  if (uncompressed_rgb)
1079  {
1080  auto uncompressed_size = w * h * bpp;
1081  std::memcpy( dest[0], uncompressed_rgb, uncompressed_size );
1082  stbi_image_free(uncompressed_rgb);
1083  }
1084  else
1085  LOG_ERROR("jpeg decode failed");
1086  }
1087 
1089  // BGR unpacking routines //
1091  void unpack_rgb_from_bgr( uint8_t * const dest[], const uint8_t * source, int width, int height, int actual_size)
1092  {
1093  auto count = width * height;
1094  auto in = reinterpret_cast<const uint8_t *>(source);
1095  auto out = reinterpret_cast<uint8_t *>(dest[0]);
1096 
1097  std::memcpy( out, in, count * 3 );
1098  for (auto i = 0; i < count; i++)
1099  {
1100  std::swap(out[i * 3], out[i * 3 + 2]);
1101  }
1102  }
1103 
1104  void yuy2_converter::process_function( uint8_t * const dest[], const uint8_t * source, int width, int height, int actual_size, int input_size)
1105  {
1107  }
1108 
1109  void uyvy_converter::process_function( uint8_t * const dest[], const uint8_t * source, int width, int height, int actual_size, int input_size)
1110  {
1112  }
1113 
1114  void mjpeg_converter::process_function( uint8_t * const dest[], const uint8_t * source, int width, int height, int actual_size, int input_size)
1115  {
1116  unpack_mjpeg(dest, source, width, height, actual_size, input_size);
1117  }
1118 
1119  void bgr_to_rgb::process_function( uint8_t * const dest[], const uint8_t * source, int width, int height, int actual_size, int input_size)
1120  {
1121  unpack_rgb_from_bgr(dest, source, width, height, actual_size);
1122  }
1123 
1124  void m420_converter::process_function( uint8_t * const dest[], const uint8_t * source, int width, int height, int actual_size, int input_size)
1125  {
1127  }
1128 }
librealsense::functional_processing_block::_target_stream
rs2_stream _target_stream
Definition: synthetic-stream.h:161
librealsense
Definition: algo.h:18
count
GLint GLsizei count
Definition: glad/glad/glad.h:2301
uint8_t
unsigned char uint8_t
Definition: stdint.h:78
RS2_FORMAT_RGBA8
@ RS2_FORMAT_RGBA8
Definition: rs_sensor.h:70
librealsense::unpack_uyvy
void unpack_uyvy(uint8_t *const d[], const uint8_t *s, int width, int height, int actual_size)
Definition: color-formats-converter.cpp:810
test-librs-connections.n
n
Definition: test-librs-connections.py:38
librealsense::unpack_uyvyc
void unpack_uyvyc(rs2_format dst_format, rs2_stream dst_stream, uint8_t *const d[], const uint8_t *s, int w, int h, int actual_size)
Definition: color-formats-converter.cpp:1049
uint16_t
unsigned short uint16_t
Definition: stdint.h:79
b
GLboolean GLboolean GLboolean b
Definition: glad/glad/glad.h:3064
librealsense::mjpeg_converter::process_function
void process_function(uint8_t *const dest[], const uint8_t *source, int width, int height, int actual_size, int input_size) override
Definition: color-formats-converter.cpp:1114
RS2_FORMAT_RGB8
@ RS2_FORMAT_RGB8
Definition: rs_sensor.h:68
v
GLdouble v
Definition: glad/glad/glad.h:2144
LOG_DEBUG
#define LOG_DEBUG(...)
Definition: easyloggingpp.h:70
test-control-reply.s1
s1
Definition: test-control-reply.py:25
cpuid
void cpuid(int info[4], int info_type)
Definition: color-formats-converter.cpp:32
opencv_pointcloud_viewer.out
out
Definition: opencv_pointcloud_viewer.py:276
int16_t
signed short int16_t
Definition: stdint.h:76
width
GLint GLsizei width
Definition: glad/glad/glad.h:1397
librealsense::functional_processing_block::_target_format
rs2_format _target_format
Definition: synthetic-stream.h:160
r
GLdouble GLdouble r
Definition: glad/glad/glad.h:1853
rs2_format
rs2_format
A stream's format identifies how binary data is encoded within a frame.
Definition: rs_sensor.h:61
stbi_load_from_memory
STBIDEF stbi_uc * stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels)
librealsense::bgr_to_rgb::process_function
void process_function(uint8_t *const dest[], const uint8_t *source, int width, int height, int actual_size, int input_size) override
Definition: color-formats-converter.cpp:1119
librealsense::unpack_m420
void unpack_m420(uint8_t *const d[], const uint8_t *s, int width, int height, int actual_size)
Definition: color-formats-converter.cpp:618
height
GLint GLsizei GLsizei height
Definition: glad/glad/glad.h:1397
i
int i
Definition: rs-pcl-color.cpp:54
g
GLboolean GLboolean g
Definition: glad/glad/glad.h:3064
j
GLint j
Definition: glad/glad/glad.h:2165
w
GLdouble GLdouble GLdouble w
Definition: glad/glad/glad.h:1757
librealsense::m420_converter::process_function
void process_function(uint8_t *const dest[], const uint8_t *source, int width, int height, int actual_size, int input_size) override
Definition: color-formats-converter.cpp:1124
unit-test-config.src
string src
Definition: unit-test-config.py:88
image.h
librealsense::unpack_mjpeg
void unpack_mjpeg(uint8_t *const dest[], const uint8_t *source, int width, int height, int actual_size, int input_size)
Definition: color-formats-converter.cpp:1074
librealsense::unpack_yuy2
void unpack_yuy2(uint8_t *const d[], const uint8_t *s, int width, int height, int actual_size)
Definition: color-formats-converter.cpp:54
color-formats-converter.h
source
GLsizei GLsizei GLchar * source
Definition: glad/glad/glad.h:2828
fps.info
info
Definition: fps.py:50
dest
char * dest
Definition: lz4.h:697
has_avx
bool has_avx()
Definition: color-formats-converter.cpp:37
image-avx.h
realsense_device_manager.c
c
Definition: realsense_device_manager.py:322
assert
#define assert(condition)
Definition: lz4.c:245
librealsense::uyvy_converter::process_function
void process_function(uint8_t *const dest[], const uint8_t *source, int width, int height, int actual_size, int input_size) override
Definition: color-formats-converter.cpp:1109
librealsense::yuy2_converter::process_function
void process_function(uint8_t *const dest[], const uint8_t *source, int width, int height, int actual_size, int input_size) override
Definition: color-formats-converter.cpp:1104
int32_t
signed int int32_t
Definition: stdint.h:77
t
GLdouble t
Definition: glad/glad/glad.h:1829
RS2_FORMAT_Y16
@ RS2_FORMAT_Y16
Definition: rs_sensor.h:73
librealsense::unpack_rgb_from_bgr
void unpack_rgb_from_bgr(uint8_t *const dest[], const uint8_t *source, int width, int height, int actual_size)
Definition: color-formats-converter.cpp:1091
RS2_FORMAT_Y8
@ RS2_FORMAT_Y8
Definition: rs_sensor.h:72
librealsense::m420_parse_one_line
void m420_parse_one_line(const uint8_t *y_one_line, const uint8_t *uv_one_line, uint8_t **dst, int width)
Definition: color-formats-converter.cpp:359
rmse.e
e
Definition: rmse.py:177
FORMAT
#define FORMAT
Definition: rs-color.c:19
option.h
clamp
#define clamp(x)
RS2_FORMAT_BGRA8
@ RS2_FORMAT_BGRA8
Definition: rs_sensor.h:71
dst
char * dst
Definition: lz4.h:724
rs2_format_to_string
const char * rs2_format_to_string(rs2_format format)
Definition: to-string.cpp:767
s
GLdouble s
Definition: glad/glad/glad.h:2441
rs2_stream
rs2_stream
Streams are different types of data provided by RealSense devices.
Definition: rs_sensor.h:43
stbi_image_free
STBIDEF void stbi_image_free(void *retval_from_stbi_load)
RS2_FORMAT_BGR8
@ RS2_FORMAT_BGR8
Definition: rs_sensor.h:69
rmse.d
d
Definition: rmse.py:171
test-unit-transform.bpp
bpp
Definition: test-unit-transform.py:41
LOG_ERROR
#define LOG_ERROR(...)
Definition: easyloggingpp.h:73
sw.h
int h
Definition: sw-dev/sw.py:11
y
GLint y
Definition: glad/glad/glad.h:1397


librealsense2
Author(s): LibRealSense ROS Team
autogenerated on Fri Aug 2 2024 08:30:01