librealsense: image.cpp Source File

Go to the documentation of this file.
00001 // License: Apache 2.0. See LICENSE file in root directory.
00002 // Copyright(c) 2015 Intel Corporation. All Rights Reserved.
00003 
00004 #include "image.h"
00005 #include "../include/librealsense/rsutil.h" // For projection/deprojection logic
00006 
00007 #include <cstring> // For memcpy
00008 #include <cmath>
00009 #include <algorithm>
00010 #ifdef __SSSE3__
00011 #include <tmmintrin.h> // For SSE3 intrinsic used in unpack_yuy2_sse
00012 #endif
00013 
00014 #pragma pack(push, 1) // All structs in this file are assumed to be byte-packed
00015 namespace rsimpl
00016 {
00017 
00019     // Image size computation //
00021 
00022     size_t get_image_size(int width, int height, rs_format format)
00023     {
00024         if (format == RS_FORMAT_YUYV) assert(width % 2 == 0);
00025         if (format == RS_FORMAT_RAW10) assert(width % 4 == 0);
00026         return width * height * get_image_bpp(format) / 8;
00027     }
00028 
00029     int get_image_bpp(rs_format format)
00030     {
00031         switch (format)
00032         {
00033         case RS_FORMAT_Z16: return  16;
00034         case RS_FORMAT_DISPARITY16: return 16;
00035         case RS_FORMAT_XYZ32F: return 12 * 8;
00036         case RS_FORMAT_YUYV:  return 16;
00037         case RS_FORMAT_RGB8: return 24;
00038         case RS_FORMAT_BGR8: return 24;
00039         case RS_FORMAT_RGBA8: return 32;
00040         case RS_FORMAT_BGRA8: return 32;
00041         case RS_FORMAT_Y8: return 8;
00042         case RS_FORMAT_Y16: return 16;
00043         case RS_FORMAT_RAW10: return 10;
00044         case RS_FORMAT_RAW16: return 16;
00045         case RS_FORMAT_RAW8: return 8;
00046         default: assert(false); return 0;
00047         }
00048     }
00050     // Naive unpacking routines //
00052     
00053     template<size_t SIZE> void copy_pixels(byte * const dest[], const byte * source, int count)
00054     {
00055         memcpy(dest[0], source, SIZE * count);
00056     }
00057 
00058     void copy_raw10(byte * const dest[], const byte * source, int count)
00059     {
00060         memcpy(dest[0], source, (5 * (count/4)));
00061     }
00062 
00063     template<class SOURCE, class UNPACK> void unpack_pixels(byte * const dest[], int count, const SOURCE * source, UNPACK unpack)
00064     {
00065         auto out = reinterpret_cast<decltype(unpack(SOURCE())) *>(dest[0]);
00066         for(int i=0; i<count; ++i) *out++ = unpack(*source++);
00067     }
00068 
00069     void unpack_y16_from_y8    (byte * const d[], const byte * s, int n) { unpack_pixels(d, n, reinterpret_cast<const uint8_t  *>(s), [](uint8_t  pixel) -> uint16_t { return pixel | pixel << 8; }); }
00070     void unpack_y16_from_y16_10(byte * const d[], const byte * s, int n) { unpack_pixels(d, n, reinterpret_cast<const uint16_t *>(s), [](uint16_t pixel) -> uint16_t { return pixel << 6; }); }
00071     void unpack_y8_from_y16_10 (byte * const d[], const byte * s, int n) { unpack_pixels(d, n, reinterpret_cast<const uint16_t *>(s), [](uint16_t pixel) -> uint8_t  { return pixel >> 2; }); }
00072     void unpack_rw10_from_rw8 (byte *  const d[], const byte * s, int n)
00073     {
00074 #ifdef __SSSE3__
00075         auto src = reinterpret_cast<const __m128i *>(s);
00076         auto dst = reinterpret_cast<__m128i *>(d[0]);
00077 
00078         __m128i* xin = (__m128i*)src;
00079         __m128i* xout = (__m128i*) dst;
00080         for (int i = 0; i < n; i += 16, ++xout, xin += 2)
00081         {
00082             __m128i  in1_16 = _mm_load_si128((__m128i*)(xin));
00083             __m128i  in2_16 = _mm_load_si128((__m128i*)(xin + 1));
00084             __m128i  out1_16 = _mm_srli_epi16(in1_16, 2);
00085             __m128i  out2_16 = _mm_srli_epi16(in2_16, 2);
00086             __m128i  out8 = _mm_packus_epi16(out1_16, out2_16);
00087             _mm_store_si128(xout, out8);
00088         }
00089 #else  // Generic code for when SSSE3 is not available.
00090         unsigned short* from = (unsigned short*)s;
00091         byte* to = d[0];
00092 
00093         for(int i = 0; i < n; ++i)
00094         {
00095           byte temp = (byte)(*from >> 2);
00096           *to = temp;
00097           ++from;
00098           ++to;
00099         }
00100 #endif
00101     }
00102 
00104     // YUY2 unpacking routines //
00106     
00107     // This templated function unpacks YUY2 into Y8/Y16/RGB8/RGBA8/BGR8/BGRA8, depending on the compile-time parameter FORMAT.
00108     // It is expected that all branching outside of the loop control variable will be removed due to constant-folding.
00109     template<rs_format FORMAT> void unpack_yuy2(byte * const d [], const byte * s, int n)
00110     {
00111         assert(n % 16 == 0); // All currently supported color resolutions are multiples of 16 pixels. Could easily extend support to other resolutions by copying final n<16 pixels into a zero-padded buffer and recursively calling self for final iteration.
00112 #ifdef __SSSE3__
00113         auto src = reinterpret_cast<const __m128i *>(s);
00114         auto dst = reinterpret_cast<__m128i *>(d[0]);
00115         for(; n; n -= 16)
00116         {
00117             const __m128i zero = _mm_set1_epi8(0);
00118             const __m128i n100 = _mm_set1_epi16(100 << 4);
00119             const __m128i n208 = _mm_set1_epi16(208 << 4);
00120             const __m128i n298 = _mm_set1_epi16(298 << 4);
00121             const __m128i n409 = _mm_set1_epi16(409 << 4);
00122             const __m128i n516 = _mm_set1_epi16(516 << 4);
00123             const __m128i evens_odds = _mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15);            
00124 
00125             // Load 8 YUY2 pixels each into two 16-byte registers
00126             __m128i s0 = _mm_loadu_si128(src++);
00127             __m128i s1 = _mm_loadu_si128(src++);
00128 
00129             if(FORMAT == RS_FORMAT_Y8)
00130             {   
00131                 // Align all Y components and output 16 pixels (16 bytes) at once
00132                 __m128i y0 = _mm_shuffle_epi8(s0, _mm_setr_epi8(1, 3, 5, 7, 9, 11, 13, 15,   0, 2, 4, 6, 8, 10, 12, 14));
00133                 __m128i y1 = _mm_shuffle_epi8(s1, _mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14,   1, 3, 5, 7, 9, 11, 13, 15));
00134                 _mm_storeu_si128(dst++, _mm_alignr_epi8(y0, y1, 8));
00135                 continue;
00136             }
00137 
00138             // Shuffle all Y components to the low order bytes of the register, and all U/V components to the high order bytes
00139             const __m128i evens_odd1s_odd3s = _mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, 1, 5, 9, 13, 3, 7, 11, 15); // to get yyyyyyyyuuuuvvvv
00140             __m128i yyyyyyyyuuuuvvvv0 = _mm_shuffle_epi8(s0, evens_odd1s_odd3s);
00141             __m128i yyyyyyyyuuuuvvvv8 = _mm_shuffle_epi8(s1, evens_odd1s_odd3s);
00142 
00143             // Retrieve all 16 Y components as 16-bit values (8 components per register))
00144             __m128i y16__0_7 = _mm_unpacklo_epi8(yyyyyyyyuuuuvvvv0, zero);         // convert to 16 bit
00145             __m128i y16__8_F = _mm_unpacklo_epi8(yyyyyyyyuuuuvvvv8, zero);         // convert to 16 bit
00146 
00147             if(FORMAT == RS_FORMAT_Y16)
00148             {      
00149                 // Output 16 pixels (32 bytes) at once
00150                 _mm_storeu_si128(dst++, _mm_slli_epi16(y16__0_7, 8));
00151                 _mm_storeu_si128(dst++, _mm_slli_epi16(y16__8_F, 8));
00152                 continue;
00153             }
00154 
00155             // Retrieve all 16 U and V components as 16-bit values (8 components per register)
00156             __m128i uv = _mm_unpackhi_epi32(yyyyyyyyuuuuvvvv0, yyyyyyyyuuuuvvvv8); // uuuuuuuuvvvvvvvv
00157             __m128i u = _mm_unpacklo_epi8(uv, uv);                                 //  uu uu uu uu uu uu uu uu  u's duplicated
00158             __m128i v = _mm_unpackhi_epi8(uv, uv);                                 //  vv vv vv vv vv vv vv vv            
00159             __m128i u16__0_7 = _mm_unpacklo_epi8(u, zero);                         // convert to 16 bit
00160             __m128i u16__8_F = _mm_unpackhi_epi8(u, zero);                         // convert to 16 bit
00161             __m128i v16__0_7 = _mm_unpacklo_epi8(v, zero);                         // convert to 16 bit
00162             __m128i v16__8_F = _mm_unpackhi_epi8(v, zero);                         // convert to 16 bit
00163 
00164             // Compute R, G, B values for first 8 pixels
00165             __m128i c16__0_7 = _mm_slli_epi16(_mm_subs_epi16(y16__0_7, _mm_set1_epi16(16)), 4);
00166             __m128i d16__0_7 = _mm_slli_epi16(_mm_subs_epi16(u16__0_7, _mm_set1_epi16(128)), 4); // perhaps could have done these u,v to d,e before the duplication
00167             __m128i e16__0_7 = _mm_slli_epi16(_mm_subs_epi16(v16__0_7, _mm_set1_epi16(128)), 4);
00168             __m128i r16__0_7 = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_add_epi16(_mm_mulhi_epi16(c16__0_7, n298), _mm_mulhi_epi16(e16__0_7, n409))))));                                                 // (298 * c + 409 * e + 128) ; //
00169             __m128i g16__0_7 = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_sub_epi16(_mm_sub_epi16(_mm_mulhi_epi16(c16__0_7, n298), _mm_mulhi_epi16(d16__0_7, n100)), _mm_mulhi_epi16(e16__0_7, n208)))))); // (298 * c - 100 * d - 208 * e + 128)
00170             __m128i b16__0_7 = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_add_epi16(_mm_mulhi_epi16(c16__0_7, n298), _mm_mulhi_epi16(d16__0_7, n516))))));                                                 // clampbyte((298 * c + 516 * d + 128) >> 8);
00171 
00172             // Compute R, G, B values for second 8 pixels
00173             __m128i c16__8_F = _mm_slli_epi16(_mm_subs_epi16(y16__8_F, _mm_set1_epi16(16)), 4);
00174             __m128i d16__8_F = _mm_slli_epi16(_mm_subs_epi16(u16__8_F, _mm_set1_epi16(128)), 4); // perhaps could have done these u,v to d,e before the duplication
00175             __m128i e16__8_F = _mm_slli_epi16(_mm_subs_epi16(v16__8_F, _mm_set1_epi16(128)), 4);           
00176             __m128i r16__8_F = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_add_epi16(_mm_mulhi_epi16(c16__8_F, n298), _mm_mulhi_epi16(e16__8_F, n409))))));                                                 // (298 * c + 409 * e + 128) ; //
00177             __m128i g16__8_F = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_sub_epi16(_mm_sub_epi16(_mm_mulhi_epi16(c16__8_F, n298), _mm_mulhi_epi16(d16__8_F, n100)), _mm_mulhi_epi16(e16__8_F, n208)))))); // (298 * c - 100 * d - 208 * e + 128)
00178             __m128i b16__8_F = _mm_min_epi16(_mm_set1_epi16(255), _mm_max_epi16(zero, ((_mm_add_epi16(_mm_mulhi_epi16(c16__8_F, n298), _mm_mulhi_epi16(d16__8_F, n516))))));                                                 // clampbyte((298 * c + 516 * d + 128) >> 8);
00179 
00180             if (FORMAT == RS_FORMAT_RGB8 || FORMAT == RS_FORMAT_RGBA8)
00181             {
00182                 // Shuffle separate R, G, B values into four registers storing four pixels each in (R, G, B, A) order
00183                 __m128i rg8__0_7 = _mm_unpacklo_epi8(_mm_shuffle_epi8(r16__0_7, evens_odds), _mm_shuffle_epi8(g16__0_7, evens_odds)); // hi to take the odds which are the upper bytes we care about
00184                 __m128i ba8__0_7 = _mm_unpacklo_epi8(_mm_shuffle_epi8(b16__0_7, evens_odds), _mm_set1_epi8(-1));
00185                 __m128i rgba_0_3 = _mm_unpacklo_epi16(rg8__0_7, ba8__0_7);
00186                 __m128i rgba_4_7 = _mm_unpackhi_epi16(rg8__0_7, ba8__0_7);
00187 
00188                 __m128i rg8__8_F = _mm_unpacklo_epi8(_mm_shuffle_epi8(r16__8_F, evens_odds), _mm_shuffle_epi8(g16__8_F, evens_odds)); // hi to take the odds which are the upper bytes we care about
00189                 __m128i ba8__8_F = _mm_unpacklo_epi8(_mm_shuffle_epi8(b16__8_F, evens_odds), _mm_set1_epi8(-1));
00190                 __m128i rgba_8_B = _mm_unpacklo_epi16(rg8__8_F, ba8__8_F);
00191                 __m128i rgba_C_F = _mm_unpackhi_epi16(rg8__8_F, ba8__8_F);
00192 
00193                 if(FORMAT == RS_FORMAT_RGBA8)
00194                 {
00195                     // Store 16 pixels (64 bytes) at once
00196                     _mm_storeu_si128(dst++, rgba_0_3);
00197                     _mm_storeu_si128(dst++, rgba_4_7);
00198                     _mm_storeu_si128(dst++, rgba_8_B);
00199                     _mm_storeu_si128(dst++, rgba_C_F);
00200                 }
00201 
00202                 if(FORMAT == RS_FORMAT_RGB8)
00203                 {
00204                     // Shuffle rgb triples to the start and end of each register
00205                     __m128i rgb0 = _mm_shuffle_epi8(rgba_0_3, _mm_setr_epi8(  3, 7, 11, 15,   0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14));
00206                     __m128i rgb1 = _mm_shuffle_epi8(rgba_4_7, _mm_setr_epi8(0, 1, 2, 4,   3, 7, 11, 15,   5, 6, 8, 9, 10, 12, 13, 14));
00207                     __m128i rgb2 = _mm_shuffle_epi8(rgba_8_B, _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9,   3, 7, 11, 15,   10, 12, 13, 14));
00208                     __m128i rgb3 = _mm_shuffle_epi8(rgba_C_F, _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14,   3, 7, 11, 15  ));
00209 
00210                     // Align registers and store 16 pixels (48 bytes) at once
00211                     _mm_storeu_si128(dst++, _mm_alignr_epi8(rgb1, rgb0, 4));
00212                     _mm_storeu_si128(dst++, _mm_alignr_epi8(rgb2, rgb1, 8));
00213                     _mm_storeu_si128(dst++, _mm_alignr_epi8(rgb3, rgb2, 12));                
00214                 }
00215             }
00216 
00217             if (FORMAT == RS_FORMAT_BGR8 || FORMAT == RS_FORMAT_BGRA8)
00218             {
00219                 // Shuffle separate R, G, B values into four registers storing four pixels each in (B, G, R, A) order
00220                 __m128i bg8__0_7 = _mm_unpacklo_epi8(_mm_shuffle_epi8(b16__0_7, evens_odds), _mm_shuffle_epi8(g16__0_7, evens_odds)); // hi to take the odds which are the upper bytes we care about
00221                 __m128i ra8__0_7 = _mm_unpacklo_epi8(_mm_shuffle_epi8(r16__0_7, evens_odds), _mm_set1_epi8(-1));
00222                 __m128i bgra_0_3 = _mm_unpacklo_epi16(bg8__0_7, ra8__0_7);
00223                 __m128i bgra_4_7 = _mm_unpackhi_epi16(bg8__0_7, ra8__0_7);
00224 
00225                 __m128i bg8__8_F = _mm_unpacklo_epi8(_mm_shuffle_epi8(b16__8_F, evens_odds), _mm_shuffle_epi8(g16__8_F, evens_odds)); // hi to take the odds which are the upper bytes we care about
00226                 __m128i ra8__8_F = _mm_unpacklo_epi8(_mm_shuffle_epi8(r16__8_F, evens_odds), _mm_set1_epi8(-1));
00227                 __m128i bgra_8_B = _mm_unpacklo_epi16(bg8__8_F, ra8__8_F);
00228                 __m128i bgra_C_F = _mm_unpackhi_epi16(bg8__8_F, ra8__8_F);
00229 
00230                 if(FORMAT == RS_FORMAT_BGRA8)
00231                 {
00232                     // Store 16 pixels (64 bytes) at once
00233                     _mm_storeu_si128(dst++, bgra_0_3);
00234                     _mm_storeu_si128(dst++, bgra_4_7);
00235                     _mm_storeu_si128(dst++, bgra_8_B);
00236                     _mm_storeu_si128(dst++, bgra_C_F);
00237                 }
00238 
00239                 if(FORMAT == RS_FORMAT_BGR8)
00240                 {
00241                     // Shuffle rgb triples to the start and end of each register
00242                     __m128i bgr0 = _mm_shuffle_epi8(bgra_0_3, _mm_setr_epi8(  3, 7, 11, 15,   0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14));
00243                     __m128i bgr1 = _mm_shuffle_epi8(bgra_4_7, _mm_setr_epi8(0, 1, 2, 4,   3, 7, 11, 15,   5, 6, 8, 9, 10, 12, 13, 14));
00244                     __m128i bgr2 = _mm_shuffle_epi8(bgra_8_B, _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9,   3, 7, 11, 15,   10, 12, 13, 14));
00245                     __m128i bgr3 = _mm_shuffle_epi8(bgra_C_F, _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14,   3, 7, 11, 15  ));
00246 
00247                     // Align registers and store 16 pixels (48 bytes) at once
00248                     _mm_storeu_si128(dst++, _mm_alignr_epi8(bgr1, bgr0, 4));
00249                     _mm_storeu_si128(dst++, _mm_alignr_epi8(bgr2, bgr1, 8));
00250                     _mm_storeu_si128(dst++, _mm_alignr_epi8(bgr3, bgr2, 12));                
00251                 }
00252             }
00253         }    
00254 #else  // Generic code for when SSSE3 is not available.
00255         auto src = reinterpret_cast<const uint8_t *>(s);
00256         auto dst = reinterpret_cast<uint8_t *>(d[0]);
00257         for(; n; n -= 16, src += 32)
00258         {
00259             if(FORMAT == RS_FORMAT_Y8)
00260             {
00261                 uint8_t out[16] = {
00262                     src[ 0], src[ 2], src[ 4], src[ 6],
00263                     src[ 8], src[10], src[12], src[14],
00264                     src[16], src[18], src[20], src[22],
00265                     src[24], src[26], src[28], src[30],
00266                 };
00267                 memcpy(dst, out, sizeof out);
00268                 dst += sizeof out;
00269                 continue;
00270             }
00271 
00272             if(FORMAT == RS_FORMAT_Y16)
00273             {
00274                 // Y16 is little-endian.  We output Y << 8.
00275                 uint8_t out[32] = {
00276                     0, src[ 0], 0, src[ 2], 0, src[ 4], 0, src[ 6],
00277                     0, src[ 8], 0, src[10], 0, src[12], 0, src[14],
00278                     0, src[16], 0, src[18], 0, src[20], 0, src[22],
00279                     0, src[24], 0, src[26], 0, src[28], 0, src[30],
00280                 };
00281                 memcpy(dst, out, sizeof out);
00282                 dst += sizeof out;
00283                 continue;
00284             }
00285 
00286             int16_t y[16] = {
00287                 src[ 0], src[ 2], src[ 4], src[ 6],
00288                 src[ 8], src[10], src[12], src[14],
00289                 src[16], src[18], src[20], src[22],
00290                 src[24], src[26], src[28], src[30],
00291             }, u[16] = {
00292                 src[ 1], src[ 1], src[ 5], src[ 5],
00293                 src[ 9], src[ 9], src[13], src[13],
00294                 src[17], src[17], src[21], src[21],
00295                 src[25], src[25], src[29], src[29],
00296             }, v[16] = {
00297                 src[ 3], src[ 3], src[ 7], src[ 7],
00298                 src[11], src[11], src[15], src[15],
00299                 src[19], src[19], src[23], src[23],
00300                 src[27], src[27], src[31], src[31],
00301             };
00302 
00303             uint8_t r[16], g[16], b[16];
00304             for(int i = 0; i < 16; i++)
00305             {
00306                 int32_t c = y[i] - 16;
00307                 int32_t d = u[i] - 128;
00308                 int32_t e = v[i] - 128;
00309 
00310                 int32_t t;
00311                 #define clamp(x)  ((t=(x)) > 255 ? 255 : t < 0 ? 0 : t)
00312                 r[i] = clamp((298 * c           + 409 * e + 128) >> 8);
00313                 g[i] = clamp((298 * c - 100 * d - 409 * e + 128) >> 8);
00314                 b[i] = clamp((298 * c + 516 * d           + 128) >> 8);
00315                 #undef clamp
00316             }
00317 
00318             if(FORMAT == RS_FORMAT_RGB8)
00319             {
00320                 uint8_t out[16*3] = {
00321                     r[ 0], g[ 0], b[ 0], r[ 1], g[ 1], b[ 1],
00322                     r[ 2], g[ 2], b[ 2], r[ 3], g[ 3], b[ 3],
00323                     r[ 4], g[ 4], b[ 4], r[ 5], g[ 5], b[ 5],
00324                     r[ 6], g[ 6], b[ 6], r[ 7], g[ 7], b[ 7],
00325                     r[ 8], g[ 8], b[ 8], r[ 9], g[ 9], b[ 9],
00326                     r[10], g[10], b[10], r[11], g[11], b[11],
00327                     r[12], g[12], b[12], r[13], g[13], b[13],
00328                     r[14], g[14], b[14], r[15], g[15], b[15],
00329                 };
00330                 memcpy(dst, out, sizeof out);
00331                 dst += sizeof out;
00332                 continue;
00333             }
00334 
00335             if(FORMAT == RS_FORMAT_BGR8)
00336             {
00337                 uint8_t out[16*3] = {
00338                     b[ 0], g[ 0], r[ 0], b[ 1], g[ 1], r[ 1],
00339                     b[ 2], g[ 2], r[ 2], b[ 3], g[ 3], r[ 3],
00340                     b[ 4], g[ 4], r[ 4], b[ 5], g[ 5], r[ 5],
00341                     b[ 6], g[ 6], r[ 6], b[ 7], g[ 7], r[ 7],
00342                     b[ 8], g[ 8], r[ 8], b[ 9], g[ 9], r[ 9],
00343                     b[10], g[10], r[10], b[11], g[11], r[11],
00344                     b[12], g[12], r[12], b[13], g[13], r[13],
00345                     b[14], g[14], r[14], b[15], g[15], r[15],
00346                 };
00347                 memcpy(dst, out, sizeof out);
00348                 dst += sizeof out;
00349                 continue;
00350             }
00351 
00352             if(FORMAT == RS_FORMAT_RGBA8)
00353             {
00354                 uint8_t out[16*4] = {
00355                     r[ 0], g[ 0], b[ 0], 255, r[ 1], g[ 1], b[ 1], 255,
00356                     r[ 2], g[ 2], b[ 2], 255, r[ 3], g[ 3], b[ 3], 255,
00357                     r[ 4], g[ 4], b[ 4], 255, r[ 5], g[ 5], b[ 5], 255,
00358                     r[ 6], g[ 6], b[ 6], 255, r[ 7], g[ 7], b[ 7], 255,
00359                     r[ 8], g[ 8], b[ 8], 255, r[ 9], g[ 9], b[ 9], 255,
00360                     r[10], g[10], b[10], 255, r[11], g[11], b[11], 255,
00361                     r[12], g[12], b[12], 255, r[13], g[13], b[13], 255,
00362                     r[14], g[14], b[14], 255, r[15], g[15], b[15], 255,
00363                 };
00364                 memcpy(dst, out, sizeof out);
00365                 dst += sizeof out;
00366                 continue;
00367             }
00368 
00369             if(FORMAT == RS_FORMAT_BGRA8)
00370             {
00371                 uint8_t out[16*4] = {
00372                     b[ 0], g[ 0], r[ 0], 255, b[ 1], g[ 1], r[ 1], 255,
00373                     b[ 2], g[ 2], r[ 2], 255, b[ 3], g[ 3], r[ 3], 255,
00374                     b[ 4], g[ 4], r[ 4], 255, b[ 5], g[ 5], r[ 5], 255,
00375                     b[ 6], g[ 6], r[ 6], 255, b[ 7], g[ 7], r[ 7], 255,
00376                     b[ 8], g[ 8], r[ 8], 255, b[ 9], g[ 9], r[ 9], 255,
00377                     b[10], g[10], r[10], 255, b[11], g[11], r[11], 255,
00378                     b[12], g[12], r[12], 255, b[13], g[13], r[13], 255,
00379                     b[14], g[14], r[14], 255, b[15], g[15], r[15], 255,
00380                 };
00381                 memcpy(dst, out, sizeof out);
00382                 dst += sizeof out;
00383                 continue;
00384             }
00385         }
00386 #endif
00387     }
00388     
00390     // 2-in-1 format splitting routines //
00392 
00393     template<class SOURCE, class SPLIT_A, class SPLIT_B> void split_frame(byte * const dest[], int count, const SOURCE * source, SPLIT_A split_a, SPLIT_B split_b)
00394     {
00395         auto a = reinterpret_cast<decltype(split_a(SOURCE())) *>(dest[0]);
00396         auto b = reinterpret_cast<decltype(split_b(SOURCE())) *>(dest[1]);
00397         for(int i=0; i<count; ++i)
00398         {
00399             *a++ = split_a(*source);
00400             *b++ = split_b(*source++);
00401         }    
00402     }
00403 
00404     struct y8i_pixel { uint8_t l, r; };
00405     void unpack_y8_y8_from_y8i(byte * const dest[], const byte * source, int count)
00406     {
00407         split_frame(dest, count, reinterpret_cast<const y8i_pixel *>(source),
00408             [](const y8i_pixel & p) -> uint8_t { return p.l; },
00409             [](const y8i_pixel & p) -> uint8_t { return p.r; });
00410     }
00411 
00412     struct y12i_pixel { uint8_t rl : 8, rh : 4, ll : 4, lh : 8; int l() const { return lh << 4 | ll; } int r() const { return rh << 8 | rl; } };
00413     void unpack_y16_y16_from_y12i_10(byte * const dest[], const byte * source, int count)
00414     {
00415         split_frame(dest, count, reinterpret_cast<const y12i_pixel *>(source),
00416             [](const y12i_pixel & p) -> uint16_t { return p.l() << 6 | p.l() >> 4; },  // We want to convert 10-bit data to 16-bit data
00417             [](const y12i_pixel & p) -> uint16_t { return p.r() << 6 | p.r() >> 4; }); // Multiply by 64 1/16 to efficiently approximate 65535/1023
00418     }
00419 
00420     struct f200_inzi_pixel { uint16_t z16; uint8_t y8; };
00421     void unpack_z16_y8_from_f200_inzi(byte * const dest[], const byte * source, int count)
00422     {
00423         split_frame(dest, count, reinterpret_cast<const f200_inzi_pixel *>(source),
00424             [](const f200_inzi_pixel & p) -> uint16_t { return p.z16; },
00425             [](const f200_inzi_pixel & p) -> uint8_t { return p.y8; });
00426     }
00427 
00428     void unpack_z16_y16_from_f200_inzi(byte * const dest[], const byte * source, int count)
00429     {
00430         split_frame(dest, count, reinterpret_cast<const f200_inzi_pixel *>(source),
00431             [](const f200_inzi_pixel & p) -> uint16_t { return p.z16; },
00432             [](const f200_inzi_pixel & p) -> uint16_t { return p.y8 | p.y8 << 8; });
00433     }
00434 
00435     void unpack_z16_y8_from_sr300_inzi(byte * const dest[], const byte * source, int count)
00436     {
00437         auto in = reinterpret_cast<const uint16_t *>(source);
00438         auto out_ir = reinterpret_cast<uint8_t *>(dest[1]);            
00439         for(int i=0; i<count; ++i) *out_ir++ = *in++ >> 2;
00440         memcpy(dest[0], in, count*2);
00441     }
00442 
00443     void unpack_z16_y16_from_sr300_inzi (byte * const dest[], const byte * source, int count)
00444     {
00445         auto in = reinterpret_cast<const uint16_t *>(source);
00446         auto out_ir = reinterpret_cast<uint16_t *>(dest[1]);
00447         for(int i=0; i<count; ++i) *out_ir++ = *in++ << 6;
00448         memcpy(dest[0], in, count*2);
00449     }
00450 
00451 #pragma GCC diagnostic push
00452 #pragma GCC diagnostic ignored "-Wmultichar"
00453 #ifdef __APPLE
00454 #pragma GCC diagnostic ignored "-Wfour-char-constants"
00455 #endif
00456 
00457     // Native pixel formats //
00459     const native_pixel_format pf_raw8       = { 'RAW8', 1, 1,{  { false, &copy_pixels<1>,                   { { RS_STREAM_FISHEYE,  RS_FORMAT_RAW8 } } } } };
00460     const native_pixel_format pf_rw16       = { 'RW16', 1, 2,{  { false, &copy_pixels<2>,                   { { RS_STREAM_COLOR,    RS_FORMAT_RAW16 } } } } };
00461     const native_pixel_format pf_rw10       = { 'pRAA', 1, 1,{  { false, &copy_raw10,                       { { RS_STREAM_COLOR,    RS_FORMAT_RAW10 } } } } };
00462     const native_pixel_format pf_yuy2       = { 'YUY2', 1, 2,{  { true,  &unpack_yuy2<RS_FORMAT_RGB8 >,     { { RS_STREAM_COLOR,    RS_FORMAT_RGB8 } } },
00463                                                                 { false, &copy_pixels<2>,                   { { RS_STREAM_COLOR,    RS_FORMAT_YUYV } } },
00464                                                                 { true,  &unpack_yuy2<RS_FORMAT_RGBA8>,     { { RS_STREAM_COLOR,    RS_FORMAT_RGBA8 } } },
00465                                                                 { true,  &unpack_yuy2<RS_FORMAT_BGR8 >,     { { RS_STREAM_COLOR,    RS_FORMAT_BGR8 } } },
00466                                                                 { true,  &unpack_yuy2<RS_FORMAT_BGRA8>,     { { RS_STREAM_COLOR,    RS_FORMAT_BGRA8 } } } } };
00467     const native_pixel_format pf_y8         = { 'GREY', 1, 1,{  { false, &copy_pixels<1>,                   { { RS_STREAM_INFRARED, RS_FORMAT_Y8 } } } } };
00468     const native_pixel_format pf_y16        = { 'Y16 ', 1, 2,{  { true,  &unpack_y16_from_y16_10,           { { RS_STREAM_INFRARED, RS_FORMAT_Y16 } } } } };
00469     const native_pixel_format pf_y8i        = { 'Y8I ', 1, 2,{  { true,  &unpack_y8_y8_from_y8i,            { { RS_STREAM_INFRARED, RS_FORMAT_Y8 },{ RS_STREAM_INFRARED2, RS_FORMAT_Y8 } } } } };
00470     const native_pixel_format pf_y12i       = { 'Y12I', 1, 3,{  { true,  &unpack_y16_y16_from_y12i_10,      { { RS_STREAM_INFRARED, RS_FORMAT_Y16 },{ RS_STREAM_INFRARED2, RS_FORMAT_Y16 } } } } };
00471     const native_pixel_format pf_z16        = { 'Z16 ', 1, 2,{  { false, &copy_pixels<2>,                   { { RS_STREAM_DEPTH,    RS_FORMAT_Z16 } } },
00472                                                                 { false, &copy_pixels<2>,                   { { RS_STREAM_DEPTH,    RS_FORMAT_DISPARITY16 } } } } };
00473     const native_pixel_format pf_invz       = { 'Z16 ', 1, 2, { { false, &copy_pixels<2>,                   { { RS_STREAM_DEPTH,    RS_FORMAT_Z16 } } } } };
00474     const native_pixel_format pf_f200_invi  = { 'Y10 ', 1, 1, { { false, &copy_pixels<1>,                   { { RS_STREAM_INFRARED, RS_FORMAT_Y8 } } },
00475                                                                 { true,  &unpack_y16_from_y8,               { { RS_STREAM_INFRARED, RS_FORMAT_Y16 } } } } };
00476     const native_pixel_format pf_f200_inzi  = { 'INZI', 1, 3,{  { true,  &unpack_z16_y8_from_f200_inzi,     { { RS_STREAM_DEPTH,    RS_FORMAT_Z16 },{ RS_STREAM_INFRARED, RS_FORMAT_Y8 } } },
00477                                                                 { true,  &unpack_z16_y16_from_f200_inzi,    { { RS_STREAM_DEPTH,    RS_FORMAT_Z16 },{ RS_STREAM_INFRARED, RS_FORMAT_Y16 } } } } };
00478     const native_pixel_format pf_sr300_invi = { 'Y10 ', 1, 2,{  { true,  &unpack_y8_from_y16_10,            { { RS_STREAM_INFRARED, RS_FORMAT_Y8 } } },
00479                                                                 { true,  &unpack_y16_from_y16_10,           { { RS_STREAM_INFRARED, RS_FORMAT_Y16 } } } } };
00480     const native_pixel_format pf_sr300_inzi = { 'INZI', 2, 2,{  { true,  &unpack_z16_y8_from_sr300_inzi,    { { RS_STREAM_DEPTH,    RS_FORMAT_Z16 },{ RS_STREAM_INFRARED, RS_FORMAT_Y8 } } },
00481                                                                 { true,  &unpack_z16_y16_from_sr300_inzi,   { { RS_STREAM_DEPTH,    RS_FORMAT_Z16 },{ RS_STREAM_INFRARED, RS_FORMAT_Y16 } } } } };
00482 #pragma GCC diagnostic pop
00483 
00485     // Deprojection //
00487 
00488     template<class MAP_DEPTH> void deproject_depth(float * points, const rs_intrinsics & intrin, const uint16_t * depth, MAP_DEPTH map_depth)
00489     {
00490         for(int y=0; y<intrin.height; ++y)
00491         {
00492             for(int x=0; x<intrin.width; ++x)
00493             {
00494                 const float pixel[] = { (float) x, (float) y};
00495                 rs_deproject_pixel_to_point(points, &intrin, pixel, map_depth(*depth++));
00496                 points += 3;                
00497             }
00498         }    
00499     }
00500 
00501     void deproject_z(float * points, const rs_intrinsics & z_intrin, const uint16_t * z_pixels, float z_scale)
00502     {
00503         deproject_depth(points, z_intrin, z_pixels, [z_scale](uint16_t z) { return z_scale * z; });
00504     }
00505 
00506     void deproject_disparity(float * points, const rs_intrinsics & disparity_intrin, const uint16_t * disparity_pixels, float disparity_scale)
00507     {
00508         deproject_depth(points, disparity_intrin, disparity_pixels, [disparity_scale](uint16_t disparity) { return disparity_scale / disparity; });
00509     }
00510 
00512     // Image alignment //
00514 
00515     template<class GET_DEPTH, class TRANSFER_PIXEL> void align_images(const rs_intrinsics & depth_intrin, const rs_extrinsics & depth_to_other, const rs_intrinsics & other_intrin, GET_DEPTH get_depth, TRANSFER_PIXEL transfer_pixel)
00516     {
00517         // Iterate over the pixels of the depth image    
00518 #pragma omp parallel for schedule(dynamic)
00519         for(int depth_y = 0; depth_y < depth_intrin.height; ++depth_y)
00520         {
00521             int depth_pixel_index = depth_y * depth_intrin.width;
00522             for(int depth_x = 0; depth_x < depth_intrin.width; ++depth_x, ++depth_pixel_index)
00523             {
00524                 // Skip over depth pixels with the value of zero, we have no depth data so we will not write anything into our aligned images
00525                 if(float depth = get_depth(depth_pixel_index))
00526                 {
00527                     // Map the top-left corner of the depth pixel onto the other image
00528                     float depth_pixel[2] = {depth_x-0.5f, depth_y-0.5f}, depth_point[3], other_point[3], other_pixel[2];
00529                     rs_deproject_pixel_to_point(depth_point, &depth_intrin, depth_pixel, depth);
00530                     rs_transform_point_to_point(other_point, &depth_to_other, depth_point);
00531                     rs_project_point_to_pixel(other_pixel, &other_intrin, other_point);
00532                     const int other_x0 = static_cast<int>(other_pixel[0] + 0.5f);
00533                     const int other_y0 = static_cast<int>(other_pixel[1] + 0.5f);
00534 
00535                     // Map the bottom-right corner of the depth pixel onto the other image
00536                     depth_pixel[0] = depth_x+0.5f; depth_pixel[1] = depth_y+0.5f;
00537                     rs_deproject_pixel_to_point(depth_point, &depth_intrin, depth_pixel, depth);
00538                     rs_transform_point_to_point(other_point, &depth_to_other, depth_point);
00539                     rs_project_point_to_pixel(other_pixel, &other_intrin, other_point);
00540                     const int other_x1 = static_cast<int>(other_pixel[0] + 0.5f);
00541                     const int other_y1 = static_cast<int>(other_pixel[1] + 0.5f);
00542 
00543                     if(other_x0 < 0 || other_y0 < 0 || other_x1 >= other_intrin.width || other_y1 >= other_intrin.height) continue;
00544 
00545                     // Transfer between the depth pixels and the pixels inside the rectangle on the other image
00546                     for(int y=other_y0; y<=other_y1; ++y) for(int x=other_x0; x<=other_x1; ++x) transfer_pixel(depth_pixel_index, y * other_intrin.width + x);
00547                 }
00548             }
00549         }    
00550     }
00551 
00552     void align_z_to_other(byte * z_aligned_to_other, const uint16_t * z_pixels, float z_scale, const rs_intrinsics & z_intrin, const rs_extrinsics & z_to_other, const rs_intrinsics & other_intrin)
00553     {
00554         auto out_z = (uint16_t *)(z_aligned_to_other);
00555         align_images(z_intrin, z_to_other, other_intrin, 
00556             [z_pixels, z_scale](int z_pixel_index) { return z_scale * z_pixels[z_pixel_index]; },
00557             [out_z, z_pixels](int z_pixel_index, int other_pixel_index) { out_z[other_pixel_index] = out_z[other_pixel_index] ? std::min(out_z[other_pixel_index],z_pixels[z_pixel_index]) : z_pixels[z_pixel_index]; });
00558     }
00559 
00560     void align_disparity_to_other(byte * disparity_aligned_to_other, const uint16_t * disparity_pixels, float disparity_scale, const rs_intrinsics & disparity_intrin, const rs_extrinsics & disparity_to_other, const rs_intrinsics & other_intrin)
00561     {
00562         auto out_disparity = (uint16_t *)(disparity_aligned_to_other);
00563         align_images(disparity_intrin, disparity_to_other, other_intrin, 
00564             [disparity_pixels, disparity_scale](int disparity_pixel_index) { return disparity_scale / disparity_pixels[disparity_pixel_index]; },
00565             [out_disparity, disparity_pixels](int disparity_pixel_index, int other_pixel_index) { out_disparity[other_pixel_index] = disparity_pixels[disparity_pixel_index]; });
00566     }
00567 
00568     template<int N> struct bytes { char b[N]; };
00569     template<int N, class GET_DEPTH> void align_other_to_depth_bytes(byte * other_aligned_to_depth, GET_DEPTH get_depth, const rs_intrinsics & depth_intrin, const rs_extrinsics & depth_to_other, const rs_intrinsics & other_intrin, const byte * other_pixels)
00570     {
00571         auto in_other = (const bytes<N> *)(other_pixels);
00572         auto out_other = (bytes<N> *)(other_aligned_to_depth);
00573         align_images(depth_intrin, depth_to_other, other_intrin, get_depth,
00574             [out_other, in_other](int depth_pixel_index, int other_pixel_index) { out_other[depth_pixel_index] = in_other[other_pixel_index]; });
00575     }
00576 
00577     template<class GET_DEPTH> void align_other_to_depth(byte * other_aligned_to_depth, GET_DEPTH get_depth, const rs_intrinsics & depth_intrin, const rs_extrinsics & depth_to_other, const rs_intrinsics & other_intrin, const byte * other_pixels, rs_format other_format)
00578     {
00579         switch(other_format)
00580         {
00581         case RS_FORMAT_Y8: 
00582             align_other_to_depth_bytes<1>(other_aligned_to_depth, get_depth, depth_intrin, depth_to_other, other_intrin, other_pixels); break;
00583         case RS_FORMAT_Y16: case RS_FORMAT_Z16: 
00584             align_other_to_depth_bytes<2>(other_aligned_to_depth, get_depth, depth_intrin, depth_to_other, other_intrin, other_pixels); break;
00585         case RS_FORMAT_RGB8: case RS_FORMAT_BGR8: 
00586             align_other_to_depth_bytes<3>(other_aligned_to_depth, get_depth, depth_intrin, depth_to_other, other_intrin, other_pixels); break;
00587         case RS_FORMAT_RGBA8: case RS_FORMAT_BGRA8: 
00588             align_other_to_depth_bytes<4>(other_aligned_to_depth, get_depth, depth_intrin, depth_to_other, other_intrin, other_pixels); break;
00589         default: 
00590             assert(false); // NOTE: rs_align_other_to_depth_bytes<2>(...) is not appropriate for RS_FORMAT_YUYV/RS_FORMAT_RAW10 images, no logic prevents U/V channels from being written to one another
00591         }
00592     }
00593 
00594     void align_other_to_z(byte * other_aligned_to_z, const uint16_t * z_pixels, float z_scale, const rs_intrinsics & z_intrin, const rs_extrinsics & z_to_other, const rs_intrinsics & other_intrin, const byte * other_pixels, rs_format other_format)
00595     {
00596         align_other_to_depth(other_aligned_to_z, [z_pixels, z_scale](int z_pixel_index) { return z_scale * z_pixels[z_pixel_index]; }, z_intrin, z_to_other, other_intrin, other_pixels, other_format);
00597     }
00598 
00599     void align_other_to_disparity(byte * other_aligned_to_disparity, const uint16_t * disparity_pixels, float disparity_scale, const rs_intrinsics & disparity_intrin, const rs_extrinsics & disparity_to_other, const rs_intrinsics & other_intrin, const byte * other_pixels, rs_format other_format)
00600     {
00601         align_other_to_depth(other_aligned_to_disparity, [disparity_pixels, disparity_scale](int disparity_pixel_index) { return disparity_scale / disparity_pixels[disparity_pixel_index]; }, disparity_intrin, disparity_to_other, other_intrin, other_pixels, other_format);
00602     }
00603 
00605     // Image rectification //
00607 
00608     std::vector<int> compute_rectification_table(const rs_intrinsics & rect_intrin, const rs_extrinsics & rect_to_unrect, const rs_intrinsics & unrect_intrin)
00609     {   
00610         std::vector<int> rectification_table;
00611         rectification_table.resize(rect_intrin.width * rect_intrin.height);
00612         align_images(rect_intrin, rect_to_unrect, unrect_intrin, [](int) { return 1.0f; },
00613             [&rectification_table](int rect_pixel_index, int unrect_pixel_index) { rectification_table[rect_pixel_index] = unrect_pixel_index; });
00614         return rectification_table;
00615     }
00616 
00617     template<class T> void rectify_image_pixels(T * rect_pixels, const std::vector<int> & rectification_table, const T * unrect_pixels)
00618     {
00619         for(auto entry : rectification_table) *rect_pixels++ = unrect_pixels[entry];
00620     }
00621 
00622     void rectify_image(uint8_t * rect_pixels, const std::vector<int> & rectification_table, const uint8_t * unrect_pixels, rs_format format)
00623     {
00624         switch(format)
00625         {
00626         case RS_FORMAT_Y8: 
00627             return rectify_image_pixels((bytes<1> *)rect_pixels, rectification_table, (const bytes<1> *)unrect_pixels);
00628         case RS_FORMAT_Y16: case RS_FORMAT_Z16: 
00629             return rectify_image_pixels((bytes<2> *)rect_pixels, rectification_table, (const bytes<2> *)unrect_pixels);
00630         case RS_FORMAT_RGB8: case RS_FORMAT_BGR8: 
00631             return rectify_image_pixels((bytes<3> *)rect_pixels, rectification_table, (const bytes<3> *)unrect_pixels);
00632         case RS_FORMAT_RGBA8: case RS_FORMAT_BGRA8: 
00633             return rectify_image_pixels((bytes<4> *)rect_pixels, rectification_table, (const bytes<4> *)unrect_pixels);
00634         default: 
00635             assert(false); // NOTE: rectify_image_pixels(...) is not appropriate for RS_FORMAT_YUYV images, no logic prevents U/V channels from being written to one another
00636         }
00637     }
00638 }
00639 
00640 #pragma pack(pop)