downsample.cpp
Go to the documentation of this file.
1 // License: Apache 2.0. See LICENSE file in root directory.
2 // Copyright(c) 2019 Intel Corporation. All Rights Reserved.
3 
4 #include "downsample.h"
5 
6 #include <assert.h>
7 
8 #ifdef __SSSE3__
9 #include <emmintrin.h>
10 #include <smmintrin.h>
11 #endif
12 
13 void downsample_min_4x4(const cv::Mat& source, cv::Mat* pDest)
14 {
15  static constexpr auto DOWNSAMPLE_FACTOR = 4;
16 
17  assert(source.cols % 8 == 0);
18  assert(source.rows % 4 == 0);
19  assert(source.type() == CV_16U);
20 
21  assert(pDest->cols == source.cols / DOWNSAMPLE_FACTOR);
22  assert(pDest->rows == source.rows / DOWNSAMPLE_FACTOR);
23  assert(pDest->type() == CV_16U);
24 
25  const size_t sizeYresized = source.rows / DOWNSAMPLE_FACTOR;
26 
27 #ifdef __SSSE3__
28  __m128i ones = _mm_set1_epi16(1);
29 
30  // Note on multi-threading here, 2018-08-17
31  // This function is called for every depth image coming from RealSense
32  // without MT this function takes on Joule in average 0.47 ms
33  // with MT this function takes on Joule in average 0.15 ms
34  #pragma omp parallel for
35  for (int y = 0; y < sizeYresized; y++)
36  {
37  for (uint16_t x = 0; x < source.cols; x += 8) {
38  const int newY = y * 4;
39  // load data rows
40  __m128i A = _mm_loadu_si128((const __m128i*)&source.at<uint16_t>(newY, x));
41  __m128i B = _mm_loadu_si128((const __m128i*)&source.at<uint16_t>(newY + 1, x));
42  __m128i C = _mm_loadu_si128((const __m128i*)&source.at<uint16_t>(newY + 2, x));
43  __m128i D = _mm_loadu_si128((const __m128i*)&source.at<uint16_t>(newY + 3, x));
44 
45  // subtract 1 to shift invalid pixels to max value (16bit integer underflow)
46  A = _mm_sub_epi16(A, ones);
47  B = _mm_sub_epi16(B, ones);
48  C = _mm_sub_epi16(C, ones);
49  D = _mm_sub_epi16(D, ones);
50 
51  // calculate minimum
52  __m128i rowMin = _mm_min_epu16(D, C);
53  rowMin = _mm_min_epu16(rowMin, B);
54  rowMin = _mm_min_epu16(rowMin, A);
55 
56  __m128i shuf32 = _mm_shuffle_epi32(rowMin, _MM_SHUFFLE(2, 3, 0, 1));
57 
58  __m128i min32 = _mm_min_epu16(rowMin, shuf32);
59 
60  __m128i shuf16 = _mm_shufflelo_epi16(min32, _MM_SHUFFLE(3, 2, 0, 1));
61  shuf16 = _mm_shufflehi_epi16(shuf16, _MM_SHUFFLE(3, 2, 0, 1));
62 
63  __m128i min2 = _mm_min_epu16(min32, shuf16);
64 
65  // undo invalid pixel shifting by adding one
66  min2 = _mm_add_epi16(min2, ones);
67 
68  uint16_t minA = _mm_extract_epi16(min2, 0);
69  uint16_t minB = _mm_extract_epi16(min2, 4);
70 
71  pDest->at<uint16_t>(y, x / DOWNSAMPLE_FACTOR) = minA;
72  pDest->at<uint16_t>(y, x / DOWNSAMPLE_FACTOR + 1) = minB;
73  }
74  }
75 #else
76  const uint16_t MAX_DEPTH = 0xffff;
77 
78  // Naive implementation for reference and non-x86 platforms:
79  #pragma omp parallel for
80  for (int y = 0; y < sizeYresized; y++)
81  for (int x = 0; x < source.cols; x += DOWNSAMPLE_FACTOR)
82  {
83  uint16_t min_value = MAX_DEPTH;
84 
85  // Loop over 4x4 quad
86  for (int i = 0; i < DOWNSAMPLE_FACTOR; i++)
87  for (int j = 0; j < DOWNSAMPLE_FACTOR; j++)
88  {
89  auto pixel = source.at<uint16_t>(y * DOWNSAMPLE_FACTOR + i, x + j);
90  // Only include non-zero pixels in min calculation
91  if (pixel) min_value = std::min(min_value, pixel);
92  }
93 
94  // If no non-zero pixels were found, mark the output as zero
95  if (min_value == MAX_DEPTH) min_value = 0;
96 
97  pDest->at<uint16_t>(y, x / DOWNSAMPLE_FACTOR) = min_value;
98  }
99 #endif
100 }
101 
void downsample_min_4x4(const cv::Mat &source, cv::Mat *pDest)
Definition: downsample.cpp:13
GLint y
unsigned short uint16_t
Definition: stdint.h:79
GLdouble x
GLint j
int min(int a, int b)
Definition: lz4s.c:73
GLsizei GLsizei GLchar * source
int i
#define D(...)
Definition: usbhost.c:33


librealsense2
Author(s): Sergey Dorodnicov , Doron Hirshberg , Mark Horn , Reagan Lopez , Itay Carpis
autogenerated on Mon May 3 2021 02:47:12