.. _program_listing_file__tmp_ws_src_vitis_common_include_imgproc_xf_mean_shift_kernel.hpp: Program Listing for File xf_mean_shift_kernel.hpp ================================================= |exhale_lsh| :ref:`Return to documentation for file ` (``/tmp/ws/src/vitis_common/include/imgproc/xf_mean_shift_kernel.hpp``) .. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS .. code-block:: cpp /* * Copyright 2019 Xilinx, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _XF_MEAN_SHIFT_HPP_ #define _XF_MEAN_SHIFT_HPP_ #include "hls_stream.h" #include "../core/xf_math.h" // flag to enable regressive object feature copying #define QU_COPY 0 #define _MST_SETUP_FLAG_ 0 // change the datatypes before configuring TOTAL BINS #define _MST_TOTAL_BINS_ 512 /* LUT to store squared values which is used in kernel computation which is * used in Histogram calculation and weight function */ static unsigned int xFTrackmulKernelLut[200] = { 0, 1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121, 144, 169, 196, 225, 256, 289, 324, 361, 400, 441, 484, 529, 576, 625, 676, 729, 784, 841, 900, 961, 1024, 1089, 1156, 1225, 1296, 1369, 1444, 1521, 1600, 1681, 1764, 1849, 1936, 2025, 2116, 2209, 2304, 2401, 2500, 2601, 2704, 2809, 2916, 3025, 3136, 3249, 3364, 3481, 3600, 3721, 3844, 3969, 4096, 4225, 4356, 4489, 4624, 4761, 4900, 5041, 5184, 5329, 5476, 5625, 5776, 5929, 6084, 6241, 6400, 6561, 6724, 6889, 7056, 7225, 7396, 7569, 7744, 7921, 8100, 8281, 8464, 8649, 8836, 9025, 9216, 9409, 9604, 9801, 10000, 10201, 10404, 10609, 10816, 11025, 11236, 11449, 11664, 11881, 12100, 12321, 12544, 12769, 12996, 13225, 13456, 13689, 13924, 14161, 14400, 14641, 14884, 15129, 15376, 15625, 15876, 16129, 16384, 16641, 16900, 17161, 17424, 17689, 17956, 18225, 18496, 18769, 19044, 19321, 19600, 19881, 20164, 20449, 20736, 21025, 21316, 21609, 21904, 22201, 22500, 22801, 23104, 23409, 23716, 24025, 24336, 24649, 24964, 25281, 25600, 25921, 26244, 26569, 26896, 27225, 27556, 27889, 28224, 28561, 28900, 29241, 29584, 29929, 30276, 30625, 30976, 31329, 31684, 32041, 32400, 32761, 33124, 33489, 33856, 34225, 34596, 34969, 35344, 35721, 36100, 36481, 36864, 37249, 37636, 38025, 38416, 38809, 39204, 39601}; /* LUT to store square root values, used in xFTrackmulWeight function, * to find the weight of every pixel in the object window */ const unsigned char xFTrackmulSqrtLut[100] = { 0, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9}; /* xFTrackmulBlkReadIn: Block reading inner loop, Reads the single pack and pushes into stream * ptr --> To store the single pack of pixels * in --> input image * rows --> Image Height * cols --> Image Width * i --> To calculate the offset * input1 --> stream into which we have to push the elements * x1 --> Top left corner x-coordinate * y1 --> Top left corner y-coordinate * buf_size --> number of elements to be read in one row */ template void xFTrackmulBlkReadIn(XF_TNAME(SRC_T, NPC) ptr[1], xf::cv::Mat& _in_mat, int i, hls::stream& input1, int x1, int y1, unsigned short buf_size) { // clang-format off #pragma HLS INLINE // clang-format on int cols = _in_mat.cols; int src_off = (cols >> XF_BITSHIFT(NPC)) * (y1 + i) + (x1 >> XF_BITSHIFT(NPC)); unsigned short size = 4 << XF_BITSHIFT(NPC); loop_blockread_inner: for (int j = 0; j < buf_size; j++) { // clang-format off #pragma HLS PIPELINE II=1 #pragma HLS LOOP_TRIPCOUNT min=20 max=IN_TC // clang-format on ptr[0] = _in_mat.read(src_off + j); input1.write(ptr[0]); } } /* xFTrackmulBlkRead: Block reading outer loop * calls the inner loop over height times * input1 --> stream into which we have to push the elements * in --> input image * rows --> Image Height * cols --> Image Width * x1 --> Top left corner x-coordinate * y1 --> Top left corner y-coordinate * obj_hgt --> height of the object * obj_wdt --> width of the object * obj_num --> object number in the video */ template void xFTrackmulBlkRead(hls::stream& input1, xf::cv::Mat& _in_mat, uint16_t x1, uint16_t y1, uint16_t obj_hgt, uint16_t obj_wdt) { XF_TNAME(SRC_T, NPC) dst[1]; unsigned short h_y = obj_hgt >> 1; unsigned short h_x = obj_wdt >> 1; unsigned short buf_size = ((x1 + (h_x << 1)) >> XF_BITSHIFT(NPC)) - (x1 >> XF_BITSHIFT(NPC)); loop_blockread_outer: for (int i = 0; i < obj_hgt; i++) { // clang-format off #pragma HLS PIPELINE #pragma HLS LOOP_TRIPCOUNT min=20 max=ROWS avg=ROWS // clang-format on xFTrackmulBlkReadIn(dst, _in_mat, i, input1, x1, y1, buf_size); } } /* xFTrackmulFindbin : To find the bin corresponds to the pixel intensity value * R,G,B --> R,G,B values of a pixel * bin --> bin value corresponding to the given pixel */ static uint16_t xFTrackmulFindbin(unsigned char R, unsigned char G, unsigned char B) { uint16_t bin; uint16_t r, g, b; r = (uint16_t)R >> 5; g = (uint16_t)G >> 5; b = (uint16_t)B >> 5; bin = r + (g << 3) + (b << 6); return bin; } /* * Bin increment and distance computation module */ template void xFTrackmulFindbinIncrement(ap_uint32_t val, short& distance, uint16_t& bin, BINTYPE BIN[ROWS * COLS], uint16_t i, uint16_t j, uint16_t obj_wdt, uint16_t h_x, uint16_t h_y, unsigned int wh) { uint8_t R = val.range(7, 0); uint8_t G = val.range(15, 8); uint8_t B = val.range(23, 16); bin = xFTrackmulFindbin(R, G, B); uint16_t y = i; uint16_t x = j; int y_off = i * obj_wdt; int loc = y_off + x; BIN[loc] = bin; int a = y - h_y; int b = x - h_x; y = __ABS(a); x = __ABS(b); int xx = xFTrackmulKernelLut[x]; int yy = xFTrackmulKernelLut[y]; short K = ((xx + yy) * wh) >> 8; // K is in 0.8 format --------> original kernel K(x,y) = (x*x+y*y)/(w/2)*(h/2) if (K <= 256) // K is in 0.8 format, comparing with '1' in Q0.8 format distance = 256 - K; } /* xFTrackmulHist: Reads the values from stream and finds the histogram of the current frame * and stores in Pu or Qu depending on frame status, Stores the bin values in _BIN array * if frame_status is '0'; store in Qu else in Pu, * input --> stream which contains the input data * x1 --> Top left corner x-coordinate * obj_hgt --> height of the object * y1 --> Top left corner y-coordinate * obj_wdt --> width of the object * Qu --> object histogram * Pu --> Array to store the histogram * BIN --> An array to store bin values * frame_status --> frame number in video */ template void xFTrackmulHist(hls::stream& input, uint16_t x1, uint16_t obj_hgt, uint16_t y1, uint16_t obj_wdt, QuPuTYPE Qu[_MST_TOTAL_BINS_], QuPuTYPE Pu[_MST_TOTAL_BINS_], BINTYPE BIN[ROWS * COLS], uint8_t frame_status) { // clang-format off #pragma HLS INLINE OFF // clang-format on // clang-format off #pragma HLS ARRAY_PARTITION variable=BIN cyclic factor=2 dim=1 #pragma HLS DEPENDENCE variable=BIN array inter false // clang-format on QuPuTYPE tmp_hist1[_MST_TOTAL_BINS_]; QuPuTYPE tmp_hist2[_MST_TOTAL_BINS_]; uint16_t buf_size; uint16_t h_y = obj_hgt >> 1; uint16_t h_x = obj_wdt >> 1; // buf_size = h_y; buf_size = ((x1 + (h_x << 1)) >> XF_BITSHIFT(NPC)) - (x1 >> XF_BITSHIFT(NPC)); char shift1, shift2; unsigned int _height1 = xf::cv::Inverse(h_y, 16, &shift1); // Q(32-shift1).shift1 -----> format unsigned int _width1 = xf::cv::Inverse(h_x, 16, &shift2); // Q(32-shift2).shift2 -----> format unsigned long int temp = _height1 * _width1; // Q(64-shift1-shift2).(shift1+shift2) -----> format unsigned int wh = temp >> (shift1 + shift2 - 16); // wh = 1/(w/2*h/2) --------> 0.16 format loop_hist_init: for (uint16_t i = 0; i < _MST_TOTAL_BINS_; i++) { tmp_hist1[i] = 0; tmp_hist2[i] = 0; } loop_hist_height: for (uint16_t i = 0; i < obj_hgt; i++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=20 max=ROWS // clang-format on loop_hist_width: for (uint16_t j = 0; j < buf_size; j = j + 2) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS PIPELINE #pragma HLS LOOP_TRIPCOUNT min=20 max=IN_TC // clang-format on XF_SNAME(WORDWIDTH) val1 = input.read(); XF_SNAME(WORDWIDTH) val2 = input.read(); short dist1 = 0, dist2 = 0; uint16_t bin1, bin2; xFTrackmulFindbinIncrement((ap_uint32_t)val1, dist1, bin1, BIN, i, j, obj_wdt, h_x, h_y, wh); xFTrackmulFindbinIncrement((ap_uint32_t)val2, dist2, bin2, BIN, i, j + 1, obj_wdt, h_x, h_y, wh); tmp_hist1[bin1] += dist1; tmp_hist2[bin2] += dist2; } } // Accumulate the temporary histograms loop_hist_accumulate: for (uint16_t i = 0; i < _MST_TOTAL_BINS_; i++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=512 max=512 avg=512 #pragma HLS PIPELINE // clang-format on if (frame_status == _MST_SETUP_FLAG_) Qu[i] = tmp_hist1[i] + tmp_hist2[i]; else Pu[i] = tmp_hist1[i] + tmp_hist2[i]; } } /*xFTrackmulSqrt: Finds the square root of the given number using an xFTrackmulSqrtLut */ static int xFTrackmulSqrt(int temp) { // clang-format off #pragma HLS INLINE // clang-format on if (temp < 100) return xFTrackmulSqrtLut[temp]; else return 10; } /* xFTrackmulWeight: Calculates the displacement in the center of the rectangle using * Histograms Pu,Qu,BIN arrays * Qu --> Model histogram * Pu --> Current frame histogram * BIN --> An array which contains the bin values * dx --> displacement of center x-coordinate * dy --> displacement of center y-coordinate * x1 --> Top left corner x-coordinate * y1 --> Top left corner y-coordinate * obj_hgt --> height of the object * obj_wdt --> width of the object * C_x --> temporary histogram used in RO, PO cases for parallel processing * C_x --> object number in the video * track --> track status of the current object * rows --> height of the image * cols --> width of the image */ template void xFTrackmulWeight(QuPuTYPE Qu[_MST_TOTAL_BINS_], QuPuTYPE Pu[_MST_TOTAL_BINS_], BINTYPE BIN[ROWS * COLS], uint16_t& x1, uint16_t& y1, uint16_t obj_hgt, uint16_t obj_wdt, uint16_t& C_x, uint16_t& C_y, bool& track, uint16_t rows, uint16_t cols) { BINTYPE loc, bin; uint16_t x, y; int total_x = 0, total_y = 0, total_w = 0, K, xx, yy; short weight; short A, B; uint16_t dispx = 0, dispy = 0; short buf_size; int y_off, a, b; unsigned short int h_x = obj_wdt >> 1; unsigned short int h_y = obj_hgt >> 1; buf_size = ((y1 + (h_y << 1)) >> XF_BITSHIFT(NPC)) - (y1 >> XF_BITSHIFT(NPC)) + 1; char shift1, shift2; unsigned int _width1 = xf::cv::Inverse(h_x, 16, &shift1); // Q(32-shift1).shift1 -----> format unsigned int _height1 = xf::cv::Inverse(h_y, 16, &shift2); // Q(32-shift2).shift2 -----> format unsigned long int temp = _width1 * _height1; // Q(64-shift1-shift2).(shift1+shift2) -----> format unsigned int wh = temp >> (shift1 + shift2 - 16); // wh = 1/(w/2*h/2) --------> 0.16 format loop_weight_height: for (uint16_t i = 0; i < obj_hgt; i++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=20 max=ROWS avg=ROWS // clang-format on y_off = i * obj_wdt; loop_weight_width: for (uint16_t j = 0; j < buf_size; j++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS PIPELINE #pragma HLS LOOP_TRIPCOUNT min=20 max=IN_TC avg=IN_TC // clang-format on y = i; x = j; loc = y_off + x; bin = BIN[loc]; A = y - h_y; B = x - h_x; y = __ABS(A); x = __ABS(B); xx = xFTrackmulKernelLut[x]; yy = xFTrackmulKernelLut[y]; K = ((xx + yy) * wh) >> 8; // weight computation if (K > 256 || Qu[bin] == 0 || Pu[bin] >> 8 == 0) weight = 0; else { a = Qu[bin] >> 8; b = Pu[bin] >> 8; weight = xFTrackmulSqrt(a / b); } total_y += (weight * A); total_x += (weight * B); total_w += weight; } } // displacement computation if (total_w != 0) { dispx = (long int)total_x / (long int)total_w; dispy = (long int)total_y / (long int)total_w; } else { dispx = 0; dispy = 0; } // Add the displacement to the previous center C_x += dispx; C_y += dispy; // Check if the object goes out of the frame if (C_y + h_y >= rows) { C_y = rows - h_y - 1; track = 0; } if (C_x + h_x >= cols) { C_x = cols - h_x - 1; track = 0; } if (C_y - h_y <= 0) { C_y = h_y + 1; track = 0; } if (C_x - h_x <= 0) { C_x = h_x + 1; track = 0; } // Update the corners of the rectangle x1 = (uint16_t)(C_x - h_x); y1 = (uint16_t)(C_y - h_y); } /* xFTrackmulFindhist: Reads the window from DDR3 and computes the histogram and stores in Pu or Qu * depending on frameno,and stores the bin values in the array BIN * if frameno = 0; store in Qu else in Pu * in --> Input image * rows --> Image Height * cols --> Image Width * x1 --> Top left corner x-coordinate * y1 --> Top left corner y-coordinate * obj_hgt --> height of the object * obj_wdt --> width of the object * Qu --> Array to store the histograms of the objects in the first frame * Pu --> array to store the histogram of current object * BIN --> An array to store the bin values * tmp_hist --> temporary histogram used in RO, PO cases to compute histogram in parallel * obj_num --> object number in the video * frameno --> frame number in video */ template void xFTrackmulFindhist(xf::cv::Mat& _in_mat, uint16_t x1, uint16_t y1, uint16_t obj_hgt, uint16_t obj_wdt, QuPuTYPE Qu[_MST_TOTAL_BINS_], QuPuTYPE Pu[_MST_TOTAL_BINS_], BINTYPE BIN[ROWS * COLS], uint8_t frame_status) { // clang-format off #pragma HLS INLINE OFF // clang-format on hls::stream input2; // clang-format off #pragma HLS DATAFLOW // clang-format on // Read the block from DDR and push into stream xFTrackmulBlkRead(input2, _in_mat, x1, y1, obj_hgt, obj_wdt); // Read the values from stream and find the histogram xFTrackmulHist> 1), COLS, NPC, XF_WORDWIDTH(SRC_T, NPC)>(input2, x1, obj_hgt, y1, obj_wdt, Qu, Pu, BIN, frame_status); } /* * xFTrackmulKernelFunc: Kernel function which gives the next centroid given the earlier one * in --> Input image * rows --> Image Height * cols --> Image Width * x1 --> Top left corner x-coordinate * y1 --> Top left corner y-coordinate * obj_hgt --> height of the object * obj_wdt --> width of the object * dx --> New center x-coordinate * dy --> New center y-coordinate * track --> object status, indicated if its valid for tracking in consecutive frames * frame_status --> current frame status * obj_num --> current object index * iters --> Total number of iterations for the convergence */ template void xFTrackmulKernelFunc(xf::cv::Mat& _in_mat, uint16_t x1, uint16_t y1, uint16_t obj_hgt, uint16_t obj_wdt, uint16_t& dx, uint16_t& dy, bool& track, uint8_t frame_status, uint8_t obj_num, uint8_t iters) { uint16_t C_x = ((obj_wdt) >> 1) + x1; uint16_t C_y = ((obj_hgt) >> 1) + y1; uint8_t loop_count = iters << 1; // setup the object feature for the first frame if (frame_status == _MST_SETUP_FLAG_) loop_count = 1; // static array store the original object feature, // features stored when the first frame is processed and // for remaining frames data is read from it for the kernel computation static uint32_t Qu[MAXOBJS][_MST_TOTAL_BINS_]; uint32_t Pu[_MST_TOTAL_BINS_]; // storage to hold the bins values of each pixel uint16_t BIN[ROWS * COLS]; // clang-format off #pragma HLS DEPENDENCE variable=BIN array intra false // clang-format on uint16_t h_x = obj_wdt >> 1; uint16_t h_y = obj_hgt >> 1; bool flag = 0; // For other frames, find histogram as well as centroid in iterative manner loop_iterations: for (uint8_t i = 0; i < loop_count; i++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=1 max=MAXITERS avg=MAXITERS // clang-format on if (flag == 0) { // Find histogram of the current frame and store in array Pu[512] xFTrackmulFindhist(_in_mat, x1, y1, obj_hgt, obj_wdt, Qu[obj_num], Pu, BIN, frame_status); flag = 1; } else { // Using Pu, Qu compute weights and displacement xFTrackmulWeight(Qu[obj_num], Pu, BIN, x1, y1, obj_hgt, obj_wdt, C_x, C_y, track, _in_mat.rows, _in_mat.cols); flag = 0; } } #if QU_COPY loop_qu_copy: for (uint16_t i = 0; i < _MST_TOTAL_BINS_; i++) { Qu[obj_num][i] = Pu[i]; } #endif dx = C_x; dy = C_y; } /* * xFTrackmulFinalTop: calls the kernel function with the corresponding object coordinates and object number * in --> Input image * rows --> Image Height * cols --> Image Width * tlx --> Top left corner x-coordinate * tly --> Top left corner y-coordinate * obj_hgt --> height of the object * obj_wdt --> width of the object * dispx --> New center x-coordinate * dispy --> New center y-coordinate * status --> object status, indicated if its valid for tracking in consecutive frames * frame_status --> '0' if first frame, else '1' * no_objects --> total number of objects for tracking * iters --> Total number of iterations for the centroid convergence, optimally '4' */ template void xFMeanShiftKernel(xf::cv::Mat& _in_mat, uint16_t tlx[MAXOBJ], uint16_t tly[MAXOBJ], uint16_t obj_hgt[MAXOBJ], uint16_t obj_wdt[MAXOBJ], uint16_t dispx[MAXOBJ], uint16_t dispy[MAXOBJ], uint16_t status[MAXOBJ], uint8_t frame_status, uint8_t no_objects, uint8_t iters) { //#pragma HLS license key=IPAUVIZ_MST uint16_t dx, dy; uint8_t a; uint16_t x1, x2, y1, y2; bool track; #ifndef __SYNTHESIS__ assert((no_objects <= MAXOBJ) && "number of objects should be less than MAX_OBJECTS"); assert((NPC == XF_NPPC1) && "NPC must be XF_NPPC1"); // assert((WORDWIDTH == XF_32UW) && // "WORDWIDTH must be XF_32UW"); assert((COLS % 2 == 0) && "object width must be in multiples of two"); #endif loop_objects: for (uint8_t i = 0; i < no_objects; i++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=1 max=MAXOBJ // clang-format on #ifndef __SYNTHESIS__ assert((obj_wdt[i] % 2 == 0) && "object width must be in multiples of two"); #endif a = i; x1 = tlx[i]; y1 = tly[i]; x2 = obj_wdt[i]; y2 = obj_hgt[i]; track = (bool)status[i]; #ifndef __SYNTHESIS__ assert((x2 < 700) && (y2 < 700) && "object width and height should be less than 700"); assert((x2 > 20) && (y2 > 20) && "object width and height should be greater than 20"); assert((x2 <= COLS) && "The object width must be less than the MAX_WIDTH "); assert((y2 <= ROWS) && "The object height must be less than the MAX_HEIGHT "); #endif if (track) { xFTrackmulKernelFunc> XF_BITSHIFT(NPC)), COLS, SRC_T, ROWS_IMG, COLS_IMG, MAXOBJ, (MAXITERS << 1), NPC>(_in_mat, x1, y1, x2, y2, dx, dy, track, frame_status, a, iters); } else // If non-trackable displacement is Zero { dx = 0; dy = 0; } status[a] = (uint16_t)track; dispx[a] = dx; dispy[a] = dy; } } #endif // _XF_MEAN_SHIFT_HPP_