.. _program_listing_file__tmp_ws_src_vitis_common_include_imgproc_xf_box_filter.hpp: Program Listing for File xf_box_filter.hpp ========================================== |exhale_lsh| :ref:`Return to documentation for file ` (``/tmp/ws/src/vitis_common/include/imgproc/xf_box_filter.hpp``) .. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS .. code-block:: cpp /* * Copyright 2019 Xilinx, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _XF_BOX_FILTER_HPP_ #define _XF_BOX_FILTER_HPP_ #ifndef __cplusplus #error C++ is needed to include this header #endif typedef unsigned short uint16_t; typedef unsigned int uint32_t; #include "hls_stream.h" #include "../common/xf_common.hpp" #include "../common/xf_utility.hpp" #define XF_DIV_VAL_3x3 3641 // 1/9 value converted into fixed point Q1.15 format. #define XF_DIV_VAL_5x5 1311 // 1/25 value converted into fixed point Q1.15 format. #define XF_DIV_VAL_7x7 669 // 1/49 value converted into fixed point Q1.15 format. namespace xf { namespace cv { template XF_PTNAME(DEPTH) xFApplyMask3x3(XF_PTNAME(DEPTH) _i00, XF_PTNAME(DEPTH) _i01, XF_PTNAME(DEPTH) _i02, XF_PTNAME(DEPTH) _i10, XF_PTNAME(DEPTH) _i11, XF_PTNAME(DEPTH) _i12, XF_PTNAME(DEPTH) _i20, XF_PTNAME(DEPTH) _i21, XF_PTNAME(DEPTH) _i22) { // clang-format off #pragma HLS INLINE off // clang-format on XF_PTNAME(DEPTH) res; ap_int<20> res_fixed; ap_int18_t g00 = (_i00 + _i02); ap_int18_t g01 = (_i20 + _i22); ap_int18_t g10 = (_i01 + _i10); ap_int18_t g11 = (_i12 + _i21); ap_int18_t g0 = (g00 + g01); ap_int18_t g1 = (g10 + g11); ap_int18_t g2 = _i11; res_fixed = (g0 + g1 + g2); res = ((res_fixed * XF_DIV_VAL_3x3) >> 15); // 1/9 in fixed point format(Q1.15) return res; } template void xFComputeMaskValues3x3(XF_PTNAME(DEPTH) * _mask_value, XF_PTNAME(DEPTH) * _l00_buf, XF_PTNAME(DEPTH) * _l10_buf, XF_PTNAME(DEPTH) * _l20_buf) { // clang-format off #pragma HLS INLINE // clang-format on ap_uint<5> filter_loop = XF_NPIXPERCYCLE(NPC); computeMaskValueLoop: for (ap_uint<5> j = 0; j < filter_loop; j++) { // clang-format off #pragma HLS unroll // clang-format on _mask_value[j] = xFApplyMask3x3(_l00_buf[j], _l00_buf[j + 1], _l00_buf[j + 2], _l10_buf[j], _l10_buf[j + 1], _l10_buf[j + 2], _l20_buf[j], _l20_buf[j + 1], _l20_buf[j + 2]); } // end of computeMaskValueLoop } template void ProcessBox3x3(xf::cv::Mat& _src_mat, xf::cv::Mat& _dst_mat, XF_SNAME(WORDWIDTH_SRC) buf[3][COLS >> XF_BITSHIFT(NPC)], XF_PTNAME(DEPTH) l00_buf[XF_NPIXPERCYCLE(NPC) + 2], XF_PTNAME(DEPTH) l10_buf[XF_NPIXPERCYCLE(NPC) + 2], XF_PTNAME(DEPTH) l20_buf[XF_NPIXPERCYCLE(NPC) + 2], XF_PTNAME(DEPTH) mask_value[XF_NPIXPERCYCLE(NPC)], XF_SNAME(WORDWIDTH_SRC) & P0, uint16_t img_width, uint16_t img_height, uint16_t& shift, ap_uint<13> row_ind, ap_uint<2> top, ap_uint<2> mid, ap_uint<2> bottom, ap_uint<13> row, int& rd_ind, int& wr_ind) { // clang-format off #pragma HLS INLINE // clang-format on XF_SNAME(WORDWIDTH_SRC) buf0, buf1, buf2; ap_uint<8> npc = XF_NPIXPERCYCLE(NPC), buf_size = XF_NPIXPERCYCLE(NPC) + 2; colLoop1: for (ap_uint<13> col = 0; col < img_width; col++) // Width of the image { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=COLS_COUNT max=COLS_COUNT #pragma HLS pipeline // clang-format on if (row < img_height) { buf[row_ind][col] = _src_mat.read(rd_ind); rd_ind++; } else buf[bottom][col] = 0; buf0 = buf[top][col]; buf1 = buf[mid][col]; buf2 = buf[bottom][col]; if (NPC == XF_NPPC8) { /* Extract the data from the packed pixels */ xfExtractPixels(&l00_buf[2], buf0, 0); xfExtractPixels(&l10_buf[2], buf1, 0); xfExtractPixels(&l20_buf[2], buf2, 0); } else { l00_buf[2] = buf0; l10_buf[2] = buf1; l20_buf[2] = buf2; } xFComputeMaskValues3x3(mask_value, l00_buf, l10_buf, l20_buf); if (col == 0) { shift = 0; P0 = 0; P0loop1: xfPackPixels(&mask_value[0], P0, 1, (npc - 1), shift); } else { xfPackPixels(&mask_value[0], P0, 0, 1, shift); _dst_mat.write(wr_ind, P0); wr_ind++; shift = 0; P0 = 0; xfPackPixels(&mask_value[0], P0, 1, (npc - 1), shift); } l00_buf[0] = l00_buf[buf_size - 2]; l00_buf[1] = l00_buf[buf_size - 1]; l10_buf[0] = l10_buf[buf_size - 2]; l10_buf[1] = l10_buf[buf_size - 1]; l20_buf[0] = l20_buf[buf_size - 2]; l20_buf[1] = l20_buf[buf_size - 1]; } // end of colLoop1 } template void xFBoxFilter3x3(xf::cv::Mat& _src_mat, xf::cv::Mat& _dst_mat, uint16_t img_height, uint16_t img_width) { ap_uint<13> row_ind = 1; ap_uint<13> row, col; uint16_t shift = 0; ap_uint<2> top, mid, bottom; ap_uint<8> buf_size = XF_NPIXPERCYCLE(NPC) + 2; int rd_ind = 0, wr_ind = 0; XF_PTNAME(DEPTH) mask_value[XF_NPIXPERCYCLE(NPC)]; // clang-format off #pragma HLS ARRAY_PARTITION variable=mask_value complete dim=1 // clang-format on XF_SNAME(WORDWIDTH_SRC) buf0, buf1, buf2; XF_PTNAME(DEPTH) l00_buf[XF_NPIXPERCYCLE(NPC) + 2], l10_buf[XF_NPIXPERCYCLE(NPC) + 2], l20_buf[XF_NPIXPERCYCLE(NPC) + 2]; // Temporary buffers to hold image data from three rows. // clang-format off #pragma HLS ARRAY_PARTITION variable=l00_buf complete dim=1 #pragma HLS ARRAY_PARTITION variable=l10_buf complete dim=1 #pragma HLS ARRAY_PARTITION variable=l20_buf complete dim=1 // clang-format on XF_SNAME(WORDWIDTH_SRC) P0; XF_SNAME(WORDWIDTH_SRC) buf[3][COLS >> XF_BITSHIFT(NPC)]; // Line Buffer to hold image row data if (USE_URAM) { // clang-format off #pragma HLS RESOURCE variable=buf core=RAM_S2P_URAM #pragma HLS array_reshape variable=buf dim=1 factor=3 cyclic // clang-format on } else { // clang-format off #pragma HLS RESOURCE variable=buf core=RAM_S2P_BRAM #pragma HLS ARRAY_PARTITION variable=buf complete dim=1 // clang-format on } bufColLoop: for (col = 0; col < img_width; col++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=COLS_COUNT max=COLS_COUNT #pragma HLS pipeline // clang-format on buf[0][col] = 0; buf[row_ind][col] = _src_mat.read(rd_ind); rd_ind++; } row_ind++; l00_buf[0] = l00_buf[1] = 0; l10_buf[0] = l10_buf[1] = 0; l20_buf[0] = l20_buf[1] = 0; ROWLOOP: for (row = 1; row < img_height + 1; row++) // Height of the image { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on // update the row index if (row_ind == 2) { top = 0; mid = 1; bottom = 2; } else if (row_ind == 0) { top = 1; mid = 2; bottom = 0; } else if (row_ind == 1) { top = 2; mid = 0; bottom = 1; } l00_buf[0] = l00_buf[1] = 0; l10_buf[0] = l10_buf[1] = 0; l20_buf[0] = l20_buf[1] = 0; P0 = 0; ProcessBox3x3( _src_mat, _dst_mat, buf, l00_buf, l10_buf, l20_buf, mask_value, P0, img_width, img_height, shift, row_ind, top, mid, bottom, row, rd_ind, wr_ind); if (NPC == XF_NPPC1) { mask_value[0] = xFApplyMask3x3(l00_buf[buf_size - 3], // Applying Mask l00_buf[buf_size - 2], 0, l10_buf[buf_size - 3], l10_buf[buf_size - 2], 0, l20_buf[buf_size - 3], l20_buf[buf_size - 2], 0); // Take care of border at last column } else { mask_value[0] = xFApplyMask3x3(l00_buf[buf_size - 2], l00_buf[buf_size - 1], 0, l10_buf[buf_size - 2], l10_buf[buf_size - 1], 0, l20_buf[buf_size - 2], l20_buf[buf_size - 1], 0); } xfPackPixels(&mask_value[0], P0, 0, 1, shift); _dst_mat.write(wr_ind, P0); wr_ind++; shift = 0; P0 = 0; row_ind++; if (row_ind == 3) { row_ind = 0; } } // end of rowLoop } // end of function template XF_PTNAME(DEPTH_SRC) xFGradient5x5(XF_PTNAME(DEPTH_SRC) D0, XF_PTNAME(DEPTH_SRC) D1, XF_PTNAME(DEPTH_SRC) D2, XF_PTNAME(DEPTH_SRC) D3, XF_PTNAME(DEPTH_SRC) D4, XF_PTNAME(DEPTH_SRC) D5, XF_PTNAME(DEPTH_SRC) D6, XF_PTNAME(DEPTH_SRC) D7, XF_PTNAME(DEPTH_SRC) D8, XF_PTNAME(DEPTH_SRC) D9, XF_PTNAME(DEPTH_SRC) D10, XF_PTNAME(DEPTH_SRC) D11, XF_PTNAME(DEPTH_SRC) D12, XF_PTNAME(DEPTH_SRC) D13, XF_PTNAME(DEPTH_SRC) D14, XF_PTNAME(DEPTH_SRC) D15, XF_PTNAME(DEPTH_SRC) D16, XF_PTNAME(DEPTH_SRC) D17, XF_PTNAME(DEPTH_SRC) D18, XF_PTNAME(DEPTH_SRC) D19, XF_PTNAME(DEPTH_SRC) D20, XF_PTNAME(DEPTH_SRC) D21, XF_PTNAME(DEPTH_SRC) D22, XF_PTNAME(DEPTH_SRC) D23, XF_PTNAME(DEPTH_SRC) D24) { // clang-format off #pragma HLS INLINE off // clang-format on XF_PTNAME(DEPTH_SRC) g = 0; XF_PTNAME(DEPTH_DST) A00 = D0 + D1 + D2; XF_PTNAME(DEPTH_DST) A01 = D3 + D4 + D5; XF_PTNAME(DEPTH_DST) A02 = D6 + D7 + D8; XF_PTNAME(DEPTH_DST) A03 = D9 + D10 + D11; XF_PTNAME(DEPTH_DST) A04 = D12 + D13 + D14; XF_PTNAME(DEPTH_DST) A05 = D15 + D16 + D17; XF_PTNAME(DEPTH_DST) A06 = D18 + D19 + D20; XF_PTNAME(DEPTH_DST) A07 = D21 + D22; XF_PTNAME(DEPTH_DST) A08 = D23 + D24; ap_int<22> A0 = A00 + A01 + A02; ap_int<22> A1 = A03 + A04 + A05; ap_int<22> A2 = A06 + A07 + A08; ap_int<25> A = A0 + A1 + A2; g = (A * XF_DIV_VAL_5x5) >> 15; return g; } template void xFComputeMask5x5(XF_PTNAME(DEPTH_SRC) * Gradientvalues, XF_PTNAME(DEPTH_SRC) * src_buf1, XF_PTNAME(DEPTH_SRC) * src_buf2, XF_PTNAME(DEPTH_SRC) * src_buf3, XF_PTNAME(DEPTH_SRC) * src_buf4, XF_PTNAME(DEPTH_SRC) * src_buf5) { // clang-format off #pragma HLS INLINE // clang-format on Compute_Grad_Loop: for (int j = 0; j < XF_NPIXPERCYCLE(NPC); j++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=8 max=8 #pragma HLS UNROLL // clang-format on Gradientvalues[j] = xFGradient5x5( src_buf1[j], src_buf1[j + 1], src_buf1[j + 2], src_buf1[j + 3], src_buf1[j + 4], src_buf2[j], src_buf2[j + 1], src_buf2[j + 2], src_buf2[j + 3], src_buf2[j + 4], src_buf3[j], src_buf3[j + 1], src_buf3[j + 2], src_buf3[j + 3], src_buf3[j + 4], src_buf4[j], src_buf4[j + 1], src_buf4[j + 2], src_buf4[j + 3], src_buf4[j + 4], src_buf5[j], src_buf5[j + 1], src_buf5[j + 2], src_buf5[j + 3], src_buf5[j + 4]); } } template void ProcessBox5x5(xf::cv::Mat& _src_mat, xf::cv::Mat& _dst_mat, XF_SNAME(WORDWIDTH_SRC) buf[5][(COLS >> XF_BITSHIFT(NPC))], XF_PTNAME(DEPTH) src_buf1[XF_NPIXPERCYCLE(NPC) + 4], XF_PTNAME(DEPTH) src_buf2[XF_NPIXPERCYCLE(NPC) + 4], XF_PTNAME(DEPTH) src_buf3[XF_NPIXPERCYCLE(NPC) + 4], XF_PTNAME(DEPTH) src_buf4[XF_NPIXPERCYCLE(NPC) + 4], XF_PTNAME(DEPTH) src_buf5[XF_NPIXPERCYCLE(NPC) + 4], XF_PTNAME(DEPTH) GradientValues[XF_NPIXPERCYCLE(NPC)], XF_SNAME(WORDWIDTH_DST) & inter_val, uint16_t img_width, uint16_t img_height, ap_uint<13> row_ind, uint16_t& shift, ap_uint<4> tp1, ap_uint<4> tp2, ap_uint<4> mid, ap_uint<4> bottom1, ap_uint<4> bottom2, ap_uint<13> row, int& rd_ind, int& wr_ind) { // clang-format off #pragma HLS INLINE // clang-format on ap_uint<8> buf_size = XF_NPIXPERCYCLE(NPC) + 4; ap_uint<8> step = XF_PIXELDEPTH(DEPTH); ap_uint<10> max_loop = XF_WORDDEPTH(WORDWIDTH_DST); uint16_t npc = XF_NPIXPERCYCLE(NPC); XF_SNAME(WORDWIDTH_SRC) buf0, buf1, buf2, buf3, buf4; Col_Loop: for (ap_uint<13> col = 0; col < img_width; col++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=TC max=TC #pragma HLS pipeline // clang-format on if (row < img_height) buf[row_ind][col] = _src_mat.read(rd_ind++); else buf[bottom2][col] = 0; buf0 = buf[tp1][col]; buf1 = buf[tp2][col]; buf2 = buf[mid][col]; buf3 = buf[bottom1][col]; buf4 = buf[bottom2][col]; if (NPC == XF_NPPC8) { xfExtractPixels(&src_buf1[4], buf0, 0); xfExtractPixels(&src_buf2[4], buf1, 0); xfExtractPixels(&src_buf3[4], buf2, 0); xfExtractPixels(&src_buf4[4], buf3, 0); xfExtractPixels(&src_buf5[4], buf4, 0); } else { src_buf1[4] = buf0; src_buf2[4] = buf1; src_buf3[4] = buf2; src_buf4[4] = buf3; src_buf5[4] = buf4; } xFComputeMask5x5(GradientValues, src_buf1, src_buf2, src_buf3, src_buf4, src_buf5); for (ap_uint<4> i = 0; i < 4; i++) { // clang-format off #pragma HLS unroll // clang-format on src_buf1[i] = src_buf1[buf_size - (4 - i)]; src_buf2[i] = src_buf2[buf_size - (4 - i)]; src_buf3[i] = src_buf3[buf_size - (4 - i)]; src_buf4[i] = src_buf4[buf_size - (4 - i)]; src_buf5[i] = src_buf5[buf_size - (4 - i)]; } if (col == 0) { shift = 0; inter_val = 0; xfPackPixels(&GradientValues[0], inter_val, 2, (npc - 2), shift); } else { if ((NPC == XF_NPPC8)) { xfPackPixels(&GradientValues[0], inter_val, 0, 2, shift); _dst_mat.write(wr_ind++, inter_val); shift = 0; inter_val = 0; xfPackPixels(&GradientValues[0], inter_val, 2, (npc - 2), shift); } else { if (col >= 2) { inter_val((max_loop - 1), (max_loop - step)) = GradientValues[0]; _dst_mat.write(wr_ind++, inter_val); } } } } // Col_Loop } template void xFBoxFilter5x5(xf::cv::Mat& _src_mat, xf::cv::Mat& _dst_mat, uint16_t img_height, uint16_t img_width) { ap_uint<13> row_ind, row, col; ap_uint<4> tp1, tp2, mid, bottom1, bottom2; ap_uint<8> buf_size = XF_NPIXPERCYCLE(NPC) + 4; ap_uint<8> step = XF_PIXELDEPTH(DEPTH); ap_uint<10> max_loop = XF_WORDDEPTH(WORDWIDTH_DST); uint16_t shift = 0; int rd_ind = 0, wr_ind = 0; ap_uint<8> i; XF_PTNAME(DEPTH) GradientValues[XF_NPIXPERCYCLE(NPC)]; // clang-format off #pragma HLS ARRAY_PARTITION variable=GradientValues complete dim=1 // clang-format on // Temporary buffers to hold image data from five rows XF_PTNAME(DEPTH) src_buf1[XF_NPIXPERCYCLE(NPC) + 4], src_buf2[XF_NPIXPERCYCLE(NPC) + 4], src_buf3[XF_NPIXPERCYCLE(NPC) + 4], src_buf4[XF_NPIXPERCYCLE(NPC) + 4], src_buf5[XF_NPIXPERCYCLE(NPC) + 4]; // clang-format off #pragma HLS ARRAY_PARTITION variable=src_buf1 complete dim=1 #pragma HLS ARRAY_PARTITION variable=src_buf2 complete dim=1 #pragma HLS ARRAY_PARTITION variable=src_buf3 complete dim=1 #pragma HLS ARRAY_PARTITION variable=src_buf4 complete dim=1 #pragma HLS ARRAY_PARTITION variable=src_buf5 complete dim=1 // clang-format on XF_SNAME(WORDWIDTH_SRC) tmp_in; XF_SNAME(WORDWIDTH_DST) inter_val = 0; // Temporary buffer to hold image data from five rows XF_SNAME(WORDWIDTH_SRC) buf[5][(COLS >> XF_BITSHIFT(NPC))]; if (USE_URAM) { // clang-format off #pragma HLS RESOURCE variable=buf core=RAM_S2P_URAM #pragma HLS array_reshape variable=buf dim=1 factor=5 cyclic // clang-format on } else { // clang-format off #pragma HLS RESOURCE variable=buf core=RAM_S2P_BRAM #pragma HLS ARRAY_PARTITION variable=buf complete dim=1 // clang-format on } row_ind = 2; Clear_Row_Loop: for (col = 0; col < img_width; col++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=TC max=TC #pragma HLS pipeline // clang-format on buf[0][col] = 0; buf[1][col] = 0; buf[row_ind][col] = _src_mat.read(rd_ind++); } row_ind++; Read_Row2_Loop: for (col = 0; col < img_width; col++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=TC max=TC #pragma HLS pipeline // clang-format on buf[row_ind][col] = _src_mat.read(rd_ind++); } row_ind++; Row_Loop: for (row = 2; row < img_height + 2; row++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on // modify the buffer indices to re use if (row_ind == 4) { tp1 = 0; tp2 = 1; mid = 2; bottom1 = 3; bottom2 = 4; } else if (row_ind == 0) { tp1 = 1; tp2 = 2; mid = 3; bottom1 = 4; bottom2 = 0; } else if (row_ind == 1) { tp1 = 2; tp2 = 3; mid = 4; bottom1 = 0; bottom2 = 1; } else if (row_ind == 2) { tp1 = 3; tp2 = 4; mid = 0; bottom1 = 1; bottom2 = 2; } else if (row_ind == 3) { tp1 = 4; tp2 = 0; mid = 1; bottom1 = 2; bottom2 = 3; } src_buf1[0] = src_buf1[1] = src_buf1[2] = src_buf1[3] = 0; src_buf2[0] = src_buf2[1] = src_buf2[2] = src_buf2[3] = 0; src_buf3[0] = src_buf3[1] = src_buf3[2] = src_buf3[3] = 0; src_buf4[0] = src_buf4[1] = src_buf4[2] = src_buf4[3] = 0; src_buf5[0] = src_buf5[1] = src_buf5[2] = src_buf5[3] = 0; inter_val = 0; ProcessBox5x5( _src_mat, _dst_mat, buf, src_buf1, src_buf2, src_buf3, src_buf4, src_buf5, GradientValues, inter_val, img_width, img_height, row_ind, shift, tp1, tp2, mid, bottom1, bottom2, row, rd_ind, wr_ind); if ((NPC == XF_NPPC8) || (NPC == XF_NPPC16)) { for (i = 0; i < 6; i++) { // clang-format off #pragma HLS unroll // clang-format on src_buf1[buf_size + i - (XF_NPIXPERCYCLE(NPC)) + 2] = 0; src_buf2[buf_size + i - (XF_NPIXPERCYCLE(NPC)) + 2] = 0; src_buf3[buf_size + i - (XF_NPIXPERCYCLE(NPC)) + 2] = 0; src_buf4[buf_size + i - (XF_NPIXPERCYCLE(NPC)) + 2] = 0; src_buf5[buf_size + i - (XF_NPIXPERCYCLE(NPC)) + 2] = 0; } GradientValues[0] = xFGradient5x5( src_buf1[0], src_buf1[1], src_buf1[2], src_buf1[3], 0, src_buf2[0], src_buf2[1], src_buf2[2], src_buf2[3], 0, src_buf3[0], src_buf3[1], src_buf3[2], src_buf3[3], 0, src_buf4[0], src_buf4[1], src_buf4[2], src_buf4[3], 0, src_buf5[0], src_buf5[1], src_buf5[2], src_buf5[3], 0); GradientValues[1] = xFGradient5x5( src_buf1[1], src_buf1[2], src_buf1[3], 0, 0, src_buf2[1], src_buf2[2], src_buf2[3], 0, 0, src_buf3[1], src_buf3[2], src_buf3[3], 0, 0, src_buf4[1], src_buf4[2], src_buf4[3], 0, 0, src_buf5[1], src_buf5[2], src_buf5[3], 0, 0); xfPackPixels(&GradientValues[0], inter_val, 0, 2, shift); _dst_mat.write(wr_ind++, inter_val); } else { // clang-format off #pragma HLS ALLOCATION function instances=xFGradient5x5 limit=1 // clang-format on GradientValues[0] = xFGradient5x5( src_buf1[buf_size - 5], src_buf1[buf_size - 4], src_buf1[buf_size - 3], src_buf1[buf_size - 2], 0, src_buf2[buf_size - 5], src_buf2[buf_size - 4], src_buf2[buf_size - 3], src_buf2[buf_size - 2], 0, src_buf3[buf_size - 5], src_buf3[buf_size - 4], src_buf3[buf_size - 3], src_buf3[buf_size - 2], 0, src_buf4[buf_size - 5], src_buf4[buf_size - 4], src_buf4[buf_size - 3], src_buf4[buf_size - 2], 0, src_buf5[buf_size - 5], src_buf5[buf_size - 4], src_buf5[buf_size - 3], src_buf5[buf_size - 2], 0); inter_val((max_loop - 1), (max_loop - step)) = GradientValues[0]; _dst_mat.write(wr_ind++, inter_val); GradientValues[0] = xFGradient5x5( src_buf1[buf_size - 4], src_buf1[buf_size - 3], src_buf1[buf_size - 2], 0, 0, src_buf2[buf_size - 4], src_buf2[buf_size - 3], src_buf2[buf_size - 2], 0, 0, src_buf3[buf_size - 4], src_buf3[buf_size - 3], src_buf3[buf_size - 2], 0, 0, src_buf4[buf_size - 4], src_buf4[buf_size - 3], src_buf4[buf_size - 2], 0, 0, src_buf5[buf_size - 4], src_buf5[buf_size - 3], src_buf5[buf_size - 2], 0, 0); inter_val((max_loop - 1), (max_loop - step)) = GradientValues[0]; _dst_mat.write(wr_ind++, inter_val); } row_ind++; if (row_ind == 5) { row_ind = 0; } } // Row_Loop } // end of xFBoxFilter5x5 template XF_PTNAME(DEPTH_SRC) xFGradient7x7(XF_PTNAME(DEPTH_SRC) * src_buf1, XF_PTNAME(DEPTH_SRC) * src_buf2, XF_PTNAME(DEPTH_SRC) * src_buf3, XF_PTNAME(DEPTH_SRC) * src_buf4, XF_PTNAME(DEPTH_SRC) * src_buf5, XF_PTNAME(DEPTH_SRC) * src_buf6, XF_PTNAME(DEPTH_SRC) * src_buf7) { // clang-format off #pragma HLS INLINE off #pragma HLS PIPELINE II=1 // clang-format on XF_PTNAME(DEPTH_SRC) g_y = 0; ap_int<20> A00 = src_buf1[0] + src_buf1[1] + src_buf1[2]; ap_int<20> A01 = src_buf1[3] + src_buf1[4]; ap_int<20> A02 = src_buf1[5] + src_buf1[6]; ap_int<20> A03 = src_buf2[0] + src_buf2[1] + src_buf2[2]; ap_int<20> A04 = src_buf2[3] + src_buf2[4]; ap_int<20> A05 = src_buf2[5] + src_buf2[6]; ap_int<20> A06 = src_buf3[0] + src_buf3[1] + src_buf3[2]; ap_int<20> A07 = src_buf3[3] + src_buf3[4]; ap_int<20> A08 = src_buf3[5] + src_buf3[6]; ap_int<20> A09 = src_buf4[0] + src_buf4[1] + src_buf4[2]; ap_int<20> A10 = src_buf4[3] + src_buf4[4]; ap_int<20> A11 = src_buf4[5] + src_buf4[6]; ap_int<20> A12 = src_buf5[0] + src_buf5[1] + src_buf5[2]; ap_int<20> A13 = src_buf5[3] + src_buf5[4]; ap_int<20> A14 = src_buf5[5] + src_buf5[6]; ap_int<20> A15 = src_buf6[0] + src_buf6[1] + src_buf6[2]; ap_int<20> A16 = src_buf6[3] + src_buf6[4]; ap_int<20> A17 = src_buf6[5] + src_buf6[6]; ap_int<20> A18 = src_buf7[0] + src_buf7[1] + src_buf7[2]; ap_int<20> A19 = src_buf7[3] + src_buf7[4]; ap_int<20> A20 = src_buf7[5] + src_buf7[6]; ap_int<24> A0 = A00 + A01 + A02; ap_int<24> A1 = A03 + A04 + A05; ap_int<24> A2 = A06 + A07 + A08; ap_int<24> A3 = A09 + A10 + A11; ap_int<24> A4 = A12 + A13 + A14; ap_int<24> A5 = A15 + A16 + A17; ap_int<24> A6 = A18 + A19 + A20; ap_int<26> S00 = A0 + A1 + A2; ap_int<26> S01 = A3 + A4; ap_int<26> S02 = A5 + A6; ap_int<26> S0 = S00 + S01; ap_int<26> S = S0 + S02; ap_int<32> res = ((S * XF_DIV_VAL_7x7) >> 15); g_y = (XF_PTNAME(DEPTH_SRC))res; return g_y; } template void xFComputeMask7x7(XF_PTNAME(DEPTH_SRC) * Gradientvalues, XF_PTNAME(DEPTH_SRC) * src_buf1, XF_PTNAME(DEPTH_SRC) * src_buf2, XF_PTNAME(DEPTH_SRC) * src_buf3, XF_PTNAME(DEPTH_SRC) * src_buf4, XF_PTNAME(DEPTH_SRC) * src_buf5, XF_PTNAME(DEPTH_SRC) * src_buf6, XF_PTNAME(DEPTH_SRC) * src_buf7) { // clang-format off #pragma HLS INLINE // clang-format on for (ap_uint<9> j = 0; j < XF_NPIXPERCYCLE(NPC); j++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=8 max=8 #pragma HLS UNROLL // clang-format on Gradientvalues[j] = xFGradient7x7(&src_buf1[j], &src_buf2[j], &src_buf3[j], &src_buf4[j], &src_buf5[j], &src_buf6[j], &src_buf7[j]); } } template void xFCopyDataBox(XF_PTNAME(DEPTH_SRC) src_buf1[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH_SRC) src_buf2[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH_SRC) src_buf3[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH_SRC) src_buf4[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH_SRC) src_buf5[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH_SRC) src_buf6[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH_SRC) src_buf7[XF_NPIXPERCYCLE(NPC) + 6]) { // clang-format off #pragma HLS INLINE // clang-format on ap_uint<5> buf_size = (XF_NPIXPERCYCLE(NPC) + 6); for (ap_uint<4> i = 0; i < 6; i++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=6 max=6 #pragma HLS unroll // clang-format on src_buf1[i] = src_buf1[buf_size - (6 - i)]; src_buf2[i] = src_buf2[buf_size - (6 - i)]; src_buf3[i] = src_buf3[buf_size - (6 - i)]; src_buf4[i] = src_buf4[buf_size - (6 - i)]; src_buf5[i] = src_buf5[buf_size - (6 - i)]; src_buf6[i] = src_buf6[buf_size - (6 - i)]; src_buf7[i] = src_buf7[buf_size - (6 - i)]; } } template void xFExtractDataBox(XF_PTNAME(DEPTH_SRC) src_buf1[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH_SRC) src_buf2[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH_SRC) src_buf3[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH_SRC) src_buf4[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH_SRC) src_buf5[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH_SRC) src_buf6[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH_SRC) src_buf7[XF_NPIXPERCYCLE(NPC) + 6], XF_SNAME(WORDWIDTH_SRC) buf0, XF_SNAME(WORDWIDTH_SRC) buf1, XF_SNAME(WORDWIDTH_SRC) buf2, XF_SNAME(WORDWIDTH_SRC) buf3, XF_SNAME(WORDWIDTH_SRC) buf4, XF_SNAME(WORDWIDTH_SRC) buf5, XF_SNAME(WORDWIDTH_SRC) buf6) { // clang-format off #pragma HLS INLINE // clang-format on xfExtractPixels(&src_buf1[6], buf0, 0); xfExtractPixels(&src_buf2[6], buf1, 0); xfExtractPixels(&src_buf3[6], buf2, 0); xfExtractPixels(&src_buf4[6], buf3, 0); xfExtractPixels(&src_buf5[6], buf4, 0); xfExtractPixels(&src_buf6[6], buf5, 0); xfExtractPixels(&src_buf7[6], buf6, 0); } template void ProcessBox7x7(xf::cv::Mat& _src_mat, xf::cv::Mat& _dst_mat, XF_SNAME(WORDWIDTH_SRC) buf[5][(COLS >> XF_BITSHIFT(NPC))], XF_PTNAME(DEPTH) src_buf1[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH) src_buf2[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH) src_buf3[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH) src_buf4[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH) src_buf5[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH) src_buf6[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH) src_buf7[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH) GradientValues[XF_NPIXPERCYCLE(NPC)], XF_SNAME(WORDWIDTH_DST) & inter_val, uint16_t img_width, uint16_t img_height, ap_uint<13> row_ind, uint16_t& shiftx, ap_uint<5> tp1, ap_uint<5> tp2, ap_uint<5> tp3, ap_uint<5> mid, ap_uint<5> bottom1, ap_uint<5> bottom2, ap_uint<5> bottom3, ap_uint<13> row, int& rd_ind, int& wr_ind) { // clang-format off #pragma HLS INLINE // clang-format on XF_SNAME(WORDWIDTH_SRC) buf0, buf1, buf2, buf3, buf4, buf5, buf6; ap_uint<8> buf_size = (XF_NPIXPERCYCLE(NPC) + 6); ap_uint<10> max_loop = XF_WORDDEPTH(WORDWIDTH_DST); ap_uint<9> npc = XF_NPIXPERCYCLE(NPC); ap_uint<8> step = XF_PIXELDEPTH(DEPTH); Col_Loop: for (ap_uint<13> col = 0; col < img_width; col++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=TC max=TC #pragma HLS pipeline // clang-format on if (row < img_height) buf[row_ind][col] = _src_mat.read(rd_ind++); else buf[bottom3][col] = 0; buf0 = buf[tp1][col]; buf1 = buf[tp2][col]; buf2 = buf[tp3][col]; buf3 = buf[mid][col]; buf4 = buf[bottom1][col]; buf5 = buf[bottom2][col]; buf6 = buf[bottom3][col]; if (NPC == XF_NPPC8) { xFExtractDataBox(src_buf1, src_buf2, src_buf3, src_buf4, src_buf5, src_buf6, src_buf7, buf0, buf1, buf2, buf3, buf4, buf5, buf6); } else { src_buf1[6] = buf0; src_buf2[6] = buf1; src_buf3[6] = buf2; src_buf4[6] = buf3; src_buf5[6] = buf4; src_buf6[6] = buf5; src_buf7[6] = buf6; } xFComputeMask7x7(GradientValues, src_buf1, src_buf2, src_buf3, src_buf4, src_buf5, src_buf6, src_buf7); xFCopyDataBox(src_buf1, src_buf2, src_buf3, src_buf4, src_buf5, src_buf6, src_buf7); if (col == 0) { shiftx = 0; inter_val = 0; xfPackPixels(&GradientValues[0], inter_val, 3, (npc - 3), shiftx); } else { if ((NPC == XF_NPPC8)) { xfPackPixels(&GradientValues[0], inter_val, 0, 3, shiftx); _dst_mat.write(wr_ind++, inter_val); shiftx = 0; inter_val = 0; xfPackPixels(&GradientValues[0], inter_val, 3, (npc - 3), shiftx); } else { if (col >= 3) { inter_val((max_loop - 1), (max_loop - step)) = GradientValues[0]; _dst_mat.write(wr_ind++, inter_val); } } } } // Col_Loop } template void RightBorderBox7x7(xf::cv::Mat& _dst_mat, XF_PTNAME(DEPTH) src_buf1[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH) src_buf2[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH) src_buf3[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH) src_buf4[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH) src_buf5[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH) src_buf6[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH) src_buf7[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH) GradientValues[XF_NPIXPERCYCLE(NPC)], XF_SNAME(WORDWIDTH_DST) & inter_val, uint16_t img_width, uint16_t img_height, ap_uint<13> row_ind, uint16_t& shiftx, ap_uint<13> row, int& wr_ind) { // clang-format off #pragma HLS INLINE // clang-format on ap_uint<4> i; ap_uint<8> buf_size = (XF_NPIXPERCYCLE(NPC) + 6); ap_uint<10> max_loop = XF_WORDDEPTH(WORDWIDTH_DST); ap_uint<8> step = XF_PIXELDEPTH(DEPTH); if (row >= 3) { if ((NPC == XF_NPPC8)) { for (i = 0; i < 8; i++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=8 max=8 #pragma HLS unroll // clang-format on src_buf1[buf_size + i - (XF_NPIXPERCYCLE(NPC))] = 0; src_buf2[buf_size + i - (XF_NPIXPERCYCLE(NPC))] = 0; src_buf3[buf_size + i - (XF_NPIXPERCYCLE(NPC))] = 0; src_buf4[buf_size + i - (XF_NPIXPERCYCLE(NPC))] = 0; src_buf5[buf_size + i - (XF_NPIXPERCYCLE(NPC))] = 0; src_buf6[buf_size + i - (XF_NPIXPERCYCLE(NPC))] = 0; src_buf7[buf_size + i - (XF_NPIXPERCYCLE(NPC))] = 0; } for (i = 0; i < 3; i++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=3 max=3 #pragma HLS unroll // clang-format on GradientValues[i] = xFGradient7x7( &src_buf1[i], &src_buf2[i], &src_buf3[i], &src_buf4[i], &src_buf5[i], &src_buf6[i], &src_buf7[i]); } xfPackPixels(&GradientValues[0], inter_val, 0, 3, shiftx); _dst_mat.write(wr_ind++, inter_val); shiftx = 0; inter_val = 0; } else { src_buf1[6] = 0; src_buf2[6] = 0; src_buf3[6] = 0; src_buf4[6] = 0; src_buf5[6] = 0; src_buf6[6] = 0; src_buf7[6] = 0; for (i = 0; i < 3; i++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=3 max=3 #pragma HLS ALLOCATION function instances=xFGradient7x7 limit=1 // clang-format on GradientValues[0] = xFGradient7x7( &src_buf1[0], &src_buf2[0], &src_buf3[0], &src_buf4[0], &src_buf5[0], &src_buf6[0], &src_buf7[0]); xFCopyDataBox(src_buf1, src_buf2, src_buf3, src_buf4, src_buf5, src_buf6, src_buf7); inter_val((max_loop - 1), (max_loop - step)) = GradientValues[0]; _dst_mat.write(wr_ind++, inter_val); } } } } template void xFBoxFilter7x7(xf::cv::Mat& _src_mat, xf::cv::Mat& _dst_mat, uint16_t img_height, uint16_t img_width) { ap_uint<13> row_ind, row, col; ap_uint<5> tp1, tp2, tp3, mid, bottom1, bottom2, bottom3, i; int rd_ind = 0, wr_ind = 0; // Gradient output values stored in these buffer XF_PTNAME(DEPTH) GradientValues[XF_NPIXPERCYCLE(NPC)]; // clang-format off #pragma HLS ARRAY_PARTITION variable=GradientValues complete dim=1 // clang-format on // Temporary buffers to hold image data from three rows. XF_PTNAME(DEPTH) src_buf1[XF_NPIXPERCYCLE(NPC) + 6], src_buf2[XF_NPIXPERCYCLE(NPC) + 6], src_buf3[XF_NPIXPERCYCLE(NPC) + 6], src_buf4[XF_NPIXPERCYCLE(NPC) + 6], src_buf5[XF_NPIXPERCYCLE(NPC) + 6], src_buf6[XF_NPIXPERCYCLE(NPC) + 6], src_buf7[XF_NPIXPERCYCLE(NPC) + 6]; // clang-format off #pragma HLS ARRAY_PARTITION variable=src_buf1 complete dim=1 #pragma HLS ARRAY_PARTITION variable=src_buf2 complete dim=1 #pragma HLS ARRAY_PARTITION variable=src_buf3 complete dim=1 #pragma HLS ARRAY_PARTITION variable=src_buf4 complete dim=1 #pragma HLS ARRAY_PARTITION variable=src_buf5 complete dim=1 #pragma HLS ARRAY_PARTITION variable=src_buf6 complete dim=1 #pragma HLS ARRAY_PARTITION variable=src_buf7 complete dim=1 // clang-format on XF_SNAME(WORDWIDTH_SRC) tmp_in; XF_SNAME(WORDWIDTH_DST) inter_val = 0; uint16_t shiftx = 0; XF_SNAME(WORDWIDTH_SRC) buf[7][(COLS >> XF_BITSHIFT(NPC))]; if (USE_URAM) { // clang-format off #pragma HLS RESOURCE variable=buf core=RAM_S2P_URAM #pragma HLS array_reshape variable=buf dim=1 factor=7 cyclic // clang-format on } else { // clang-format off #pragma HLS RESOURCE variable=buf core=RAM_S2P_BRAM #pragma HLS ARRAY_PARTITION variable=buf complete dim=1 // clang-format on } row_ind = 3; Clear_Row_Loop: for (col = 0; col < img_width; col++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=TC max=TC #pragma HLS pipeline // clang-format on buf[0][col] = 0; buf[1][col] = 0; buf[2][col] = 0; buf[row_ind][col] = _src_mat.read(rd_ind++); } row_ind++; Read_Row1_Loop: for (col = 0; col < img_width; col++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=TC max=TC #pragma HLS pipeline // clang-format on buf[row_ind][col] = _src_mat.read(rd_ind++); } row_ind++; Read_Row2_Loop: for (col = 0; col < img_width; col++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=TC max=TC #pragma HLS pipeline // clang-format on buf[row_ind][col] = _src_mat.read(rd_ind++); } row_ind++; Row_Loop: for (row = 3; row < img_height + 3; row++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on // modify the buffer indices to re use if (row_ind == 0) { tp1 = 1; tp2 = 2; tp3 = 3; mid = 4; bottom1 = 5; bottom2 = 6; bottom3 = 0; } else if (row_ind == 1) { tp1 = 2; tp2 = 3; tp3 = 4; mid = 5; bottom1 = 6; bottom2 = 0; bottom3 = 1; } else if (row_ind == 2) { tp1 = 3; tp2 = 4; tp3 = 5; mid = 6; bottom1 = 0; bottom2 = 1; bottom3 = 2; } else if (row_ind == 3) { tp1 = 4; tp2 = 5; tp3 = 6; mid = 0; bottom1 = 1; bottom2 = 2; bottom3 = 3; } else if (row_ind == 4) { tp1 = 5; tp2 = 6; tp3 = 0; mid = 1; bottom1 = 2; bottom2 = 3; bottom3 = 4; } else if (row_ind == 5) { tp1 = 6; tp2 = 0; tp3 = 1; mid = 2; bottom1 = 3; bottom2 = 4; bottom3 = 5; } else if (row_ind == 6) { tp1 = 0; tp2 = 1; tp3 = 2; mid = 3; bottom1 = 4; bottom2 = 5; bottom3 = 6; } for (i = 0; i < 6; i++) { // clang-format off #pragma HLS unroll // clang-format on src_buf1[i] = 0; src_buf2[i] = 0; src_buf3[i] = 0; src_buf4[i] = 0; src_buf5[i] = 0; src_buf6[i] = 0; src_buf7[i] = 0; } inter_val = 0; ProcessBox7x7( _src_mat, _dst_mat, buf, src_buf1, src_buf2, src_buf3, src_buf4, src_buf5, src_buf6, src_buf7, GradientValues, inter_val, img_width, img_height, row_ind, shiftx, tp1, tp2, tp3, mid, bottom1, bottom2, bottom3, row, rd_ind, wr_ind); RightBorderBox7x7( _dst_mat, src_buf1, src_buf2, src_buf3, src_buf4, src_buf5, src_buf6, src_buf7, GradientValues, inter_val, img_width, img_height, row_ind, shiftx, row, wr_ind); row_ind++; if (row_ind == 7) { row_ind = 0; } } // Row_Loop ends here } // end of function xFBoxFilter7x7 template void boxFilter(xf::cv::Mat& _src_mat, xf::cv::Mat& _dst_mat) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert(((FILTER_TYPE == XF_FILTER_3X3) || (FILTER_TYPE == XF_FILTER_5X5) || (FILTER_TYPE == XF_FILTER_7X7)) && ("Filter width should be 3 or 5 or 7.")); assert(BORDER_TYPE == XF_BORDER_CONSTANT && "Only XF_BORDER_CONSTANT is supported"); assert(((_src_mat.rows <= ROWS) && (_src_mat.cols <= COLS)) && "ROWS and COLS should be greater than input image"); #endif uint16_t img_width = _src_mat.cols >> XF_BITSHIFT(NPC); uint16_t img_height = _src_mat.rows; if (FILTER_TYPE == XF_FILTER_3X3) { xFBoxFilter3x3> XF_BITSHIFT(NPC)), USE_URAM>(_src_mat, _dst_mat, img_height, img_width); } else if (FILTER_TYPE == XF_FILTER_5X5) { if (NPC == XF_NPPC8) { xFBoxFilter5x5> XF_BITSHIFT(NPC)), USE_URAM>( _src_mat, _dst_mat, img_height, img_width); } else { xFBoxFilter5x5> XF_BITSHIFT(NPC)), USE_URAM>( _src_mat, _dst_mat, img_height, img_width); } } else if (FILTER_TYPE == XF_FILTER_7X7) { if (NPC == XF_NPPC8) { xFBoxFilter7x7> XF_BITSHIFT(NPC)), USE_URAM>( _src_mat, _dst_mat, img_height, img_width); } else { xFBoxFilter7x7> XF_BITSHIFT(NPC)), USE_URAM>( _src_mat, _dst_mat, img_height, img_width); } } } } // namespace cv } // namespace xf #endif