.. _program_listing_file__tmp_ws_src_vitis_common_include_imgproc_xf_custom_convolution.hpp: Program Listing for File xf_custom_convolution.hpp ================================================== |exhale_lsh| :ref:`Return to documentation for file ` (``/tmp/ws/src/vitis_common/include/imgproc/xf_custom_convolution.hpp``) .. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS .. code-block:: cpp /* * Copyright 2019 Xilinx, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _XF_CUSTOM_CONVOLUTION_HPP_ #define _XF_CUSTOM_CONVOLUTION_HPP_ #include "../common/xf_common.hpp" #include "../common/xf_utility.hpp" #include "hls_stream.h" typedef unsigned char uchar; namespace xf { namespace cv { /**************************************************************************************** * xFApplyCustomFilter: Applies the user defined kernel to the input image. * * _lbuf -> Buffer containing the input image data * _kernel -> Kernel provided by the user of type 16S * shift -> Fixed point format of the filter co-efficients for unity *gain filter ****************************************************************************************/ template XF_PTNAME(DEPTH_DST) xFApplyCustomFilter(buf_type _lbuf[][buf_width], short int _kernel[][filter_width], int ind, unsigned char shift) { // clang-format off #pragma HLS INLINE off // clang-format on XF_PTNAME(DEPTH_DST) res = 0; ap_int32_t tmp_res[PLANES]; ap_int24_t conv_val[filter_height][filter_width][PLANES]; // clang-format off #pragma HLS ARRAY_PARTITION variable=conv_val complete dim=0 // clang-format on ap_int32_t row_sum[filter_height][PLANES], fix_res = 0, tmp_row_sum = 0; // clang-format off #pragma HLS ARRAY_PARTITION variable=row_sum complete dim=1 // clang-format on XF_PTNAME(DEPTH_DST) arr_ind = ind; // performing kernel operation and storing in the temporary buffer filterLoopI: for (uchar i = 0; i < filter_height; i++) { // clang-format off #pragma HLS UNROLL // clang-format on arr_ind = ind; filterLoopJ: for (uchar j = 0; j < filter_width; j++) { // clang-format off #pragma HLS UNROLL // clang-format on planes_loop1: for (uchar k = 0; k < PLANES; k++) { // clang-format off #pragma HLS UNROLL // clang-format on conv_val[i][j][k] = ((_lbuf[i][arr_ind].range((k * 8) + 7, k * 8)) * _kernel[i][j]); } arr_ind++; } } // accumulating the row sum values of the temporary buffer planes_add_row: for (uchar p = 0; p < PLANES; p++) { // clang-format off #pragma HLS UNROLL // clang-format on addFilterLoopI: for (uchar i = 0; i < filter_height; i++) { // clang-format off #pragma HLS UNROLL // clang-format on tmp_row_sum = 0; addFilterLoopJ: for (uchar j = 0; j < filter_width; j++) { // clang-format off #pragma HLS UNROLL // clang-format on tmp_row_sum += conv_val[i][j][p]; } row_sum[i][p] = tmp_row_sum; } } // adding the row_sum buffer elements and storing in the result add_row_col_plane_loop: for (uchar p = 0; p < PLANES; p++) { // clang-format off #pragma HLS UNROLL // clang-format on fix_res = 0; resultFilterLoopI: for (uchar i = 0; i < filter_height; i++) { // clang-format off #pragma HLS UNROLL // clang-format on fix_res += row_sum[i][p]; } // converting the input type from Q1.shift tmp_res[p] = (fix_res >> shift); } // overflow handling depending upon the input type if ((DEPTH_DST == XF_8UP) || (DEPTH_DST == XF_24UP)) { planes_loop_out8: for (uchar p = 0; p < PLANES; p++) { // clang-format off #pragma HLS UNROLL // clang-format on if (tmp_res[p] > 255) { res.range((p * 8) + 7, p * 8) = 255; } else if (tmp_res[p] < 0) { res.range((p * 8) + 7, p * 8) = 0; } else { res.range((p * 8) + 7, p * 8) = tmp_res[p]; } } } else if ((DEPTH_DST == XF_16SP) || (DEPTH_DST == XF_48SP)) { planes_loop_out16: for (uchar p = 0; p < PLANES; p++) { // clang-format off #pragma HLS UNROLL // clang-format on int tmp_val = (int)tmp_res[p]; if (tmp_val > ((1 << (16 - 1)) - 1)) { res.range((p * 16) + 15, p * 16) = ((1 << (16 - 1)) - 1); } else if (tmp_val < -(1 << (16 - 1))) { res.range((p * 16) + 15, p * 16) = -(1 << (16 - 1)); } else { res.range((p * 16) + 15, p * 16) = (short)tmp_val; } } } return res; } /**************************************************************************************** * xFComputeCustomFilter : Applies the mask and Computes the filter value for *NPC * number of times. * * _lbuf -> Buffer containing the input image data * _kernel -> Kernel provided by the user of type 16S * _mask_value -> The output buffer containing ouput image data * shift -> Fixed point format of the filter co-efficients for unity *gain filter ****************************************************************************************/ template void xFComputeCustomFilter(XF_PTNAME(DEPTH_SRC) _lbuf[][buf_width], short int _kernel[][filter_width], XF_PTNAME(DEPTH_DST) * _mask_value, unsigned char shift) { // clang-format off #pragma HLS inline // clang-format on // computes the filter operation depending upon the mode of parallelism computeFilterLoop: for (ap_uint<5> j = 0; j < XF_NPIXPERCYCLE(NPC); j++) { // clang-format off #pragma HLS UNROLL // clang-format on _mask_value[j] = xFApplyCustomFilter( _lbuf, _kernel, j, shift); } } template void Convolution_Process(xf::cv::Mat& _src, xf::cv::Mat& _dst, XF_SNAME(WORDWIDTH_SRC) buf[filter_height][COLS >> XF_BITSHIFT(NPC)], XF_PTNAME(DEPTH_SRC) lbuf[filter_height][XF_NPIXPERCYCLE(NPC) + filter_width - 1], XF_SNAME(WORDWIDTH_SRC) tmp_buf[filter_height], XF_PTNAME(DEPTH_DST) mask_value[XF_NPIXPERCYCLE(NPC)], short int _filter[][filter_width], uint16_t image_width, uchar row_ind, unsigned char shift, XF_SNAME(WORDWIDTH_DST) & P0, unsigned char index[filter_height], ap_uint<13> col_factor, uchar filter_width_factor, unsigned short image_height, ap_uint<13> row, int& rd_ind, int& wr_ind) { // clang-format off #pragma HLS INLINE // clang-format on uchar step = XF_PIXELDEPTH(DEPTH_DST); unsigned short max_loop = XF_WORDDEPTH(WORDWIDTH_DST); mainColLoop: for (ap_uint<13> col = 0; col < (image_width); col++) // Width of the image { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=TC max=TC #pragma HLS PIPELINE II=1 // clang-format on // reading the data from the stream to the input buffer if (row < image_height) { buf[row_ind][col] = _src.read(rd_ind); rd_ind++; } else { buf[row_ind][col] = 0; } // loading the data from the input buffer to the temporary buffer fillTempBuffer_1: for (uchar l = 0; l < filter_height; l++) { // clang-format off #pragma HLS UNROLL // clang-format on tmp_buf[l] = buf[index[l]][col]; } // extracting the pixels from the temporary buffer to the line buffer extractPixelsLoop_1: for (uchar l = 0; l < filter_height; l++) { // clang-format off #pragma HLS UNROLL // clang-format on xfExtractPixels(&lbuf[l][(filter_width - 1)], tmp_buf[l], 0); } // computing the mask value xFComputeCustomFilter(lbuf, _filter, mask_value, shift); // left column border condition if (col <= col_factor) { ap_uint<13> ind = filter_width_factor; ap_uint<13> range_step = 0; if ((XF_NPIXPERCYCLE(NPC) - filter_width_factor) >= 0) { packMaskToTempRes_1: for (uchar l = 0; l < (XF_NPIXPERCYCLE(NPC) - FW); l++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=F_COUNT max=F_COUNT #pragma HLS UNROLL // clang-format on P0.range((range_step + (step - 1)), range_step) = mask_value[ind++]; range_step += step; } } else { filter_width_factor -= XF_NPIXPERCYCLE(NPC); } } // packing the data from the mask value to the temporary result P0 and // pushing data into stream else { ap_uint<10> max_range_step = max_loop - (filter_width_factor * step); packMaskToTempRes_2: for (uchar l = 0; l < FW; l++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=FW max=FW #pragma HLS UNROLL // clang-format on P0.range((max_range_step + (step - 1)), (max_range_step)) = mask_value[l]; max_range_step += step; } // writing the temporary result into the stream _dst.write(wr_ind, P0); wr_ind++; ap_uint<13> ind = filter_width_factor; ap_uint<13> range_step = 0; packMaskToTempRes_3: for (ap_uint<13> l = 0; l < (XF_NPIXPERCYCLE(NPC) - FW); l++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=F_COUNT max=F_COUNT #pragma HLS UNROLL // clang-format on P0.range((range_step + (step - 1)), range_step) = mask_value[ind++]; range_step += step; } } // re-initializing the line buffers copyEndPixelsI_1: for (uchar i = 0; i < filter_height; i++) { // clang-format off #pragma HLS UNROLL // clang-format on copyEndPixelsJ_1: for (uchar l = 0; l < (filter_width - 1); l++) { // clang-format off #pragma HLS UNROLL // clang-format on lbuf[i][l] = lbuf[i][XF_NPIXPERCYCLE(NPC) + l]; } } } // end of main column loop*/ } /************************************************************************************ * xFCustomConvKernel : Convolutes the input filter over the input image and *writes * onto the output image. * * _src -> Input image of type 8U * _filter -> Kernel provided by the user of type 16S * _dst -> Output image after applying the filter operation, of type *8U or 16S * shift -> Fixed point format of the filter co-efficients for unity *gain *filter ************************************************************************************/ template void xFCustomConvolutionKernel(xf::cv::Mat& _src, short int _filter[][filter_width], xf::cv::Mat& _dst, unsigned char shift, unsigned short img_width, unsigned short img_height) { uchar step = XF_PIXELDEPTH(DEPTH_DST); unsigned short max_loop = XF_WORDDEPTH(WORDWIDTH_DST); uchar buf_size = (XF_NPIXPERCYCLE(NPC) + filter_width - 1); uchar row_ind = 0, row_ptr = 0; unsigned char index[filter_height]; // clang-format off #pragma HLS ARRAY_PARTITION variable=index complete dim=1 // clang-format on XF_SNAME(WORDWIDTH_DST) P0; XF_SNAME(WORDWIDTH_SRC) buf[filter_height][COLS >> XF_BITSHIFT(NPC)]; // clang-format off #pragma HLS ARRAY_PARTITION variable=buf complete dim=1 // clang-format on XF_PTNAME(DEPTH_SRC) lbuf[filter_height][XF_NPIXPERCYCLE(NPC) + filter_width - 1]; // clang-format off #pragma HLS ARRAY_PARTITION variable=lbuf complete dim=0 // clang-format on XF_SNAME(WORDWIDTH_SRC) tmp_buf[filter_height]; // clang-format off #pragma HLS ARRAY_PARTITION variable=tmp_buf complete dim=1 // clang-format on XF_PTNAME(DEPTH_DST) mask_value[XF_NPIXPERCYCLE(NPC)]; // clang-format off #pragma HLS ARRAY_PARTITION variable=mask_value complete dim=1 // clang-format on XF_PTNAME(DEPTH_DST) col_border_mask[(filter_width >> 1)]; // clang-format off #pragma HLS ARRAY_PARTITION variable=col_border_mask complete dim=1 // clang-format on ap_uint<13> col_factor = 0; uchar filter_width_factor = (filter_width >> 1); int rd_ind = 0, wr_ind = 0; // setting the column factor depending upon the filter dimensions colFactorLoop: for (uchar f = (filter_width >> 1); f > (XF_NPIXPERCYCLE(NPC)); f = (f - XF_NPIXPERCYCLE(NPC))) { col_factor++; } // initializing the first two rows to zeros fillBufZerosI: for (uchar i = 0; i < (filter_height >> 1); i++) { // clang-format off #pragma HLS UNROLL // clang-format on fillBufZerosJ: for (ap_uint<13> j = 0; j < (img_width); j++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=COLS_COUNT max=COLS_COUNT #pragma HLS UNROLL // clang-format on buf[row_ind][j] = 0; } row_ind++; } // reading the first two rows from the input stream readTopBorderI: for (uchar i = 0; i < (filter_height >> 1); i++) { // clang-format off #pragma HLS UNROLL // clang-format on readTopBorderJ: for (ap_uint<13> j = 0; j < (img_width); j++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=COLS_COUNT max=COLS_COUNT #pragma HLS PIPELINE // clang-format on buf[row_ind][j] = _src.read(rd_ind); rd_ind++; } row_ind++; } // row loop from 1 to the end of the image mainRowLoop: for (ap_uint<13> row = (filter_height >> 1); row < (img_height + ((filter_height >> 1))); row++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on row_ptr = row_ind + 1; // index calculation settingIndex_1: for (int l = 0; l < filter_height; l++) { // clang-format off #pragma HLS UNROLL // clang-format on if (row_ptr >= filter_height) row_ptr = 0; index[l] = row_ptr++; } // initializing the line buffer to zero fillingLineBufferZerosI_1: for (uchar i = 0; i < filter_height; i++) { // clang-format off #pragma HLS UNROLL // clang-format on fillingLineBufferZerosJ_1: for (uchar j = 0; j < (filter_width - 1); j++) { // clang-format off #pragma HLS UNROLL // clang-format on lbuf[i][j] = 0; } } // initializing the temporary result value to zero P0 = 0; Convolution_Process( _src, _dst, buf, lbuf, tmp_buf, mask_value, _filter, img_width, row_ind, shift, P0, index, col_factor, filter_width_factor, img_height, row, rd_ind, wr_ind); // initializing the line buffers to zero fillingLineBufferZerosI_2: for (uchar i = 0; i < filter_height; i++) { // clang-format off #pragma HLS UNROLL // clang-format on fillingLineBufferZerosJ_2: for (ap_uint<13> l = (filter_width - 1); l < buf_size; l++) { // clang-format off #pragma HLS UNROLL // clang-format on lbuf[i][l] = 0; } } // applying the filter and computing the mask_value if ((filter_width >> 1) > 0) { getMaskValue_1: for (uchar i = 0; i < (filter_width >> 1); i++) { // clang-format off #pragma HLS UNROLL // clang-format on col_border_mask[i] = xFApplyCustomFilter(lbuf, _filter, i, shift); } } int max_range_step = max_loop - (FW * step); packMaskToTempRes_4: for (uchar l = 0; l < FW; l++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=FW max=FW #pragma HLS UNROLL // clang-format on P0.range((max_range_step + step - 1), (max_range_step)) = col_border_mask[l]; max_range_step += step; } // writing the temporary result into the stream _dst.write(wr_ind, P0); wr_ind++; colFactorLoopBorder: for (ap_uint<13> c = 0; c < col_factor; c++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=COL_FACTOR_COUNT max=COL_FACTOR_COUNT // clang-format on max_range_step = 0; widthFactorLoopBorder: for (int l = FW; l < (XF_NPIXPERCYCLE(NPC) + FW); l++) { P0.range((max_range_step + (step - 1)), (max_range_step)) = col_border_mask[l]; max_range_step += step; } _dst.write(wr_ind, P0); wr_ind++; } // incrementing the row_ind for each iteration of row row_ind++; if (row_ind == filter_height) { row_ind = 0; } } // end of main row loop } // end of xFCustomConvKernel template void xFApplyFilter2D(XF_PTNAME(DEPTH_SRC) _kernel_pixel[F_HEIGHT][F_WIDTH], short int _kernel_filter[F_HEIGHT][F_WIDTH], XF_PTNAME(DEPTH_DST) & out, unsigned char shift) { // clang-format off #pragma HLS INLINE off // clang-format on ap_int<32> sum = 0, in_step = 0, out_step = 0, p = 0; ap_int<32> temp = 0; ap_int<32> tmp_sum = 0; FILTER_LOOP_HEIGHT: ap_uint<24> bgr_val; if ((DEPTH_DST == XF_8UP) || (DEPTH_DST == XF_24UP)) { in_step = 8; out_step = 8; } else { in_step = 8; out_step = 16; } for (ap_uint<8> c = 0, k = 0; c < PLANES; c++, k += out_step) { sum = 0; temp = 0; tmp_sum = 0; for (ap_int<8> m = 0; m < F_HEIGHT; m++) { FILTER_LOOP_WIDTH: for (ap_int<8> n = 0; n < F_WIDTH; n++) { XF_PTNAME(DEPTH_SRC) src_v = _kernel_pixel[F_HEIGHT - m - 1][F_WIDTH - 1 - n]; short int filter_v = _kernel_filter[m][n]; temp = src_v.range(p + (in_step - 1), p) * filter_v; sum = sum + temp; } } p = p + 8; tmp_sum = sum >> shift; if ((DEPTH_DST == XF_8UP) || (DEPTH_DST == XF_24UP)) { if (tmp_sum > ((1 << (8)) - 1)) { out.range(k + 7, k) = ((1 << (8)) - 1); } else if (tmp_sum < 0) { out.range(k + 7, k) = 0; } else { out.range(k + 7, k) = tmp_sum; } } else if ((DEPTH_DST == XF_16SP) || (DEPTH_DST == XF_48SP)) { if (tmp_sum > ((1 << (16 - 1)) - 1)) { out.range(k + 15, k) = ((1 << (16 - 1)) - 1); } else if (tmp_sum < -(1 << (16 - 1))) { out.range(k + 15, k) = -(1 << (16 - 1)); } else { out.range(k + 15, k) = tmp_sum; } } } } static int borderInterpolate(int p, int len, int borderType) { // clang-format off #pragma HLS INLINE // clang-format on if (p >= 0 && p < len) return p; else p = -1; return p; } template static void xFFilter2Dkernel(xf::cv::Mat& _src_mat, xf::cv::Mat& _dst_mat, short int _filter_kernel[K_HEIGHT][K_WIDTH], unsigned char shift, uint16_t rows, uint16_t cols) { XF_SNAME(WORDWIDTH_SRC) fillvalue = 0; // clang-format off #pragma HLS INLINE off // clang-format on // The main processing window XF_PTNAME(DEPTH_SRC) src_kernel_win[K_HEIGHT][K_WIDTH]; // The main line buffer XF_SNAME(WORDWIDTH_SRC) k_buf[K_HEIGHT][COLS >> XF_BITSHIFT(NPC)]; // A small buffer keeping a few pixels from the line // buffer, so that we can complete right borders correctly. XF_SNAME(WORDWIDTH_SRC) right_border_buf[K_HEIGHT][K_WIDTH]; // Temporary storage for reading from the line buffers. XF_SNAME(WORDWIDTH_SRC) col_buf[K_HEIGHT]; #ifndef __SYNTHESIS__ assert(rows >= 8); assert(cols >= 8); assert(rows <= ROWS); assert(cols <= COLS); #endif // clang-format off #pragma HLS ARRAY_PARTITION variable=col_buf complete dim=0 #pragma HLS ARRAY_PARTITION variable=_filter_kernel complete dim=0 #pragma HLS ARRAY_PARTITION variable=src_kernel_win complete dim=0 #pragma HLS ARRAY_PARTITION variable=k_buf complete dim=1 #pragma HLS ARRAY_PARTITION variable=right_border_buf complete dim=0 // clang-format on int heightloop = rows + K_HEIGHT - 1 + K_HEIGHT; int widthloop = cols + K_WIDTH - 1; // one pixel overlap, so it should minus one /*ap_uint<13> i,j; ap_uint<13> anchorx=K_WIDTH/2,anchory=K_HEIGHT/2; ap_uint<13> ImagLocx=0,ImagLocy =0;*/ uint16_t i, j; int rd_ind = 0, wr_ind = 0; uint16_t anchorx = K_WIDTH >> 1, anchory = K_HEIGHT >> 1; int16_t ImagLocx = 0, ImagLocy = 0; ROW_LOOP: for (i = 0; i < heightloop; i++) { COL_LOOP: for (j = 0; j < widthloop; j++) { // This DEPENDENCE pragma is necessary because the border mode handling is not // affine. // clang-format off #pragma HLS DEPENDENCE array inter false #pragma HLS LOOP_FLATTEN OFF #pragma HLS PIPELINE // clang-format on // fill data x,y are the coordinate in the image, it could be negative. // For example (-1,-1) represents the // interpolation pixel. ImagLocx = j - anchorx; ImagLocy = i - K_HEIGHT - anchory; int16_t x = borderInterpolate(ImagLocx, cols, 0); // column left shift for (ap_int<8> row = 0; row < K_HEIGHT; row++) for (ap_int<8> col = K_WIDTH - 1; col >= 1; col--) src_kernel_win[row][col] = src_kernel_win[row][col - 1]; for (ap_int<8> buf_row = 0; buf_row < K_HEIGHT; buf_row++) { // Fetch the column from the line buffer to shift into the window. #ifndef __SYNTHESIS__ assert((x < COLS)); #endif col_buf[buf_row] = ((x < 0)) ? fillvalue : k_buf[buf_row][x]; } if ((ImagLocy < (-anchory)) || (ImagLocy >= K_HEIGHT - 1 && ImagLocy < rows - 1)) { // Advance load and body process if (ImagLocx >= 0 && ImagLocx < cols) { XF_SNAME(WORDWIDTH_SRC) Toppixel = col_buf[K_HEIGHT - 1]; // k_buf[k](K_HEIGHT-1,ImagLocx); src_kernel_win[K_HEIGHT - 1][0] = Toppixel; if (ImagLocx >= cols - K_WIDTH) { right_border_buf[0][ImagLocx - (cols - K_WIDTH)] = Toppixel; } for (ap_int<8> buf_row = K_HEIGHT - 1; buf_row >= 1; buf_row--) { XF_SNAME(WORDWIDTH_SRC) temp = col_buf[buf_row - 1]; // k_buf[k](buf_row-1,ImagLocx); src_kernel_win[buf_row - 1][0] = temp; k_buf[buf_row][x] = temp; if (ImagLocx >= cols - K_WIDTH) { right_border_buf[K_HEIGHT - buf_row][ImagLocx - (cols - K_WIDTH)] = temp; } } XF_SNAME(WORDWIDTH_SRC) temp = 0; temp = (_src_mat.read(rd_ind)); rd_ind++; k_buf[0][x] = temp; } else if (ImagLocx < 0) { for (int buf_row = 0; buf_row < K_HEIGHT; buf_row++) { src_kernel_win[buf_row][0] = fillvalue; } } else if (ImagLocx >= cols) { for (int buf_row = 0; buf_row < K_HEIGHT; buf_row++) { src_kernel_win[buf_row][0] = fillvalue; } } } else if (ImagLocy >= 0) { // && ImagLocy < K_HEIGHT-1) || // (ImagLocy >= rows-1 && ImagLocy < heightloop)) { // top extend pixel bottom keep the buffer 0 with the data rows-1 // content. int ref = K_HEIGHT - 1; if (ImagLocy >= rows - 1) ref = rows - 1; int y = ImagLocy; for (int buf_row = 0; buf_row < K_HEIGHT; buf_row++) { int t = borderInterpolate(y, rows, 0); int locy = ref - t; #ifndef __SYNTHESIS__ assert(t < 0 || (locy >= 0 && locy < K_HEIGHT)); #endif if (y >= rows) src_kernel_win[buf_row][0] = fillvalue; else if (y < 0) src_kernel_win[buf_row][0] = fillvalue; else src_kernel_win[buf_row][0] = col_buf[locy]; y--; } } // figure out the output image pixel value if (i >= (K_HEIGHT + K_HEIGHT - 1) && j >= (K_WIDTH - 1)) { XF_PTNAME(DEPTH_DST) temp; xFApplyFilter2D(src_kernel_win, _filter_kernel, temp, shift); XF_SNAME(WORDWIDTH_DST) temp1 = temp; _dst_mat.write(wr_ind, temp1); wr_ind++; } } } } template void filter2D(xf::cv::Mat& _src_mat, xf::cv::Mat& _dst_mat, short int filter[FILTER_HEIGHT * FILTER_WIDTH], unsigned char _shift) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert(((_src_mat.rows <= ROWS) && (_src_mat.cols <= COLS)) && "ROWS and COLS should be greater than input image"); #endif unsigned short img_width = _src_mat.cols >> XF_BITSHIFT(NPC); unsigned short img_height = _src_mat.rows; short int lfilter[FILTER_HEIGHT][FILTER_WIDTH]; // clang-format off #pragma HLS ARRAY_PARTITION variable=lfilter complete dim=0 // clang-format on for (unsigned char i = 0; i < FILTER_HEIGHT; i++) { for (unsigned char j = 0; j < FILTER_WIDTH; j++) { lfilter[i][j] = filter[i * FILTER_WIDTH + j]; } } if (NPC == XF_NPPC8) { xFCustomConvolutionKernel> XF_BITSHIFT(NPC)), FILTER_HEIGHT, FILTER_WIDTH, (XF_NPIXPERCYCLE(NPC) - ((FILTER_WIDTH >> 1) % XF_NPIXPERCYCLE(NPC))), ((FILTER_WIDTH >> 1) % XF_NPIXPERCYCLE(NPC)), (((FILTER_WIDTH >> 1) - 1) >> XF_BITSHIFT(NPC)), XF_CHANNELS(SRC_T, NPC)>( _src_mat, lfilter, _dst_mat, _shift, img_width, img_height); } else if (NPC == XF_NPPC1) { xFFilter2Dkernel(_src_mat, _dst_mat, lfilter, _shift, img_height, img_width); } } } // namespace cv } // namespace xf #endif // _XF_CUSTOM_CONVOLUTION_HPP_