.. _program_listing_file__tmp_ws_src_vitis_common_include_features_xf_max_suppression.hpp: Program Listing for File xf_max_suppression.hpp =============================================== |exhale_lsh| :ref:`Return to documentation for file ` (``/tmp/ws/src/vitis_common/include/features/xf_max_suppression.hpp``) .. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS .. code-block:: cpp /* * Copyright 2019 Xilinx, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _XF_MAX_SUPPRESSION_HPP_ #define _XF_MAX_SUPPRESSION_HPP_ #ifndef __cplusplus #error C++ is needed to include this header #endif template bool xFFindMaxRad1(SRC_T t0, SRC_T t1, SRC_T t2, SRC_T m0, SRC_T m1, SRC_T m2, SRC_T b0, SRC_T b1, SRC_T b2) { // clang-format off #pragma HLS INLINE off // clang-format on bool Max = false; if (m1 > t1 && m1 > m0 && m1 > m2 && m1 > b1) Max = true; return Max; } template void xFSuppressionRad1(DST_T* Maxarray, XF_PTNAME(IN_DEPTH) * l00_buf, XF_PTNAME(IN_DEPTH) * l10_buf, XF_PTNAME(IN_DEPTH) * l20_buf) { // clang-format off #pragma HLS INLINE off // clang-format on Suppression_Loop: for (ap_uint<8> i = 0; i < (1 << XF_BITSHIFT(NPC)); i++) { // clang-format off #pragma HLS UNROLL // clang-format on bool Max = xFFindMaxRad1(l00_buf[i], l00_buf[i + 1], l00_buf[i + 2], l10_buf[i], l10_buf[i + 1], l10_buf[i + 2], l20_buf[i], l20_buf[i + 1], l20_buf[i + 2]); Maxarray[i] = Max ? 255 : 0; } } template void ProcessMax1(xf::cv::Mat& _src_mat, xf::cv::Mat& _dst_mat, XF_SNAME(IN_WW) buf[3][(COLS >> XF_BITSHIFT(NPC))], XF_PTNAME(IN_DEPTH) l00_buf[XF_NPIXPERCYCLE(NPC) + 2], XF_PTNAME(IN_DEPTH) l10_buf[XF_NPIXPERCYCLE(NPC) + 2], XF_PTNAME(IN_DEPTH) l20_buf[XF_NPIXPERCYCLE(NPC) + 2], XF_PTNAME(OUT_DEPTH) Array[XF_NPIXPERCYCLE(NPC)], XF_SNAME(OUT_WW) & P0, uint16_t img_width, ap_uint<13> row_ind, uint16_t& shift, ap_uint<2> tp, ap_uint<2> mid, ap_uint<2> bottom, bool flag, int& read_index, int& write_index) { // clang-format off #pragma HLS INLINE off // clang-format on ap_uint<5> nms_bufsize = ((1 << XF_BITSHIFT(NPC)) + 2); XF_SNAME(IN_WW) buf0, buf1, buf2; uint16_t npc = XF_NPIXPERCYCLE(NPC); Col_Loop: for (ap_uint<13> col = 0; col < img_width; col++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=TC max=TC #pragma HLS pipeline // clang-format on if (flag) buf[row_ind][col] = _src_mat.read(read_index++); buf0 = buf[tp][col]; buf1 = buf[mid][col]; buf2 = buf[bottom][col]; xfExtractPixels(l00_buf, buf0, 2); xfExtractPixels(l10_buf, buf1, 2); xfExtractPixels(l20_buf, buf2, 2); xFSuppressionRad1(Array, l00_buf, l10_buf, l20_buf); if (col == 0) { shift = 0; P0 = 0; xfPackPixels(&Array[0], P0, 1, (npc - 1), shift); } else { xfPackPixels(&Array[0], P0, 0, 1, shift); _dst_mat.write(write_index++, (P0)); shift = 0; P0 = 0; xfPackPixels(&Array[0], P0, 1, (npc - 1), shift); } l00_buf[0] = l00_buf[nms_bufsize - 2]; l00_buf[1] = l00_buf[nms_bufsize - 1]; l10_buf[0] = l10_buf[nms_bufsize - 2]; l10_buf[1] = l10_buf[nms_bufsize - 1]; l20_buf[0] = l20_buf[nms_bufsize - 2]; l20_buf[1] = l20_buf[nms_bufsize - 1]; } // Col_Loop } template void xFMaxSuppressionRad1(xf::cv::Mat& _src_mat, xf::cv::Mat& _dst_mat, uint16_t img_height, uint16_t img_width) { ap_uint<13> row_ind, row, col; ap_uint<2> tp, mid, bottom; uint16_t shift = 0; XF_PTNAME(OUT_DEPTH) Array[(1 << XF_BITSHIFT(NPC))]; // clang-format off #pragma HLS ARRAY_PARTITION variable=Array complete dim=1 // clang-format on ap_uint<5> nms_bufsize = ((1 << XF_BITSHIFT(NPC)) + 2); int read_index = 0, write_index = 0; // Temporary buffers to hold image data from three rows. XF_PTNAME(IN_DEPTH) l00_buf[(1 << XF_BITSHIFT(NPC)) + 2], l10_buf[(1 << XF_BITSHIFT(NPC)) + 2], l20_buf[(1 << XF_BITSHIFT(NPC)) + 2]; // clang-format off #pragma HLS ARRAY_PARTITION variable=l00_buf complete dim=1 #pragma HLS ARRAY_PARTITION variable=l10_buf complete dim=1 #pragma HLS ARRAY_PARTITION variable=l20_buf complete dim=1 // clang-format on XF_SNAME(OUT_WW) P0; // Line buffer to hold the image data XF_SNAME(IN_WW) buf[3][(COLS >> XF_BITSHIFT(NPC))]; // clang-format off #pragma HLS RESOURCE variable=buf core=RAM_S2P_BRAM #pragma HLS ARRAY_PARTITION variable=buf complete dim=1 // clang-format on row_ind = 1; Clear_first_Row: for (col = 0; col < img_width; col++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=TC max=TC #pragma HLS pipeline // clang-format on buf[0][col] = 0; buf[row_ind][col] = _src_mat.read(read_index++); } row_ind++; Row_Loop: for (row = 1; row < img_height; row++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS #pragma HLS // clang-format on if (row_ind == 2) { tp = 0; mid = 1; bottom = 2; } else if (row_ind == 0) { tp = 1; mid = 2; bottom = 0; } else if (row_ind == 1) { tp = 2; mid = 0; bottom = 1; } l00_buf[0] = l00_buf[1] = 0; l10_buf[0] = l10_buf[1] = 0; l20_buf[0] = l20_buf[1] = 0; P0 = 0; ProcessMax1( _src_mat, _dst_mat, buf, l00_buf, l10_buf, l20_buf, Array, P0, img_width, row_ind, shift, tp, mid, bottom, true, read_index, write_index); if (row) { XF_PTNAME(IN_DEPTH) val = (XF_PTNAME(IN_DEPTH))0; if ((NPC == XF_NPPC8)) { bool Max = xFFindMaxRad1(l00_buf[nms_bufsize - 2], l00_buf[nms_bufsize - 1], val, l10_buf[nms_bufsize - 2], l10_buf[nms_bufsize - 1], val, l20_buf[nms_bufsize - 2], l20_buf[nms_bufsize - 1], val); Array[0] = Max ? 255 : 0; } else { bool Max = xFFindMaxRad1(l00_buf[nms_bufsize - 3], l00_buf[nms_bufsize - 2], val, l10_buf[nms_bufsize - 3], l10_buf[nms_bufsize - 2], val, l20_buf[nms_bufsize - 3], l20_buf[nms_bufsize - 2], val); Array[0] = Max ? 255 : 0; } xfPackPixels(&Array[0], P0, 0, 1, shift); // P0.range(((8 << XF_BITSHIFT(NPC))-1), ((8 << XF_BITSHIFT(NPC))-8)) = Array[0]; // Get bits // from // certain range of positions. _dst_mat.write(write_index++, (P0)); shift = 0; P0 = 0; } row_ind++; if (row_ind == 3) { row_ind = 0; } } // Row_Loop if (row_ind == 3) { row_ind = 0; } if (row_ind == 2) { tp = 0; mid = 1; bottom = 2; } else if (row_ind == 0) { tp = 1; mid = 2; bottom = 0; } else if (row_ind == 1) { tp = 2; mid = 0; bottom = 1; } l00_buf[0] = l00_buf[1] = 0; l10_buf[0] = l10_buf[1] = 0; l20_buf[0] = l20_buf[1] = 0; Clear_Row_Loop: for (col = 0; col < img_width; col++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=TC max=TC #pragma HLS pipeline // clang-format on buf[bottom][col] = 0; } ProcessMax1( _src_mat, _dst_mat, buf, l00_buf, l10_buf, l20_buf, Array, P0, img_width, row_ind, shift, tp, mid, bottom, false, read_index, write_index); XF_PTNAME(IN_DEPTH) val = 0; if ((NPC == XF_NPPC8)) { bool Max = xFFindMaxRad1(l00_buf[nms_bufsize - 2], l00_buf[nms_bufsize - 1], val, l10_buf[nms_bufsize - 2], l10_buf[nms_bufsize - 1], val, l20_buf[nms_bufsize - 2], l20_buf[nms_bufsize - 1], val); Array[0] = Max ? 255 : 0; } else { bool Max = xFFindMaxRad1(l00_buf[nms_bufsize - 3], l00_buf[nms_bufsize - 2], val, l10_buf[nms_bufsize - 3], l10_buf[nms_bufsize - 2], val, l20_buf[nms_bufsize - 3], l20_buf[nms_bufsize - 2], val); Array[0] = Max ? 255 : 0; } xfPackPixels(&Array[0], P0, 0, 1, shift); _dst_mat.write(write_index++, (P0)); shift = 0; P0 = 0; } // xFMaxSuppressionRad1 template bool xFFindMaxRad2(SRC_T l22, SRC_T l02, SRC_T l11, SRC_T l12, SRC_T l13, SRC_T l20, SRC_T l21, SRC_T l23, SRC_T l24, SRC_T l31, SRC_T l32, SRC_T l33, SRC_T l42) { // clang-format off #pragma HLS INLINE off // clang-format on bool Max = false; if ((l22 > l02) && (l22 > l11) && (l22 > l12) && (l22 > l13) && (l22 > l20) && (l22 > l21) && (l22 > l23) && (l22 > l24) && (l22 > l31) && (l22 > l32) && (l22 > l33) && (l22 > l42)) Max = true; return Max; } template void xFSuppressionRad2(DST_T* Maxarray, XF_PTNAME(IN_DEPTH) * l00_buf, XF_PTNAME(IN_DEPTH) * l10_buf, XF_PTNAME(IN_DEPTH) * l20_buf, XF_PTNAME(IN_DEPTH) * l30_buf, XF_PTNAME(IN_DEPTH) * l40_buf) { // clang-format off #pragma HLS INLINE off // clang-format on Suppression_Loop: for (ap_uint<5> i = 0; i < (1 << XF_BITSHIFT(NPC)); i++) { // clang-format off #pragma HLS UNROLL // clang-format on bool Max = xFFindMaxRad2(l20_buf[i + 2], l00_buf[i + 2], l10_buf[i + 1], l10_buf[i + 2], l10_buf[i + 3], l20_buf[i], l20_buf[i + 1], l20_buf[i + 3], l20_buf[i + 4], l30_buf[i + 1], l30_buf[i + 2], l30_buf[i + 3], l40_buf[i + 2]); Maxarray[i] = Max ? 255 : 0; } } // xFSuppressionRad2 template void ProcessRad2(xf::cv::Mat& _src_mat, xf::cv::Mat& _dst_mat, XF_SNAME(IN_WW) buf[5][(COLS >> XF_BITSHIFT(NPC))], XF_PTNAME(IN_DEPTH) l00_buf[XF_NPIXPERCYCLE(NPC) + 4], XF_PTNAME(IN_DEPTH) l10_buf[XF_NPIXPERCYCLE(NPC) + 4], XF_PTNAME(IN_DEPTH) l20_buf[XF_NPIXPERCYCLE(NPC) + 4], XF_PTNAME(IN_DEPTH) l30_buf[XF_NPIXPERCYCLE(NPC) + 4], XF_PTNAME(IN_DEPTH) l40_buf[XF_NPIXPERCYCLE(NPC) + 4], XF_PTNAME(OUT_DEPTH) Array[XF_NPIXPERCYCLE(NPC)], XF_SNAME(OUT_WW) & inter_valx, uint16_t img_width, ap_uint<13> row_ind, uint16_t& shift, ap_uint<4> tp1, ap_uint<4> tp2, ap_uint<4> mid, ap_uint<4> bottom1, ap_uint<4> bottom2, bool flag, int& read_pointer, int& write_pointer) { // clang-format off #pragma HLS INLINE off // clang-format on ap_uint<8> nms_bufsize = (1 << XF_BITSHIFT(NPC)) + 4; uint16_t npc = XF_NPIXPERCYCLE(NPC); XF_SNAME(IN_WW) buf0, buf1, buf2, buf3, buf4; Col_Loop: for (ap_uint<13> col = 0; col < img_width; col++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=TC max=TC #pragma HLS pipeline // clang-format on if (flag) buf[row_ind][col] = _src_mat.read(read_pointer++); buf0 = buf[tp1][col]; buf1 = buf[tp2][col]; buf2 = buf[mid][col]; buf3 = buf[bottom1][col]; buf4 = buf[bottom2][col]; xfExtractPixels(l00_buf, buf0, 4); xfExtractPixels(l10_buf, buf1, 4); xfExtractPixels(l20_buf, buf2, 4); xfExtractPixels(l30_buf, buf3, 4); xfExtractPixels(l40_buf, buf4, 4); xFSuppressionRad2(Array, l00_buf, l10_buf, l20_buf, l30_buf, l40_buf); for (ap_uint<4> i = 0; i < 4; i++) { // clang-format off #pragma HLS unroll // clang-format on l00_buf[i] = l00_buf[nms_bufsize - (4 - i)]; l10_buf[i] = l10_buf[nms_bufsize - (4 - i)]; l20_buf[i] = l20_buf[nms_bufsize - (4 - i)]; l30_buf[i] = l30_buf[nms_bufsize - (4 - i)]; l40_buf[i] = l40_buf[nms_bufsize - (4 - i)]; } if (col == 0) { shift = 0; inter_valx = 0; xfPackPixels(&Array[0], inter_valx, 2, (npc - 2), shift); } else { if (NPC == XF_NPPC8) { xfPackPixels(&Array[0], inter_valx, 0, 2, shift); _dst_mat.write(write_pointer++, inter_valx); shift = 0; inter_valx = 0; xfPackPixels(&Array[0], inter_valx, 2, (npc - 2), shift); } else { if (col >= 2) { inter_valx(7, 0) = Array[0]; _dst_mat.write(write_pointer++, (inter_valx)); } } } } // Col_Loop } template void xFMaxSuppressionRad2(xf::cv::Mat& _src_mat, xf::cv::Mat& _dst_mat, uint16_t img_height, uint16_t img_width) { ap_uint<13> row_ind, row, col; ap_uint<8> tp1, tp2, mid, bottom1, bottom2; ap_uint<8> nms_bufsize = (1 << XF_BITSHIFT(NPC)) + 4; int read_pointer = 0, write_pointer = 0; XF_PTNAME(OUT_DEPTH) Array[(1 << XF_BITSHIFT(NPC))]; // clang-format off #pragma HLS ARRAY_PARTITION variable=Array complete dim=1 // clang-format on // Temporary buffers to hold image data from three rows. XF_PTNAME(IN_DEPTH) l00_buf[(1 << XF_BITSHIFT(NPC)) + 4], l10_buf[(1 << XF_BITSHIFT(NPC)) + 4], l20_buf[(1 << XF_BITSHIFT(NPC)) + 4]; XF_PTNAME(IN_DEPTH) l30_buf[(1 << XF_BITSHIFT(NPC)) + 4], l40_buf[(1 << XF_BITSHIFT(NPC)) + 4]; // clang-format off #pragma HLS ARRAY_PARTITION variable=l00_buf complete dim=1 #pragma HLS ARRAY_PARTITION variable=l10_buf complete dim=1 #pragma HLS ARRAY_PARTITION variable=l20_buf complete dim=1 #pragma HLS ARRAY_PARTITION variable=l30_buf complete dim=1 #pragma HLS ARRAY_PARTITION variable=l40_buf complete dim=1 // clang-format on ap_uint<8> i = 0; uint16_t shift = 0; XF_SNAME(IN_WW) tmp_in; XF_SNAME(OUT_WW) inter_valx = 0; uint16_t npc = XF_NPIXPERCYCLE(NPC); XF_SNAME(IN_WW) buf[5][(COLS >> XF_BITSHIFT(NPC))]; // clang-format off #pragma HLS RESOURCE variable=buf core=RAM_S2P_BRAM #pragma HLS ARRAY_PARTITION variable=buf complete dim=1 // clang-format on row_ind = 2; Clear_Row_Loop: for (col = 0; col < img_width; col++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=TC max=TC #pragma HLS pipeline // clang-format on buf[0][col] = 0; buf[1][col] = 0; buf[row_ind][col] = _src_mat.read(read_pointer++); } row_ind++; Read_Row1_Loop: for (col = 0; col < img_width; col++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=TC max=TC #pragma HLS pipeline // clang-format on buf[row_ind][col] = _src_mat.read(read_pointer++); } row_ind++; Row_Loop: for (row = 2; row < img_height; row++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on // modify the buffer indices to re use if (row_ind == 4) { tp1 = 0; tp2 = 1; mid = 2; bottom1 = 3; bottom2 = 4; } else if (row_ind == 0) { tp1 = 1; tp2 = 2; mid = 3; bottom1 = 4; bottom2 = 0; } else if (row_ind == 1) { tp1 = 2; tp2 = 3; mid = 4; bottom1 = 0; bottom2 = 1; } else if (row_ind == 2) { tp1 = 3; tp2 = 4; mid = 0; bottom1 = 1; bottom2 = 2; } else if (row_ind == 3) { tp1 = 4; tp2 = 0; mid = 1; bottom1 = 2; bottom2 = 3; } l00_buf[0] = l00_buf[1] = l00_buf[2] = l00_buf[3] = 0; l10_buf[0] = l10_buf[1] = l10_buf[2] = l10_buf[3] = 0; l20_buf[0] = l20_buf[1] = l20_buf[2] = l20_buf[3] = 0; l30_buf[0] = l30_buf[1] = l30_buf[2] = l30_buf[3] = 0; l40_buf[0] = l40_buf[1] = l40_buf[2] = l40_buf[3] = 0; inter_valx = 0; ProcessRad2( _src_mat, _dst_mat, buf, l00_buf, l10_buf, l20_buf, l30_buf, l40_buf, Array, inter_valx, img_width, row_ind, shift, tp1, tp2, mid, bottom1, bottom2, true, read_pointer, write_pointer); if (row >= 2) { if ((NPC == XF_NPPC8) || (NPC == XF_NPPC16)) { for (i = 4; i < nms_bufsize; i++) { // clang-format off #pragma HLS unroll // clang-format on l00_buf[i] = 0; l10_buf[i] = 0; l20_buf[i] = 0; l30_buf[i] = 0; l40_buf[i] = 0; } Array[0] = xFFindMaxRad2(l20_buf[2], l00_buf[2], l10_buf[1], l10_buf[2], l10_buf[3], l20_buf[0], l20_buf[1], l20_buf[3], l20_buf[4], l30_buf[1], l30_buf[2], l30_buf[3], l40_buf[2]); Array[1] = xFFindMaxRad2(l20_buf[3], l00_buf[3], l10_buf[2], l10_buf[2], l10_buf[4], l20_buf[1], l20_buf[1], l20_buf[4], l20_buf[5], l30_buf[2], l30_buf[3], l30_buf[4], l40_buf[3]); xfPackPixels(&Array[0], inter_valx, 0, 2, shift); _dst_mat.write(write_pointer++, (inter_valx)); shift = 0; inter_valx = 0; } else if (NPC == XF_NPPC1) { l00_buf[4] = 0; l10_buf[4] = 0; l20_buf[4] = 0; l30_buf[4] = 0; l40_buf[4] = 0; Array[0] = xFFindMaxRad2(l20_buf[2], l00_buf[2], l10_buf[1], l10_buf[2], l10_buf[3], l20_buf[0], l20_buf[1], l20_buf[3], l20_buf[4], l30_buf[1], l30_buf[2], l30_buf[3], l40_buf[2]); ap_uint<16> step = XF_PIXELDEPTH(OUT_DEPTH); inter_valx(((step << XF_BITSHIFT(NPC)) - 1), ((step << XF_BITSHIFT(NPC)) - step)) = Array[0]; _dst_mat.write(write_pointer++, (inter_valx)); lbufLoop3: for (i = 0; i < 4; i++) { // clang-format off #pragma HLS unroll // clang-format on l00_buf[i] = l00_buf[nms_bufsize - (4 - i)]; l10_buf[i] = l10_buf[nms_bufsize - (4 - i)]; l20_buf[i] = l20_buf[nms_bufsize - (4 - i)]; l30_buf[i] = l30_buf[nms_bufsize - (4 - i)]; l40_buf[i] = l40_buf[nms_bufsize - (4 - i)]; } l00_buf[3] = 0; l10_buf[3] = 0; l20_buf[3] = 0; l30_buf[3] = 0; l40_buf[3] = 0; Array[0] = xFFindMaxRad2(l20_buf[2], l00_buf[2], l10_buf[1], l10_buf[2], l10_buf[3], l20_buf[0], l20_buf[1], l20_buf[3], l20_buf[4], l30_buf[1], l30_buf[2], l30_buf[3], l40_buf[2]); inter_valx(((step << XF_BITSHIFT(NPC)) - 1), ((step << XF_BITSHIFT(NPC)) - step)) = Array[0]; _dst_mat.write(write_pointer++, (inter_valx)); } } row_ind++; if (row_ind == 5) { row_ind = 0; } } // Row_Loop ends here Border_Row_Loop: for (row = 0; row < 2; row++) { if (row_ind == 5) { row_ind = 0; } if (row_ind == 4) { tp1 = 0; tp2 = 1; mid = 2; bottom1 = 3; bottom2 = 4; } else if (row_ind == 0) { tp1 = 1; tp2 = 2; mid = 3; bottom1 = 4; bottom2 = 0; } else if (row_ind == 1) { tp1 = 2; tp2 = 3; mid = 4; bottom1 = 0; bottom2 = 1; } else if (row_ind == 2) { tp1 = 3; tp2 = 4; mid = 0; bottom1 = 1; bottom2 = 2; } else if (row_ind == 3) { tp1 = 4; tp2 = 0; mid = 1; bottom1 = 2; bottom2 = 3; } Clear_Row_Loop1: for (col = 0; col < img_width; col++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=TC max=TC #pragma HLS pipeline // clang-format on buf[bottom2][col] = 0; } l00_buf[0] = l00_buf[1] = l00_buf[2] = l00_buf[3] = 0; l20_buf[0] = l20_buf[1] = l20_buf[2] = l20_buf[3] = 0; l30_buf[0] = l30_buf[1] = l30_buf[2] = l30_buf[3] = 0; l40_buf[0] = l40_buf[1] = l40_buf[2] = l40_buf[3] = 0; ProcessRad2( _src_mat, _dst_mat, buf, l00_buf, l10_buf, l20_buf, l30_buf, l40_buf, Array, inter_valx, img_width, row_ind, shift, tp1, tp2, mid, bottom1, bottom2, false, read_pointer, write_pointer); if (NPC == XF_NPPC8 || NPC == XF_NPPC16) { Array[0] = xFFindMaxRad2(l20_buf[2], l00_buf[2], l10_buf[1], l10_buf[2], l10_buf[3], l20_buf[0], l20_buf[1], l20_buf[3], l20_buf[4], l30_buf[1], l30_buf[2], l30_buf[3], l40_buf[2]); Array[1] = xFFindMaxRad2(l20_buf[3], l00_buf[3], l10_buf[2], l10_buf[2], l10_buf[4], l20_buf[1], l20_buf[1], l20_buf[4], l20_buf[5], l30_buf[2], l30_buf[3], l30_buf[4], l40_buf[3]); xfPackPixels(&Array[0], inter_valx, 0, 2, shift); _dst_mat.write(write_pointer++, (inter_valx)); shift = 0; inter_valx = 0; } else if (NPC == XF_NPPC1) { l00_buf[4] = 0; l10_buf[4] = 0; l20_buf[4] = 0; l30_buf[4] = 0; l40_buf[4] = 0; Array[0] = xFFindMaxRad2(l20_buf[2], l00_buf[2], l10_buf[1], l10_buf[2], l10_buf[3], l20_buf[0], l20_buf[1], l20_buf[3], l20_buf[4], l30_buf[1], l30_buf[2], l30_buf[3], l40_buf[2]); ap_uint<8> step = XF_PIXELDEPTH(OUT_DEPTH); inter_valx(((step << XF_BITSHIFT(NPC)) - 1), ((step << XF_BITSHIFT(NPC)) - step)) = Array[0]; _dst_mat.write(write_pointer++, (inter_valx)); lbufLoop33: for (i = 0; i < 4; i++) { // clang-format off #pragma HLS unroll // clang-format on l00_buf[i] = l00_buf[nms_bufsize - (4 - i)]; l10_buf[i] = l10_buf[nms_bufsize - (4 - i)]; l20_buf[i] = l20_buf[nms_bufsize - (4 - i)]; l30_buf[i] = l30_buf[nms_bufsize - (4 - i)]; l40_buf[i] = l40_buf[nms_bufsize - (4 - i)]; } l00_buf[3] = 0; l10_buf[3] = 0; l20_buf[3] = 0; l30_buf[3] = 0; l40_buf[3] = 0; Array[0] = xFFindMaxRad2(l20_buf[2], l00_buf[2], l10_buf[1], l10_buf[2], l10_buf[3], l20_buf[0], l20_buf[1], l20_buf[3], l20_buf[4], l30_buf[1], l30_buf[2], l30_buf[3], l40_buf[2]); inter_valx(((step << XF_BITSHIFT(NPC)) - 1), ((step << XF_BITSHIFT(NPC)) - step)) = Array[0]; _dst_mat.write(write_pointer++, (inter_valx)); } } } /********************************************************************* * xFMaxSuppression : Calls the Main Function depend on Requirements *********************************************************************/ template void xFMaxSuppression(xf::cv::Mat& _src_mat, xf::cv::Mat& _dst_mat, uint8_t _nms_radius, uint16_t img_height, uint16_t img_width) { #ifndef __SYNTHESIS__ //#pragma HLS STREAM variable=_dst_mat.data depth=1 assert(((_nms_radius == XF_NMS_RADIUS_1) || (_nms_radius == XF_NMS_RADIUS_2)) && "radius size must be 1, 2"); assert(((img_height <= ROWS) && (img_width <= COLS)) && "ROWS and COLS should be greater than input image"); #endif img_width = img_width >> XF_BITSHIFT(NPC); if (_nms_radius == XF_NMS_RADIUS_1) { xFMaxSuppressionRad1> XF_BITSHIFT(NPC))>(_src_mat, _dst_mat, img_height, img_width); } else { xFMaxSuppressionRad2> XF_BITSHIFT(NPC))>(_src_mat, _dst_mat, img_height, img_width); } } #endif // _XF_MAX_SUPPRESSION_HPP_