Program Listing for File xf_max_suppression.hpp
↰ Return to documentation for file (/tmp/ws/src/vitis_common/include/features/xf_max_suppression.hpp
)
/*
* Copyright 2019 Xilinx, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _XF_MAX_SUPPRESSION_HPP_
#define _XF_MAX_SUPPRESSION_HPP_
#ifndef __cplusplus
#error C++ is needed to include this header
#endif
template <typename SRC_T>
bool xFFindMaxRad1(SRC_T t0, SRC_T t1, SRC_T t2, SRC_T m0, SRC_T m1, SRC_T m2, SRC_T b0, SRC_T b1, SRC_T b2) {
// clang-format off
#pragma HLS INLINE off
// clang-format on
bool Max = false;
if (m1 > t1 && m1 > m0 && m1 > m2 && m1 > b1) Max = true;
return Max;
}
template <int NPC, int IN_DEPTH, typename DST_T>
void xFSuppressionRad1(DST_T* Maxarray,
XF_PTNAME(IN_DEPTH) * l00_buf,
XF_PTNAME(IN_DEPTH) * l10_buf,
XF_PTNAME(IN_DEPTH) * l20_buf) {
// clang-format off
#pragma HLS INLINE off
// clang-format on
Suppression_Loop:
for (ap_uint<8> i = 0; i < (1 << XF_BITSHIFT(NPC)); i++) {
// clang-format off
#pragma HLS UNROLL
// clang-format on
bool Max = xFFindMaxRad1(l00_buf[i], l00_buf[i + 1], l00_buf[i + 2], l10_buf[i], l10_buf[i + 1], l10_buf[i + 2],
l20_buf[i], l20_buf[i + 1], l20_buf[i + 2]);
Maxarray[i] = Max ? 255 : 0;
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int IN_DEPTH, int OUT_DEPTH, int NPC, int IN_WW, int OUT_WW, int TC>
void ProcessMax1(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src_mat,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst_mat,
XF_SNAME(IN_WW) buf[3][(COLS >> XF_BITSHIFT(NPC))],
XF_PTNAME(IN_DEPTH) l00_buf[XF_NPIXPERCYCLE(NPC) + 2],
XF_PTNAME(IN_DEPTH) l10_buf[XF_NPIXPERCYCLE(NPC) + 2],
XF_PTNAME(IN_DEPTH) l20_buf[XF_NPIXPERCYCLE(NPC) + 2],
XF_PTNAME(OUT_DEPTH) Array[XF_NPIXPERCYCLE(NPC)],
XF_SNAME(OUT_WW) & P0,
uint16_t img_width,
ap_uint<13> row_ind,
uint16_t& shift,
ap_uint<2> tp,
ap_uint<2> mid,
ap_uint<2> bottom,
bool flag,
int& read_index,
int& write_index) {
// clang-format off
#pragma HLS INLINE off
// clang-format on
ap_uint<5> nms_bufsize = ((1 << XF_BITSHIFT(NPC)) + 2);
XF_SNAME(IN_WW) buf0, buf1, buf2;
uint16_t npc = XF_NPIXPERCYCLE(NPC);
Col_Loop:
for (ap_uint<13> col = 0; col < img_width; col++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
#pragma HLS pipeline
// clang-format on
if (flag) buf[row_ind][col] = _src_mat.read(read_index++);
buf0 = buf[tp][col];
buf1 = buf[mid][col];
buf2 = buf[bottom][col];
xfExtractPixels<NPC, IN_WW, IN_DEPTH>(l00_buf, buf0, 2);
xfExtractPixels<NPC, IN_WW, IN_DEPTH>(l10_buf, buf1, 2);
xfExtractPixels<NPC, IN_WW, IN_DEPTH>(l20_buf, buf2, 2);
xFSuppressionRad1<NPC, IN_DEPTH>(Array, l00_buf, l10_buf, l20_buf);
if (col == 0) {
shift = 0;
P0 = 0;
xfPackPixels<NPC, OUT_WW, OUT_DEPTH>(&Array[0], P0, 1, (npc - 1), shift);
} else {
xfPackPixels<NPC, OUT_WW, OUT_DEPTH>(&Array[0], P0, 0, 1, shift);
_dst_mat.write(write_index++, (P0));
shift = 0;
P0 = 0;
xfPackPixels<NPC, OUT_WW, OUT_DEPTH>(&Array[0], P0, 1, (npc - 1), shift);
}
l00_buf[0] = l00_buf[nms_bufsize - 2];
l00_buf[1] = l00_buf[nms_bufsize - 1];
l10_buf[0] = l10_buf[nms_bufsize - 2];
l10_buf[1] = l10_buf[nms_bufsize - 1];
l20_buf[0] = l20_buf[nms_bufsize - 2];
l20_buf[1] = l20_buf[nms_bufsize - 1];
} // Col_Loop
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int IN_DEPTH, int OUT_DEPTH, int NPC, int IN_WW, int OUT_WW, int TC>
void xFMaxSuppressionRad1(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src_mat,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst_mat,
uint16_t img_height,
uint16_t img_width) {
ap_uint<13> row_ind, row, col;
ap_uint<2> tp, mid, bottom;
uint16_t shift = 0;
XF_PTNAME(OUT_DEPTH) Array[(1 << XF_BITSHIFT(NPC))];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=Array complete dim=1
// clang-format on
ap_uint<5> nms_bufsize = ((1 << XF_BITSHIFT(NPC)) + 2);
int read_index = 0, write_index = 0;
// Temporary buffers to hold image data from three rows.
XF_PTNAME(IN_DEPTH)
l00_buf[(1 << XF_BITSHIFT(NPC)) + 2], l10_buf[(1 << XF_BITSHIFT(NPC)) + 2], l20_buf[(1 << XF_BITSHIFT(NPC)) + 2];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=l00_buf complete dim=1
#pragma HLS ARRAY_PARTITION variable=l10_buf complete dim=1
#pragma HLS ARRAY_PARTITION variable=l20_buf complete dim=1
// clang-format on
XF_SNAME(OUT_WW) P0;
// Line buffer to hold the image data
XF_SNAME(IN_WW) buf[3][(COLS >> XF_BITSHIFT(NPC))];
// clang-format off
#pragma HLS RESOURCE variable=buf core=RAM_S2P_BRAM
#pragma HLS ARRAY_PARTITION variable=buf complete dim=1
// clang-format on
row_ind = 1;
Clear_first_Row:
for (col = 0; col < img_width; col++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
#pragma HLS pipeline
// clang-format on
buf[0][col] = 0;
buf[row_ind][col] = _src_mat.read(read_index++);
}
row_ind++;
Row_Loop:
for (row = 1; row < img_height; row++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
#pragma HLS
// clang-format on
if (row_ind == 2) {
tp = 0;
mid = 1;
bottom = 2;
} else if (row_ind == 0) {
tp = 1;
mid = 2;
bottom = 0;
} else if (row_ind == 1) {
tp = 2;
mid = 0;
bottom = 1;
}
l00_buf[0] = l00_buf[1] = 0;
l10_buf[0] = l10_buf[1] = 0;
l20_buf[0] = l20_buf[1] = 0;
P0 = 0;
ProcessMax1<SRC_T, DST_T, ROWS, COLS, IN_DEPTH, OUT_DEPTH, NPC, IN_WW, OUT_WW, TC>(
_src_mat, _dst_mat, buf, l00_buf, l10_buf, l20_buf, Array, P0, img_width, row_ind, shift, tp, mid, bottom,
true, read_index, write_index);
if (row) {
XF_PTNAME(IN_DEPTH) val = (XF_PTNAME(IN_DEPTH))0;
if ((NPC == XF_NPPC8)) {
bool Max = xFFindMaxRad1(l00_buf[nms_bufsize - 2], l00_buf[nms_bufsize - 1], val,
l10_buf[nms_bufsize - 2], l10_buf[nms_bufsize - 1], val,
l20_buf[nms_bufsize - 2], l20_buf[nms_bufsize - 1], val);
Array[0] = Max ? 255 : 0;
} else {
bool Max = xFFindMaxRad1(l00_buf[nms_bufsize - 3], l00_buf[nms_bufsize - 2], val,
l10_buf[nms_bufsize - 3], l10_buf[nms_bufsize - 2], val,
l20_buf[nms_bufsize - 3], l20_buf[nms_bufsize - 2], val);
Array[0] = Max ? 255 : 0;
}
xfPackPixels<NPC, OUT_WW, OUT_DEPTH>(&Array[0], P0, 0, 1, shift);
// P0.range(((8 << XF_BITSHIFT(NPC))-1), ((8 << XF_BITSHIFT(NPC))-8)) = Array[0]; // Get bits
// from
// certain range of positions.
_dst_mat.write(write_index++, (P0));
shift = 0;
P0 = 0;
}
row_ind++;
if (row_ind == 3) {
row_ind = 0;
}
} // Row_Loop
if (row_ind == 3) {
row_ind = 0;
}
if (row_ind == 2) {
tp = 0;
mid = 1;
bottom = 2;
} else if (row_ind == 0) {
tp = 1;
mid = 2;
bottom = 0;
} else if (row_ind == 1) {
tp = 2;
mid = 0;
bottom = 1;
}
l00_buf[0] = l00_buf[1] = 0;
l10_buf[0] = l10_buf[1] = 0;
l20_buf[0] = l20_buf[1] = 0;
Clear_Row_Loop:
for (col = 0; col < img_width; col++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
#pragma HLS pipeline
// clang-format on
buf[bottom][col] = 0;
}
ProcessMax1<SRC_T, DST_T, ROWS, COLS, IN_DEPTH, OUT_DEPTH, NPC, IN_WW, OUT_WW, TC>(
_src_mat, _dst_mat, buf, l00_buf, l10_buf, l20_buf, Array, P0, img_width, row_ind, shift, tp, mid, bottom,
false, read_index, write_index);
XF_PTNAME(IN_DEPTH) val = 0;
if ((NPC == XF_NPPC8)) {
bool Max =
xFFindMaxRad1(l00_buf[nms_bufsize - 2], l00_buf[nms_bufsize - 1], val, l10_buf[nms_bufsize - 2],
l10_buf[nms_bufsize - 1], val, l20_buf[nms_bufsize - 2], l20_buf[nms_bufsize - 1], val);
Array[0] = Max ? 255 : 0;
} else {
bool Max =
xFFindMaxRad1(l00_buf[nms_bufsize - 3], l00_buf[nms_bufsize - 2], val, l10_buf[nms_bufsize - 3],
l10_buf[nms_bufsize - 2], val, l20_buf[nms_bufsize - 3], l20_buf[nms_bufsize - 2], val);
Array[0] = Max ? 255 : 0;
}
xfPackPixels<NPC, OUT_WW, OUT_DEPTH>(&Array[0], P0, 0, 1, shift);
_dst_mat.write(write_index++, (P0));
shift = 0;
P0 = 0;
}
// xFMaxSuppressionRad1
template <typename SRC_T>
bool xFFindMaxRad2(SRC_T l22,
SRC_T l02,
SRC_T l11,
SRC_T l12,
SRC_T l13,
SRC_T l20,
SRC_T l21,
SRC_T l23,
SRC_T l24,
SRC_T l31,
SRC_T l32,
SRC_T l33,
SRC_T l42) {
// clang-format off
#pragma HLS INLINE off
// clang-format on
bool Max = false;
if ((l22 > l02) && (l22 > l11) && (l22 > l12) && (l22 > l13) && (l22 > l20) && (l22 > l21) && (l22 > l23) &&
(l22 > l24) && (l22 > l31) && (l22 > l32) && (l22 > l33) && (l22 > l42))
Max = true;
return Max;
}
template <int NPC, int IN_DEPTH, typename DST_T>
void xFSuppressionRad2(DST_T* Maxarray,
XF_PTNAME(IN_DEPTH) * l00_buf,
XF_PTNAME(IN_DEPTH) * l10_buf,
XF_PTNAME(IN_DEPTH) * l20_buf,
XF_PTNAME(IN_DEPTH) * l30_buf,
XF_PTNAME(IN_DEPTH) * l40_buf) {
// clang-format off
#pragma HLS INLINE off
// clang-format on
Suppression_Loop:
for (ap_uint<5> i = 0; i < (1 << XF_BITSHIFT(NPC)); i++) {
// clang-format off
#pragma HLS UNROLL
// clang-format on
bool Max = xFFindMaxRad2(l20_buf[i + 2], l00_buf[i + 2], l10_buf[i + 1], l10_buf[i + 2], l10_buf[i + 3],
l20_buf[i], l20_buf[i + 1], l20_buf[i + 3], l20_buf[i + 4], l30_buf[i + 1],
l30_buf[i + 2], l30_buf[i + 3], l40_buf[i + 2]);
Maxarray[i] = Max ? 255 : 0;
}
}
// xFSuppressionRad2
template <int SRC_T, int DST_T, int ROWS, int COLS, int IN_DEPTH, int OUT_DEPTH, int NPC, int IN_WW, int OUT_WW, int TC>
void ProcessRad2(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src_mat,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst_mat,
XF_SNAME(IN_WW) buf[5][(COLS >> XF_BITSHIFT(NPC))],
XF_PTNAME(IN_DEPTH) l00_buf[XF_NPIXPERCYCLE(NPC) + 4],
XF_PTNAME(IN_DEPTH) l10_buf[XF_NPIXPERCYCLE(NPC) + 4],
XF_PTNAME(IN_DEPTH) l20_buf[XF_NPIXPERCYCLE(NPC) + 4],
XF_PTNAME(IN_DEPTH) l30_buf[XF_NPIXPERCYCLE(NPC) + 4],
XF_PTNAME(IN_DEPTH) l40_buf[XF_NPIXPERCYCLE(NPC) + 4],
XF_PTNAME(OUT_DEPTH) Array[XF_NPIXPERCYCLE(NPC)],
XF_SNAME(OUT_WW) & inter_valx,
uint16_t img_width,
ap_uint<13> row_ind,
uint16_t& shift,
ap_uint<4> tp1,
ap_uint<4> tp2,
ap_uint<4> mid,
ap_uint<4> bottom1,
ap_uint<4> bottom2,
bool flag,
int& read_pointer,
int& write_pointer) {
// clang-format off
#pragma HLS INLINE off
// clang-format on
ap_uint<8> nms_bufsize = (1 << XF_BITSHIFT(NPC)) + 4;
uint16_t npc = XF_NPIXPERCYCLE(NPC);
XF_SNAME(IN_WW) buf0, buf1, buf2, buf3, buf4;
Col_Loop:
for (ap_uint<13> col = 0; col < img_width; col++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
#pragma HLS pipeline
// clang-format on
if (flag) buf[row_ind][col] = _src_mat.read(read_pointer++);
buf0 = buf[tp1][col];
buf1 = buf[tp2][col];
buf2 = buf[mid][col];
buf3 = buf[bottom1][col];
buf4 = buf[bottom2][col];
xfExtractPixels<NPC, IN_WW, IN_DEPTH>(l00_buf, buf0, 4);
xfExtractPixels<NPC, IN_WW, IN_DEPTH>(l10_buf, buf1, 4);
xfExtractPixels<NPC, IN_WW, IN_DEPTH>(l20_buf, buf2, 4);
xfExtractPixels<NPC, IN_WW, IN_DEPTH>(l30_buf, buf3, 4);
xfExtractPixels<NPC, IN_WW, IN_DEPTH>(l40_buf, buf4, 4);
xFSuppressionRad2<NPC, IN_DEPTH>(Array, l00_buf, l10_buf, l20_buf, l30_buf, l40_buf);
for (ap_uint<4> i = 0; i < 4; i++) {
// clang-format off
#pragma HLS unroll
// clang-format on
l00_buf[i] = l00_buf[nms_bufsize - (4 - i)];
l10_buf[i] = l10_buf[nms_bufsize - (4 - i)];
l20_buf[i] = l20_buf[nms_bufsize - (4 - i)];
l30_buf[i] = l30_buf[nms_bufsize - (4 - i)];
l40_buf[i] = l40_buf[nms_bufsize - (4 - i)];
}
if (col == 0) {
shift = 0;
inter_valx = 0;
xfPackPixels<NPC, OUT_WW, OUT_DEPTH>(&Array[0], inter_valx, 2, (npc - 2), shift);
} else {
if (NPC == XF_NPPC8) {
xfPackPixels<NPC, OUT_WW, OUT_DEPTH>(&Array[0], inter_valx, 0, 2, shift);
_dst_mat.write(write_pointer++, inter_valx);
shift = 0;
inter_valx = 0;
xfPackPixels<NPC, OUT_WW, OUT_DEPTH>(&Array[0], inter_valx, 2, (npc - 2), shift);
} else {
if (col >= 2) {
inter_valx(7, 0) = Array[0];
_dst_mat.write(write_pointer++, (inter_valx));
}
}
}
} // Col_Loop
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int IN_DEPTH, int OUT_DEPTH, int NPC, int IN_WW, int OUT_WW, int TC>
void xFMaxSuppressionRad2(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src_mat,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst_mat,
uint16_t img_height,
uint16_t img_width) {
ap_uint<13> row_ind, row, col;
ap_uint<8> tp1, tp2, mid, bottom1, bottom2;
ap_uint<8> nms_bufsize = (1 << XF_BITSHIFT(NPC)) + 4;
int read_pointer = 0, write_pointer = 0;
XF_PTNAME(OUT_DEPTH) Array[(1 << XF_BITSHIFT(NPC))];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=Array complete dim=1
// clang-format on
// Temporary buffers to hold image data from three rows.
XF_PTNAME(IN_DEPTH)
l00_buf[(1 << XF_BITSHIFT(NPC)) + 4], l10_buf[(1 << XF_BITSHIFT(NPC)) + 4], l20_buf[(1 << XF_BITSHIFT(NPC)) + 4];
XF_PTNAME(IN_DEPTH) l30_buf[(1 << XF_BITSHIFT(NPC)) + 4], l40_buf[(1 << XF_BITSHIFT(NPC)) + 4];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=l00_buf complete dim=1
#pragma HLS ARRAY_PARTITION variable=l10_buf complete dim=1
#pragma HLS ARRAY_PARTITION variable=l20_buf complete dim=1
#pragma HLS ARRAY_PARTITION variable=l30_buf complete dim=1
#pragma HLS ARRAY_PARTITION variable=l40_buf complete dim=1
// clang-format on
ap_uint<8> i = 0;
uint16_t shift = 0;
XF_SNAME(IN_WW) tmp_in;
XF_SNAME(OUT_WW) inter_valx = 0;
uint16_t npc = XF_NPIXPERCYCLE(NPC);
XF_SNAME(IN_WW) buf[5][(COLS >> XF_BITSHIFT(NPC))];
// clang-format off
#pragma HLS RESOURCE variable=buf core=RAM_S2P_BRAM
#pragma HLS ARRAY_PARTITION variable=buf complete dim=1
// clang-format on
row_ind = 2;
Clear_Row_Loop:
for (col = 0; col < img_width; col++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
#pragma HLS pipeline
// clang-format on
buf[0][col] = 0;
buf[1][col] = 0;
buf[row_ind][col] = _src_mat.read(read_pointer++);
}
row_ind++;
Read_Row1_Loop:
for (col = 0; col < img_width; col++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
#pragma HLS pipeline
// clang-format on
buf[row_ind][col] = _src_mat.read(read_pointer++);
}
row_ind++;
Row_Loop:
for (row = 2; row < img_height; row++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
// modify the buffer indices to re use
if (row_ind == 4) {
tp1 = 0;
tp2 = 1;
mid = 2;
bottom1 = 3;
bottom2 = 4;
} else if (row_ind == 0) {
tp1 = 1;
tp2 = 2;
mid = 3;
bottom1 = 4;
bottom2 = 0;
} else if (row_ind == 1) {
tp1 = 2;
tp2 = 3;
mid = 4;
bottom1 = 0;
bottom2 = 1;
} else if (row_ind == 2) {
tp1 = 3;
tp2 = 4;
mid = 0;
bottom1 = 1;
bottom2 = 2;
} else if (row_ind == 3) {
tp1 = 4;
tp2 = 0;
mid = 1;
bottom1 = 2;
bottom2 = 3;
}
l00_buf[0] = l00_buf[1] = l00_buf[2] = l00_buf[3] = 0;
l10_buf[0] = l10_buf[1] = l10_buf[2] = l10_buf[3] = 0;
l20_buf[0] = l20_buf[1] = l20_buf[2] = l20_buf[3] = 0;
l30_buf[0] = l30_buf[1] = l30_buf[2] = l30_buf[3] = 0;
l40_buf[0] = l40_buf[1] = l40_buf[2] = l40_buf[3] = 0;
inter_valx = 0;
ProcessRad2<SRC_T, DST_T, ROWS, COLS, IN_DEPTH, OUT_DEPTH, NPC, IN_WW, OUT_WW, TC>(
_src_mat, _dst_mat, buf, l00_buf, l10_buf, l20_buf, l30_buf, l40_buf, Array, inter_valx, img_width, row_ind,
shift, tp1, tp2, mid, bottom1, bottom2, true, read_pointer, write_pointer);
if (row >= 2) {
if ((NPC == XF_NPPC8) || (NPC == XF_NPPC16)) {
for (i = 4; i < nms_bufsize; i++) {
// clang-format off
#pragma HLS unroll
// clang-format on
l00_buf[i] = 0;
l10_buf[i] = 0;
l20_buf[i] = 0;
l30_buf[i] = 0;
l40_buf[i] = 0;
}
Array[0] =
xFFindMaxRad2(l20_buf[2], l00_buf[2], l10_buf[1], l10_buf[2], l10_buf[3], l20_buf[0], l20_buf[1],
l20_buf[3], l20_buf[4], l30_buf[1], l30_buf[2], l30_buf[3], l40_buf[2]);
Array[1] =
xFFindMaxRad2(l20_buf[3], l00_buf[3], l10_buf[2], l10_buf[2], l10_buf[4], l20_buf[1], l20_buf[1],
l20_buf[4], l20_buf[5], l30_buf[2], l30_buf[3], l30_buf[4], l40_buf[3]);
xfPackPixels<NPC, OUT_WW, OUT_DEPTH>(&Array[0], inter_valx, 0, 2, shift);
_dst_mat.write(write_pointer++, (inter_valx));
shift = 0;
inter_valx = 0;
} else if (NPC == XF_NPPC1) {
l00_buf[4] = 0;
l10_buf[4] = 0;
l20_buf[4] = 0;
l30_buf[4] = 0;
l40_buf[4] = 0;
Array[0] =
xFFindMaxRad2(l20_buf[2], l00_buf[2], l10_buf[1], l10_buf[2], l10_buf[3], l20_buf[0], l20_buf[1],
l20_buf[3], l20_buf[4], l30_buf[1], l30_buf[2], l30_buf[3], l40_buf[2]);
ap_uint<16> step = XF_PIXELDEPTH(OUT_DEPTH);
inter_valx(((step << XF_BITSHIFT(NPC)) - 1), ((step << XF_BITSHIFT(NPC)) - step)) = Array[0];
_dst_mat.write(write_pointer++, (inter_valx));
lbufLoop3:
for (i = 0; i < 4; i++) {
// clang-format off
#pragma HLS unroll
// clang-format on
l00_buf[i] = l00_buf[nms_bufsize - (4 - i)];
l10_buf[i] = l10_buf[nms_bufsize - (4 - i)];
l20_buf[i] = l20_buf[nms_bufsize - (4 - i)];
l30_buf[i] = l30_buf[nms_bufsize - (4 - i)];
l40_buf[i] = l40_buf[nms_bufsize - (4 - i)];
}
l00_buf[3] = 0;
l10_buf[3] = 0;
l20_buf[3] = 0;
l30_buf[3] = 0;
l40_buf[3] = 0;
Array[0] =
xFFindMaxRad2(l20_buf[2], l00_buf[2], l10_buf[1], l10_buf[2], l10_buf[3], l20_buf[0], l20_buf[1],
l20_buf[3], l20_buf[4], l30_buf[1], l30_buf[2], l30_buf[3], l40_buf[2]);
inter_valx(((step << XF_BITSHIFT(NPC)) - 1), ((step << XF_BITSHIFT(NPC)) - step)) = Array[0];
_dst_mat.write(write_pointer++, (inter_valx));
}
}
row_ind++;
if (row_ind == 5) {
row_ind = 0;
}
} // Row_Loop ends here
Border_Row_Loop:
for (row = 0; row < 2; row++) {
if (row_ind == 5) {
row_ind = 0;
}
if (row_ind == 4) {
tp1 = 0;
tp2 = 1;
mid = 2;
bottom1 = 3;
bottom2 = 4;
} else if (row_ind == 0) {
tp1 = 1;
tp2 = 2;
mid = 3;
bottom1 = 4;
bottom2 = 0;
} else if (row_ind == 1) {
tp1 = 2;
tp2 = 3;
mid = 4;
bottom1 = 0;
bottom2 = 1;
} else if (row_ind == 2) {
tp1 = 3;
tp2 = 4;
mid = 0;
bottom1 = 1;
bottom2 = 2;
} else if (row_ind == 3) {
tp1 = 4;
tp2 = 0;
mid = 1;
bottom1 = 2;
bottom2 = 3;
}
Clear_Row_Loop1:
for (col = 0; col < img_width; col++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
#pragma HLS pipeline
// clang-format on
buf[bottom2][col] = 0;
}
l00_buf[0] = l00_buf[1] = l00_buf[2] = l00_buf[3] = 0;
l20_buf[0] = l20_buf[1] = l20_buf[2] = l20_buf[3] = 0;
l30_buf[0] = l30_buf[1] = l30_buf[2] = l30_buf[3] = 0;
l40_buf[0] = l40_buf[1] = l40_buf[2] = l40_buf[3] = 0;
ProcessRad2<SRC_T, DST_T, ROWS, COLS, IN_DEPTH, OUT_DEPTH, NPC, IN_WW, OUT_WW, TC>(
_src_mat, _dst_mat, buf, l00_buf, l10_buf, l20_buf, l30_buf, l40_buf, Array, inter_valx, img_width, row_ind,
shift, tp1, tp2, mid, bottom1, bottom2, false, read_pointer, write_pointer);
if (NPC == XF_NPPC8 || NPC == XF_NPPC16) {
Array[0] = xFFindMaxRad2(l20_buf[2], l00_buf[2], l10_buf[1], l10_buf[2], l10_buf[3], l20_buf[0], l20_buf[1],
l20_buf[3], l20_buf[4], l30_buf[1], l30_buf[2], l30_buf[3], l40_buf[2]);
Array[1] = xFFindMaxRad2(l20_buf[3], l00_buf[3], l10_buf[2], l10_buf[2], l10_buf[4], l20_buf[1], l20_buf[1],
l20_buf[4], l20_buf[5], l30_buf[2], l30_buf[3], l30_buf[4], l40_buf[3]);
xfPackPixels<NPC, OUT_WW, OUT_DEPTH>(&Array[0], inter_valx, 0, 2, shift);
_dst_mat.write(write_pointer++, (inter_valx));
shift = 0;
inter_valx = 0;
} else if (NPC == XF_NPPC1) {
l00_buf[4] = 0;
l10_buf[4] = 0;
l20_buf[4] = 0;
l30_buf[4] = 0;
l40_buf[4] = 0;
Array[0] = xFFindMaxRad2(l20_buf[2], l00_buf[2], l10_buf[1], l10_buf[2], l10_buf[3], l20_buf[0], l20_buf[1],
l20_buf[3], l20_buf[4], l30_buf[1], l30_buf[2], l30_buf[3], l40_buf[2]);
ap_uint<8> step = XF_PIXELDEPTH(OUT_DEPTH);
inter_valx(((step << XF_BITSHIFT(NPC)) - 1), ((step << XF_BITSHIFT(NPC)) - step)) = Array[0];
_dst_mat.write(write_pointer++, (inter_valx));
lbufLoop33:
for (i = 0; i < 4; i++) {
// clang-format off
#pragma HLS unroll
// clang-format on
l00_buf[i] = l00_buf[nms_bufsize - (4 - i)];
l10_buf[i] = l10_buf[nms_bufsize - (4 - i)];
l20_buf[i] = l20_buf[nms_bufsize - (4 - i)];
l30_buf[i] = l30_buf[nms_bufsize - (4 - i)];
l40_buf[i] = l40_buf[nms_bufsize - (4 - i)];
}
l00_buf[3] = 0;
l10_buf[3] = 0;
l20_buf[3] = 0;
l30_buf[3] = 0;
l40_buf[3] = 0;
Array[0] = xFFindMaxRad2(l20_buf[2], l00_buf[2], l10_buf[1], l10_buf[2], l10_buf[3], l20_buf[0], l20_buf[1],
l20_buf[3], l20_buf[4], l30_buf[1], l30_buf[2], l30_buf[3], l40_buf[2]);
inter_valx(((step << XF_BITSHIFT(NPC)) - 1), ((step << XF_BITSHIFT(NPC)) - step)) = Array[0];
_dst_mat.write(write_pointer++, (inter_valx));
}
}
}
/*********************************************************************
* xFMaxSuppression : Calls the Main Function depend on Requirements
*********************************************************************/
template <int SRC_T, int DST_T, int ROWS, int COLS, int IN_DEPTH, int OUT_DEPTH, int NPC, int IN_WW, int OUT_WW>
void xFMaxSuppression(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src_mat,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst_mat,
uint8_t _nms_radius,
uint16_t img_height,
uint16_t img_width) {
#ifndef __SYNTHESIS__
//#pragma HLS STREAM variable=_dst_mat.data depth=1
assert(((_nms_radius == XF_NMS_RADIUS_1) || (_nms_radius == XF_NMS_RADIUS_2)) && "radius size must be 1, 2");
assert(((img_height <= ROWS) && (img_width <= COLS)) && "ROWS and COLS should be greater than input image");
#endif
img_width = img_width >> XF_BITSHIFT(NPC);
if (_nms_radius == XF_NMS_RADIUS_1) {
xFMaxSuppressionRad1<SRC_T, DST_T, ROWS, COLS, IN_DEPTH, OUT_DEPTH, NPC, IN_WW, OUT_WW,
(COLS >> XF_BITSHIFT(NPC))>(_src_mat, _dst_mat, img_height, img_width);
} else {
xFMaxSuppressionRad2<SRC_T, DST_T, ROWS, COLS, IN_DEPTH, OUT_DEPTH, NPC, IN_WW, OUT_WW,
(COLS >> XF_BITSHIFT(NPC))>(_src_mat, _dst_mat, img_height, img_width);
}
}
#endif // _XF_MAX_SUPPRESSION_HPP_