Program Listing for File xf_sobel.hpp
↰ Return to documentation for file (include/imgproc/xf_sobel.hpp
)
/*
* Copyright 2019 Xilinx, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _XF_SOBEL_HPP_
#define _XF_SOBEL_HPP_
typedef unsigned short uint16_t;
typedef unsigned int uint32_t;
#include "../common/xf_common.hpp"
#include "../common/xf_utility.hpp"
#include "hls_stream.h"
namespace xf {
namespace cv {
/*****************************************************************
* SobelFilter3x3
*****************************************************************
* X-Gradient Computation
*
* -------------
* |-1 0 1|
* |-2 0 2|
* |-1 0 1|
* -------------
*****************************************************************/
template <int DEPTH_SRC, int DEPTH_DST>
XF_PTNAME(DEPTH_DST)
xFGradientX3x3(XF_PTNAME(DEPTH_SRC) t0,
XF_PTNAME(DEPTH_SRC) t1,
XF_PTNAME(DEPTH_SRC) t2,
XF_PTNAME(DEPTH_SRC) m0,
XF_PTNAME(DEPTH_SRC) m1,
XF_PTNAME(DEPTH_SRC) m2,
XF_PTNAME(DEPTH_SRC) b0,
XF_PTNAME(DEPTH_SRC) b1,
XF_PTNAME(DEPTH_SRC) b2) {
// clang-format off
#pragma HLS INLINE off
// clang-format on
XF_PTNAME(DEPTH_DST) g_x = 0;
// ap_uint<8> g_x = 0;
short int M00 = ((short int)m0 << 1);
short int M01 = ((short int)m2 << 1);
short int A00 = (t2 + b2);
short int S00 = (t0 + b0);
short int out_pix;
out_pix = M01 - M00;
out_pix = out_pix + A00;
out_pix = out_pix - S00;
g_x = (XF_PTNAME(DEPTH_DST))out_pix;
if ((DEPTH_DST == XF_8UP) || (DEPTH_DST == XF_24UP)) {
if (out_pix < 0) {
g_x = 0;
} else if (out_pix > 255) {
g_x = 255;
}
}
return g_x;
}
/**********************************************************************
* Y-Gradient Computation
* -------------
* | 1 2 1|
* | 0 0 0|
* |-1 -2 -1|
* -------------
**********************************************************************/
template <int DEPTH_SRC, int DEPTH_DST>
XF_PTNAME(DEPTH_DST)
xFGradientY3x3(XF_PTNAME(DEPTH_SRC) t0,
XF_PTNAME(DEPTH_SRC) t1,
XF_PTNAME(DEPTH_SRC) t2,
XF_PTNAME(DEPTH_SRC) m0,
XF_PTNAME(DEPTH_SRC) m1,
XF_PTNAME(DEPTH_SRC) m2,
XF_PTNAME(DEPTH_SRC) b0,
XF_PTNAME(DEPTH_SRC) b1,
XF_PTNAME(DEPTH_SRC) b2) {
// clang-format off
#pragma HLS INLINE off
// clang-format on
XF_PTNAME(DEPTH_DST) g_y = 0;
short int M00 = ((short int)t1 << 1);
short int M01 = ((short int)b1 << 1);
short int A00 = (b0 + b2);
short int S00 = (t0 + t2);
short int out_pix;
out_pix = M01 - M00;
out_pix = out_pix + A00;
out_pix = out_pix - S00;
g_y = (XF_PTNAME(DEPTH_DST))out_pix;
if ((DEPTH_DST == XF_8UP) || (DEPTH_DST == XF_24UP)) {
if (out_pix < 0) {
g_y = 0;
} else if (out_pix > 255) {
g_y = 255;
}
}
return g_y;
}
template <int PLANES, int NPC, int DEPTH_SRC, int DEPTH_DST>
void xFSobel3x3(XF_PTNAME(DEPTH_DST) * GradientvaluesX,
XF_PTNAME(DEPTH_DST) * GradientvaluesY,
XF_PTNAME(DEPTH_SRC) * src_buf1,
XF_PTNAME(DEPTH_SRC) * src_buf2,
XF_PTNAME(DEPTH_SRC) * src_buf3) {
// clang-format off
#pragma HLS INLINE off
// clang-format on
int STEP, STEP_OUT;
if ((DEPTH_DST == XF_48SP) || (DEPTH_DST == XF_16SP)) {
STEP_OUT = 16;
STEP = 8;
} else {
STEP_OUT = 8;
STEP = 8;
}
Compute_Grad_Loop:
for (ap_uint<5> j = 0; j < XF_NPIXPERCYCLE(NPC); j++) {
int p = 0;
// clang-format off
#pragma HLS UNROLL
// clang-format on
for (ap_uint<5> c = 0, k = 0; c < PLANES; c++, k += STEP) {
GradientvaluesX[j].range(p + (STEP_OUT - 1), p) = xFGradientX3x3<DEPTH_SRC, DEPTH_DST>(
src_buf1[j].range(k + STEP - 1, k), src_buf1[j + 1].range(k + STEP - 1, k),
src_buf1[j + 2].range(k + STEP - 1, k), src_buf2[j].range(k + STEP - 1, k),
src_buf2[j + 1].range(k + STEP - 1, k), src_buf2[j + 2].range(k + STEP - 1, k),
src_buf3[j].range(k + STEP - 1, k), src_buf3[j + 1].range(k + STEP - 1, k),
src_buf3[j + 2].range(k + STEP - 1, k));
GradientvaluesY[j].range(p + (STEP_OUT - 1), p) = xFGradientY3x3<DEPTH_SRC, DEPTH_DST>(
src_buf1[j].range(k + STEP - 1, k), src_buf1[j + 1].range(k + STEP - 1, k),
src_buf1[j + 2].range(k + STEP - 1, k), src_buf2[j].range(k + STEP - 1, k),
src_buf2[j + 1].range(k + STEP - 1, k), src_buf2[j + 2].range(k + STEP - 1, k),
src_buf3[j].range(k + STEP - 1, k), src_buf3[j + 1].range(k + STEP - 1, k),
src_buf3[j + 2].range(k + STEP - 1, k));
p += STEP_OUT;
}
}
}
/**************************************************************************************
* ProcessSobel3x3 : Computes gradients for the column input data
**************************************************************************************/
template <int SRC_T,
int DST_T,
int ROWS,
int COLS,
int PLANES,
int DEPTH_SRC,
int DEPTH_DST,
int NPC,
int WORDWIDTH_SRC,
int WORDWIDTH_DST,
int TC>
void ProcessSobel3x3(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src_mat,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _gradx_mat,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _grady_mat,
XF_SNAME(WORDWIDTH_SRC) buf[3][(COLS >> XF_BITSHIFT(NPC))],
XF_PTNAME(DEPTH_SRC) src_buf1[XF_NPIXPERCYCLE(NPC) + 2],
XF_PTNAME(DEPTH_SRC) src_buf2[XF_NPIXPERCYCLE(NPC) + 2],
XF_PTNAME(DEPTH_SRC) src_buf3[XF_NPIXPERCYCLE(NPC) + 2],
XF_PTNAME(DEPTH_DST) GradientValuesX[XF_NPIXPERCYCLE(NPC)],
XF_PTNAME(DEPTH_DST) GradientValuesY[XF_NPIXPERCYCLE(NPC)],
XF_SNAME(WORDWIDTH_DST) & P0,
XF_SNAME(WORDWIDTH_DST) & P1,
uint16_t img_width,
uint16_t img_height,
ap_uint<13> row_ind,
uint16_t& shift_x,
uint16_t& shift_y,
ap_uint<2> tp,
ap_uint<2> mid,
ap_uint<2> bottom,
ap_uint<13> row,
int& read_index,
int& write_index) {
// clang-format off
#pragma HLS INLINE
// clang-format on
XF_SNAME(WORDWIDTH_SRC) buf0, buf1, buf2;
uint16_t npc = XF_NPIXPERCYCLE(NPC);
ap_uint<5> buf_size = XF_NPIXPERCYCLE(NPC) + 2;
Col_Loop:
for (ap_uint<13> col = 0; col < img_width; col++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
#pragma HLS pipeline
// clang-format on
if (row < img_height)
buf[row_ind][col] = _src_mat.read(read_index++); // Read data
else
buf[bottom][col] = 0;
buf0 = buf[tp][col];
buf1 = buf[mid][col];
buf2 = buf[bottom][col];
if (NPC == XF_NPPC8) {
xfExtractPixels<NPC, WORDWIDTH_SRC, DEPTH_SRC>(&src_buf1[2], buf0, 0);
xfExtractPixels<NPC, WORDWIDTH_SRC, DEPTH_SRC>(&src_buf2[2], buf1, 0);
xfExtractPixels<NPC, WORDWIDTH_SRC, DEPTH_SRC>(&src_buf3[2], buf2, 0);
} else {
src_buf1[2] = buf0;
src_buf2[2] = buf1;
src_buf3[2] = buf2;
}
xFSobel3x3<PLANES, NPC, DEPTH_SRC, DEPTH_DST>(GradientValuesX, GradientValuesY, src_buf1, src_buf2, src_buf3);
if (col == 0) {
shift_x = 0;
shift_y = 0;
P0 = 0;
P1 = 0;
xfPackPixels<NPC, WORDWIDTH_DST, DEPTH_DST>(&GradientValuesX[0], P0, 1, (npc - 1), shift_x);
xfPackPixels<NPC, WORDWIDTH_DST, DEPTH_DST>(&GradientValuesY[0], P1, 1, (npc - 1), shift_y);
} else {
xfPackPixels<NPC, WORDWIDTH_DST, DEPTH_DST>(&GradientValuesX[0], P0, 0, 1, shift_x);
xfPackPixels<NPC, WORDWIDTH_DST, DEPTH_DST>(&GradientValuesY[0], P1, 0, 1, shift_y);
_gradx_mat.write(write_index, P0);
_grady_mat.write(write_index++, P1);
shift_x = 0;
shift_y = 0;
P0 = 0;
P1 = 0;
xfPackPixels<NPC, WORDWIDTH_DST, DEPTH_DST>(&GradientValuesX[0], P0, 1, (npc - 1), shift_x);
xfPackPixels<NPC, WORDWIDTH_DST, DEPTH_DST>(&GradientValuesY[0], P1, 1, (npc - 1), shift_y);
}
src_buf1[0] = src_buf1[buf_size - 2];
src_buf1[1] = src_buf1[buf_size - 1];
src_buf2[0] = src_buf2[buf_size - 2];
src_buf2[1] = src_buf2[buf_size - 1];
src_buf3[0] = src_buf3[buf_size - 2];
src_buf3[1] = src_buf3[buf_size - 1];
} // Col_Loop
}
template <int SRC_T,
int DST_T,
int ROWS,
int COLS,
int PLANES,
int DEPTH_SRC,
int DEPTH_DST,
int NPC,
int WORDWIDTH_SRC,
int WORDWIDTH_DST,
int TC,
bool USE_URAM>
void xFSobelFilter3x3(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src_mat,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst_matx,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst_maty,
uint16_t img_height,
uint16_t img_width) {
ap_uint<13> row_ind;
ap_uint<2> tp, mid, bottom;
ap_uint<8> buf_size = XF_NPIXPERCYCLE(NPC) + 2;
uint16_t shift_x = 0, shift_y = 0;
ap_uint<13> row, col;
int read_index = 0, write_index = 0;
XF_PTNAME(DEPTH_DST)
GradientValuesX[XF_NPIXPERCYCLE(NPC)]; // X-Gradient result buffer
XF_PTNAME(DEPTH_DST)
GradientValuesY[XF_NPIXPERCYCLE(NPC)]; // Y-Gradient result buffer
// clang-format off
#pragma HLS ARRAY_PARTITION variable=GradientValuesX complete dim=1
#pragma HLS ARRAY_PARTITION variable=GradientValuesY complete dim=1
// clang-format on
XF_PTNAME(DEPTH_SRC)
src_buf1[XF_NPIXPERCYCLE(NPC) + 2],
src_buf2[XF_NPIXPERCYCLE(NPC) + 2], // Temporary buffers to hold input data for processing
src_buf3[XF_NPIXPERCYCLE(NPC) + 2];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=src_buf1 complete dim=1
#pragma HLS ARRAY_PARTITION variable=src_buf2 complete dim=1
#pragma HLS ARRAY_PARTITION variable=src_buf3 complete dim=1
// clang-format on
XF_SNAME(WORDWIDTH_DST) P0, P1; // Output data is packed
// Line buffer to hold image data
XF_SNAME(WORDWIDTH_SRC) buf[3][(COLS >> XF_BITSHIFT(NPC))]; // Line buffer
if (USE_URAM) {
// clang-format off
#pragma HLS array reshape variable=buf dim=1 factor=3 cyclic
#pragma HLS RESOURCE variable=buf core=RAM_S2P_URAM
// clang-format on
} else {
// clang-format off
#pragma HLS RESOURCE variable=buf core=RAM_S2P_BRAM
#pragma HLS ARRAY_PARTITION variable=buf complete dim=1
// clang-format on
}
row_ind = 1;
Clear_Row_Loop:
for (col = 0; col < img_width; col++) // Top row border care
{
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
#pragma HLS pipeline
// clang-format on
buf[0][col] = 0;
buf[row_ind][col] = _src_mat.read(read_index++); // Read data
}
row_ind++;
Row_Loop: // Process complete image
for (row = 1; row < img_height + 1; row++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
if (row_ind == 2) // Indexes to hold maintain the row index
{
tp = 0;
mid = 1;
bottom = 2;
} else if (row_ind == 0) {
tp = 1;
mid = 2;
bottom = 0;
} else if (row_ind == 1) {
tp = 2;
mid = 0;
bottom = 1;
}
src_buf1[0] = src_buf1[1] = 0;
src_buf2[0] = src_buf2[1] = 0;
src_buf3[0] = src_buf3[1] = 0;
/*********** Process complete row
* **********/
P0 = P1 = 0;
ProcessSobel3x3<SRC_T, DST_T, ROWS, COLS, PLANES, DEPTH_SRC, DEPTH_DST, NPC, WORDWIDTH_SRC, WORDWIDTH_DST, TC>(
_src_mat, _dst_matx, _dst_maty, buf, src_buf1, src_buf2, src_buf3, GradientValuesX, GradientValuesY, P0, P1,
img_width, img_height, row_ind, shift_x, shift_y, tp, mid, bottom, row, read_index, write_index);
/* Last column border care for RO & PO Case
*/
if ((NPC == XF_NPPC8)) {
// Compute gradient at last column
int STEP, STEP_OUT, p = 0;
if ((DEPTH_DST == XF_48SP) || (DEPTH_DST == XF_16SP)) {
STEP_OUT = 16;
STEP = 8;
} else {
STEP_OUT = 8;
STEP = 8;
}
for (ap_uint<5> c = 0, k = 0; c < PLANES; c++, k += STEP) {
GradientValuesX[0].range(p + (STEP_OUT - 1), p) = xFGradientX3x3<DEPTH_SRC, DEPTH_DST>(
src_buf1[buf_size - 2].range(k + STEP - 1, k), src_buf1[buf_size - 1].range(k + STEP - 1, k), 0,
src_buf2[buf_size - 2].range(k + STEP - 1, k), src_buf2[buf_size - 1].range(k + STEP - 1, k), 0,
src_buf3[buf_size - 2].range(k + STEP - 1, k), src_buf3[buf_size - 1].range(k + STEP - 1, k), 0);
GradientValuesY[0].range(p + (STEP_OUT - 1), p) = xFGradientY3x3<DEPTH_SRC, DEPTH_DST>(
src_buf1[buf_size - 2].range(k + STEP - 1, k), src_buf1[buf_size - 1].range(k + STEP - 1, k), 0,
src_buf2[buf_size - 2].range(k + STEP - 1, k), src_buf2[buf_size - 1].range(k + STEP - 1, k), 0,
src_buf3[buf_size - 2].range(k + STEP - 1, k), src_buf3[buf_size - 1].range(k + STEP - 1, k), 0);
p += STEP_OUT;
}
} else /* Last column border care for NO Case
*/
{
int STEP, STEP_OUT, q = 0;
if ((DEPTH_DST == XF_48SP) || (DEPTH_DST == XF_16SP)) {
STEP_OUT = 16;
STEP = 8;
} else {
STEP_OUT = 8;
STEP = 8;
}
for (ap_uint<7> i = 0, k = 0; i < PLANES; i++, k += STEP) {
GradientValuesX[0].range(q + (STEP_OUT - 1), q) = xFGradientX3x3<DEPTH_SRC, DEPTH_DST>(
src_buf1[buf_size - 3].range(k + STEP - 1, k), src_buf1[buf_size - 2].range(k + STEP - 1, k), 0,
src_buf2[buf_size - 3].range(k + STEP - 1, k), src_buf2[buf_size - 2].range(k + STEP - 1, k), 0,
src_buf3[buf_size - 3].range(k + STEP - 1, k), src_buf3[buf_size - 2].range(k + STEP - 1, k), 0);
GradientValuesY[0].range(q + (STEP_OUT - 1), q) = xFGradientY3x3<DEPTH_SRC, DEPTH_DST>(
src_buf1[buf_size - 3].range(k + STEP - 1, k), src_buf1[buf_size - 2].range(k + STEP - 1, k), 0,
src_buf2[buf_size - 3].range(k + STEP - 1, k), src_buf2[buf_size - 2].range(k + STEP - 1, k), 0,
src_buf3[buf_size - 3].range(k + STEP - 1, k), src_buf3[buf_size - 2].range(k + STEP - 1, k), 0);
q += STEP_OUT;
}
}
xfPackPixels<NPC, WORDWIDTH_DST, DEPTH_DST>(&GradientValuesX[0], P0, 0, 1, shift_x);
xfPackPixels<NPC, WORDWIDTH_DST, DEPTH_DST>(&GradientValuesY[0], P1, 0, 1, shift_y);
_dst_matx.write(write_index, P0);
_dst_maty.write(write_index++, P1);
shift_x = 0;
shift_y = 0;
P0 = 0;
P1 = 0;
row_ind++;
if (row_ind == 3) {
row_ind = 0;
}
} // Row_Loop
}
// xFSobelFilter3x3
/*****************************************************************
* SobelFilter5x5
*****************************************************************/
template <int PLANES, int DEPTH_SRC, int DEPTH_DST>
XF_PTNAME(DEPTH_DST)
xFGradientX5x5(XF_PTNAME(DEPTH_SRC) * src_buf1,
XF_PTNAME(DEPTH_SRC) * src_buf2,
XF_PTNAME(DEPTH_SRC) * src_buf3,
XF_PTNAME(DEPTH_SRC) * src_buf4,
XF_PTNAME(DEPTH_SRC) * src_buf5) {
// clang-format off
#pragma HLS INLINE off
// clang-format on
XF_PTNAME(DEPTH_DST) g_x = 0, out_val = 0;
int STEP, STEP_OUT, p = 0;
if ((DEPTH_DST == XF_48SP) || (DEPTH_DST == XF_16SP)) {
STEP_OUT = 16;
STEP = 8;
} else {
STEP_OUT = 8;
STEP = 8;
}
for (int i = 0, k = 0; i < PLANES; i++, k += STEP) {
short int M00 =
(short int)(((short int)src_buf1[1].range(k + STEP - 1, k) + (short int)src_buf5[1].range(k + STEP - 1, k))
<< 1);
short int M01 =
(short int)((short int)src_buf1[4].range(k + STEP - 1, k) + (short int)src_buf5[4].range(k + STEP - 1, k)) -
((short int)src_buf1[0].range(k + STEP - 1, k) + (short int)src_buf5[0].range(k + STEP - 1, k));
short int A00 =
(short int)(((short int)src_buf1[3].range(k + STEP - 1, k) + (short int)src_buf5[3].range(k + STEP - 1, k))
<< 1);
short int M02 =
(short int)(((short int)src_buf2[0].range(k + STEP - 1, k) + (short int)src_buf4[0].range(k + STEP - 1, k))
<< 2);
short int M03 =
(short int)((short int)src_buf2[1].range(k + STEP - 1, k) + (short int)src_buf4[1].range(k + STEP - 1, k))
<< 3;
short int A01 =
(short int)((short int)src_buf2[3].range(k + STEP - 1, k) + (short int)src_buf4[3].range(k + STEP - 1, k))
<< 3;
short int A02 =
(short int)((short int)src_buf2[4].range(k + STEP - 1, k) + (short int)src_buf4[4].range(k + STEP - 1, k))
<< 2;
short int M04 = (short int)src_buf3[0].range(k + STEP - 1, k) * 6;
short int M05 = (short int)src_buf3[1].range(k + STEP - 1, k) * 12;
short int A03 = (short int)src_buf3[3].range(k + STEP - 1, k) * 12;
short int A04 = (short int)src_buf3[4].range(k + STEP - 1, k) * 6;
short int S00 = M00 + M02;
short int S01 = M03 + M04 + M05;
short int A0 = A00 + A01;
short int A1 = A02 + A03;
short int A2 = A04 + M01;
short int FA = A0 + A1 + A2;
short int FS = S00 + S01;
short int out_x = FA - FS;
g_x = (XF_PTNAME(DEPTH_DST))out_x;
if ((DEPTH_DST == XF_8UP) || (DEPTH_DST == XF_24UP)) {
if (out_x < 0)
g_x = 0;
else if (out_x > 255)
g_x = 255;
}
out_val.range(p + (STEP_OUT - 1), p) = g_x;
p += STEP_OUT;
}
return out_val;
}
/****************************************************************
* Sobel Filter Y-Gradient used is 5x5
*
* --- ---- ---- ---- ---
* | -1 | -4 | -6 | -4 | -1 |
* --- ---- ---- ---- ---
* | -2 | -8 | -12 | -8 | -2 |
* --- ---- ---- ---- ---
* | 0 | 0 | 0 | 0 | 0 |
* --- ---- ---- ---- --- ---
* | 2 | 8 | 12 | 8 | 2 |
* --- ---- ---- ---- --- ---
* | 1 | 4 | 6 | 4 | 1 |
* --- ---- ---- ---- --- ---
******************************************************************/
template <int PLANES, int DEPTH_SRC, int DEPTH_DST>
XF_PTNAME(DEPTH_DST)
xFGradientY5x5(XF_PTNAME(DEPTH_SRC) * src_buf1,
XF_PTNAME(DEPTH_SRC) * src_buf2,
XF_PTNAME(DEPTH_SRC) * src_buf3,
XF_PTNAME(DEPTH_SRC) * src_buf4,
XF_PTNAME(DEPTH_SRC) * src_buf5) {
// clang-format off
#pragma HLS INLINE off
// clang-format on
XF_PTNAME(DEPTH_DST) g_y = 0, out_val = 0;
int STEP, STEP_OUT, p = 0;
if ((DEPTH_DST == XF_48SP) || (DEPTH_DST == XF_16SP)) {
STEP_OUT = 16;
STEP = 8;
} else {
STEP_OUT = 8;
STEP = 8;
}
for (int i = 0, k = 0; i < PLANES; i++, k += STEP) {
short int M00 =
((short int)src_buf5[0].range(k + STEP - 1, k) + (short int)src_buf5[4].range(k + STEP - 1, k)) -
((short int)src_buf1[0].range(k + STEP - 1, k) + (short int)src_buf1[4].range(k + STEP - 1, k));
short int M01 =
(short int)(((short int)src_buf1[1].range(k + STEP - 1, k) + (short int)src_buf1[3].range(k + STEP - 1, k))
<< 2);
short int A00 =
(short int)(((short int)src_buf5[1].range(k + STEP - 1, k) + (short int)src_buf5[3].range(k + STEP - 1, k))
<< 2);
short int M02 =
(short int)(((short int)src_buf2[0].range(k + STEP - 1, k) + (short int)src_buf2[4].range(k + STEP - 1, k))
<< 1);
short int A01 =
(short int)(((short int)src_buf4[0].range(k + STEP - 1, k) + (short int)src_buf4[4].range(k + STEP - 1, k))
<< 1);
short int M03 =
(short int)(((short int)src_buf2[1].range(k + STEP - 1, k) + (short int)src_buf2[3].range(k + STEP - 1, k))
<< 3);
short int A02 =
(short int)(((short int)src_buf4[1].range(k + STEP - 1, k) + (short int)src_buf4[3].range(k + STEP - 1, k))
<< 3);
short int M04 = (short int)(src_buf1[2].range(k + STEP - 1, k) * 6);
short int M05 = (short int)(src_buf2[2].range(k + STEP - 1, k) * 12);
short int A03 = (short int)(src_buf4[2].range(k + STEP - 1, k) * 12);
short int A04 = (short int)(src_buf5[2].range(k + STEP - 1, k) * 6);
short int S00 = M01 + M02 + M03;
short int S01 = M04 + M05;
short int A0 = A00 + A01;
short int A1 = A02 + A03;
short int A2 = A04 + M00;
short int FA = A0 + A1 + A2;
short int FS = S00 + S01;
short int out_y = FA - FS;
g_y = (XF_PTNAME(DEPTH_DST))out_y;
if ((DEPTH_DST == XF_8UP) || (DEPTH_DST == XF_24UP)) {
if (out_y < 0)
g_y = 0;
else if (out_y > 255)
g_y = 255;
}
out_val.range(p + (STEP_OUT - 1), p) = g_y;
p += STEP_OUT;
}
return out_val;
}
template <int NPC, int PLANES, int DEPTH_SRC, int DEPTH_DST>
void xFSobel5x5(XF_PTNAME(DEPTH_DST) * GradientvaluesX,
XF_PTNAME(DEPTH_DST) * GradientvaluesY,
XF_PTNAME(DEPTH_SRC) * src_buf1,
XF_PTNAME(DEPTH_SRC) * src_buf2,
XF_PTNAME(DEPTH_SRC) * src_buf3,
XF_PTNAME(DEPTH_SRC) * src_buf4,
XF_PTNAME(DEPTH_SRC) * src_buf5) {
// clang-format off
#pragma HLS INLINE off
// clang-format on
Compute_Grad_Loop:
for (ap_uint<5> j = 0; j < XF_NPIXPERCYCLE(NPC); j++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=8 max=8
#pragma HLS UNROLL
// clang-format on
GradientvaluesX[j] = xFGradientX5x5<PLANES, DEPTH_SRC, DEPTH_DST>(&src_buf1[j], &src_buf2[j], &src_buf3[j],
&src_buf4[j], &src_buf5[j]);
GradientvaluesY[j] = xFGradientY5x5<PLANES, DEPTH_SRC, DEPTH_DST>(&src_buf1[j], &src_buf2[j], &src_buf3[j],
&src_buf4[j], &src_buf5[j]);
}
}
/**************************************************************************************
* ProcessSobel5x5 : Computes gradients for the column input data
**************************************************************************************/
template <int SRC_T,
int DST_T,
int ROWS,
int COLS,
int PLANES,
int DEPTH_SRC,
int DEPTH_DST,
int NPC,
int WORDWIDTH_SRC,
int WORDWIDTH_DST,
int TC>
void ProcessSobel5x5(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src_mat,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst_matx,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst_maty,
XF_SNAME(WORDWIDTH_SRC) buf[5][(COLS >> XF_BITSHIFT(NPC))],
XF_PTNAME(DEPTH_SRC) src_buf1[XF_NPIXPERCYCLE(NPC) + 4],
XF_PTNAME(DEPTH_SRC) src_buf2[XF_NPIXPERCYCLE(NPC) + 4],
XF_PTNAME(DEPTH_SRC) src_buf3[XF_NPIXPERCYCLE(NPC) + 4],
XF_PTNAME(DEPTH_SRC) src_buf4[XF_NPIXPERCYCLE(NPC) + 4],
XF_PTNAME(DEPTH_SRC) src_buf5[XF_NPIXPERCYCLE(NPC) + 4],
XF_PTNAME(DEPTH_DST) GradientValuesX[XF_NPIXPERCYCLE(NPC)],
XF_PTNAME(DEPTH_DST) GradientValuesY[XF_NPIXPERCYCLE(NPC)],
XF_SNAME(WORDWIDTH_DST) & inter_valx,
XF_SNAME(WORDWIDTH_DST) & inter_valy,
uint16_t img_width,
uint16_t img_height,
ap_uint<13> row_ind,
uint16_t& shift_x,
uint16_t& shift_y,
ap_uint<4> tp1,
ap_uint<4> tp2,
ap_uint<4> mid,
ap_uint<4> bottom1,
ap_uint<4> bottom2,
ap_uint<13> row,
int& read_index,
int& write_index) {
// clang-format off
#pragma HLS INLINE
// clang-format on
XF_SNAME(WORDWIDTH_SRC) buf0, buf1, buf2, buf3, buf4;
ap_uint<8> buf_size = XF_NPIXPERCYCLE(NPC) + 4;
uint16_t npc = XF_NPIXPERCYCLE(NPC);
ap_uint<8> max_loop = XF_WORDDEPTH(WORDWIDTH_DST);
ap_uint<8> step = XF_PIXELDEPTH(DEPTH_DST);
Col_Loop:
for (ap_uint<13> col = 0; col < img_width; col++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
#pragma HLS pipeline
// clang-format on
if (row < img_height)
buf[row_ind][col] = _src_mat.read(read_index++);
else
buf[bottom2][col] = 0;
buf0 = buf[tp1][col];
buf1 = buf[tp2][col];
buf2 = buf[mid][col];
buf3 = buf[bottom1][col];
buf4 = buf[bottom2][col];
if (NPC == XF_NPPC8) {
xfExtractPixels<NPC, WORDWIDTH_SRC, DEPTH_SRC>(&src_buf1[4], buf0, 0);
xfExtractPixels<NPC, WORDWIDTH_SRC, DEPTH_SRC>(&src_buf2[4], buf1, 0);
xfExtractPixels<NPC, WORDWIDTH_SRC, DEPTH_SRC>(&src_buf3[4], buf2, 0);
xfExtractPixels<NPC, WORDWIDTH_SRC, DEPTH_SRC>(&src_buf4[4], buf3, 0);
xfExtractPixels<NPC, WORDWIDTH_SRC, DEPTH_SRC>(&src_buf5[4], buf4, 0);
} else {
src_buf1[4] = buf0;
src_buf2[4] = buf1;
src_buf3[4] = buf2;
src_buf4[4] = buf3;
src_buf5[4] = buf4;
}
xFSobel5x5<NPC, PLANES, DEPTH_SRC, DEPTH_DST>(GradientValuesX, GradientValuesY, src_buf1, src_buf2, src_buf3,
src_buf4, src_buf5);
for (ap_uint<4> i = 0; i < 4; i++) {
// clang-format off
#pragma HLS unroll
// clang-format on
src_buf1[i] = src_buf1[buf_size - (4 - i)];
src_buf2[i] = src_buf2[buf_size - (4 - i)];
src_buf3[i] = src_buf3[buf_size - (4 - i)];
src_buf4[i] = src_buf4[buf_size - (4 - i)];
src_buf5[i] = src_buf5[buf_size - (4 - i)];
}
if (col == 0) {
shift_x = 0, shift_y = 0;
inter_valx = 0;
inter_valy = 0;
xfPackPixels<NPC, WORDWIDTH_DST, DEPTH_DST>(&GradientValuesX[0], inter_valx, 2, (npc - 2), shift_x);
xfPackPixels<NPC, WORDWIDTH_DST, DEPTH_DST>(&GradientValuesY[0], inter_valy, 2, (npc - 2), shift_y);
} else {
if ((NPC == XF_NPPC8)) {
xfPackPixels<NPC, WORDWIDTH_DST, DEPTH_DST>(&GradientValuesX[0], inter_valx, 0, 2, shift_x);
xfPackPixels<NPC, WORDWIDTH_DST, DEPTH_DST>(&GradientValuesY[0], inter_valy, 0, 2, shift_y);
_dst_matx.write(write_index, inter_valx);
_dst_maty.write(write_index++, inter_valy);
shift_x = 0;
shift_y = 0;
inter_valx = 0;
inter_valy = 0;
xfPackPixels<NPC, WORDWIDTH_DST, DEPTH_DST>(&GradientValuesX[0], inter_valx, 2, (npc - 2), shift_x);
xfPackPixels<NPC, WORDWIDTH_DST, DEPTH_DST>(&GradientValuesY[0], inter_valy, 2, (npc - 2), shift_y);
} else {
if (col >= 2) {
inter_valx((max_loop - 1), (max_loop - step)) = GradientValuesX[0];
inter_valy((max_loop - 1), (max_loop - step)) = GradientValuesY[0];
_dst_matx.write(write_index, inter_valx);
_dst_maty.write(write_index++, inter_valy);
}
}
}
} // Col_Loop
}
template <int SRC_T,
int DST_T,
int ROWS,
int COLS,
int PLANES,
int DEPTH_SRC,
int DEPTH_DST,
int NPC,
int WORDWIDTH_SRC,
int WORDWIDTH_DST,
int TC,
bool USE_URAM>
void xFSobelFilter5x5(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src_mat,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst_matx,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst_maty,
uint16_t img_height,
uint16_t img_width) {
ap_uint<13> row_ind;
ap_uint<13> row, col;
ap_uint<4> tp1, tp2, mid, bottom1, bottom2;
ap_uint<5> i;
ap_uint<8> buf_size = XF_NPIXPERCYCLE(NPC) + 4;
ap_uint<9> step = XF_PIXELDEPTH(DEPTH_DST);
ap_uint<9> max_loop = XF_WORDDEPTH(WORDWIDTH_DST);
uint16_t shift_x = 0, shift_y = 0;
int read_index = 0, write_index = 0;
XF_PTNAME(DEPTH_DST) GradientValuesX[XF_NPIXPERCYCLE(NPC)];
XF_PTNAME(DEPTH_DST) GradientValuesY[XF_NPIXPERCYCLE(NPC)];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=GradientValuesX complete dim=1
#pragma HLS ARRAY_PARTITION variable=GradientValuesY complete dim=1
// clang-format on
XF_SNAME(WORDWIDTH_SRC) buf0, buf1, buf2, buf3, buf4;
// Temporary buffers to hold image data from five rows
XF_PTNAME(DEPTH_SRC)
src_buf1[XF_NPIXPERCYCLE(NPC) + 4], src_buf2[XF_NPIXPERCYCLE(NPC) + 4], src_buf3[XF_NPIXPERCYCLE(NPC) + 4],
src_buf4[XF_NPIXPERCYCLE(NPC) + 4], src_buf5[XF_NPIXPERCYCLE(NPC) + 4];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=src_buf1 complete dim=1
#pragma HLS ARRAY_PARTITION variable=src_buf2 complete dim=1
#pragma HLS ARRAY_PARTITION variable=src_buf3 complete dim=1
#pragma HLS ARRAY_PARTITION variable=src_buf4 complete dim=1
#pragma HLS ARRAY_PARTITION variable=src_buf5 complete dim=1
// clang-format on
XF_SNAME(WORDWIDTH_SRC) tmp_in;
XF_SNAME(WORDWIDTH_DST) inter_valx = 0, inter_valy = 0;
// Temporary buffer to hold image data from five rows
XF_SNAME(WORDWIDTH_SRC) buf[5][(COLS >> XF_BITSHIFT(NPC))];
if (USE_URAM) {
// clang-format off
#pragma HLS RESOURCE variable=buf core=RAM_S2P_URAM
#pragma HLS array reshape variable=buf dim=1 factor=5 cyclic
// clang-format on
} else {
// clang-format off
#pragma HLS RESOURCE variable=buf core=RAM_S2P_BRAM
#pragma HLS ARRAY_PARTITION variable=buf complete dim=1
// clang-format on
}
row_ind = 2;
Clear_Row_Loop:
for (col = 0; col < img_width; col++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
#pragma HLS pipeline
// clang-format on
buf[0][col] = 0;
buf[1][col] = 0;
buf[row_ind][col] = _src_mat.read(read_index++);
}
row_ind++;
Read_Row2_Loop:
for (col = 0; col < img_width; col++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
#pragma HLS pipeline
// clang-format on
buf[row_ind][col] = _src_mat.read(read_index++);
}
row_ind++;
Row_Loop:
for (row = 2; row < img_height + 2; row++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
// modify the buffer indices to re use
if (row_ind == 4) {
tp1 = 0;
tp2 = 1;
mid = 2;
bottom1 = 3;
bottom2 = 4;
} else if (row_ind == 0) {
tp1 = 1;
tp2 = 2;
mid = 3;
bottom1 = 4;
bottom2 = 0;
} else if (row_ind == 1) {
tp1 = 2;
tp2 = 3;
mid = 4;
bottom1 = 0;
bottom2 = 1;
} else if (row_ind == 2) {
tp1 = 3;
tp2 = 4;
mid = 0;
bottom1 = 1;
bottom2 = 2;
} else if (row_ind == 3) {
tp1 = 4;
tp2 = 0;
mid = 1;
bottom1 = 2;
bottom2 = 3;
}
src_buf1[0] = src_buf1[1] = src_buf1[2] = src_buf1[3] = 0;
src_buf2[0] = src_buf2[1] = src_buf2[2] = src_buf2[3] = 0;
src_buf3[0] = src_buf3[1] = src_buf3[2] = src_buf3[3] = 0;
src_buf4[0] = src_buf4[1] = src_buf4[2] = src_buf4[3] = 0;
src_buf5[0] = src_buf5[1] = src_buf5[2] = src_buf5[3] = 0;
inter_valx = inter_valy = 0;
ProcessSobel5x5<SRC_T, DST_T, ROWS, COLS, PLANES, DEPTH_SRC, DEPTH_DST, NPC, WORDWIDTH_SRC, WORDWIDTH_DST, TC>(
_src_mat, _dst_matx, _dst_maty, buf, src_buf1, src_buf2, src_buf3, src_buf4, src_buf5, GradientValuesX,
GradientValuesY, inter_valx, inter_valy, img_width, img_height, row_ind, shift_x, shift_y, tp1, tp2, mid,
bottom1, bottom2, row, read_index, write_index);
if ((NPC == XF_NPPC8) || (NPC == XF_NPPC16)) {
for (ap_uint<6> i = 4; i < (XF_NPIXPERCYCLE(NPC) + 4); i++) {
src_buf1[i] = 0;
src_buf2[i] = 0;
src_buf3[i] = 0;
src_buf4[i] = 0;
src_buf5[i] = 0;
}
GradientValuesX[0] = xFGradientX5x5<PLANES, DEPTH_SRC, DEPTH_DST>(&src_buf1[0], &src_buf2[0], &src_buf3[0],
&src_buf4[0], &src_buf5[0]);
GradientValuesX[1] = xFGradientX5x5<PLANES, DEPTH_SRC, DEPTH_DST>(&src_buf1[1], &src_buf2[1], &src_buf3[1],
&src_buf4[1], &src_buf5[1]);
GradientValuesY[0] = xFGradientY5x5<PLANES, DEPTH_SRC, DEPTH_DST>(&src_buf1[0], &src_buf2[0], &src_buf3[0],
&src_buf4[0], &src_buf5[0]);
GradientValuesY[1] = xFGradientY5x5<PLANES, DEPTH_SRC, DEPTH_DST>(&src_buf1[1], &src_buf2[1], &src_buf3[1],
&src_buf4[1], &src_buf5[1]);
xfPackPixels<NPC, WORDWIDTH_DST, DEPTH_DST>(&GradientValuesX[0], inter_valx, 0, 2, shift_x);
xfPackPixels<NPC, WORDWIDTH_DST, DEPTH_DST>(&GradientValuesY[0], inter_valy, 0, 2, shift_y);
_dst_matx.write(write_index, inter_valx);
_dst_maty.write(write_index++, inter_valy);
} else {
// clang-format off
#pragma HLS ALLOCATION function instances=xFGradientX5x5<PLANES, DEPTH_SRC, DEPTH_DST> limit=1
#pragma HLS ALLOCATION function instances=xFGradientY5x5<PLANES, DEPTH_SRC, DEPTH_DST> limit=1
// clang-format on
src_buf1[buf_size - 1] = 0;
src_buf2[buf_size - 1] = 0;
src_buf3[buf_size - 1] = 0;
src_buf4[buf_size - 1] = 0;
src_buf5[buf_size - 1] = 0;
GradientValuesX[0] = xFGradientX5x5<PLANES, DEPTH_SRC, DEPTH_DST>(&src_buf1[0], &src_buf2[0], &src_buf3[0],
&src_buf4[0], &src_buf5[0]);
GradientValuesY[0] = xFGradientY5x5<PLANES, DEPTH_SRC, DEPTH_DST>(&src_buf1[0], &src_buf2[0], &src_buf3[0],
&src_buf4[0], &src_buf5[0]);
inter_valx((max_loop - 1), (max_loop - step)) = GradientValuesX[0];
inter_valy((max_loop - 1), (max_loop - step)) = GradientValuesY[0];
_dst_matx.write(write_index, inter_valx);
_dst_maty.write(write_index++, inter_valy);
for (ap_uint<4> i = 0; i < 4; i++) {
// clang-format off
#pragma HLS unroll
// clang-format on
src_buf1[i] = src_buf1[buf_size - (4 - i)];
src_buf2[i] = src_buf2[buf_size - (4 - i)];
src_buf3[i] = src_buf3[buf_size - (4 - i)];
src_buf4[i] = src_buf4[buf_size - (4 - i)];
src_buf5[i] = src_buf5[buf_size - (4 - i)];
}
src_buf1[buf_size - 1] = 0;
src_buf2[buf_size - 1] = 0;
src_buf3[buf_size - 1] = 0;
src_buf4[buf_size - 1] = 0;
src_buf5[buf_size - 1] = 0;
GradientValuesX[0] = xFGradientX5x5<PLANES, DEPTH_SRC, DEPTH_DST>(&src_buf1[0], &src_buf2[0], &src_buf3[0],
&src_buf4[0], &src_buf5[0]);
GradientValuesY[0] = xFGradientY5x5<PLANES, DEPTH_SRC, DEPTH_DST>(&src_buf1[0], &src_buf2[0], &src_buf3[0],
&src_buf4[0], &src_buf5[0]);
inter_valx((max_loop - 1), (max_loop - step)) = GradientValuesX[0];
inter_valy((max_loop - 1), (max_loop - step)) = GradientValuesY[0];
_dst_matx.write(write_index, inter_valx);
_dst_maty.write(write_index++, inter_valy);
}
row_ind++;
if (row_ind == 5) {
row_ind = 0;
}
} // Row_Loop
}
// xFSobelFilter5x5
/*******************************************************************************
* SobelFilter7x7
*******************************************************************************
* SobelFilter X-Gradient used is 7X7
*
* --- ---- ---- ---- --- ---- --- ----
* | -1 | -4 | -5 | 0 | 5 | 4 | 1 |
* --- ---- ---- ---- --- ---- --- ----
* | -6 | -24 | -30 | 0 | 30 | 24 | 6 |
* --- ---- ---- ---- --- ---- --- ----
* | -15 | -60 | -75 | 0 | 75 | 60 | 15 |
* --- ---- ---- ---- --- ---- --- ----
* | -20 | -80 | -100 | 0 | 100 | 80 | 20 |
* --- ---- ---- ---- --- ---- --- ----
* | -15 | -60 | -75 | 0 | 75 | 60 | 15 |
* --- ---- ---- ---- --- ---- --- ----
* | -6 | -24 | -30 | 0 | 30 | 24 | 6 |
* --- ---- ---- ---- --- ---- --- ----
* | -1 | -4 | -5 | 0 | 5 | 4 | 1 |
* --- ---- ---- ---- --- ---- --- ----
******************************************************************************/
template <int PLANES, int DEPTH_SRC, int DEPTH_DST>
XF_PTNAME(DEPTH_DST)
xFGradientX7x7(XF_PTNAME(DEPTH_SRC) * src_buf1,
XF_PTNAME(DEPTH_SRC) * src_buf2,
XF_PTNAME(DEPTH_SRC) * src_buf3,
XF_PTNAME(DEPTH_SRC) * src_buf4,
XF_PTNAME(DEPTH_SRC) * src_buf5,
XF_PTNAME(DEPTH_SRC) * src_buf6,
XF_PTNAME(DEPTH_SRC) * src_buf7) {
// clang-format off
#pragma HLS INLINE off
#pragma HLS PIPELINE II=1
// clang-format on
XF_PTNAME(DEPTH_DST) g_x = 0;
XF_PTNAME(DEPTH_DST) val = 0;
int STEP, STEP_OUT, p = 0;
if ((DEPTH_DST == XF_48SP) || (DEPTH_DST == XF_16SP)) {
STEP_OUT = 16;
STEP = 8;
} else if ((DEPTH_DST == XF_32SP)) {
STEP_OUT = 32;
STEP = 8;
} else {
STEP = 8;
STEP_OUT = 8;
}
for (int i = 0, k = 0; i < PLANES; i++, k += STEP) {
int Res = 0;
ap_int<20> M00 = (ap_int<20>)(((ap_int<20>)src_buf1[6].range(k + STEP - 1, k) +
(ap_int<20>)src_buf7[6].range(k + STEP - 1, k)) -
((ap_int<20>)src_buf1[0].range(k + STEP - 1, k) +
(ap_int<20>)src_buf7[0].range(k + STEP - 1, k)));
ap_int<20> M01 = (ap_int<20>)(((ap_int<20>)src_buf1[1].range(k + STEP - 1, k) +
(ap_int<20>)src_buf7[1].range(k + STEP - 1, k))
<< 2);
ap_int<20> A00 = (ap_int<20>)(((ap_int<20>)src_buf1[5].range(k + STEP - 1, k) +
(ap_int<20>)src_buf7[5].range(k + STEP - 1, k))
<< 2);
ap_int<20> M02 =
(ap_int<20>)(((ap_int<20>)src_buf1[2].range(k + STEP - 1, k) +
(ap_int<20>)src_buf7[2].range(k + STEP - 1, k))
<< 2) +
(ap_int<20>)((ap_int<20>)src_buf1[2].range(k + STEP - 1, k) +
(ap_int<20>)src_buf7[2].range(k + STEP - 1, k)); //(src_buf1[2] + src_buf7[2]) * 5;
ap_int<20> A01 = (ap_int<20>)(((ap_int<20>)src_buf1[4].range(k + STEP - 1, k) +
(ap_int<20>)src_buf7[4].range(k + STEP - 1, k))
<< 2) +
(ap_int<20>)src_buf1[4].range(k + STEP - 1, k) +
(ap_int<20>)src_buf7[4].range(k + STEP - 1,
k); //(src_buf1[4] + src_buf7[4]) * 5;
ap_int<20> M03 = (ap_int<20>)(((ap_int<20>)src_buf2[0].range(k + STEP - 1, k) +
(ap_int<20>)src_buf6[0].range(k + STEP - 1, k))
<< 2) +
(ap_int<20>)(((ap_int<20>)src_buf2[0].range(k + STEP - 1, k) +
(ap_int<20>)src_buf6[0].range(k + STEP - 1, k))
<< 1); //(src_buf2[0] + src_buf6[0]) * 6;
ap_int<20> A02 = (ap_int<20>)(((ap_int<20>)src_buf2[6].range(k + STEP - 1, k) +
(ap_int<20>)src_buf6[6].range(k + STEP - 1, k))
<< 2) +
(ap_int<20>)(((ap_int<20>)src_buf2[6].range(k + STEP - 1, k) +
(ap_int<20>)src_buf6[6].range(k + STEP - 1, k))
<< 1); //(src_buf2[6] + src_buf6[6]) * 6;
ap_int<20> M04 = (ap_int<20>)(((ap_int<20>)src_buf2[1].range(k + STEP - 1, k) +
(ap_int<20>)src_buf6[1].range(k + STEP - 1, k))
<< 4) +
(ap_int<20>)(((ap_int<20>)src_buf2[1].range(k + STEP - 1, k) +
(ap_int<20>)src_buf6[1].range(k + STEP - 1, k))
<< 3); //(src_buf2[1] + src_buf6[1]) * 24;
ap_int<20> A03 = (ap_int<20>)(((ap_int<20>)src_buf2[5].range(k + STEP - 1, k) +
(ap_int<20>)src_buf6[5].range(k + STEP - 1, k))
<< 4) +
(ap_int<20>)(((ap_int<20>)src_buf2[5].range(k + STEP - 1, k) +
(ap_int<20>)src_buf6[5].range(k + STEP - 1, k))
<< 3); //(src_buf2[5] + src_buf6[5]) * 24;
ap_int<20> M05 = (ap_int<20>)(((ap_int<20>)src_buf2[2].range(k + STEP - 1, k) +
(ap_int<20>)src_buf6[2].range(k + STEP - 1, k))
<< 5) -
(ap_int<20>)(((ap_int<20>)src_buf2[2].range(k + STEP - 1, k) +
(ap_int<20>)src_buf6[2].range(k + STEP - 1, k))
<< 1); //(src_buf2[2] + src_buf6[2]) * 30;
ap_int<20> A04 = (ap_int<20>)(((ap_int<20>)src_buf2[4].range(k + STEP - 1, k) +
(ap_int<20>)src_buf6[4].range(k + STEP - 1, k))
<< 5) -
(ap_int<20>)(((ap_int<20>)src_buf2[4].range(k + STEP - 1, k) +
(ap_int<20>)src_buf6[4].range(k + STEP - 1, k))
<< 1); //(src_buf2[4] + src_buf6[4]) * 30;
ap_int<20> M06 =
(ap_int<20>)(((ap_int<20>)src_buf3[0].range(k + STEP - 1, k) +
(ap_int<20>)src_buf5[0].range(k + STEP - 1, k))
<< 4) -
(ap_int<20>)((ap_int<20>)src_buf3[0].range(k + STEP - 1, k) +
(ap_int<20>)src_buf5[0].range(k + STEP - 1, k)); //(src_buf3[0] + src_buf5[0]) * 15;
ap_int<20> A05 =
(ap_int<20>)(((ap_int<20>)src_buf3[6].range(k + STEP - 1, k) +
(ap_int<20>)src_buf5[6].range(k + STEP - 1, k))
<< 4) -
(ap_int<20>)((ap_int<20>)src_buf3[6].range(k + STEP - 1, k) +
(ap_int<20>)src_buf5[6].range(k + STEP - 1, k)); //(src_buf3[6] + src_buf5[6]) * 15;
ap_int<20> M07 = (ap_int<20>)(((ap_int<20>)src_buf3[1].range(k + STEP - 1, k) +
(ap_int<20>)src_buf5[1].range(k + STEP - 1, k))
<< 6) -
(ap_int<20>)(((ap_int<20>)src_buf3[1].range(k + STEP - 1, k) +
(ap_int<20>)src_buf5[1].range(k + STEP - 1, k))
<< 2); //(src_buf3[1] + src_buf5[1]) * 60;
ap_int<20> A06 = (ap_int<20>)(((ap_int<20>)src_buf3[5].range(k + STEP - 1, k) +
(ap_int<20>)src_buf5[5].range(k + STEP - 1, k))
<< 6) -
(ap_int<20>)(((ap_int<20>)src_buf3[5].range(k + STEP - 1, k) +
(ap_int<20>)src_buf5[5].range(k + STEP - 1, k))
<< 2); //(src_buf3[5] + src_buf5[5]) * 60;
ap_int<20> M08 =
(ap_int<20>)(((ap_int<20>)src_buf3[2].range(k + STEP - 1, k) +
(ap_int<20>)src_buf5[2].range(k + STEP - 1, k))
<< 6) +
(ap_int<20>)(((ap_int<20>)src_buf3[2].range(k + STEP - 1, k) +
(ap_int<20>)src_buf5[2].range(k + STEP - 1, k))
<< 3) +
(ap_int<20>)((ap_int<20>)src_buf3[2].range(k + STEP - 1, k) + (ap_int<20>)src_buf5[2].range(k + STEP - 1, k)
<< 1) +
(ap_int<20>)src_buf3[2].range(k + STEP - 1, k) +
(ap_int<20>)src_buf5[2].range(k + STEP - 1,
k); //(src_buf3[2] + src_buf5[2]) * 75;
ap_int<20> A07 =
(ap_int<20>)(((ap_int<20>)src_buf3[4].range(k + STEP - 1, k) +
(ap_int<20>)src_buf5[4].range(k + STEP - 1, k))
<< 6) +
(ap_int<20>)(((ap_int<20>)src_buf3[4].range(k + STEP - 1, k) +
(ap_int<20>)src_buf5[4].range(k + STEP - 1, k))
<< 3) +
(ap_int<20>)((ap_int<20>)src_buf3[4].range(k + STEP - 1, k) + (ap_int<20>)src_buf5[4].range(k + STEP - 1, k)
<< 1) +
(ap_int<20>)src_buf3[4].range(k + STEP - 1, k) +
(ap_int<20>)src_buf5[4].range(k + STEP - 1,
k); //(src_buf3[4] + src_buf5[4]) * 75;
ap_int<20> M09 = (ap_int<20>)(((ap_int<20>)src_buf4[6].range(k + STEP - 1, k) -
(ap_int<20>)src_buf4[0].range(k + STEP - 1, k))
<< 4) +
(ap_int<20>)(((ap_int<20>)src_buf4[6].range(k + STEP - 1, k) -
(ap_int<20>)src_buf4[0].range(k + STEP - 1, k))
<< 2); //(src_buf4[6] - src_buf4[0]) * 20;
ap_int<20> M10 = (ap_int<20>)(((ap_int<20>)src_buf4[5].range(k + STEP - 1, k) -
(ap_int<20>)src_buf4[1].range(k + STEP - 1, k))
<< 6) +
(ap_int<20>)(((ap_int<20>)src_buf4[5].range(k + STEP - 1, k) -
(ap_int<20>)src_buf4[1].range(k + STEP - 1, k))
<< 4); //(src_buf4[5] - src_buf4[1]) * 80;
ap_int<20> M11 =
(ap_int<20>)(((ap_int<20>)src_buf4[4].range(k + STEP - 1, k) -
(ap_int<20>)src_buf4[2].range(k + STEP - 1, k))
<< 6) +
(ap_int<20>)(((ap_int<20>)src_buf4[4].range(k + STEP - 1, k) -
(ap_int<20>)src_buf4[2].range(k + STEP - 1, k))
<< 5) +
(ap_int<20>)((ap_int<20>)src_buf4[4].range(k + STEP - 1, k) - (ap_int<20>)src_buf4[2].range(k + STEP - 1, k)
<< 2); //(src_buf4[4] - src_buf4[2]) * 100;
ap_int<20> FS00 = M01 + M02 + M03;
ap_int<20> FS01 = M04 + M05;
ap_int<20> FS02 = M06 + M07 + M08;
ap_int<20> FA00 = A00 + A01;
ap_int<20> FA01 = A02 + A03;
ap_int<20> FA02 = A04 + A05;
ap_int<20> FA03 = A06 + A07;
ap_int<20> FA04 = M09 + M10 + M11;
ap_int<20> FS0 = FS00 + FS01 + FS02;
ap_int<20> FA0 = M00 + FA00 + FA01;
ap_int<20> FA1 = FA02 + FA03 + FA04;
Res = (FA0 + FA1) - (FS0);
g_x = (XF_PTNAME(DEPTH_DST))Res;
if ((DEPTH_DST == XF_8UP) || (DEPTH_DST == XF_24UP)) {
if (Res < 0)
g_x = 0;
else if (Res > 255)
g_x = 255;
}
if ((DEPTH_DST == XF_16SP) || (DEPTH_DST == XF_48SP)) {
if (Res > 32767)
g_x = 32767;
else if (Res < -32768)
g_x = -32768;
}
val.range(p + (STEP_OUT - 1), p) = g_x;
p += STEP_OUT;
}
return val;
}
/********************************************************************
* SobelFilter Y-Gradient used is 7X7
*
* --- ---- ---- ---- --- ---- --- ----
* | -1 | -6 | -15 | -20 | -15 | -6 | -1 |
* --- ---- ---- ---- --- ---- --- ----
* | -4 | -24 | -60 | -80 | -60 |-24 | -4 |
* --- ---- ---- ---- --- ---- --- ----
* | -5 | -30 | -75 |-100 | -75 |-30 | -5 |
* --- ---- ---- ---- --- ---- --- ----
* | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
* --- ---- ---- ---- --- ---- --- ----
* | 5 | 30 | 75 | 100 | 75 | 30 | 5 |
* --- ---- ---- ---- --- ---- --- ----
* | 4 | 24 | 60 | 80 | 60 | 24 | 4 |
* --- ---- ---- ---- --- ---- --- ----
* | 1 | 6 | 15 | 20 | 15 | 6 | 1 |
* --- ---- ---- ---- --- ---- --- ----
******************************************************************/
template <int PLANES, int DEPTH_SRC, int DEPTH_DST>
XF_PTNAME(DEPTH_DST)
xFGradientY7x7(XF_PTNAME(DEPTH_SRC) * src_buf1,
XF_PTNAME(DEPTH_SRC) * src_buf2,
XF_PTNAME(DEPTH_SRC) * src_buf3,
XF_PTNAME(DEPTH_SRC) * src_buf4,
XF_PTNAME(DEPTH_SRC) * src_buf5,
XF_PTNAME(DEPTH_SRC) * src_buf6,
XF_PTNAME(DEPTH_SRC) * src_buf7) {
// clang-format off
#pragma HLS INLINE off
#pragma HLS PIPELINE II=1
// clang-format on
XF_PTNAME(DEPTH_DST) g_y = 0, val = 0;
int STEP, STEP_OUT, p = 0;
if ((DEPTH_DST == XF_48SP) || (DEPTH_DST == XF_16SP)) {
STEP_OUT = 16;
STEP = 8;
} else if ((DEPTH_DST == XF_32SP)) {
STEP_OUT = 32;
STEP = 8;
} else {
STEP = 8;
STEP_OUT = 8;
}
for (int i = 0, k = 0; i < PLANES; i++, k += STEP) {
int Res = 0;
ap_int<20> M00 = (src_buf7[0].range(k + STEP - 1, k) + src_buf7[6].range(k + STEP - 1, k)) -
(src_buf1[0].range(k + STEP - 1, k) + src_buf1[6].range(k + STEP - 1, k));
ap_int<20> M01 = ((ap_int<20>)(src_buf1[1].range(k + STEP - 1, k) + src_buf1[5].range(k + STEP - 1, k)) << 2) +
((ap_int<20>)(src_buf1[1].range(k + STEP - 1, k) + src_buf1[5].range(k + STEP - 1, k))
<< 1); //(src_buf1[1] + src_buf1[5]) * 6;
ap_int<20> A00 = ((ap_int<20>)(src_buf7[1].range(k + STEP - 1, k) + src_buf7[5].range(k + STEP - 1, k)) << 2) +
((ap_int<20>)(src_buf7[1].range(k + STEP - 1, k) + src_buf7[5].range(k + STEP - 1, k))
<< 1); //(src_buf7[1] + src_buf7[5]) * 6;
ap_int<20> M02 = ((ap_int<20>)(src_buf1[2].range(k + STEP - 1, k) + src_buf1[4].range(k + STEP - 1, k)) << 4) -
(src_buf1[2].range(k + STEP - 1, k) +
src_buf1[4].range(k + STEP - 1,
k)); // (src_buf1[2] + src_buf1[4]) * 15;
ap_int<20> A01 = ((ap_int<20>)(src_buf7[2].range(k + STEP - 1, k) + src_buf7[4].range(k + STEP - 1, k)) << 4) -
(src_buf7[2].range(k + STEP - 1, k) +
src_buf7[4].range(k + STEP - 1,
k)); //(src_buf7[2] + src_buf7[4]) * 15;
ap_int<20> M03 = (ap_int<20>)(src_buf2[0].range(k + STEP - 1, k) + src_buf2[6].range(k + STEP - 1, k)) << 2;
ap_int<20> A02 = (ap_int<20>)(src_buf6[0].range(k + STEP - 1, k) + src_buf6[6].range(k + STEP - 1, k)) << 2;
ap_int<20> M04 = ((ap_int<20>)(src_buf2[1].range(k + STEP - 1, k) + src_buf2[5].range(k + STEP - 1, k)) << 4) +
((ap_int<20>)(src_buf2[1].range(k + STEP - 1, k) + src_buf2[5].range(k + STEP - 1, k))
<< 3); //(src_buf2[1] + src_buf2[5]) * 24;
ap_int<20> A03 = ((ap_int<20>)(src_buf6[1].range(k + STEP - 1, k) + src_buf6[5].range(k + STEP - 1, k)) << 4) +
((ap_int<20>)(src_buf6[1].range(k + STEP - 1, k) + src_buf6[5].range(k + STEP - 1, k))
<< 3); //(src_buf6[1] + src_buf6[5]) * 24;
ap_int<20> M05 = ((ap_int<20>)(src_buf2[2].range(k + STEP - 1, k) + src_buf2[4].range(k + STEP - 1, k)) << 6) -
((ap_int<20>)(src_buf2[2].range(k + STEP - 1, k) + src_buf2[4].range(k + STEP - 1, k))
<< 2); //(src_buf2[2] + src_buf2[4]) * 60;
ap_int<20> A04 = ((ap_int<20>)(src_buf6[2].range(k + STEP - 1, k) + src_buf6[4].range(k + STEP - 1, k)) << 6) -
((ap_int<20>)(src_buf6[2].range(k + STEP - 1, k) + src_buf6[4].range(k + STEP - 1, k))
<< 2); //(src_buf6[2] + src_buf6[4]) * 60;
ap_int<20> M06 = ((ap_int<20>)(src_buf3[0].range(k + STEP - 1, k) + src_buf3[6].range(k + STEP - 1, k)) << 2) +
(src_buf3[0].range(k + STEP - 1, k) +
src_buf3[6].range(k + STEP - 1, k)); //(src_buf3[0] + src_buf3[6]) * 5;
ap_int<20> A05 = ((ap_int<20>)(src_buf5[0].range(k + STEP - 1, k) + src_buf5[6].range(k + STEP - 1, k)) << 2) +
(src_buf5[0].range(k + STEP - 1, k) +
src_buf5[6].range(k + STEP - 1, k)); //(src_buf5[0] + src_buf5[6]) * 5;
ap_int<20> M07 = ((ap_int<20>)(src_buf3[1].range(k + STEP - 1, k) + src_buf3[5].range(k + STEP - 1, k)) << 5) -
((ap_int<20>)(src_buf3[1].range(k + STEP - 1, k) + src_buf3[5].range(k + STEP - 1, k))
<< 1); //(src_buf3[1] + src_buf3[5]) * 30;
ap_int<20> A06 = ((ap_int<20>)(src_buf5[1].range(k + STEP - 1, k) + src_buf5[5].range(k + STEP - 1, k)) << 5) -
((ap_int<20>)(src_buf5[1].range(k + STEP - 1, k) + src_buf5[5].range(k + STEP - 1, k))
<< 1); //(src_buf5[1] + src_buf5[5]) * 30;
ap_int<20> M08 = ((ap_int<20>)(src_buf3[2].range(k + STEP - 1, k) + src_buf3[4].range(k + STEP - 1, k)) << 6) +
((ap_int<20>)(src_buf3[2].range(k + STEP - 1, k) + src_buf3[4].range(k + STEP - 1, k)) << 3) +
((ap_int<20>)(src_buf3[2].range(k + STEP - 1, k) + src_buf3[4].range(k + STEP - 1, k)) << 1) +
(src_buf3[2].range(k + STEP - 1, k) +
src_buf3[4].range(k + STEP - 1,
k)); //(src_buf3[2] + src_buf3[4]) * 75;
ap_int<20> A07 = ((ap_int<20>)(src_buf5[2].range(k + STEP - 1, k) + src_buf5[4].range(k + STEP - 1, k)) << 6) +
((ap_int<20>)(src_buf5[2].range(k + STEP - 1, k) + src_buf5[4].range(k + STEP - 1, k)) << 3) +
((ap_int<20>)(src_buf5[2].range(k + STEP - 1, k) + src_buf5[4].range(k + STEP - 1, k)) << 1) +
(src_buf5[2].range(k + STEP - 1, k) +
src_buf5[4].range(k + STEP - 1,
k)); //(src_buf5[2] + src_buf5[4]) * 75;
ap_int<20> M09 = ((ap_int<20>)(src_buf7[3].range(k + STEP - 1, k) - src_buf1[3].range(k + STEP - 1, k)) << 4) +
((ap_int<20>)(src_buf7[3].range(k + STEP - 1, k) - src_buf1[3].range(k + STEP - 1, k))
<< 2); //(src_buf7[3] - src_buf1[3]) * 20;
ap_int<20> M10 = ((ap_int<20>)(src_buf6[3].range(k + STEP - 1, k) - src_buf2[3].range(k + STEP - 1, k)) << 6) +
((ap_int<20>)(src_buf6[3].range(k + STEP - 1, k) - src_buf2[3].range(k + STEP - 1, k))
<< 4); //(src_buf6[3] - src_buf2[3]) * 80;
ap_int<20> M11 = ((ap_int<20>)(src_buf5[3].range(k + STEP - 1, k) - src_buf3[3].range(k + STEP - 1, k)) << 6) +
((ap_int<20>)(src_buf5[3].range(k + STEP - 1, k) - src_buf3[3].range(k + STEP - 1, k)) << 5) +
((ap_int<20>)(src_buf5[3].range(k + STEP - 1, k) - src_buf3[3].range(k + STEP - 1, k))
<< 2); //(src_buf5[3] - src_buf3[3]) * 100;
ap_int<20> FS00 = M01 + M02 + M03;
ap_int<20> FS01 = M04 + M05;
ap_int<20> FS02 = M06 + M07 + M08;
ap_int<20> FA00 = A00 + A01;
ap_int<20> FA01 = A02 + A03;
ap_int<20> FA02 = A04 + A05;
ap_int<20> FA03 = A06 + A07;
ap_int<20> FA04 = M09 + M10 + M11;
ap_int<20> FS0 = FS00 + FS01 + FS02;
ap_int<20> FA0 = M00 + FA00 + FA01;
ap_int<20> FA1 = FA02 + FA03 + FA04;
Res = (FA0 + FA1) - (FS0);
g_y = (XF_PTNAME(DEPTH_DST))Res;
if ((DEPTH_DST == XF_8UP) || (DEPTH_DST == XF_24UP)) {
if (Res < 0)
g_y = 0;
else if (Res > 255)
g_y = 255;
}
if ((DEPTH_DST == XF_16SP) || (DEPTH_DST == XF_48SP)) {
if (Res > 32767)
g_y = 32767;
else if (Res < -32768)
g_y = -32768;
}
// g_y = (XF_PTNAME(DEPTH_DST))Res;
val.range(p + (STEP_OUT - 1), p) = (XF_PTNAME(DEPTH_DST))g_y;
p += STEP_OUT;
}
return val;
}
template <int NPC, int PLANES, int DEPTH_SRC, int DEPTH_DST>
void xFSobel7x7(XF_PTNAME(DEPTH_DST) * GradientvaluesX,
XF_PTNAME(DEPTH_DST) * GradientvaluesY,
XF_PTNAME(DEPTH_SRC) src_buf1[XF_NPIXPERCYCLE(NPC) + 6],
XF_PTNAME(DEPTH_SRC) src_buf2[XF_NPIXPERCYCLE(NPC) + 6],
XF_PTNAME(DEPTH_SRC) src_buf3[XF_NPIXPERCYCLE(NPC) + 6],
XF_PTNAME(DEPTH_SRC) src_buf4[XF_NPIXPERCYCLE(NPC) + 6],
XF_PTNAME(DEPTH_SRC) src_buf5[XF_NPIXPERCYCLE(NPC) + 6],
XF_PTNAME(DEPTH_SRC) src_buf6[XF_NPIXPERCYCLE(NPC) + 6],
XF_PTNAME(DEPTH_SRC) src_buf7[XF_NPIXPERCYCLE(NPC) + 6]) {
// clang-format off
#pragma HLS INLINE
// clang-format on
for (ap_uint<9> j = 0; j < XF_NPIXPERCYCLE(NPC); j++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=8 max=8
#pragma HLS UNROLL
// clang-format on
GradientvaluesX[j] = xFGradientX7x7<PLANES, DEPTH_SRC, DEPTH_DST>(
&src_buf1[j], &src_buf2[j], &src_buf3[j], &src_buf4[j], &src_buf5[j], &src_buf6[j], &src_buf7[j]);
GradientvaluesY[j] = xFGradientY7x7<PLANES, DEPTH_SRC, DEPTH_DST>(
&src_buf1[j], &src_buf2[j], &src_buf3[j], &src_buf4[j], &src_buf5[j], &src_buf6[j], &src_buf7[j]);
}
}
/**************************************************************************************
* ProcessSobel7x7 : Computes gradients for the column input data
**************************************************************************************/
template <int SRC_T,
int DST_T,
int ROWS,
int COLS,
int PLANES,
int DEPTH_SRC,
int DEPTH_DST,
int NPC,
int WORDWIDTH_SRC,
int WORDWIDTH_DST,
int TC>
void ProcessSobel7x7(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src_mat,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _gradx_mat,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _grady_mat,
XF_SNAME(WORDWIDTH_SRC) buf[7][(COLS >> XF_BITSHIFT(NPC))],
XF_PTNAME(DEPTH_SRC) src_buf1[XF_NPIXPERCYCLE(NPC) + 6],
XF_PTNAME(DEPTH_SRC) src_buf2[XF_NPIXPERCYCLE(NPC) + 6],
XF_PTNAME(DEPTH_SRC) src_buf3[XF_NPIXPERCYCLE(NPC) + 6],
XF_PTNAME(DEPTH_SRC) src_buf4[XF_NPIXPERCYCLE(NPC) + 6],
XF_PTNAME(DEPTH_SRC) src_buf5[XF_NPIXPERCYCLE(NPC) + 6],
XF_PTNAME(DEPTH_SRC) src_buf6[XF_NPIXPERCYCLE(NPC) + 6],
XF_PTNAME(DEPTH_SRC) src_buf7[XF_NPIXPERCYCLE(NPC) + 6],
XF_PTNAME(DEPTH_DST) GradientValuesX[XF_NPIXPERCYCLE(NPC)],
XF_PTNAME(DEPTH_DST) GradientValuesY[XF_NPIXPERCYCLE(NPC)],
XF_SNAME(WORDWIDTH_DST) & inter_valx,
XF_SNAME(WORDWIDTH_DST) & inter_valy,
uint16_t img_width,
uint16_t img_height,
ap_uint<13> row_ind,
uint16_t& shiftx,
uint16_t& shifty,
ap_uint<4> tp1,
ap_uint<4> tp2,
ap_uint<4> tp3,
ap_uint<4> mid,
ap_uint<4> bottom1,
ap_uint<4> bottom2,
ap_uint<4> bottom3,
ap_uint<13> row,
int& read_index,
int& write_index) {
// clang-format off
#pragma HLS INLINE
// clang-format on
XF_SNAME(WORDWIDTH_SRC) buf0, buf1, buf2, buf3, buf4, buf5, buf6;
uint16_t npc = XF_NPIXPERCYCLE(NPC);
ap_uint<10> max_loop = XF_WORDDEPTH(WORDWIDTH_DST);
Col_Loop:
for (ap_uint<13> col = 0; col < img_width; col++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
#pragma HLS pipeline
// clang-format on
if (row < img_height)
buf[row_ind][col] = _src_mat.read(read_index++);
else
buf[bottom3][col] = 0;
buf0 = buf[tp1][col];
buf1 = buf[tp2][col];
buf2 = buf[tp3][col];
buf3 = buf[mid][col];
buf4 = buf[bottom1][col];
buf5 = buf[bottom2][col];
buf6 = buf[bottom3][col];
if (row == 26 && col == 15) printf("hello");
if (NPC == XF_NPPC8) {
xfExtractData<NPC, WORDWIDTH_SRC, DEPTH_SRC>(src_buf1, src_buf2, src_buf3, src_buf4, src_buf5, src_buf6,
src_buf7, buf0, buf1, buf2, buf3, buf4, buf5, buf6);
} else {
src_buf1[6] = buf0;
src_buf2[6] = buf1;
src_buf3[6] = buf2;
src_buf4[6] = buf3;
src_buf5[6] = buf4;
src_buf6[6] = buf5;
src_buf7[6] = buf6;
}
xFSobel7x7<NPC, PLANES, DEPTH_SRC, DEPTH_DST>(GradientValuesX, GradientValuesY, src_buf1, src_buf2, src_buf3,
src_buf4, src_buf5, src_buf6, src_buf7);
xfCopyData<NPC, DEPTH_SRC>(src_buf1, src_buf2, src_buf3, src_buf4, src_buf5, src_buf6, src_buf7);
if (col == 0) {
shiftx = 0;
shifty = 0;
inter_valx = 0;
inter_valy = 0;
xfPackPixels<NPC, WORDWIDTH_DST, DEPTH_DST>(&GradientValuesX[0], inter_valx, 3, (npc - 3), shiftx);
xfPackPixels<NPC, WORDWIDTH_DST, DEPTH_DST>(&GradientValuesY[0], inter_valy, 3, (npc - 3), shifty);
} else {
if ((NPC == XF_NPPC8)) {
xfPackPixels<NPC, WORDWIDTH_DST, DEPTH_DST>(&GradientValuesX[0], inter_valx, 0, 3, shiftx);
xfPackPixels<NPC, WORDWIDTH_DST, DEPTH_DST>(&GradientValuesY[0], inter_valy, 0, 3, shifty);
_gradx_mat.write(write_index, inter_valx);
_grady_mat.write(write_index++, inter_valy);
shiftx = 0;
shifty = 0;
inter_valx = 0;
inter_valy = 0;
xfPackPixels<NPC, WORDWIDTH_DST, DEPTH_DST>(&GradientValuesX[0], inter_valx, 3, (npc - 3), shiftx);
xfPackPixels<NPC, WORDWIDTH_DST, DEPTH_DST>(&GradientValuesY[0], inter_valy, 3, (npc - 3), shifty);
} else {
if (col >= 3) {
inter_valx((max_loop - 1), (max_loop - XF_PIXELDEPTH(DEPTH_DST))) = GradientValuesX[0];
inter_valy((max_loop - 1), (max_loop - XF_PIXELDEPTH(DEPTH_DST))) = GradientValuesY[0];
_gradx_mat.write(write_index, inter_valx);
_grady_mat.write(write_index++, inter_valy);
}
}
}
} // Col_Loop
}
template <int SRC_T,
int DST_T,
int ROWS,
int COLS,
int PLANES,
int DEPTH_SRC,
int DEPTH_DST,
int NPC,
int WORDWIDTH_SRC,
int WORDWIDTH_DST,
int TC>
void RightBorder7x7(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src_mat,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _gradx_mat,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _grady_mat,
XF_PTNAME(DEPTH_SRC) src_buf1[XF_NPIXPERCYCLE(NPC) + 6],
XF_PTNAME(DEPTH_SRC) src_buf2[XF_NPIXPERCYCLE(NPC) + 6],
XF_PTNAME(DEPTH_SRC) src_buf3[XF_NPIXPERCYCLE(NPC) + 6],
XF_PTNAME(DEPTH_SRC) src_buf4[XF_NPIXPERCYCLE(NPC) + 6],
XF_PTNAME(DEPTH_SRC) src_buf5[XF_NPIXPERCYCLE(NPC) + 6],
XF_PTNAME(DEPTH_SRC) src_buf6[XF_NPIXPERCYCLE(NPC) + 6],
XF_PTNAME(DEPTH_SRC) src_buf7[XF_NPIXPERCYCLE(NPC) + 6],
XF_PTNAME(DEPTH_DST) GradientValuesX[XF_NPIXPERCYCLE(NPC)],
XF_PTNAME(DEPTH_DST) GradientValuesY[XF_NPIXPERCYCLE(NPC)],
XF_SNAME(WORDWIDTH_DST) & inter_valx,
XF_SNAME(WORDWIDTH_DST) & inter_valy,
uint16_t& shiftx,
uint16_t& shifty,
int& read_index,
int& write_index) {
//#pragma HLS INLINE off
ap_uint<4> i = 0;
ap_uint<5> buf_size = (XF_NPIXPERCYCLE(NPC) + 6);
ap_uint<10> max_loop = XF_WORDDEPTH(WORDWIDTH_DST);
if ((NPC == XF_NPPC8)) {
for (i = 0; i < 8; i++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=8 max=8
#pragma HLS unroll
// clang-format on
src_buf1[buf_size + i - (XF_NPIXPERCYCLE(NPC))] = 0;
src_buf2[buf_size + i - (XF_NPIXPERCYCLE(NPC))] = 0;
src_buf3[buf_size + i - (XF_NPIXPERCYCLE(NPC))] = 0;
src_buf4[buf_size + i - (XF_NPIXPERCYCLE(NPC))] = 0;
src_buf5[buf_size + i - (XF_NPIXPERCYCLE(NPC))] = 0;
src_buf6[buf_size + i - (XF_NPIXPERCYCLE(NPC))] = 0;
src_buf7[buf_size + i - (XF_NPIXPERCYCLE(NPC))] = 0;
}
for (i = 0; i < 3; i++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=3 max=3
#pragma HLS unroll
// clang-format on
GradientValuesX[i] = xFGradientX7x7<PLANES, DEPTH_SRC, DEPTH_DST>(
&src_buf1[i], &src_buf2[i], &src_buf3[i], &src_buf4[i], &src_buf5[i], &src_buf6[i], &src_buf7[i]);
GradientValuesY[i] = xFGradientY7x7<PLANES, DEPTH_SRC, DEPTH_DST>(
&src_buf1[i], &src_buf2[i], &src_buf3[i], &src_buf4[i], &src_buf5[i], &src_buf6[i], &src_buf7[i]);
}
xfPackPixels<NPC, WORDWIDTH_DST, DEPTH_DST>(&GradientValuesX[0], inter_valx, 0, 3, shiftx);
xfPackPixels<NPC, WORDWIDTH_DST, DEPTH_DST>(&GradientValuesY[0], inter_valy, 0, 3, shifty);
_gradx_mat.write(write_index, inter_valx);
_grady_mat.write(write_index++, inter_valy);
shiftx = 0;
shifty = 0;
inter_valx = 0;
inter_valy = 0;
} else {
src_buf1[6] = 0;
src_buf2[6] = 0;
src_buf3[6] = 0;
src_buf4[6] = 0;
src_buf5[6] = 0;
src_buf6[6] = 0;
src_buf7[6] = 0;
for (ap_uint<5> k = 0; k < 3; k++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=3 max=3
#pragma HLS ALLOCATION function instances=xFGradientX7x7<PLANES, DEPTH_SRC, DEPTH_DST> limit=1
#pragma HLS ALLOCATION function instances=xFGradientY7x7<PLANES, DEPTH_SRC, DEPTH_DST> limit=1
// clang-format on
XF_PTNAME(DEPTH_DST)
x1 = xFGradientX7x7<PLANES, DEPTH_SRC, DEPTH_DST>(&src_buf1[0], &src_buf2[0], &src_buf3[0], &src_buf4[0],
&src_buf5[0], &src_buf6[0], &src_buf7[0]);
XF_PTNAME(DEPTH_DST)
y1 = xFGradientY7x7<PLANES, DEPTH_SRC, DEPTH_DST>(&src_buf1[0], &src_buf2[0], &src_buf3[0], &src_buf4[0],
&src_buf5[0], &src_buf6[0], &src_buf7[0]);
xfCopyData<NPC, DEPTH_SRC>(src_buf1, src_buf2, src_buf3, src_buf4, src_buf5, src_buf6, src_buf7);
inter_valx((max_loop - 1), (max_loop - XF_PIXELDEPTH(DEPTH_DST))) = x1; // GradientValuesX[0];
inter_valy((max_loop - 1), (max_loop - XF_PIXELDEPTH(DEPTH_DST))) = y1; // GradientValuesY[0];
_gradx_mat.write(write_index, inter_valx);
_grady_mat.write(write_index++, inter_valy);
}
}
}
template <int SRC_T,
int DST_T,
int ROWS,
int COLS,
int PLANES,
int DEPTH_SRC,
int DEPTH_DST,
int NPC,
int WORDWIDTH_SRC,
int WORDWIDTH_DST,
int TC,
bool USE_URAM>
void xFSobelFilter7x7(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src_mat,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _gradx_mat,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _grady_mat,
uint16_t img_height,
uint16_t img_width) {
ap_uint<13> row_ind, row, col;
ap_uint<4> tp1, tp2, tp3, mid, bottom1, bottom2, bottom3;
ap_uint<5> i;
int read_index = 0, write_index = 0;
// Gradient output values stored in these buffer
XF_PTNAME(DEPTH_DST) GradientValuesX[XF_NPIXPERCYCLE(NPC)];
XF_PTNAME(DEPTH_DST) GradientValuesY[XF_NPIXPERCYCLE(NPC)];
if (NPC > 1) {
// clang-format off
#pragma HLS ARRAY_PARTITION variable=GradientValuesX complete dim=1
#pragma HLS ARRAY_PARTITION variable=GradientValuesY complete dim=1
// clang-format on
}
// Temporary buffers to hold image data from three rows.
XF_PTNAME(DEPTH_SRC)
src_buf1[XF_NPIXPERCYCLE(NPC) + 6], src_buf2[XF_NPIXPERCYCLE(NPC) + 6], src_buf3[XF_NPIXPERCYCLE(NPC) + 6],
src_buf4[XF_NPIXPERCYCLE(NPC) + 6], src_buf5[XF_NPIXPERCYCLE(NPC) + 6];
XF_PTNAME(DEPTH_SRC)
src_buf6[XF_NPIXPERCYCLE(NPC) + 6], src_buf7[XF_NPIXPERCYCLE(NPC) + 6];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=src_buf1 complete dim=1
#pragma HLS ARRAY_PARTITION variable=src_buf2 complete dim=1
#pragma HLS ARRAY_PARTITION variable=src_buf3 complete dim=1
#pragma HLS ARRAY_PARTITION variable=src_buf4 complete dim=1
#pragma HLS ARRAY_PARTITION variable=src_buf5 complete dim=1
#pragma HLS ARRAY_PARTITION variable=src_buf6 complete dim=1
#pragma HLS ARRAY_PARTITION variable=src_buf7 complete dim=1
// clang-format on
XF_SNAME(WORDWIDTH_DST) inter_valx = 0, inter_valy = 0;
uint16_t shiftx = 0, shifty = 0;
XF_SNAME(WORDWIDTH_SRC) buf[7][(COLS >> XF_BITSHIFT(NPC))];
if (USE_URAM) {
// clang-format off
#pragma HLS RESOURCE variable=buf core=RAM_S2P_URAM
#pragma HLS array reshape variable=buf dim=1 factor=7 cyclic
// clang-format on
} else {
// clang-format off
#pragma HLS RESOURCE variable=buf core=RAM_S2P_BRAM
#pragma HLS ARRAY_PARTITION variable=buf complete dim=1
// clang-format on
}
row_ind = 3;
Clear_Row_Loop:
for (col = 0; col < img_width; col++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
#pragma HLS pipeline
// clang-format on
buf[0][col] = 0;
buf[1][col] = 0;
buf[2][col] = 0;
buf[row_ind][col] = _src_mat.read(read_index++);
}
row_ind++;
Read_Row1_Loop:
for (col = 0; col < img_width; col++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
#pragma HLS pipeline
// clang-format on
buf[row_ind][col] = _src_mat.read(read_index++);
}
row_ind++;
Read_Row2_Loop:
for (col = 0; col < img_width; col++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
#pragma HLS pipeline
// clang-format on
buf[row_ind][col] = _src_mat.read(read_index++);
}
row_ind++;
Row_Loop:
for (row = 3; row < img_height + 3; row++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
// modify the buffer indices to re use
if (row_ind == 0) {
tp1 = 1;
tp2 = 2;
tp3 = 3;
mid = 4;
bottom1 = 5;
bottom2 = 6;
bottom3 = 0;
} else if (row_ind == 1) {
tp1 = 2;
tp2 = 3;
tp3 = 4;
mid = 5;
bottom1 = 6;
bottom2 = 0;
bottom3 = 1;
} else if (row_ind == 2) {
tp1 = 3;
tp2 = 4;
tp3 = 5;
mid = 6;
bottom1 = 0;
bottom2 = 1;
bottom3 = 2;
} else if (row_ind == 3) {
tp1 = 4;
tp2 = 5;
tp3 = 6;
mid = 0;
bottom1 = 1;
bottom2 = 2;
bottom3 = 3;
} else if (row_ind == 4) {
tp1 = 5;
tp2 = 6;
tp3 = 0;
mid = 1;
bottom1 = 2;
bottom2 = 3;
bottom3 = 4;
} else if (row_ind == 5) {
tp1 = 6;
tp2 = 0;
tp3 = 1;
mid = 2;
bottom1 = 3;
bottom2 = 4;
bottom3 = 5;
} else if (row_ind == 6) {
tp1 = 0;
tp2 = 1;
tp3 = 2;
mid = 3;
bottom1 = 4;
bottom2 = 5;
bottom3 = 6;
}
for (i = 0; i < 6; i++) {
// clang-format off
#pragma HLS unroll
// clang-format on
src_buf1[i] = 0;
src_buf2[i] = 0;
src_buf3[i] = 0;
src_buf4[i] = 0;
src_buf5[i] = 0;
src_buf6[i] = 0;
src_buf7[i] = 0;
}
inter_valx = inter_valy = 0;
/*********** Process complete row
* **********/
ProcessSobel7x7<SRC_T, DST_T, ROWS, COLS, PLANES, DEPTH_SRC, DEPTH_DST, NPC, WORDWIDTH_SRC, WORDWIDTH_DST, TC>(
_src_mat, _gradx_mat, _grady_mat, buf, src_buf1, src_buf2, src_buf3, src_buf4, src_buf5, src_buf6, src_buf7,
GradientValuesX, GradientValuesY, inter_valx, inter_valy, img_width, img_height, row_ind, shiftx, shifty,
tp1, tp2, tp3, mid, bottom1, bottom2, bottom3, row, read_index, write_index);
RightBorder7x7<SRC_T, DST_T, ROWS, COLS, PLANES, DEPTH_SRC, DEPTH_DST, NPC, WORDWIDTH_SRC, WORDWIDTH_DST, TC>(
_src_mat, _gradx_mat, _grady_mat, src_buf1, src_buf2, src_buf3, src_buf4, src_buf5, src_buf6, src_buf7,
GradientValuesX, GradientValuesY, inter_valx, inter_valy, shiftx, shifty, read_index, write_index);
row_ind++;
if (row_ind == 7) {
row_ind = 0;
}
} // Row_Loop ends here
}
// xFSobelFilter7x7
template <int BORDER_TYPE,
int FILTER_TYPE,
int SRC_T,
int DST_T,
int ROWS,
int COLS,
int NPC = 1,
bool USE_URAM = false>
void Sobel(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src_mat,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst_matx,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst_maty) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
uint16_t width = _src_mat.cols >> XF_BITSHIFT(NPC);
uint16_t height = _src_mat.rows;
#ifndef __SYNTHESIS__
assert(((FILTER_TYPE == XF_FILTER_3X3) || (FILTER_TYPE == XF_FILTER_5X5) || (FILTER_TYPE == XF_FILTER_7X7)) &&
" Filter width must be XF_FILTER_3X3, XF_FILTER_5X5 or XF_FILTER_7X7 ");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && "NPC must be XF_NPPC1 or XF_NPPC8");
assert((BORDER_TYPE == XF_BORDER_CONSTANT) && "Border type must be XF_BORDER_CONSTANT ");
assert(((_src_mat.rows <= ROWS) && (_src_mat.cols <= COLS)) && "ROWS and COLS should be greater than input image");
#endif
if (FILTER_TYPE == XF_FILTER_3X3) {
xFSobelFilter3x3<SRC_T, DST_T, ROWS, COLS, XF_CHANNELS(SRC_T, NPC), XF_DEPTH(SRC_T, NPC), XF_DEPTH(DST_T, NPC),
NPC, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(DST_T, NPC), (COLS >> XF_BITSHIFT(NPC)), USE_URAM>(
_src_mat, _dst_matx, _dst_maty, height, width);
}
else if (FILTER_TYPE == XF_FILTER_5X5) {
xFSobelFilter5x5<SRC_T, DST_T, ROWS, COLS, XF_CHANNELS(SRC_T, NPC), XF_DEPTH(SRC_T, NPC), XF_DEPTH(DST_T, NPC),
NPC, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(DST_T, NPC), (COLS >> XF_BITSHIFT(NPC)), USE_URAM>(
_src_mat, _dst_matx, _dst_maty, height, width);
}
else if (FILTER_TYPE == XF_FILTER_7X7) {
xFSobelFilter7x7<SRC_T, DST_T, ROWS, COLS, XF_CHANNELS(SRC_T, NPC), XF_DEPTH(SRC_T, NPC), XF_DEPTH(DST_T, NPC),
NPC, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(DST_T, NPC), (COLS >> XF_BITSHIFT(NPC)), USE_URAM>(
_src_mat, _dst_matx, _dst_maty, height, width);
}
}
} // namespace cv
} // namespace xf
// xFSobelFilter
#endif // _XF_SOBEL_HPP_