.. _program_listing_file__tmp_ws_src_vitis_common_include_imgproc_xf_sobel.hpp: Program Listing for File xf_sobel.hpp ===================================== |exhale_lsh| :ref:`Return to documentation for file ` (``/tmp/ws/src/vitis_common/include/imgproc/xf_sobel.hpp``) .. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS .. code-block:: cpp /* * Copyright 2019 Xilinx, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _XF_SOBEL_HPP_ #define _XF_SOBEL_HPP_ typedef unsigned short uint16_t; typedef unsigned int uint32_t; #include "../common/xf_common.hpp" #include "../common/xf_utility.hpp" #include "hls_stream.h" namespace xf { namespace cv { /***************************************************************** * SobelFilter3x3 ***************************************************************** * X-Gradient Computation * * ------------- * |-1 0 1| * |-2 0 2| * |-1 0 1| * ------------- *****************************************************************/ template XF_PTNAME(DEPTH_DST) xFGradientX3x3(XF_PTNAME(DEPTH_SRC) t0, XF_PTNAME(DEPTH_SRC) t1, XF_PTNAME(DEPTH_SRC) t2, XF_PTNAME(DEPTH_SRC) m0, XF_PTNAME(DEPTH_SRC) m1, XF_PTNAME(DEPTH_SRC) m2, XF_PTNAME(DEPTH_SRC) b0, XF_PTNAME(DEPTH_SRC) b1, XF_PTNAME(DEPTH_SRC) b2) { // clang-format off #pragma HLS INLINE off // clang-format on XF_PTNAME(DEPTH_DST) g_x = 0; // ap_uint<8> g_x = 0; short int M00 = ((short int)m0 << 1); short int M01 = ((short int)m2 << 1); short int A00 = (t2 + b2); short int S00 = (t0 + b0); short int out_pix; out_pix = M01 - M00; out_pix = out_pix + A00; out_pix = out_pix - S00; g_x = (XF_PTNAME(DEPTH_DST))out_pix; if ((DEPTH_DST == XF_8UP) || (DEPTH_DST == XF_24UP)) { if (out_pix < 0) { g_x = 0; } else if (out_pix > 255) { g_x = 255; } } return g_x; } /********************************************************************** * Y-Gradient Computation * ------------- * | 1 2 1| * | 0 0 0| * |-1 -2 -1| * ------------- **********************************************************************/ template XF_PTNAME(DEPTH_DST) xFGradientY3x3(XF_PTNAME(DEPTH_SRC) t0, XF_PTNAME(DEPTH_SRC) t1, XF_PTNAME(DEPTH_SRC) t2, XF_PTNAME(DEPTH_SRC) m0, XF_PTNAME(DEPTH_SRC) m1, XF_PTNAME(DEPTH_SRC) m2, XF_PTNAME(DEPTH_SRC) b0, XF_PTNAME(DEPTH_SRC) b1, XF_PTNAME(DEPTH_SRC) b2) { // clang-format off #pragma HLS INLINE off // clang-format on XF_PTNAME(DEPTH_DST) g_y = 0; short int M00 = ((short int)t1 << 1); short int M01 = ((short int)b1 << 1); short int A00 = (b0 + b2); short int S00 = (t0 + t2); short int out_pix; out_pix = M01 - M00; out_pix = out_pix + A00; out_pix = out_pix - S00; g_y = (XF_PTNAME(DEPTH_DST))out_pix; if ((DEPTH_DST == XF_8UP) || (DEPTH_DST == XF_24UP)) { if (out_pix < 0) { g_y = 0; } else if (out_pix > 255) { g_y = 255; } } return g_y; } template void xFSobel3x3(XF_PTNAME(DEPTH_DST) * GradientvaluesX, XF_PTNAME(DEPTH_DST) * GradientvaluesY, XF_PTNAME(DEPTH_SRC) * src_buf1, XF_PTNAME(DEPTH_SRC) * src_buf2, XF_PTNAME(DEPTH_SRC) * src_buf3) { // clang-format off #pragma HLS INLINE off // clang-format on int STEP, STEP_OUT; if ((DEPTH_DST == XF_48SP) || (DEPTH_DST == XF_16SP)) { STEP_OUT = 16; STEP = 8; } else { STEP_OUT = 8; STEP = 8; } Compute_Grad_Loop: for (ap_uint<5> j = 0; j < XF_NPIXPERCYCLE(NPC); j++) { int p = 0; // clang-format off #pragma HLS UNROLL // clang-format on for (ap_uint<5> c = 0, k = 0; c < PLANES; c++, k += STEP) { GradientvaluesX[j].range(p + (STEP_OUT - 1), p) = xFGradientX3x3( src_buf1[j].range(k + STEP - 1, k), src_buf1[j + 1].range(k + STEP - 1, k), src_buf1[j + 2].range(k + STEP - 1, k), src_buf2[j].range(k + STEP - 1, k), src_buf2[j + 1].range(k + STEP - 1, k), src_buf2[j + 2].range(k + STEP - 1, k), src_buf3[j].range(k + STEP - 1, k), src_buf3[j + 1].range(k + STEP - 1, k), src_buf3[j + 2].range(k + STEP - 1, k)); GradientvaluesY[j].range(p + (STEP_OUT - 1), p) = xFGradientY3x3( src_buf1[j].range(k + STEP - 1, k), src_buf1[j + 1].range(k + STEP - 1, k), src_buf1[j + 2].range(k + STEP - 1, k), src_buf2[j].range(k + STEP - 1, k), src_buf2[j + 1].range(k + STEP - 1, k), src_buf2[j + 2].range(k + STEP - 1, k), src_buf3[j].range(k + STEP - 1, k), src_buf3[j + 1].range(k + STEP - 1, k), src_buf3[j + 2].range(k + STEP - 1, k)); p += STEP_OUT; } } } /************************************************************************************** * ProcessSobel3x3 : Computes gradients for the column input data **************************************************************************************/ template void ProcessSobel3x3(xf::cv::Mat& _src_mat, xf::cv::Mat& _gradx_mat, xf::cv::Mat& _grady_mat, XF_SNAME(WORDWIDTH_SRC) buf[3][(COLS >> XF_BITSHIFT(NPC))], XF_PTNAME(DEPTH_SRC) src_buf1[XF_NPIXPERCYCLE(NPC) + 2], XF_PTNAME(DEPTH_SRC) src_buf2[XF_NPIXPERCYCLE(NPC) + 2], XF_PTNAME(DEPTH_SRC) src_buf3[XF_NPIXPERCYCLE(NPC) + 2], XF_PTNAME(DEPTH_DST) GradientValuesX[XF_NPIXPERCYCLE(NPC)], XF_PTNAME(DEPTH_DST) GradientValuesY[XF_NPIXPERCYCLE(NPC)], XF_SNAME(WORDWIDTH_DST) & P0, XF_SNAME(WORDWIDTH_DST) & P1, uint16_t img_width, uint16_t img_height, ap_uint<13> row_ind, uint16_t& shift_x, uint16_t& shift_y, ap_uint<2> tp, ap_uint<2> mid, ap_uint<2> bottom, ap_uint<13> row, int& read_index, int& write_index) { // clang-format off #pragma HLS INLINE // clang-format on XF_SNAME(WORDWIDTH_SRC) buf0, buf1, buf2; uint16_t npc = XF_NPIXPERCYCLE(NPC); ap_uint<5> buf_size = XF_NPIXPERCYCLE(NPC) + 2; Col_Loop: for (ap_uint<13> col = 0; col < img_width; col++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=TC max=TC #pragma HLS pipeline // clang-format on if (row < img_height) buf[row_ind][col] = _src_mat.read(read_index++); // Read data else buf[bottom][col] = 0; buf0 = buf[tp][col]; buf1 = buf[mid][col]; buf2 = buf[bottom][col]; if (NPC == XF_NPPC8) { xfExtractPixels(&src_buf1[2], buf0, 0); xfExtractPixels(&src_buf2[2], buf1, 0); xfExtractPixels(&src_buf3[2], buf2, 0); } else { src_buf1[2] = buf0; src_buf2[2] = buf1; src_buf3[2] = buf2; } xFSobel3x3(GradientValuesX, GradientValuesY, src_buf1, src_buf2, src_buf3); if (col == 0) { shift_x = 0; shift_y = 0; P0 = 0; P1 = 0; xfPackPixels(&GradientValuesX[0], P0, 1, (npc - 1), shift_x); xfPackPixels(&GradientValuesY[0], P1, 1, (npc - 1), shift_y); } else { xfPackPixels(&GradientValuesX[0], P0, 0, 1, shift_x); xfPackPixels(&GradientValuesY[0], P1, 0, 1, shift_y); _gradx_mat.write(write_index, P0); _grady_mat.write(write_index++, P1); shift_x = 0; shift_y = 0; P0 = 0; P1 = 0; xfPackPixels(&GradientValuesX[0], P0, 1, (npc - 1), shift_x); xfPackPixels(&GradientValuesY[0], P1, 1, (npc - 1), shift_y); } src_buf1[0] = src_buf1[buf_size - 2]; src_buf1[1] = src_buf1[buf_size - 1]; src_buf2[0] = src_buf2[buf_size - 2]; src_buf2[1] = src_buf2[buf_size - 1]; src_buf3[0] = src_buf3[buf_size - 2]; src_buf3[1] = src_buf3[buf_size - 1]; } // Col_Loop } template void xFSobelFilter3x3(xf::cv::Mat& _src_mat, xf::cv::Mat& _dst_matx, xf::cv::Mat& _dst_maty, uint16_t img_height, uint16_t img_width) { ap_uint<13> row_ind; ap_uint<2> tp, mid, bottom; ap_uint<8> buf_size = XF_NPIXPERCYCLE(NPC) + 2; uint16_t shift_x = 0, shift_y = 0; ap_uint<13> row, col; int read_index = 0, write_index = 0; XF_PTNAME(DEPTH_DST) GradientValuesX[XF_NPIXPERCYCLE(NPC)]; // X-Gradient result buffer XF_PTNAME(DEPTH_DST) GradientValuesY[XF_NPIXPERCYCLE(NPC)]; // Y-Gradient result buffer // clang-format off #pragma HLS ARRAY_PARTITION variable=GradientValuesX complete dim=1 #pragma HLS ARRAY_PARTITION variable=GradientValuesY complete dim=1 // clang-format on XF_PTNAME(DEPTH_SRC) src_buf1[XF_NPIXPERCYCLE(NPC) + 2], src_buf2[XF_NPIXPERCYCLE(NPC) + 2], // Temporary buffers to hold input data for processing src_buf3[XF_NPIXPERCYCLE(NPC) + 2]; // clang-format off #pragma HLS ARRAY_PARTITION variable=src_buf1 complete dim=1 #pragma HLS ARRAY_PARTITION variable=src_buf2 complete dim=1 #pragma HLS ARRAY_PARTITION variable=src_buf3 complete dim=1 // clang-format on XF_SNAME(WORDWIDTH_DST) P0, P1; // Output data is packed // Line buffer to hold image data XF_SNAME(WORDWIDTH_SRC) buf[3][(COLS >> XF_BITSHIFT(NPC))]; // Line buffer if (USE_URAM) { // clang-format off #pragma HLS array reshape variable=buf dim=1 factor=3 cyclic #pragma HLS RESOURCE variable=buf core=RAM_S2P_URAM // clang-format on } else { // clang-format off #pragma HLS RESOURCE variable=buf core=RAM_S2P_BRAM #pragma HLS ARRAY_PARTITION variable=buf complete dim=1 // clang-format on } row_ind = 1; Clear_Row_Loop: for (col = 0; col < img_width; col++) // Top row border care { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=TC max=TC #pragma HLS pipeline // clang-format on buf[0][col] = 0; buf[row_ind][col] = _src_mat.read(read_index++); // Read data } row_ind++; Row_Loop: // Process complete image for (row = 1; row < img_height + 1; row++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on if (row_ind == 2) // Indexes to hold maintain the row index { tp = 0; mid = 1; bottom = 2; } else if (row_ind == 0) { tp = 1; mid = 2; bottom = 0; } else if (row_ind == 1) { tp = 2; mid = 0; bottom = 1; } src_buf1[0] = src_buf1[1] = 0; src_buf2[0] = src_buf2[1] = 0; src_buf3[0] = src_buf3[1] = 0; /*********** Process complete row * **********/ P0 = P1 = 0; ProcessSobel3x3( _src_mat, _dst_matx, _dst_maty, buf, src_buf1, src_buf2, src_buf3, GradientValuesX, GradientValuesY, P0, P1, img_width, img_height, row_ind, shift_x, shift_y, tp, mid, bottom, row, read_index, write_index); /* Last column border care for RO & PO Case */ if ((NPC == XF_NPPC8)) { // Compute gradient at last column int STEP, STEP_OUT, p = 0; if ((DEPTH_DST == XF_48SP) || (DEPTH_DST == XF_16SP)) { STEP_OUT = 16; STEP = 8; } else { STEP_OUT = 8; STEP = 8; } for (ap_uint<5> c = 0, k = 0; c < PLANES; c++, k += STEP) { GradientValuesX[0].range(p + (STEP_OUT - 1), p) = xFGradientX3x3( src_buf1[buf_size - 2].range(k + STEP - 1, k), src_buf1[buf_size - 1].range(k + STEP - 1, k), 0, src_buf2[buf_size - 2].range(k + STEP - 1, k), src_buf2[buf_size - 1].range(k + STEP - 1, k), 0, src_buf3[buf_size - 2].range(k + STEP - 1, k), src_buf3[buf_size - 1].range(k + STEP - 1, k), 0); GradientValuesY[0].range(p + (STEP_OUT - 1), p) = xFGradientY3x3( src_buf1[buf_size - 2].range(k + STEP - 1, k), src_buf1[buf_size - 1].range(k + STEP - 1, k), 0, src_buf2[buf_size - 2].range(k + STEP - 1, k), src_buf2[buf_size - 1].range(k + STEP - 1, k), 0, src_buf3[buf_size - 2].range(k + STEP - 1, k), src_buf3[buf_size - 1].range(k + STEP - 1, k), 0); p += STEP_OUT; } } else /* Last column border care for NO Case */ { int STEP, STEP_OUT, q = 0; if ((DEPTH_DST == XF_48SP) || (DEPTH_DST == XF_16SP)) { STEP_OUT = 16; STEP = 8; } else { STEP_OUT = 8; STEP = 8; } for (ap_uint<7> i = 0, k = 0; i < PLANES; i++, k += STEP) { GradientValuesX[0].range(q + (STEP_OUT - 1), q) = xFGradientX3x3( src_buf1[buf_size - 3].range(k + STEP - 1, k), src_buf1[buf_size - 2].range(k + STEP - 1, k), 0, src_buf2[buf_size - 3].range(k + STEP - 1, k), src_buf2[buf_size - 2].range(k + STEP - 1, k), 0, src_buf3[buf_size - 3].range(k + STEP - 1, k), src_buf3[buf_size - 2].range(k + STEP - 1, k), 0); GradientValuesY[0].range(q + (STEP_OUT - 1), q) = xFGradientY3x3( src_buf1[buf_size - 3].range(k + STEP - 1, k), src_buf1[buf_size - 2].range(k + STEP - 1, k), 0, src_buf2[buf_size - 3].range(k + STEP - 1, k), src_buf2[buf_size - 2].range(k + STEP - 1, k), 0, src_buf3[buf_size - 3].range(k + STEP - 1, k), src_buf3[buf_size - 2].range(k + STEP - 1, k), 0); q += STEP_OUT; } } xfPackPixels(&GradientValuesX[0], P0, 0, 1, shift_x); xfPackPixels(&GradientValuesY[0], P1, 0, 1, shift_y); _dst_matx.write(write_index, P0); _dst_maty.write(write_index++, P1); shift_x = 0; shift_y = 0; P0 = 0; P1 = 0; row_ind++; if (row_ind == 3) { row_ind = 0; } } // Row_Loop } // xFSobelFilter3x3 /***************************************************************** * SobelFilter5x5 *****************************************************************/ template XF_PTNAME(DEPTH_DST) xFGradientX5x5(XF_PTNAME(DEPTH_SRC) * src_buf1, XF_PTNAME(DEPTH_SRC) * src_buf2, XF_PTNAME(DEPTH_SRC) * src_buf3, XF_PTNAME(DEPTH_SRC) * src_buf4, XF_PTNAME(DEPTH_SRC) * src_buf5) { // clang-format off #pragma HLS INLINE off // clang-format on XF_PTNAME(DEPTH_DST) g_x = 0, out_val = 0; int STEP, STEP_OUT, p = 0; if ((DEPTH_DST == XF_48SP) || (DEPTH_DST == XF_16SP)) { STEP_OUT = 16; STEP = 8; } else { STEP_OUT = 8; STEP = 8; } for (int i = 0, k = 0; i < PLANES; i++, k += STEP) { short int M00 = (short int)(((short int)src_buf1[1].range(k + STEP - 1, k) + (short int)src_buf5[1].range(k + STEP - 1, k)) << 1); short int M01 = (short int)((short int)src_buf1[4].range(k + STEP - 1, k) + (short int)src_buf5[4].range(k + STEP - 1, k)) - ((short int)src_buf1[0].range(k + STEP - 1, k) + (short int)src_buf5[0].range(k + STEP - 1, k)); short int A00 = (short int)(((short int)src_buf1[3].range(k + STEP - 1, k) + (short int)src_buf5[3].range(k + STEP - 1, k)) << 1); short int M02 = (short int)(((short int)src_buf2[0].range(k + STEP - 1, k) + (short int)src_buf4[0].range(k + STEP - 1, k)) << 2); short int M03 = (short int)((short int)src_buf2[1].range(k + STEP - 1, k) + (short int)src_buf4[1].range(k + STEP - 1, k)) << 3; short int A01 = (short int)((short int)src_buf2[3].range(k + STEP - 1, k) + (short int)src_buf4[3].range(k + STEP - 1, k)) << 3; short int A02 = (short int)((short int)src_buf2[4].range(k + STEP - 1, k) + (short int)src_buf4[4].range(k + STEP - 1, k)) << 2; short int M04 = (short int)src_buf3[0].range(k + STEP - 1, k) * 6; short int M05 = (short int)src_buf3[1].range(k + STEP - 1, k) * 12; short int A03 = (short int)src_buf3[3].range(k + STEP - 1, k) * 12; short int A04 = (short int)src_buf3[4].range(k + STEP - 1, k) * 6; short int S00 = M00 + M02; short int S01 = M03 + M04 + M05; short int A0 = A00 + A01; short int A1 = A02 + A03; short int A2 = A04 + M01; short int FA = A0 + A1 + A2; short int FS = S00 + S01; short int out_x = FA - FS; g_x = (XF_PTNAME(DEPTH_DST))out_x; if ((DEPTH_DST == XF_8UP) || (DEPTH_DST == XF_24UP)) { if (out_x < 0) g_x = 0; else if (out_x > 255) g_x = 255; } out_val.range(p + (STEP_OUT - 1), p) = g_x; p += STEP_OUT; } return out_val; } /**************************************************************** * Sobel Filter Y-Gradient used is 5x5 * * --- ---- ---- ---- --- * | -1 | -4 | -6 | -4 | -1 | * --- ---- ---- ---- --- * | -2 | -8 | -12 | -8 | -2 | * --- ---- ---- ---- --- * | 0 | 0 | 0 | 0 | 0 | * --- ---- ---- ---- --- --- * | 2 | 8 | 12 | 8 | 2 | * --- ---- ---- ---- --- --- * | 1 | 4 | 6 | 4 | 1 | * --- ---- ---- ---- --- --- ******************************************************************/ template XF_PTNAME(DEPTH_DST) xFGradientY5x5(XF_PTNAME(DEPTH_SRC) * src_buf1, XF_PTNAME(DEPTH_SRC) * src_buf2, XF_PTNAME(DEPTH_SRC) * src_buf3, XF_PTNAME(DEPTH_SRC) * src_buf4, XF_PTNAME(DEPTH_SRC) * src_buf5) { // clang-format off #pragma HLS INLINE off // clang-format on XF_PTNAME(DEPTH_DST) g_y = 0, out_val = 0; int STEP, STEP_OUT, p = 0; if ((DEPTH_DST == XF_48SP) || (DEPTH_DST == XF_16SP)) { STEP_OUT = 16; STEP = 8; } else { STEP_OUT = 8; STEP = 8; } for (int i = 0, k = 0; i < PLANES; i++, k += STEP) { short int M00 = ((short int)src_buf5[0].range(k + STEP - 1, k) + (short int)src_buf5[4].range(k + STEP - 1, k)) - ((short int)src_buf1[0].range(k + STEP - 1, k) + (short int)src_buf1[4].range(k + STEP - 1, k)); short int M01 = (short int)(((short int)src_buf1[1].range(k + STEP - 1, k) + (short int)src_buf1[3].range(k + STEP - 1, k)) << 2); short int A00 = (short int)(((short int)src_buf5[1].range(k + STEP - 1, k) + (short int)src_buf5[3].range(k + STEP - 1, k)) << 2); short int M02 = (short int)(((short int)src_buf2[0].range(k + STEP - 1, k) + (short int)src_buf2[4].range(k + STEP - 1, k)) << 1); short int A01 = (short int)(((short int)src_buf4[0].range(k + STEP - 1, k) + (short int)src_buf4[4].range(k + STEP - 1, k)) << 1); short int M03 = (short int)(((short int)src_buf2[1].range(k + STEP - 1, k) + (short int)src_buf2[3].range(k + STEP - 1, k)) << 3); short int A02 = (short int)(((short int)src_buf4[1].range(k + STEP - 1, k) + (short int)src_buf4[3].range(k + STEP - 1, k)) << 3); short int M04 = (short int)(src_buf1[2].range(k + STEP - 1, k) * 6); short int M05 = (short int)(src_buf2[2].range(k + STEP - 1, k) * 12); short int A03 = (short int)(src_buf4[2].range(k + STEP - 1, k) * 12); short int A04 = (short int)(src_buf5[2].range(k + STEP - 1, k) * 6); short int S00 = M01 + M02 + M03; short int S01 = M04 + M05; short int A0 = A00 + A01; short int A1 = A02 + A03; short int A2 = A04 + M00; short int FA = A0 + A1 + A2; short int FS = S00 + S01; short int out_y = FA - FS; g_y = (XF_PTNAME(DEPTH_DST))out_y; if ((DEPTH_DST == XF_8UP) || (DEPTH_DST == XF_24UP)) { if (out_y < 0) g_y = 0; else if (out_y > 255) g_y = 255; } out_val.range(p + (STEP_OUT - 1), p) = g_y; p += STEP_OUT; } return out_val; } template void xFSobel5x5(XF_PTNAME(DEPTH_DST) * GradientvaluesX, XF_PTNAME(DEPTH_DST) * GradientvaluesY, XF_PTNAME(DEPTH_SRC) * src_buf1, XF_PTNAME(DEPTH_SRC) * src_buf2, XF_PTNAME(DEPTH_SRC) * src_buf3, XF_PTNAME(DEPTH_SRC) * src_buf4, XF_PTNAME(DEPTH_SRC) * src_buf5) { // clang-format off #pragma HLS INLINE off // clang-format on Compute_Grad_Loop: for (ap_uint<5> j = 0; j < XF_NPIXPERCYCLE(NPC); j++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=8 max=8 #pragma HLS UNROLL // clang-format on GradientvaluesX[j] = xFGradientX5x5(&src_buf1[j], &src_buf2[j], &src_buf3[j], &src_buf4[j], &src_buf5[j]); GradientvaluesY[j] = xFGradientY5x5(&src_buf1[j], &src_buf2[j], &src_buf3[j], &src_buf4[j], &src_buf5[j]); } } /************************************************************************************** * ProcessSobel5x5 : Computes gradients for the column input data **************************************************************************************/ template void ProcessSobel5x5(xf::cv::Mat& _src_mat, xf::cv::Mat& _dst_matx, xf::cv::Mat& _dst_maty, XF_SNAME(WORDWIDTH_SRC) buf[5][(COLS >> XF_BITSHIFT(NPC))], XF_PTNAME(DEPTH_SRC) src_buf1[XF_NPIXPERCYCLE(NPC) + 4], XF_PTNAME(DEPTH_SRC) src_buf2[XF_NPIXPERCYCLE(NPC) + 4], XF_PTNAME(DEPTH_SRC) src_buf3[XF_NPIXPERCYCLE(NPC) + 4], XF_PTNAME(DEPTH_SRC) src_buf4[XF_NPIXPERCYCLE(NPC) + 4], XF_PTNAME(DEPTH_SRC) src_buf5[XF_NPIXPERCYCLE(NPC) + 4], XF_PTNAME(DEPTH_DST) GradientValuesX[XF_NPIXPERCYCLE(NPC)], XF_PTNAME(DEPTH_DST) GradientValuesY[XF_NPIXPERCYCLE(NPC)], XF_SNAME(WORDWIDTH_DST) & inter_valx, XF_SNAME(WORDWIDTH_DST) & inter_valy, uint16_t img_width, uint16_t img_height, ap_uint<13> row_ind, uint16_t& shift_x, uint16_t& shift_y, ap_uint<4> tp1, ap_uint<4> tp2, ap_uint<4> mid, ap_uint<4> bottom1, ap_uint<4> bottom2, ap_uint<13> row, int& read_index, int& write_index) { // clang-format off #pragma HLS INLINE // clang-format on XF_SNAME(WORDWIDTH_SRC) buf0, buf1, buf2, buf3, buf4; ap_uint<8> buf_size = XF_NPIXPERCYCLE(NPC) + 4; uint16_t npc = XF_NPIXPERCYCLE(NPC); ap_uint<8> max_loop = XF_WORDDEPTH(WORDWIDTH_DST); ap_uint<8> step = XF_PIXELDEPTH(DEPTH_DST); Col_Loop: for (ap_uint<13> col = 0; col < img_width; col++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=TC max=TC #pragma HLS pipeline // clang-format on if (row < img_height) buf[row_ind][col] = _src_mat.read(read_index++); else buf[bottom2][col] = 0; buf0 = buf[tp1][col]; buf1 = buf[tp2][col]; buf2 = buf[mid][col]; buf3 = buf[bottom1][col]; buf4 = buf[bottom2][col]; if (NPC == XF_NPPC8) { xfExtractPixels(&src_buf1[4], buf0, 0); xfExtractPixels(&src_buf2[4], buf1, 0); xfExtractPixels(&src_buf3[4], buf2, 0); xfExtractPixels(&src_buf4[4], buf3, 0); xfExtractPixels(&src_buf5[4], buf4, 0); } else { src_buf1[4] = buf0; src_buf2[4] = buf1; src_buf3[4] = buf2; src_buf4[4] = buf3; src_buf5[4] = buf4; } xFSobel5x5(GradientValuesX, GradientValuesY, src_buf1, src_buf2, src_buf3, src_buf4, src_buf5); for (ap_uint<4> i = 0; i < 4; i++) { // clang-format off #pragma HLS unroll // clang-format on src_buf1[i] = src_buf1[buf_size - (4 - i)]; src_buf2[i] = src_buf2[buf_size - (4 - i)]; src_buf3[i] = src_buf3[buf_size - (4 - i)]; src_buf4[i] = src_buf4[buf_size - (4 - i)]; src_buf5[i] = src_buf5[buf_size - (4 - i)]; } if (col == 0) { shift_x = 0, shift_y = 0; inter_valx = 0; inter_valy = 0; xfPackPixels(&GradientValuesX[0], inter_valx, 2, (npc - 2), shift_x); xfPackPixels(&GradientValuesY[0], inter_valy, 2, (npc - 2), shift_y); } else { if ((NPC == XF_NPPC8)) { xfPackPixels(&GradientValuesX[0], inter_valx, 0, 2, shift_x); xfPackPixels(&GradientValuesY[0], inter_valy, 0, 2, shift_y); _dst_matx.write(write_index, inter_valx); _dst_maty.write(write_index++, inter_valy); shift_x = 0; shift_y = 0; inter_valx = 0; inter_valy = 0; xfPackPixels(&GradientValuesX[0], inter_valx, 2, (npc - 2), shift_x); xfPackPixels(&GradientValuesY[0], inter_valy, 2, (npc - 2), shift_y); } else { if (col >= 2) { inter_valx((max_loop - 1), (max_loop - step)) = GradientValuesX[0]; inter_valy((max_loop - 1), (max_loop - step)) = GradientValuesY[0]; _dst_matx.write(write_index, inter_valx); _dst_maty.write(write_index++, inter_valy); } } } } // Col_Loop } template void xFSobelFilter5x5(xf::cv::Mat& _src_mat, xf::cv::Mat& _dst_matx, xf::cv::Mat& _dst_maty, uint16_t img_height, uint16_t img_width) { ap_uint<13> row_ind; ap_uint<13> row, col; ap_uint<4> tp1, tp2, mid, bottom1, bottom2; ap_uint<5> i; ap_uint<8> buf_size = XF_NPIXPERCYCLE(NPC) + 4; ap_uint<9> step = XF_PIXELDEPTH(DEPTH_DST); ap_uint<9> max_loop = XF_WORDDEPTH(WORDWIDTH_DST); uint16_t shift_x = 0, shift_y = 0; int read_index = 0, write_index = 0; XF_PTNAME(DEPTH_DST) GradientValuesX[XF_NPIXPERCYCLE(NPC)]; XF_PTNAME(DEPTH_DST) GradientValuesY[XF_NPIXPERCYCLE(NPC)]; // clang-format off #pragma HLS ARRAY_PARTITION variable=GradientValuesX complete dim=1 #pragma HLS ARRAY_PARTITION variable=GradientValuesY complete dim=1 // clang-format on XF_SNAME(WORDWIDTH_SRC) buf0, buf1, buf2, buf3, buf4; // Temporary buffers to hold image data from five rows XF_PTNAME(DEPTH_SRC) src_buf1[XF_NPIXPERCYCLE(NPC) + 4], src_buf2[XF_NPIXPERCYCLE(NPC) + 4], src_buf3[XF_NPIXPERCYCLE(NPC) + 4], src_buf4[XF_NPIXPERCYCLE(NPC) + 4], src_buf5[XF_NPIXPERCYCLE(NPC) + 4]; // clang-format off #pragma HLS ARRAY_PARTITION variable=src_buf1 complete dim=1 #pragma HLS ARRAY_PARTITION variable=src_buf2 complete dim=1 #pragma HLS ARRAY_PARTITION variable=src_buf3 complete dim=1 #pragma HLS ARRAY_PARTITION variable=src_buf4 complete dim=1 #pragma HLS ARRAY_PARTITION variable=src_buf5 complete dim=1 // clang-format on XF_SNAME(WORDWIDTH_SRC) tmp_in; XF_SNAME(WORDWIDTH_DST) inter_valx = 0, inter_valy = 0; // Temporary buffer to hold image data from five rows XF_SNAME(WORDWIDTH_SRC) buf[5][(COLS >> XF_BITSHIFT(NPC))]; if (USE_URAM) { // clang-format off #pragma HLS RESOURCE variable=buf core=RAM_S2P_URAM #pragma HLS array reshape variable=buf dim=1 factor=5 cyclic // clang-format on } else { // clang-format off #pragma HLS RESOURCE variable=buf core=RAM_S2P_BRAM #pragma HLS ARRAY_PARTITION variable=buf complete dim=1 // clang-format on } row_ind = 2; Clear_Row_Loop: for (col = 0; col < img_width; col++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=TC max=TC #pragma HLS pipeline // clang-format on buf[0][col] = 0; buf[1][col] = 0; buf[row_ind][col] = _src_mat.read(read_index++); } row_ind++; Read_Row2_Loop: for (col = 0; col < img_width; col++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=TC max=TC #pragma HLS pipeline // clang-format on buf[row_ind][col] = _src_mat.read(read_index++); } row_ind++; Row_Loop: for (row = 2; row < img_height + 2; row++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on // modify the buffer indices to re use if (row_ind == 4) { tp1 = 0; tp2 = 1; mid = 2; bottom1 = 3; bottom2 = 4; } else if (row_ind == 0) { tp1 = 1; tp2 = 2; mid = 3; bottom1 = 4; bottom2 = 0; } else if (row_ind == 1) { tp1 = 2; tp2 = 3; mid = 4; bottom1 = 0; bottom2 = 1; } else if (row_ind == 2) { tp1 = 3; tp2 = 4; mid = 0; bottom1 = 1; bottom2 = 2; } else if (row_ind == 3) { tp1 = 4; tp2 = 0; mid = 1; bottom1 = 2; bottom2 = 3; } src_buf1[0] = src_buf1[1] = src_buf1[2] = src_buf1[3] = 0; src_buf2[0] = src_buf2[1] = src_buf2[2] = src_buf2[3] = 0; src_buf3[0] = src_buf3[1] = src_buf3[2] = src_buf3[3] = 0; src_buf4[0] = src_buf4[1] = src_buf4[2] = src_buf4[3] = 0; src_buf5[0] = src_buf5[1] = src_buf5[2] = src_buf5[3] = 0; inter_valx = inter_valy = 0; ProcessSobel5x5( _src_mat, _dst_matx, _dst_maty, buf, src_buf1, src_buf2, src_buf3, src_buf4, src_buf5, GradientValuesX, GradientValuesY, inter_valx, inter_valy, img_width, img_height, row_ind, shift_x, shift_y, tp1, tp2, mid, bottom1, bottom2, row, read_index, write_index); if ((NPC == XF_NPPC8) || (NPC == XF_NPPC16)) { for (ap_uint<6> i = 4; i < (XF_NPIXPERCYCLE(NPC) + 4); i++) { src_buf1[i] = 0; src_buf2[i] = 0; src_buf3[i] = 0; src_buf4[i] = 0; src_buf5[i] = 0; } GradientValuesX[0] = xFGradientX5x5(&src_buf1[0], &src_buf2[0], &src_buf3[0], &src_buf4[0], &src_buf5[0]); GradientValuesX[1] = xFGradientX5x5(&src_buf1[1], &src_buf2[1], &src_buf3[1], &src_buf4[1], &src_buf5[1]); GradientValuesY[0] = xFGradientY5x5(&src_buf1[0], &src_buf2[0], &src_buf3[0], &src_buf4[0], &src_buf5[0]); GradientValuesY[1] = xFGradientY5x5(&src_buf1[1], &src_buf2[1], &src_buf3[1], &src_buf4[1], &src_buf5[1]); xfPackPixels(&GradientValuesX[0], inter_valx, 0, 2, shift_x); xfPackPixels(&GradientValuesY[0], inter_valy, 0, 2, shift_y); _dst_matx.write(write_index, inter_valx); _dst_maty.write(write_index++, inter_valy); } else { // clang-format off #pragma HLS ALLOCATION function instances=xFGradientX5x5 limit=1 #pragma HLS ALLOCATION function instances=xFGradientY5x5 limit=1 // clang-format on src_buf1[buf_size - 1] = 0; src_buf2[buf_size - 1] = 0; src_buf3[buf_size - 1] = 0; src_buf4[buf_size - 1] = 0; src_buf5[buf_size - 1] = 0; GradientValuesX[0] = xFGradientX5x5(&src_buf1[0], &src_buf2[0], &src_buf3[0], &src_buf4[0], &src_buf5[0]); GradientValuesY[0] = xFGradientY5x5(&src_buf1[0], &src_buf2[0], &src_buf3[0], &src_buf4[0], &src_buf5[0]); inter_valx((max_loop - 1), (max_loop - step)) = GradientValuesX[0]; inter_valy((max_loop - 1), (max_loop - step)) = GradientValuesY[0]; _dst_matx.write(write_index, inter_valx); _dst_maty.write(write_index++, inter_valy); for (ap_uint<4> i = 0; i < 4; i++) { // clang-format off #pragma HLS unroll // clang-format on src_buf1[i] = src_buf1[buf_size - (4 - i)]; src_buf2[i] = src_buf2[buf_size - (4 - i)]; src_buf3[i] = src_buf3[buf_size - (4 - i)]; src_buf4[i] = src_buf4[buf_size - (4 - i)]; src_buf5[i] = src_buf5[buf_size - (4 - i)]; } src_buf1[buf_size - 1] = 0; src_buf2[buf_size - 1] = 0; src_buf3[buf_size - 1] = 0; src_buf4[buf_size - 1] = 0; src_buf5[buf_size - 1] = 0; GradientValuesX[0] = xFGradientX5x5(&src_buf1[0], &src_buf2[0], &src_buf3[0], &src_buf4[0], &src_buf5[0]); GradientValuesY[0] = xFGradientY5x5(&src_buf1[0], &src_buf2[0], &src_buf3[0], &src_buf4[0], &src_buf5[0]); inter_valx((max_loop - 1), (max_loop - step)) = GradientValuesX[0]; inter_valy((max_loop - 1), (max_loop - step)) = GradientValuesY[0]; _dst_matx.write(write_index, inter_valx); _dst_maty.write(write_index++, inter_valy); } row_ind++; if (row_ind == 5) { row_ind = 0; } } // Row_Loop } // xFSobelFilter5x5 /******************************************************************************* * SobelFilter7x7 ******************************************************************************* * SobelFilter X-Gradient used is 7X7 * * --- ---- ---- ---- --- ---- --- ---- * | -1 | -4 | -5 | 0 | 5 | 4 | 1 | * --- ---- ---- ---- --- ---- --- ---- * | -6 | -24 | -30 | 0 | 30 | 24 | 6 | * --- ---- ---- ---- --- ---- --- ---- * | -15 | -60 | -75 | 0 | 75 | 60 | 15 | * --- ---- ---- ---- --- ---- --- ---- * | -20 | -80 | -100 | 0 | 100 | 80 | 20 | * --- ---- ---- ---- --- ---- --- ---- * | -15 | -60 | -75 | 0 | 75 | 60 | 15 | * --- ---- ---- ---- --- ---- --- ---- * | -6 | -24 | -30 | 0 | 30 | 24 | 6 | * --- ---- ---- ---- --- ---- --- ---- * | -1 | -4 | -5 | 0 | 5 | 4 | 1 | * --- ---- ---- ---- --- ---- --- ---- ******************************************************************************/ template XF_PTNAME(DEPTH_DST) xFGradientX7x7(XF_PTNAME(DEPTH_SRC) * src_buf1, XF_PTNAME(DEPTH_SRC) * src_buf2, XF_PTNAME(DEPTH_SRC) * src_buf3, XF_PTNAME(DEPTH_SRC) * src_buf4, XF_PTNAME(DEPTH_SRC) * src_buf5, XF_PTNAME(DEPTH_SRC) * src_buf6, XF_PTNAME(DEPTH_SRC) * src_buf7) { // clang-format off #pragma HLS INLINE off #pragma HLS PIPELINE II=1 // clang-format on XF_PTNAME(DEPTH_DST) g_x = 0; XF_PTNAME(DEPTH_DST) val = 0; int STEP, STEP_OUT, p = 0; if ((DEPTH_DST == XF_48SP) || (DEPTH_DST == XF_16SP)) { STEP_OUT = 16; STEP = 8; } else if ((DEPTH_DST == XF_32SP)) { STEP_OUT = 32; STEP = 8; } else { STEP = 8; STEP_OUT = 8; } for (int i = 0, k = 0; i < PLANES; i++, k += STEP) { int Res = 0; ap_int<20> M00 = (ap_int<20>)(((ap_int<20>)src_buf1[6].range(k + STEP - 1, k) + (ap_int<20>)src_buf7[6].range(k + STEP - 1, k)) - ((ap_int<20>)src_buf1[0].range(k + STEP - 1, k) + (ap_int<20>)src_buf7[0].range(k + STEP - 1, k))); ap_int<20> M01 = (ap_int<20>)(((ap_int<20>)src_buf1[1].range(k + STEP - 1, k) + (ap_int<20>)src_buf7[1].range(k + STEP - 1, k)) << 2); ap_int<20> A00 = (ap_int<20>)(((ap_int<20>)src_buf1[5].range(k + STEP - 1, k) + (ap_int<20>)src_buf7[5].range(k + STEP - 1, k)) << 2); ap_int<20> M02 = (ap_int<20>)(((ap_int<20>)src_buf1[2].range(k + STEP - 1, k) + (ap_int<20>)src_buf7[2].range(k + STEP - 1, k)) << 2) + (ap_int<20>)((ap_int<20>)src_buf1[2].range(k + STEP - 1, k) + (ap_int<20>)src_buf7[2].range(k + STEP - 1, k)); //(src_buf1[2] + src_buf7[2]) * 5; ap_int<20> A01 = (ap_int<20>)(((ap_int<20>)src_buf1[4].range(k + STEP - 1, k) + (ap_int<20>)src_buf7[4].range(k + STEP - 1, k)) << 2) + (ap_int<20>)src_buf1[4].range(k + STEP - 1, k) + (ap_int<20>)src_buf7[4].range(k + STEP - 1, k); //(src_buf1[4] + src_buf7[4]) * 5; ap_int<20> M03 = (ap_int<20>)(((ap_int<20>)src_buf2[0].range(k + STEP - 1, k) + (ap_int<20>)src_buf6[0].range(k + STEP - 1, k)) << 2) + (ap_int<20>)(((ap_int<20>)src_buf2[0].range(k + STEP - 1, k) + (ap_int<20>)src_buf6[0].range(k + STEP - 1, k)) << 1); //(src_buf2[0] + src_buf6[0]) * 6; ap_int<20> A02 = (ap_int<20>)(((ap_int<20>)src_buf2[6].range(k + STEP - 1, k) + (ap_int<20>)src_buf6[6].range(k + STEP - 1, k)) << 2) + (ap_int<20>)(((ap_int<20>)src_buf2[6].range(k + STEP - 1, k) + (ap_int<20>)src_buf6[6].range(k + STEP - 1, k)) << 1); //(src_buf2[6] + src_buf6[6]) * 6; ap_int<20> M04 = (ap_int<20>)(((ap_int<20>)src_buf2[1].range(k + STEP - 1, k) + (ap_int<20>)src_buf6[1].range(k + STEP - 1, k)) << 4) + (ap_int<20>)(((ap_int<20>)src_buf2[1].range(k + STEP - 1, k) + (ap_int<20>)src_buf6[1].range(k + STEP - 1, k)) << 3); //(src_buf2[1] + src_buf6[1]) * 24; ap_int<20> A03 = (ap_int<20>)(((ap_int<20>)src_buf2[5].range(k + STEP - 1, k) + (ap_int<20>)src_buf6[5].range(k + STEP - 1, k)) << 4) + (ap_int<20>)(((ap_int<20>)src_buf2[5].range(k + STEP - 1, k) + (ap_int<20>)src_buf6[5].range(k + STEP - 1, k)) << 3); //(src_buf2[5] + src_buf6[5]) * 24; ap_int<20> M05 = (ap_int<20>)(((ap_int<20>)src_buf2[2].range(k + STEP - 1, k) + (ap_int<20>)src_buf6[2].range(k + STEP - 1, k)) << 5) - (ap_int<20>)(((ap_int<20>)src_buf2[2].range(k + STEP - 1, k) + (ap_int<20>)src_buf6[2].range(k + STEP - 1, k)) << 1); //(src_buf2[2] + src_buf6[2]) * 30; ap_int<20> A04 = (ap_int<20>)(((ap_int<20>)src_buf2[4].range(k + STEP - 1, k) + (ap_int<20>)src_buf6[4].range(k + STEP - 1, k)) << 5) - (ap_int<20>)(((ap_int<20>)src_buf2[4].range(k + STEP - 1, k) + (ap_int<20>)src_buf6[4].range(k + STEP - 1, k)) << 1); //(src_buf2[4] + src_buf6[4]) * 30; ap_int<20> M06 = (ap_int<20>)(((ap_int<20>)src_buf3[0].range(k + STEP - 1, k) + (ap_int<20>)src_buf5[0].range(k + STEP - 1, k)) << 4) - (ap_int<20>)((ap_int<20>)src_buf3[0].range(k + STEP - 1, k) + (ap_int<20>)src_buf5[0].range(k + STEP - 1, k)); //(src_buf3[0] + src_buf5[0]) * 15; ap_int<20> A05 = (ap_int<20>)(((ap_int<20>)src_buf3[6].range(k + STEP - 1, k) + (ap_int<20>)src_buf5[6].range(k + STEP - 1, k)) << 4) - (ap_int<20>)((ap_int<20>)src_buf3[6].range(k + STEP - 1, k) + (ap_int<20>)src_buf5[6].range(k + STEP - 1, k)); //(src_buf3[6] + src_buf5[6]) * 15; ap_int<20> M07 = (ap_int<20>)(((ap_int<20>)src_buf3[1].range(k + STEP - 1, k) + (ap_int<20>)src_buf5[1].range(k + STEP - 1, k)) << 6) - (ap_int<20>)(((ap_int<20>)src_buf3[1].range(k + STEP - 1, k) + (ap_int<20>)src_buf5[1].range(k + STEP - 1, k)) << 2); //(src_buf3[1] + src_buf5[1]) * 60; ap_int<20> A06 = (ap_int<20>)(((ap_int<20>)src_buf3[5].range(k + STEP - 1, k) + (ap_int<20>)src_buf5[5].range(k + STEP - 1, k)) << 6) - (ap_int<20>)(((ap_int<20>)src_buf3[5].range(k + STEP - 1, k) + (ap_int<20>)src_buf5[5].range(k + STEP - 1, k)) << 2); //(src_buf3[5] + src_buf5[5]) * 60; ap_int<20> M08 = (ap_int<20>)(((ap_int<20>)src_buf3[2].range(k + STEP - 1, k) + (ap_int<20>)src_buf5[2].range(k + STEP - 1, k)) << 6) + (ap_int<20>)(((ap_int<20>)src_buf3[2].range(k + STEP - 1, k) + (ap_int<20>)src_buf5[2].range(k + STEP - 1, k)) << 3) + (ap_int<20>)((ap_int<20>)src_buf3[2].range(k + STEP - 1, k) + (ap_int<20>)src_buf5[2].range(k + STEP - 1, k) << 1) + (ap_int<20>)src_buf3[2].range(k + STEP - 1, k) + (ap_int<20>)src_buf5[2].range(k + STEP - 1, k); //(src_buf3[2] + src_buf5[2]) * 75; ap_int<20> A07 = (ap_int<20>)(((ap_int<20>)src_buf3[4].range(k + STEP - 1, k) + (ap_int<20>)src_buf5[4].range(k + STEP - 1, k)) << 6) + (ap_int<20>)(((ap_int<20>)src_buf3[4].range(k + STEP - 1, k) + (ap_int<20>)src_buf5[4].range(k + STEP - 1, k)) << 3) + (ap_int<20>)((ap_int<20>)src_buf3[4].range(k + STEP - 1, k) + (ap_int<20>)src_buf5[4].range(k + STEP - 1, k) << 1) + (ap_int<20>)src_buf3[4].range(k + STEP - 1, k) + (ap_int<20>)src_buf5[4].range(k + STEP - 1, k); //(src_buf3[4] + src_buf5[4]) * 75; ap_int<20> M09 = (ap_int<20>)(((ap_int<20>)src_buf4[6].range(k + STEP - 1, k) - (ap_int<20>)src_buf4[0].range(k + STEP - 1, k)) << 4) + (ap_int<20>)(((ap_int<20>)src_buf4[6].range(k + STEP - 1, k) - (ap_int<20>)src_buf4[0].range(k + STEP - 1, k)) << 2); //(src_buf4[6] - src_buf4[0]) * 20; ap_int<20> M10 = (ap_int<20>)(((ap_int<20>)src_buf4[5].range(k + STEP - 1, k) - (ap_int<20>)src_buf4[1].range(k + STEP - 1, k)) << 6) + (ap_int<20>)(((ap_int<20>)src_buf4[5].range(k + STEP - 1, k) - (ap_int<20>)src_buf4[1].range(k + STEP - 1, k)) << 4); //(src_buf4[5] - src_buf4[1]) * 80; ap_int<20> M11 = (ap_int<20>)(((ap_int<20>)src_buf4[4].range(k + STEP - 1, k) - (ap_int<20>)src_buf4[2].range(k + STEP - 1, k)) << 6) + (ap_int<20>)(((ap_int<20>)src_buf4[4].range(k + STEP - 1, k) - (ap_int<20>)src_buf4[2].range(k + STEP - 1, k)) << 5) + (ap_int<20>)((ap_int<20>)src_buf4[4].range(k + STEP - 1, k) - (ap_int<20>)src_buf4[2].range(k + STEP - 1, k) << 2); //(src_buf4[4] - src_buf4[2]) * 100; ap_int<20> FS00 = M01 + M02 + M03; ap_int<20> FS01 = M04 + M05; ap_int<20> FS02 = M06 + M07 + M08; ap_int<20> FA00 = A00 + A01; ap_int<20> FA01 = A02 + A03; ap_int<20> FA02 = A04 + A05; ap_int<20> FA03 = A06 + A07; ap_int<20> FA04 = M09 + M10 + M11; ap_int<20> FS0 = FS00 + FS01 + FS02; ap_int<20> FA0 = M00 + FA00 + FA01; ap_int<20> FA1 = FA02 + FA03 + FA04; Res = (FA0 + FA1) - (FS0); g_x = (XF_PTNAME(DEPTH_DST))Res; if ((DEPTH_DST == XF_8UP) || (DEPTH_DST == XF_24UP)) { if (Res < 0) g_x = 0; else if (Res > 255) g_x = 255; } if ((DEPTH_DST == XF_16SP) || (DEPTH_DST == XF_48SP)) { if (Res > 32767) g_x = 32767; else if (Res < -32768) g_x = -32768; } val.range(p + (STEP_OUT - 1), p) = g_x; p += STEP_OUT; } return val; } /******************************************************************** * SobelFilter Y-Gradient used is 7X7 * * --- ---- ---- ---- --- ---- --- ---- * | -1 | -6 | -15 | -20 | -15 | -6 | -1 | * --- ---- ---- ---- --- ---- --- ---- * | -4 | -24 | -60 | -80 | -60 |-24 | -4 | * --- ---- ---- ---- --- ---- --- ---- * | -5 | -30 | -75 |-100 | -75 |-30 | -5 | * --- ---- ---- ---- --- ---- --- ---- * | 0 | 0 | 0 | 0 | 0 | 0 | 0 | * --- ---- ---- ---- --- ---- --- ---- * | 5 | 30 | 75 | 100 | 75 | 30 | 5 | * --- ---- ---- ---- --- ---- --- ---- * | 4 | 24 | 60 | 80 | 60 | 24 | 4 | * --- ---- ---- ---- --- ---- --- ---- * | 1 | 6 | 15 | 20 | 15 | 6 | 1 | * --- ---- ---- ---- --- ---- --- ---- ******************************************************************/ template XF_PTNAME(DEPTH_DST) xFGradientY7x7(XF_PTNAME(DEPTH_SRC) * src_buf1, XF_PTNAME(DEPTH_SRC) * src_buf2, XF_PTNAME(DEPTH_SRC) * src_buf3, XF_PTNAME(DEPTH_SRC) * src_buf4, XF_PTNAME(DEPTH_SRC) * src_buf5, XF_PTNAME(DEPTH_SRC) * src_buf6, XF_PTNAME(DEPTH_SRC) * src_buf7) { // clang-format off #pragma HLS INLINE off #pragma HLS PIPELINE II=1 // clang-format on XF_PTNAME(DEPTH_DST) g_y = 0, val = 0; int STEP, STEP_OUT, p = 0; if ((DEPTH_DST == XF_48SP) || (DEPTH_DST == XF_16SP)) { STEP_OUT = 16; STEP = 8; } else if ((DEPTH_DST == XF_32SP)) { STEP_OUT = 32; STEP = 8; } else { STEP = 8; STEP_OUT = 8; } for (int i = 0, k = 0; i < PLANES; i++, k += STEP) { int Res = 0; ap_int<20> M00 = (src_buf7[0].range(k + STEP - 1, k) + src_buf7[6].range(k + STEP - 1, k)) - (src_buf1[0].range(k + STEP - 1, k) + src_buf1[6].range(k + STEP - 1, k)); ap_int<20> M01 = ((ap_int<20>)(src_buf1[1].range(k + STEP - 1, k) + src_buf1[5].range(k + STEP - 1, k)) << 2) + ((ap_int<20>)(src_buf1[1].range(k + STEP - 1, k) + src_buf1[5].range(k + STEP - 1, k)) << 1); //(src_buf1[1] + src_buf1[5]) * 6; ap_int<20> A00 = ((ap_int<20>)(src_buf7[1].range(k + STEP - 1, k) + src_buf7[5].range(k + STEP - 1, k)) << 2) + ((ap_int<20>)(src_buf7[1].range(k + STEP - 1, k) + src_buf7[5].range(k + STEP - 1, k)) << 1); //(src_buf7[1] + src_buf7[5]) * 6; ap_int<20> M02 = ((ap_int<20>)(src_buf1[2].range(k + STEP - 1, k) + src_buf1[4].range(k + STEP - 1, k)) << 4) - (src_buf1[2].range(k + STEP - 1, k) + src_buf1[4].range(k + STEP - 1, k)); // (src_buf1[2] + src_buf1[4]) * 15; ap_int<20> A01 = ((ap_int<20>)(src_buf7[2].range(k + STEP - 1, k) + src_buf7[4].range(k + STEP - 1, k)) << 4) - (src_buf7[2].range(k + STEP - 1, k) + src_buf7[4].range(k + STEP - 1, k)); //(src_buf7[2] + src_buf7[4]) * 15; ap_int<20> M03 = (ap_int<20>)(src_buf2[0].range(k + STEP - 1, k) + src_buf2[6].range(k + STEP - 1, k)) << 2; ap_int<20> A02 = (ap_int<20>)(src_buf6[0].range(k + STEP - 1, k) + src_buf6[6].range(k + STEP - 1, k)) << 2; ap_int<20> M04 = ((ap_int<20>)(src_buf2[1].range(k + STEP - 1, k) + src_buf2[5].range(k + STEP - 1, k)) << 4) + ((ap_int<20>)(src_buf2[1].range(k + STEP - 1, k) + src_buf2[5].range(k + STEP - 1, k)) << 3); //(src_buf2[1] + src_buf2[5]) * 24; ap_int<20> A03 = ((ap_int<20>)(src_buf6[1].range(k + STEP - 1, k) + src_buf6[5].range(k + STEP - 1, k)) << 4) + ((ap_int<20>)(src_buf6[1].range(k + STEP - 1, k) + src_buf6[5].range(k + STEP - 1, k)) << 3); //(src_buf6[1] + src_buf6[5]) * 24; ap_int<20> M05 = ((ap_int<20>)(src_buf2[2].range(k + STEP - 1, k) + src_buf2[4].range(k + STEP - 1, k)) << 6) - ((ap_int<20>)(src_buf2[2].range(k + STEP - 1, k) + src_buf2[4].range(k + STEP - 1, k)) << 2); //(src_buf2[2] + src_buf2[4]) * 60; ap_int<20> A04 = ((ap_int<20>)(src_buf6[2].range(k + STEP - 1, k) + src_buf6[4].range(k + STEP - 1, k)) << 6) - ((ap_int<20>)(src_buf6[2].range(k + STEP - 1, k) + src_buf6[4].range(k + STEP - 1, k)) << 2); //(src_buf6[2] + src_buf6[4]) * 60; ap_int<20> M06 = ((ap_int<20>)(src_buf3[0].range(k + STEP - 1, k) + src_buf3[6].range(k + STEP - 1, k)) << 2) + (src_buf3[0].range(k + STEP - 1, k) + src_buf3[6].range(k + STEP - 1, k)); //(src_buf3[0] + src_buf3[6]) * 5; ap_int<20> A05 = ((ap_int<20>)(src_buf5[0].range(k + STEP - 1, k) + src_buf5[6].range(k + STEP - 1, k)) << 2) + (src_buf5[0].range(k + STEP - 1, k) + src_buf5[6].range(k + STEP - 1, k)); //(src_buf5[0] + src_buf5[6]) * 5; ap_int<20> M07 = ((ap_int<20>)(src_buf3[1].range(k + STEP - 1, k) + src_buf3[5].range(k + STEP - 1, k)) << 5) - ((ap_int<20>)(src_buf3[1].range(k + STEP - 1, k) + src_buf3[5].range(k + STEP - 1, k)) << 1); //(src_buf3[1] + src_buf3[5]) * 30; ap_int<20> A06 = ((ap_int<20>)(src_buf5[1].range(k + STEP - 1, k) + src_buf5[5].range(k + STEP - 1, k)) << 5) - ((ap_int<20>)(src_buf5[1].range(k + STEP - 1, k) + src_buf5[5].range(k + STEP - 1, k)) << 1); //(src_buf5[1] + src_buf5[5]) * 30; ap_int<20> M08 = ((ap_int<20>)(src_buf3[2].range(k + STEP - 1, k) + src_buf3[4].range(k + STEP - 1, k)) << 6) + ((ap_int<20>)(src_buf3[2].range(k + STEP - 1, k) + src_buf3[4].range(k + STEP - 1, k)) << 3) + ((ap_int<20>)(src_buf3[2].range(k + STEP - 1, k) + src_buf3[4].range(k + STEP - 1, k)) << 1) + (src_buf3[2].range(k + STEP - 1, k) + src_buf3[4].range(k + STEP - 1, k)); //(src_buf3[2] + src_buf3[4]) * 75; ap_int<20> A07 = ((ap_int<20>)(src_buf5[2].range(k + STEP - 1, k) + src_buf5[4].range(k + STEP - 1, k)) << 6) + ((ap_int<20>)(src_buf5[2].range(k + STEP - 1, k) + src_buf5[4].range(k + STEP - 1, k)) << 3) + ((ap_int<20>)(src_buf5[2].range(k + STEP - 1, k) + src_buf5[4].range(k + STEP - 1, k)) << 1) + (src_buf5[2].range(k + STEP - 1, k) + src_buf5[4].range(k + STEP - 1, k)); //(src_buf5[2] + src_buf5[4]) * 75; ap_int<20> M09 = ((ap_int<20>)(src_buf7[3].range(k + STEP - 1, k) - src_buf1[3].range(k + STEP - 1, k)) << 4) + ((ap_int<20>)(src_buf7[3].range(k + STEP - 1, k) - src_buf1[3].range(k + STEP - 1, k)) << 2); //(src_buf7[3] - src_buf1[3]) * 20; ap_int<20> M10 = ((ap_int<20>)(src_buf6[3].range(k + STEP - 1, k) - src_buf2[3].range(k + STEP - 1, k)) << 6) + ((ap_int<20>)(src_buf6[3].range(k + STEP - 1, k) - src_buf2[3].range(k + STEP - 1, k)) << 4); //(src_buf6[3] - src_buf2[3]) * 80; ap_int<20> M11 = ((ap_int<20>)(src_buf5[3].range(k + STEP - 1, k) - src_buf3[3].range(k + STEP - 1, k)) << 6) + ((ap_int<20>)(src_buf5[3].range(k + STEP - 1, k) - src_buf3[3].range(k + STEP - 1, k)) << 5) + ((ap_int<20>)(src_buf5[3].range(k + STEP - 1, k) - src_buf3[3].range(k + STEP - 1, k)) << 2); //(src_buf5[3] - src_buf3[3]) * 100; ap_int<20> FS00 = M01 + M02 + M03; ap_int<20> FS01 = M04 + M05; ap_int<20> FS02 = M06 + M07 + M08; ap_int<20> FA00 = A00 + A01; ap_int<20> FA01 = A02 + A03; ap_int<20> FA02 = A04 + A05; ap_int<20> FA03 = A06 + A07; ap_int<20> FA04 = M09 + M10 + M11; ap_int<20> FS0 = FS00 + FS01 + FS02; ap_int<20> FA0 = M00 + FA00 + FA01; ap_int<20> FA1 = FA02 + FA03 + FA04; Res = (FA0 + FA1) - (FS0); g_y = (XF_PTNAME(DEPTH_DST))Res; if ((DEPTH_DST == XF_8UP) || (DEPTH_DST == XF_24UP)) { if (Res < 0) g_y = 0; else if (Res > 255) g_y = 255; } if ((DEPTH_DST == XF_16SP) || (DEPTH_DST == XF_48SP)) { if (Res > 32767) g_y = 32767; else if (Res < -32768) g_y = -32768; } // g_y = (XF_PTNAME(DEPTH_DST))Res; val.range(p + (STEP_OUT - 1), p) = (XF_PTNAME(DEPTH_DST))g_y; p += STEP_OUT; } return val; } template void xFSobel7x7(XF_PTNAME(DEPTH_DST) * GradientvaluesX, XF_PTNAME(DEPTH_DST) * GradientvaluesY, XF_PTNAME(DEPTH_SRC) src_buf1[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH_SRC) src_buf2[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH_SRC) src_buf3[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH_SRC) src_buf4[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH_SRC) src_buf5[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH_SRC) src_buf6[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH_SRC) src_buf7[XF_NPIXPERCYCLE(NPC) + 6]) { // clang-format off #pragma HLS INLINE // clang-format on for (ap_uint<9> j = 0; j < XF_NPIXPERCYCLE(NPC); j++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=8 max=8 #pragma HLS UNROLL // clang-format on GradientvaluesX[j] = xFGradientX7x7( &src_buf1[j], &src_buf2[j], &src_buf3[j], &src_buf4[j], &src_buf5[j], &src_buf6[j], &src_buf7[j]); GradientvaluesY[j] = xFGradientY7x7( &src_buf1[j], &src_buf2[j], &src_buf3[j], &src_buf4[j], &src_buf5[j], &src_buf6[j], &src_buf7[j]); } } /************************************************************************************** * ProcessSobel7x7 : Computes gradients for the column input data **************************************************************************************/ template void ProcessSobel7x7(xf::cv::Mat& _src_mat, xf::cv::Mat& _gradx_mat, xf::cv::Mat& _grady_mat, XF_SNAME(WORDWIDTH_SRC) buf[7][(COLS >> XF_BITSHIFT(NPC))], XF_PTNAME(DEPTH_SRC) src_buf1[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH_SRC) src_buf2[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH_SRC) src_buf3[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH_SRC) src_buf4[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH_SRC) src_buf5[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH_SRC) src_buf6[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH_SRC) src_buf7[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH_DST) GradientValuesX[XF_NPIXPERCYCLE(NPC)], XF_PTNAME(DEPTH_DST) GradientValuesY[XF_NPIXPERCYCLE(NPC)], XF_SNAME(WORDWIDTH_DST) & inter_valx, XF_SNAME(WORDWIDTH_DST) & inter_valy, uint16_t img_width, uint16_t img_height, ap_uint<13> row_ind, uint16_t& shiftx, uint16_t& shifty, ap_uint<4> tp1, ap_uint<4> tp2, ap_uint<4> tp3, ap_uint<4> mid, ap_uint<4> bottom1, ap_uint<4> bottom2, ap_uint<4> bottom3, ap_uint<13> row, int& read_index, int& write_index) { // clang-format off #pragma HLS INLINE // clang-format on XF_SNAME(WORDWIDTH_SRC) buf0, buf1, buf2, buf3, buf4, buf5, buf6; uint16_t npc = XF_NPIXPERCYCLE(NPC); ap_uint<10> max_loop = XF_WORDDEPTH(WORDWIDTH_DST); Col_Loop: for (ap_uint<13> col = 0; col < img_width; col++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=TC max=TC #pragma HLS pipeline // clang-format on if (row < img_height) buf[row_ind][col] = _src_mat.read(read_index++); else buf[bottom3][col] = 0; buf0 = buf[tp1][col]; buf1 = buf[tp2][col]; buf2 = buf[tp3][col]; buf3 = buf[mid][col]; buf4 = buf[bottom1][col]; buf5 = buf[bottom2][col]; buf6 = buf[bottom3][col]; if (row == 26 && col == 15) printf("hello"); if (NPC == XF_NPPC8) { xfExtractData(src_buf1, src_buf2, src_buf3, src_buf4, src_buf5, src_buf6, src_buf7, buf0, buf1, buf2, buf3, buf4, buf5, buf6); } else { src_buf1[6] = buf0; src_buf2[6] = buf1; src_buf3[6] = buf2; src_buf4[6] = buf3; src_buf5[6] = buf4; src_buf6[6] = buf5; src_buf7[6] = buf6; } xFSobel7x7(GradientValuesX, GradientValuesY, src_buf1, src_buf2, src_buf3, src_buf4, src_buf5, src_buf6, src_buf7); xfCopyData(src_buf1, src_buf2, src_buf3, src_buf4, src_buf5, src_buf6, src_buf7); if (col == 0) { shiftx = 0; shifty = 0; inter_valx = 0; inter_valy = 0; xfPackPixels(&GradientValuesX[0], inter_valx, 3, (npc - 3), shiftx); xfPackPixels(&GradientValuesY[0], inter_valy, 3, (npc - 3), shifty); } else { if ((NPC == XF_NPPC8)) { xfPackPixels(&GradientValuesX[0], inter_valx, 0, 3, shiftx); xfPackPixels(&GradientValuesY[0], inter_valy, 0, 3, shifty); _gradx_mat.write(write_index, inter_valx); _grady_mat.write(write_index++, inter_valy); shiftx = 0; shifty = 0; inter_valx = 0; inter_valy = 0; xfPackPixels(&GradientValuesX[0], inter_valx, 3, (npc - 3), shiftx); xfPackPixels(&GradientValuesY[0], inter_valy, 3, (npc - 3), shifty); } else { if (col >= 3) { inter_valx((max_loop - 1), (max_loop - XF_PIXELDEPTH(DEPTH_DST))) = GradientValuesX[0]; inter_valy((max_loop - 1), (max_loop - XF_PIXELDEPTH(DEPTH_DST))) = GradientValuesY[0]; _gradx_mat.write(write_index, inter_valx); _grady_mat.write(write_index++, inter_valy); } } } } // Col_Loop } template void RightBorder7x7(xf::cv::Mat& _src_mat, xf::cv::Mat& _gradx_mat, xf::cv::Mat& _grady_mat, XF_PTNAME(DEPTH_SRC) src_buf1[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH_SRC) src_buf2[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH_SRC) src_buf3[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH_SRC) src_buf4[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH_SRC) src_buf5[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH_SRC) src_buf6[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH_SRC) src_buf7[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH_DST) GradientValuesX[XF_NPIXPERCYCLE(NPC)], XF_PTNAME(DEPTH_DST) GradientValuesY[XF_NPIXPERCYCLE(NPC)], XF_SNAME(WORDWIDTH_DST) & inter_valx, XF_SNAME(WORDWIDTH_DST) & inter_valy, uint16_t& shiftx, uint16_t& shifty, int& read_index, int& write_index) { //#pragma HLS INLINE off ap_uint<4> i = 0; ap_uint<5> buf_size = (XF_NPIXPERCYCLE(NPC) + 6); ap_uint<10> max_loop = XF_WORDDEPTH(WORDWIDTH_DST); if ((NPC == XF_NPPC8)) { for (i = 0; i < 8; i++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=8 max=8 #pragma HLS unroll // clang-format on src_buf1[buf_size + i - (XF_NPIXPERCYCLE(NPC))] = 0; src_buf2[buf_size + i - (XF_NPIXPERCYCLE(NPC))] = 0; src_buf3[buf_size + i - (XF_NPIXPERCYCLE(NPC))] = 0; src_buf4[buf_size + i - (XF_NPIXPERCYCLE(NPC))] = 0; src_buf5[buf_size + i - (XF_NPIXPERCYCLE(NPC))] = 0; src_buf6[buf_size + i - (XF_NPIXPERCYCLE(NPC))] = 0; src_buf7[buf_size + i - (XF_NPIXPERCYCLE(NPC))] = 0; } for (i = 0; i < 3; i++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=3 max=3 #pragma HLS unroll // clang-format on GradientValuesX[i] = xFGradientX7x7( &src_buf1[i], &src_buf2[i], &src_buf3[i], &src_buf4[i], &src_buf5[i], &src_buf6[i], &src_buf7[i]); GradientValuesY[i] = xFGradientY7x7( &src_buf1[i], &src_buf2[i], &src_buf3[i], &src_buf4[i], &src_buf5[i], &src_buf6[i], &src_buf7[i]); } xfPackPixels(&GradientValuesX[0], inter_valx, 0, 3, shiftx); xfPackPixels(&GradientValuesY[0], inter_valy, 0, 3, shifty); _gradx_mat.write(write_index, inter_valx); _grady_mat.write(write_index++, inter_valy); shiftx = 0; shifty = 0; inter_valx = 0; inter_valy = 0; } else { src_buf1[6] = 0; src_buf2[6] = 0; src_buf3[6] = 0; src_buf4[6] = 0; src_buf5[6] = 0; src_buf6[6] = 0; src_buf7[6] = 0; for (ap_uint<5> k = 0; k < 3; k++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=3 max=3 #pragma HLS ALLOCATION function instances=xFGradientX7x7 limit=1 #pragma HLS ALLOCATION function instances=xFGradientY7x7 limit=1 // clang-format on XF_PTNAME(DEPTH_DST) x1 = xFGradientX7x7(&src_buf1[0], &src_buf2[0], &src_buf3[0], &src_buf4[0], &src_buf5[0], &src_buf6[0], &src_buf7[0]); XF_PTNAME(DEPTH_DST) y1 = xFGradientY7x7(&src_buf1[0], &src_buf2[0], &src_buf3[0], &src_buf4[0], &src_buf5[0], &src_buf6[0], &src_buf7[0]); xfCopyData(src_buf1, src_buf2, src_buf3, src_buf4, src_buf5, src_buf6, src_buf7); inter_valx((max_loop - 1), (max_loop - XF_PIXELDEPTH(DEPTH_DST))) = x1; // GradientValuesX[0]; inter_valy((max_loop - 1), (max_loop - XF_PIXELDEPTH(DEPTH_DST))) = y1; // GradientValuesY[0]; _gradx_mat.write(write_index, inter_valx); _grady_mat.write(write_index++, inter_valy); } } } template void xFSobelFilter7x7(xf::cv::Mat& _src_mat, xf::cv::Mat& _gradx_mat, xf::cv::Mat& _grady_mat, uint16_t img_height, uint16_t img_width) { ap_uint<13> row_ind, row, col; ap_uint<4> tp1, tp2, tp3, mid, bottom1, bottom2, bottom3; ap_uint<5> i; int read_index = 0, write_index = 0; // Gradient output values stored in these buffer XF_PTNAME(DEPTH_DST) GradientValuesX[XF_NPIXPERCYCLE(NPC)]; XF_PTNAME(DEPTH_DST) GradientValuesY[XF_NPIXPERCYCLE(NPC)]; if (NPC > 1) { // clang-format off #pragma HLS ARRAY_PARTITION variable=GradientValuesX complete dim=1 #pragma HLS ARRAY_PARTITION variable=GradientValuesY complete dim=1 // clang-format on } // Temporary buffers to hold image data from three rows. XF_PTNAME(DEPTH_SRC) src_buf1[XF_NPIXPERCYCLE(NPC) + 6], src_buf2[XF_NPIXPERCYCLE(NPC) + 6], src_buf3[XF_NPIXPERCYCLE(NPC) + 6], src_buf4[XF_NPIXPERCYCLE(NPC) + 6], src_buf5[XF_NPIXPERCYCLE(NPC) + 6]; XF_PTNAME(DEPTH_SRC) src_buf6[XF_NPIXPERCYCLE(NPC) + 6], src_buf7[XF_NPIXPERCYCLE(NPC) + 6]; // clang-format off #pragma HLS ARRAY_PARTITION variable=src_buf1 complete dim=1 #pragma HLS ARRAY_PARTITION variable=src_buf2 complete dim=1 #pragma HLS ARRAY_PARTITION variable=src_buf3 complete dim=1 #pragma HLS ARRAY_PARTITION variable=src_buf4 complete dim=1 #pragma HLS ARRAY_PARTITION variable=src_buf5 complete dim=1 #pragma HLS ARRAY_PARTITION variable=src_buf6 complete dim=1 #pragma HLS ARRAY_PARTITION variable=src_buf7 complete dim=1 // clang-format on XF_SNAME(WORDWIDTH_DST) inter_valx = 0, inter_valy = 0; uint16_t shiftx = 0, shifty = 0; XF_SNAME(WORDWIDTH_SRC) buf[7][(COLS >> XF_BITSHIFT(NPC))]; if (USE_URAM) { // clang-format off #pragma HLS RESOURCE variable=buf core=RAM_S2P_URAM #pragma HLS array reshape variable=buf dim=1 factor=7 cyclic // clang-format on } else { // clang-format off #pragma HLS RESOURCE variable=buf core=RAM_S2P_BRAM #pragma HLS ARRAY_PARTITION variable=buf complete dim=1 // clang-format on } row_ind = 3; Clear_Row_Loop: for (col = 0; col < img_width; col++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=TC max=TC #pragma HLS pipeline // clang-format on buf[0][col] = 0; buf[1][col] = 0; buf[2][col] = 0; buf[row_ind][col] = _src_mat.read(read_index++); } row_ind++; Read_Row1_Loop: for (col = 0; col < img_width; col++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=TC max=TC #pragma HLS pipeline // clang-format on buf[row_ind][col] = _src_mat.read(read_index++); } row_ind++; Read_Row2_Loop: for (col = 0; col < img_width; col++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=TC max=TC #pragma HLS pipeline // clang-format on buf[row_ind][col] = _src_mat.read(read_index++); } row_ind++; Row_Loop: for (row = 3; row < img_height + 3; row++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on // modify the buffer indices to re use if (row_ind == 0) { tp1 = 1; tp2 = 2; tp3 = 3; mid = 4; bottom1 = 5; bottom2 = 6; bottom3 = 0; } else if (row_ind == 1) { tp1 = 2; tp2 = 3; tp3 = 4; mid = 5; bottom1 = 6; bottom2 = 0; bottom3 = 1; } else if (row_ind == 2) { tp1 = 3; tp2 = 4; tp3 = 5; mid = 6; bottom1 = 0; bottom2 = 1; bottom3 = 2; } else if (row_ind == 3) { tp1 = 4; tp2 = 5; tp3 = 6; mid = 0; bottom1 = 1; bottom2 = 2; bottom3 = 3; } else if (row_ind == 4) { tp1 = 5; tp2 = 6; tp3 = 0; mid = 1; bottom1 = 2; bottom2 = 3; bottom3 = 4; } else if (row_ind == 5) { tp1 = 6; tp2 = 0; tp3 = 1; mid = 2; bottom1 = 3; bottom2 = 4; bottom3 = 5; } else if (row_ind == 6) { tp1 = 0; tp2 = 1; tp3 = 2; mid = 3; bottom1 = 4; bottom2 = 5; bottom3 = 6; } for (i = 0; i < 6; i++) { // clang-format off #pragma HLS unroll // clang-format on src_buf1[i] = 0; src_buf2[i] = 0; src_buf3[i] = 0; src_buf4[i] = 0; src_buf5[i] = 0; src_buf6[i] = 0; src_buf7[i] = 0; } inter_valx = inter_valy = 0; /*********** Process complete row * **********/ ProcessSobel7x7( _src_mat, _gradx_mat, _grady_mat, buf, src_buf1, src_buf2, src_buf3, src_buf4, src_buf5, src_buf6, src_buf7, GradientValuesX, GradientValuesY, inter_valx, inter_valy, img_width, img_height, row_ind, shiftx, shifty, tp1, tp2, tp3, mid, bottom1, bottom2, bottom3, row, read_index, write_index); RightBorder7x7( _src_mat, _gradx_mat, _grady_mat, src_buf1, src_buf2, src_buf3, src_buf4, src_buf5, src_buf6, src_buf7, GradientValuesX, GradientValuesY, inter_valx, inter_valy, shiftx, shifty, read_index, write_index); row_ind++; if (row_ind == 7) { row_ind = 0; } } // Row_Loop ends here } // xFSobelFilter7x7 template void Sobel(xf::cv::Mat& _src_mat, xf::cv::Mat& _dst_matx, xf::cv::Mat& _dst_maty) { // clang-format off #pragma HLS INLINE OFF // clang-format on uint16_t width = _src_mat.cols >> XF_BITSHIFT(NPC); uint16_t height = _src_mat.rows; #ifndef __SYNTHESIS__ assert(((FILTER_TYPE == XF_FILTER_3X3) || (FILTER_TYPE == XF_FILTER_5X5) || (FILTER_TYPE == XF_FILTER_7X7)) && " Filter width must be XF_FILTER_3X3, XF_FILTER_5X5 or XF_FILTER_7X7 "); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && "NPC must be XF_NPPC1 or XF_NPPC8"); assert((BORDER_TYPE == XF_BORDER_CONSTANT) && "Border type must be XF_BORDER_CONSTANT "); assert(((_src_mat.rows <= ROWS) && (_src_mat.cols <= COLS)) && "ROWS and COLS should be greater than input image"); #endif if (FILTER_TYPE == XF_FILTER_3X3) { xFSobelFilter3x3> XF_BITSHIFT(NPC)), USE_URAM>( _src_mat, _dst_matx, _dst_maty, height, width); } else if (FILTER_TYPE == XF_FILTER_5X5) { xFSobelFilter5x5> XF_BITSHIFT(NPC)), USE_URAM>( _src_mat, _dst_matx, _dst_maty, height, width); } else if (FILTER_TYPE == XF_FILTER_7X7) { xFSobelFilter7x7> XF_BITSHIFT(NPC)), USE_URAM>( _src_mat, _dst_matx, _dst_maty, height, width); } } } // namespace cv } // namespace xf // xFSobelFilter #endif // _XF_SOBEL_HPP_