.. _program_listing_file__tmp_ws_src_vitis_common_include_imgproc_xf_gaussian_filter.hpp: Program Listing for File xf_gaussian_filter.hpp =============================================== |exhale_lsh| :ref:`Return to documentation for file ` (``/tmp/ws/src/vitis_common/include/imgproc/xf_gaussian_filter.hpp``) .. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS .. code-block:: cpp /* * Copyright 2019 Xilinx, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _XF_GAUSSIAN_HPP_ #define _XF_GAUSSIAN_HPP_ #ifndef __cplusplus #error C++ is needed to include this header #endif #include "hls_stream.h" #include "../common/xf_common.hpp" #include "../common/xf_utility.hpp" namespace xf { namespace cv { static void weightsghcalculation3x3(float sigma, unsigned char* weights) { // clang-format off #pragma HLS INLINE // clang-format on float cf[3]; float sum = 0; if (sigma <= 0) { sigma = 0.8; } int n = 3; float scale2X = -(1 / ((sigma * sigma) * 2)); for (int i = 0; i < n; i++) { float x = i - ((n - 1) >> 1); float t = expf(scale2X * x * x); cf[i] = (float)t; sum += cf[i]; } float err = 0.0f; sum = 1. / sum; for (int i = 0; i < n; i++) { cf[i] = (float)(cf[i] * sum); weights[i] = ((cf[i] * 256) + err + 0.5); err = ((cf[i] * 256)) - weights[i]; } } static void weightsghcalculation5x5(float sigma, unsigned char weights[5]) { // clang-format off #pragma HLS INLINE // clang-format on float cf[5]; float sum = 0; if (sigma <= 0) { sigma = 1.1f; } int n = 5; float scale2X = -(1 / ((sigma * sigma) * 2)); for (int i = 0; i < n; i++) { float x = i - ((n - 1) >> 1); float t = expf(scale2X * x * x); cf[i] = (float)t; sum += cf[i]; } float err = 0.0f; sum = 1. / sum; for (int i = 0; i < n; i++) { cf[i] = (float)(cf[i] * sum); weights[i] = ((float)(cf[i] * 256) + err + 0.5); err = ((cf[i] * 256)) - weights[i]; } } static void weightsghcalculation7x7(float sigma, unsigned char weights[7]) { // clang-format off #pragma HLS INLINE // clang-format on float kval[7][7]; float cf[7]; float sum = 0; if (sigma <= 0) { sigma = 1.4f; } int n = 7; float scale2X = -(1 / ((sigma * sigma) * 2)); for (int i = 0; i < n; i++) { float x = i - ((n - 1) >> 1); float t = expf(scale2X * x * x); cf[i] = (float)t; sum += cf[i]; } sum = 1. / sum; for (int i = 0; i < n; i++) { cf[i] = (float)(cf[i] * sum); weights[i] = (unsigned char)(((float)cf[i] * 256) + 0.5); } } template XF_PTNAME(DEPTH) xFapplygaussian3x3(XF_PTNAME(DEPTH) D1, XF_PTNAME(DEPTH) D2, XF_PTNAME(DEPTH) D3, XF_PTNAME(DEPTH) D4, XF_PTNAME(DEPTH) D5, XF_PTNAME(DEPTH) D6, XF_PTNAME(DEPTH) D7, XF_PTNAME(DEPTH) D8, XF_PTNAME(DEPTH) D9, unsigned char* weights) { // clang-format off #pragma HLS INLINE OFF // clang-format on XF_PTNAME(DEPTH) out_pix = 0; unsigned int sum = 0; ap_uint<18> sum1, sum2, sum3; sum2 = (D4 + D6) * weights[0] + D5 * weights[1]; ap_uint<15> sumvalue0 = D1 + D3 + D7 + D9; ap_uint<15> sumvalue1 = D2 + D8; unsigned int value1 = sumvalue0 * weights[0] + sumvalue1 * weights[1]; sum = (value1)*weights[0] + sum2 * weights[1]; unsigned char val = (sum + 32768) >> 16; out_pix = (XF_PTNAME(DEPTH))val; return out_pix; } template XF_PTNAME(DEPTH) xfapplygaussian5x5(XF_PTNAME(DEPTH) * src_buf1, XF_PTNAME(DEPTH) * src_buf2, XF_PTNAME(DEPTH) * src_buf3, XF_PTNAME(DEPTH) * src_buf4, XF_PTNAME(DEPTH) * src_buf5, unsigned char weights[5]) { // clang-format off #pragma HLS INLINE OFF // clang-format on unsigned int sum = 0.0, sumval = 0; unsigned char value = 0; XF_PTNAME(DEPTH) out_pix = 0; ap_uint<10> tmp[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; for (int i = 0, k = 0; i < PLANES; i++, k += 8) { tmp[0] = src_buf1[0].range(k + 7, k) + src_buf1[4].range(k + 7, k); tmp[1] = src_buf2[0].range(k + 7, k) + src_buf2[4].range(k + 7, k); tmp[2] = src_buf3[0].range(k + 7, k) + src_buf3[4].range(k + 7, k); tmp[3] = src_buf4[0].range(k + 7, k) + src_buf4[4].range(k + 7, k); tmp[4] = src_buf5[0].range(k + 7, k) + src_buf5[4].range(k + 7, k); tmp[5] = src_buf1[1].range(k + 7, k) + src_buf1[3].range(k + 7, k); tmp[6] = src_buf2[1].range(k + 7, k) + src_buf2[3].range(k + 7, k); tmp[7] = src_buf3[1].range(k + 7, k) + src_buf3[3].range(k + 7, k); tmp[8] = src_buf4[1].range(k + 7, k) + src_buf4[3].range(k + 7, k); tmp[9] = src_buf5[1].range(k + 7, k) + src_buf5[3].range(k + 7, k); ap_uint<24> tmp_sum[5] = {0, 0, 0, 0, 0}; tmp_sum[0] = (ap_uint<24>)(tmp[0] + tmp[4]) * weights[0] + (ap_uint<24>)(tmp[5] + tmp[9]) * weights[1] + (ap_uint<24>)(src_buf1[2].range(k + 7, k) + src_buf5[2].range(k + 7, k)) * weights[2]; tmp_sum[1] = (ap_uint<24>)(tmp[1] + tmp[3]) * weights[0] + (ap_uint<24>)(tmp[6] + tmp[8]) * weights[1] + (ap_uint<24>)(src_buf2[2].range(k + 7, k) + src_buf4[2].range(k + 7, k)) * weights[2]; tmp_sum[2] = (ap_uint<24>)tmp[2] * weights[0] + (ap_uint<24>)tmp[7] * weights[1] + (ap_uint<24>)src_buf3[2].range(k + 7, k) * weights[2]; sumval = (unsigned int)tmp_sum[0] * weights[0] + tmp_sum[1] * weights[1] + tmp_sum[2] * weights[2]; unsigned short val = ((sumval + 32768) >> 16); if (val >= 255) { value = 255; } else { value = val; } out_pix.range(k + 7, k) = (XF_PTNAME(DEPTH))value; } return out_pix; } template XF_PTNAME(DEPTH) xfapplygaussian7x7(XF_PTNAME(DEPTH) * src_buf1, XF_PTNAME(DEPTH) * src_buf2, XF_PTNAME(DEPTH) * src_buf3, XF_PTNAME(DEPTH) * src_buf4, XF_PTNAME(DEPTH) * src_buf5, XF_PTNAME(DEPTH) * src_buf6, XF_PTNAME(DEPTH) * src_buf7, unsigned char weights[7]) { // clang-format off #pragma HLS INLINE OFF // clang-format on XF_PTNAME(DEPTH) out_pix = 0; unsigned long long int sum_val = 0; unsigned short val = 0; for (int c = 0, k = 0; c < PLANES; c++, k += 8) { unsigned long long int sum_value = 0; unsigned int sum = 0.0, sum1 = 0.0, sum2 = 0.0, sum3 = 0.0, sum4 = 0.0, sum5 = 0.0, sum6 = 0.0; sum3 = (unsigned int)(src_buf4[0].range(k + 7, k) + src_buf4[6].range(k + 7, k)) * weights[0] + (src_buf4[1].range(k + 7, k) + src_buf4[5].range(k + 7, k)) * weights[1] + (src_buf4[2].range(k + 7, k) + src_buf4[4].range(k + 7, k)) * weights[2] + src_buf4[3].range(k + 7, k) * weights[3]; sum = (unsigned int)(src_buf1[0].range(k + 7, k) + src_buf1[6].range(k + 7, k) + src_buf7[0].range(k + 7, k) + src_buf7[6].range(k + 7, k)) * weights[0] + (src_buf1[1].range(k + 7, k) + src_buf1[5].range(k + 7, k) + src_buf7[1].range(k + 7, k) + src_buf7[5].range(k + 7, k)) * weights[1] + (src_buf1[2].range(k + 7, k) + src_buf1[4].range(k + 7, k) + src_buf7[2].range(k + 7, k) + src_buf7[4].range(k + 7, k)) * weights[2] + (src_buf1[3].range(k + 7, k) + src_buf7[3].range(k + 7, k)) * weights[3]; sum1 = (unsigned int)(src_buf2[0].range(k + 7, k) + src_buf2[6].range(k + 7, k) + src_buf6[0].range(k + 7, k) + src_buf6[6].range(k + 7, k)) * weights[0] + (src_buf2[1].range(k + 7, k) + src_buf2[5].range(k + 7, k) + src_buf6[1].range(k + 7, k) + src_buf6[5].range(k + 7, k)) * weights[1] + (src_buf2[2].range(k + 7, k) + src_buf2[4].range(k + 7, k) + src_buf6[2].range(k + 7, k) + src_buf6[4].range(k + 7, k)) * weights[2] + (src_buf2[3].range(k + 7, k) + src_buf6[3].range(k + 7, k)) * weights[3]; sum2 = (unsigned int)(src_buf3[0].range(k + 7, k) + src_buf3[6].range(k + 7, k) + src_buf5[0].range(k + 7, k) + src_buf5[6].range(k + 7, k)) * weights[0] + (src_buf3[1].range(k + 7, k) + src_buf3[5].range(k + 7, k) + src_buf5[1].range(k + 7, k) + src_buf5[5].range(k + 7, k)) * weights[1] + (src_buf3[2].range(k + 7, k) + src_buf3[4].range(k + 7, k) + src_buf5[2].range(k + 7, k) + src_buf5[4].range(k + 7, k)) * weights[2] + (src_buf3[3].range(k + 7, k) + src_buf5[3].range(k + 7, k)) * weights[3]; sum_value = (sum)*weights[0] + (sum1)*weights[1] + (sum2)*weights[2] + (sum3)*weights[3]; unsigned short val = ((sum_value + 32768) >> 16); unsigned char value; if (val >= 255) { value = 255; } else { value = val; } out_pix.range(k + 7, k) = (uchar_t)value; } return out_pix; } template void auGaussian3x3(XF_PTNAME(DEPTH) * OutputValues, XF_PTNAME(DEPTH) * src_buf1, XF_PTNAME(DEPTH) * src_buf2, XF_PTNAME(DEPTH) * src_buf3, unsigned char weights[3]) { // clang-format off #pragma HLS INLINE // clang-format on int p = 0; ap_uint<24> val; Compute_Grad_Loop: for (ap_uint<5> j = 0; j < (XF_NPIXPERCYCLE(NPC)); j++) { // clang-format off #pragma HLS UNROLL // clang-format on for (ap_uint<5> c = 0, k = 0; c < PLANES; c++, k += 8) { // clang-format off #pragma HLS UNROLL // clang-format on val.range(k + 7, k) = xFapplygaussian3x3( src_buf1[j].range(k + 7, k), src_buf1[j + 1].range(k + 7, k), src_buf1[j + 2].range(k + 7, k), src_buf2[j].range(k + 7, k), src_buf2[j + 1].range(k + 7, k), src_buf2[j + 2].range(k + 7, k), src_buf3[j].range(k + 7, k), src_buf3[j + 1].range(k + 7, k), src_buf3[j + 2].range(k + 7, k), weights); } OutputValues[p] = val; p++; } } template void ProcessGaussian3x3(xf::cv::Mat& _src_mat, xf::cv::Mat& _out_mat, XF_SNAME(WORDWIDTH) buf[3][(COLS >> XF_BITSHIFT(NPC))], XF_PTNAME(DEPTH) src_buf1[XF_NPIXPERCYCLE(NPC) + 2], XF_PTNAME(DEPTH) src_buf2[XF_NPIXPERCYCLE(NPC) + 2], XF_PTNAME(DEPTH) src_buf3[XF_NPIXPERCYCLE(NPC) + 2], XF_PTNAME(DEPTH) OutputValues[XF_NPIXPERCYCLE(NPC)], XF_SNAME(WORDWIDTH) & P0, uint16_t img_width, uint16_t img_height, uint16_t& shift_x, ap_uint<2> tp, ap_uint<2> mid, ap_uint<2> bottom, ap_uint<13> row, unsigned char* weights, int& read_index, int& write_index) { // clang-format off #pragma HLS INLINE // clang-format on XF_SNAME(WORDWIDTH) buf0, buf1, buf2; uint16_t npc = XF_NPIXPERCYCLE(NPC); ap_uint<5> buf_size = XF_NPIXPERCYCLE(NPC) + 2; Col_Loop: for (ap_uint<13> col = 0; col < img_width; col++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=TC max=TC #pragma HLS pipeline // clang-format on if (row < img_height) buf[bottom][col] = _src_mat.read(read_index++); // Read data else buf[bottom][col] = 0; buf0 = buf[tp][col]; buf1 = buf[mid][col]; buf2 = buf[bottom][col]; if (NPC == XF_NPPC8) { xfExtractPixels(&src_buf1[2], buf0, 0); xfExtractPixels(&src_buf2[2], buf1, 0); xfExtractPixels(&src_buf3[2], buf2, 0); } else { src_buf1[2] = buf0; src_buf2[2] = buf1; src_buf3[2] = buf2; } auGaussian3x3(OutputValues, src_buf1, src_buf2, src_buf3, weights); if (col == 0) { shift_x = 0; P0 = 0; if (NPC == XF_NPPC8) { xfPackPixels(&OutputValues[0], P0, 1, (npc - 1), shift_x); } else { P0 = OutputValues[0]; } } else { if (NPC == XF_NPPC8) { xfPackPixels(&OutputValues[0], P0, 0, 1, shift_x); } else { P0 = OutputValues[0]; } _out_mat.write(write_index++, P0); shift_x = 0; P0 = 0; if (NPC == XF_NPPC8) { xfPackPixels(&OutputValues[0], P0, 1, (npc - 1), shift_x); } else { P0 = OutputValues[0]; } } src_buf1[0] = src_buf1[buf_size - 2]; src_buf1[1] = src_buf1[buf_size - 1]; src_buf2[0] = src_buf2[buf_size - 2]; src_buf2[1] = src_buf2[buf_size - 1]; src_buf3[0] = src_buf3[buf_size - 2]; src_buf3[1] = src_buf3[buf_size - 1]; } // Col_Loop } template void xfGaussianFilter3x3(xf::cv::Mat& _src_mat, xf::cv::Mat& _out_mat, uint16_t img_height, uint16_t img_width, unsigned char* weights) { ap_uint<13> row_ind; ap_uint<2> tp, mid, bottom; ap_uint<8> buf_size = XF_NPIXPERCYCLE(NPC) + 2; uint16_t shift_x = 0; ap_uint<13> row, col; int read_index = 0, write_index = 0; XF_PTNAME(DEPTH) OutputValues[XF_NPIXPERCYCLE(NPC)]; // clang-format off #pragma HLS ARRAY_PARTITION variable=OutputValues complete dim=1 // clang-format on XF_PTNAME(DEPTH) src_buf1[XF_NPIXPERCYCLE(NPC) + 2] = {0}, src_buf2[XF_NPIXPERCYCLE(NPC) + 2] = {0}, src_buf3[XF_NPIXPERCYCLE(NPC) + 2] = {0}; // clang-format off #pragma HLS ARRAY_PARTITION variable=src_buf1 complete dim=1 #pragma HLS ARRAY_PARTITION variable=src_buf2 complete dim=1 #pragma HLS ARRAY_PARTITION variable=src_buf3 complete dim=1 // clang-format on XF_SNAME(WORDWIDTH) P0; XF_SNAME(WORDWIDTH) buf[3][(COLS >> XF_BITSHIFT(NPC))]; // clang-format off #pragma HLS RESOURCE variable=buf core=RAM_S2P_BRAM #pragma HLS ARRAY_PARTITION variable=buf complete dim=1 // clang-format on row_ind = 1; Clear_Row_Loop: for (col = 0; col < img_width; col++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=TC max=TC #pragma HLS pipeline // clang-format on //#pragma HLS LOOP_FLATTEN off buf[0][col] = 0; buf[row_ind][col] = _src_mat.read(read_index++); // data[read_index++]; } row_ind++; Row_Loop: for (row = 1; row < img_height + 1; row++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on if (row_ind == 2) { tp = 0; mid = 1; bottom = 2; } else if (row_ind == 0) { tp = 1; mid = 2; bottom = 0; } else if (row_ind == 1) { tp = 2; mid = 0; bottom = 1; } src_buf1[0] = src_buf1[1] = 0; src_buf2[0] = src_buf2[1] = 0; src_buf3[0] = src_buf3[1] = 0; P0 = 0; ProcessGaussian3x3( _src_mat, _out_mat, buf, src_buf1, src_buf2, src_buf3, OutputValues, P0, img_width, img_height, shift_x, tp, mid, bottom, row, weights, read_index, write_index); if ((NPC == XF_NPPC8) || (NPC == XF_NPPC16)) { OutputValues[0] = xFapplygaussian3x3(src_buf1[buf_size - 2], src_buf1[buf_size - 1], 0, src_buf2[buf_size - 2], src_buf2[buf_size - 1], 0, src_buf3[buf_size - 2], src_buf3[buf_size - 1], 0, weights); } else { ap_uint<24> out_val1; for (int i = 0, k = 0; i < PLANES; i++, k += 8) { ap_uint<8> srcbuf10 = src_buf1[buf_size - 3].range(k + 7, k); ap_uint<8> srcbuf11 = src_buf1[buf_size - 2].range(k + 7, k); ap_uint<8> srcbuf20 = src_buf2[buf_size - 3].range(k + 7, k); ap_uint<8> srcbuf21 = src_buf2[buf_size - 2].range(k + 7, k); ap_uint<8> srcbuf30 = src_buf3[buf_size - 3].range(k + 7, k); ap_uint<8> srcbuf31 = src_buf3[buf_size - 2].range(k + 7, k); out_val1.range(k + 7, k) = xFapplygaussian3x3(srcbuf10, srcbuf11, 0, srcbuf20, srcbuf21, 0, srcbuf30, srcbuf31, 0, weights); } OutputValues[0] = out_val1; } if (NPC == XF_NPPC8) { xfPackPixels(&OutputValues[0], P0, 0, 1, shift_x); } else { P0 = OutputValues[0]; } _out_mat.write(write_index++, P0); // data[write_index++] = (P0); shift_x = 0; P0 = 0; row_ind++; if (row_ind == 3) { row_ind = 0; } } // Row_Loop } template void xFGaussian5x5(XF_PTNAME(DEPTH) * OutputValues, XF_PTNAME(DEPTH) * src_buf1, XF_PTNAME(DEPTH) * src_buf2, XF_PTNAME(DEPTH) * src_buf3, XF_PTNAME(DEPTH) * src_buf4, XF_PTNAME(DEPTH) * src_buf5, unsigned char weights[5]) { // clang-format off #pragma HLS INLINE // clang-format on XF_PTNAME(DEPTH) val = 0, p = 0; Compute_Grad_Loop: for (ap_uint<5> j = 0; j < XF_NPIXPERCYCLE(NPC); j++) { OutputValues[j] = xfapplygaussian5x5(&src_buf1[j], &src_buf2[j], &src_buf3[j], &src_buf4[j], &src_buf5[j], weights); } } template void ProcessGaussian5x5(xf::cv::Mat& _src_mat, xf::cv::Mat& _out_mat, XF_SNAME(WORDWIDTH) buf[5][(COLS >> XF_BITSHIFT(NPC))], XF_PTNAME(DEPTH) src_buf1[XF_NPIXPERCYCLE(NPC) + 4], XF_PTNAME(DEPTH) src_buf2[XF_NPIXPERCYCLE(NPC) + 4], XF_PTNAME(DEPTH) src_buf3[XF_NPIXPERCYCLE(NPC) + 4], XF_PTNAME(DEPTH) src_buf4[XF_NPIXPERCYCLE(NPC) + 4], XF_PTNAME(DEPTH) src_buf5[XF_NPIXPERCYCLE(NPC) + 4], XF_PTNAME(DEPTH) OutputValues[XF_NPIXPERCYCLE(NPC)], XF_SNAME(WORDWIDTH) & inter_valx, uint16_t img_width, uint16_t img_height, ap_uint<13> row_ind, uint16_t& shift_x, ap_uint<4> tp1, ap_uint<4> tp2, ap_uint<4> mid, ap_uint<4> bottom1, ap_uint<4> bottom2, ap_uint<13> row, unsigned char weights[5], int& read_index, int& write_index) { // clang-format off #pragma HLS INLINE // clang-format on XF_SNAME(WORDWIDTH) buf0, buf1, buf2, buf3, buf4; ap_uint<8> buf_size = XF_NPIXPERCYCLE(NPC) + 4; uint16_t npc = XF_NPIXPERCYCLE(NPC); ap_uint<8> max_loop = XF_WORDDEPTH(WORDWIDTH); ap_uint<8> step = XF_PIXELDEPTH(DEPTH); Col_Loop: for (ap_uint<13> col = 0; col < img_width; col++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=TC max=TC #pragma HLS pipeline // clang-format on if (row < img_height) buf[row_ind][col] = _src_mat.read(read_index++); //.data[read_index++]; else buf[bottom2][col] = 0; buf0 = buf[tp1][col]; buf1 = buf[tp2][col]; buf2 = buf[mid][col]; buf3 = buf[bottom1][col]; buf4 = buf[bottom2][col]; if (NPC == XF_NPPC8) { xfExtractPixels(&src_buf1[4], buf0, 0); xfExtractPixels(&src_buf2[4], buf1, 0); xfExtractPixels(&src_buf3[4], buf2, 0); xfExtractPixels(&src_buf4[4], buf3, 0); xfExtractPixels(&src_buf5[4], buf4, 0); } else { src_buf1[4] = buf0; src_buf2[4] = buf1; src_buf3[4] = buf2; src_buf4[4] = buf3; src_buf5[4] = buf4; } xFGaussian5x5(OutputValues, src_buf1, src_buf2, src_buf3, src_buf4, src_buf5, weights); for (ap_uint<4> i = 0; i < 4; i++) { // clang-format off #pragma HLS unroll // clang-format on src_buf1[i] = src_buf1[buf_size - (4 - i)]; src_buf2[i] = src_buf2[buf_size - (4 - i)]; src_buf3[i] = src_buf3[buf_size - (4 - i)]; src_buf4[i] = src_buf4[buf_size - (4 - i)]; src_buf5[i] = src_buf5[buf_size - (4 - i)]; } if (col == 0) { shift_x = 0; inter_valx = 0; xfPackPixels(&OutputValues[0], inter_valx, 2, (npc - 2), shift_x); } else { if (NPC == XF_NPPC8) { xfPackPixels(&OutputValues[0], inter_valx, 0, 2, shift_x); _out_mat.write(write_index++, inter_valx); shift_x = 0; inter_valx = 0; xfPackPixels(&OutputValues[0], inter_valx, 2, (npc - 2), shift_x); } else { if (col >= 2) { if (PLANES == 1) { inter_valx((max_loop - 1), (max_loop - 8)) = OutputValues[0]; _out_mat.write(write_index++, inter_valx); } else { // _out_mat.data[write_index++] = //(OutputValues[0]); _out_mat.write(write_index++, OutputValues[0]); } } } } } // Col_Loop } template void xFGaussianFilter5x5(xf::cv::Mat& _src_mat, xf::cv::Mat& _out_mat, uint16_t img_height, uint16_t img_width, unsigned char weights[5]) { ap_uint<13> row_ind; ap_uint<13> row, col; ap_uint<4> tp1, tp2, mid, bottom1, bottom2; ap_uint<5> i; ap_uint<5> buf_size = XF_NPIXPERCYCLE(NPC) + 4; ap_uint<9> step = XF_PIXELDEPTH(DEPTH); ap_uint<9> max_loop = XF_WORDDEPTH(WORDWIDTH); uint16_t shift_x = 0; ap_uint<8> npc = XF_NPIXPERCYCLE(NPC); int read_index = 0, write_index = 0; XF_PTNAME(DEPTH) OutputValues[XF_NPIXPERCYCLE(NPC)]; // clang-format off #pragma HLS ARRAY_PARTITION variable=OutputValues complete dim=1 // clang-format on XF_SNAME(WORDWIDTH) buf0, buf1, buf2, buf3, buf4; XF_PTNAME(DEPTH) src_buf1[XF_NPIXPERCYCLE(NPC) + 4], src_buf2[XF_NPIXPERCYCLE(NPC) + 4], src_buf3[XF_NPIXPERCYCLE(NPC) + 4], src_buf4[XF_NPIXPERCYCLE(NPC) + 4], src_buf5[XF_NPIXPERCYCLE(NPC) + 4]; // clang-format off #pragma HLS ARRAY_PARTITION variable=src_buf1 complete dim=1 #pragma HLS ARRAY_PARTITION variable=src_buf2 complete dim=1 #pragma HLS ARRAY_PARTITION variable=src_buf3 complete dim=1 #pragma HLS ARRAY_PARTITION variable=src_buf4 complete dim=1 #pragma HLS ARRAY_PARTITION variable=src_buf5 complete dim=1 // clang-format on XF_SNAME(WORDWIDTH) tmp_in; XF_SNAME(WORDWIDTH) inter_valx = 0; XF_SNAME(WORDWIDTH) buf[5][(COLS >> XF_BITSHIFT(NPC))]; // clang-format off #pragma HLS RESOURCE variable=buf core=RAM_S2P_BRAM #pragma HLS ARRAY_PARTITION variable=buf complete dim=1 // clang-format on row_ind = 2; Clear_Row_Loop: for (col = 0; col < img_width; col++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=TC max=TC #pragma HLS pipeline // clang-format on buf[0][col] = 0; buf[1][col] = 0; buf[row_ind][col] = _src_mat.read(read_index++); //.data[read_index++]; } row_ind++; Read_Row2_Loop: for (col = 0; col < img_width; col++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=TC max=TC #pragma HLS pipeline // clang-format on buf[row_ind][col] = _src_mat.read(read_index++); //_src_mat.data[read_index++]; } row_ind++; Row_Loop: for (row = 2; row < img_height + 2; row++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on // modify the buffer indices to re use if (row_ind == 4) { tp1 = 0; tp2 = 1; mid = 2; bottom1 = 3; bottom2 = 4; } else if (row_ind == 0) { tp1 = 1; tp2 = 2; mid = 3; bottom1 = 4; bottom2 = 0; } else if (row_ind == 1) { tp1 = 2; tp2 = 3; mid = 4; bottom1 = 0; bottom2 = 1; } else if (row_ind == 2) { tp1 = 3; tp2 = 4; mid = 0; bottom1 = 1; bottom2 = 2; } else if (row_ind == 3) { tp1 = 4; tp2 = 0; mid = 1; bottom1 = 2; bottom2 = 3; } src_buf1[0] = src_buf1[1] = src_buf1[2] = src_buf1[3] = 0; src_buf2[0] = src_buf2[1] = src_buf2[2] = src_buf2[3] = 0; src_buf3[0] = src_buf3[1] = src_buf3[2] = src_buf3[3] = 0; src_buf4[0] = src_buf4[1] = src_buf4[2] = src_buf4[3] = 0; src_buf5[0] = src_buf5[1] = src_buf5[2] = src_buf5[3] = 0; inter_valx = 0; ProcessGaussian5x5( _src_mat, _out_mat, buf, src_buf1, src_buf2, src_buf3, src_buf4, src_buf5, OutputValues, inter_valx, img_width, img_height, row_ind, shift_x, tp1, tp2, mid, bottom1, bottom2, row, weights, read_index, write_index); if ((NPC == XF_NPPC8) || (NPC == XF_NPPC16)) { for (ap_uint<6> i = 4; i < (XF_NPIXPERCYCLE(NPC) + 4); i++) { src_buf1[i] = 0; src_buf2[i] = 0; src_buf3[i] = 0; src_buf4[i] = 0; src_buf5[i] = 0; } OutputValues[0] = xfapplygaussian5x5( &src_buf1[0], &src_buf2[0], &src_buf3[0], &src_buf4[0], &src_buf5[0], weights); OutputValues[1] = xfapplygaussian5x5( &src_buf1[1], &src_buf2[1], &src_buf3[1], &src_buf4[1], &src_buf5[1], weights); xfPackPixels(&OutputValues[0], inter_valx, 0, 2, shift_x); //_out_mat.data[write_index++] = (inter_valx); _out_mat.write(write_index++, inter_valx); } else { // clang-format off #pragma HLS ALLOCATION function instances=xfapplygaussian5x5 limit=1 // clang-format on src_buf1[buf_size - 1] = 0; src_buf2[buf_size - 1] = 0; src_buf3[buf_size - 1] = 0; src_buf4[buf_size - 1] = 0; src_buf5[buf_size - 1] = 0; OutputValues[0] = xfapplygaussian5x5( &src_buf1[0], &src_buf2[0], &src_buf3[0], &src_buf4[0], &src_buf5[0], weights); inter_valx((max_loop - 1), (max_loop - step)) = OutputValues[0]; _out_mat.write(write_index++, inter_valx); for (ap_uint<4> i = 0; i < 4; i++) { // clang-format off #pragma HLS unroll // clang-format on src_buf1[i] = src_buf1[buf_size - (4 - i)]; src_buf2[i] = src_buf2[buf_size - (4 - i)]; src_buf3[i] = src_buf3[buf_size - (4 - i)]; src_buf4[i] = src_buf4[buf_size - (4 - i)]; src_buf5[i] = src_buf5[buf_size - (4 - i)]; } OutputValues[0] = xfapplygaussian5x5( &src_buf1[0], &src_buf2[0], &src_buf3[0], &src_buf4[0], &src_buf5[0], weights); inter_valx((max_loop - 1), (max_loop - step)) = OutputValues[0]; _out_mat.write(write_index++, inter_valx); } row_ind++; if (row_ind == 5) { row_ind = 0; } } // Row_Loop } template void xFGaussian7x7(XF_PTNAME(DEPTH) * OutputValues, XF_PTNAME(DEPTH) * src_buf1, XF_PTNAME(DEPTH) * src_buf2, XF_PTNAME(DEPTH) * src_buf3, XF_PTNAME(DEPTH) * src_buf4, XF_PTNAME(DEPTH) * src_buf5, XF_PTNAME(DEPTH) * src_buf6, XF_PTNAME(DEPTH) * src_buf7, unsigned char weights[7]) { // clang-format off #pragma HLS INLINE // clang-format on for (ap_uint<9> j = 0; j < XF_NPIXPERCYCLE(NPC); j++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=8 max=8 #pragma HLS UNROLL // clang-format on // for(ap_uint<8> c=0,k=0;c(&src_buf1[j], &src_buf2[j], &src_buf3[j], &src_buf4[j], &src_buf5[j], &src_buf6[j], &src_buf7[j], weights); // } } } template void ProcessGaussian7x7(xf::cv::Mat& _src_mat, xf::cv::Mat& _out_mat, XF_SNAME(WORDWIDTH) buf[7][(COLS >> XF_BITSHIFT(NPC))], XF_PTNAME(DEPTH) src_buf1[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH) src_buf2[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH) src_buf3[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH) src_buf4[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH) src_buf5[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH) src_buf6[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH) src_buf7[XF_NPIXPERCYCLE(NPC) + 6], XF_PTNAME(DEPTH) OutputValues[XF_NPIXPERCYCLE(NPC)], XF_SNAME(WORDWIDTH) & inter_valx, uint16_t img_width, uint16_t img_height, uint16_t& shiftx, ap_uint<4> tp1, ap_uint<4> tp2, ap_uint<4> tp3, ap_uint<4> mid, ap_uint<4> bottom1, ap_uint<4> bottom2, ap_uint<4> bottom3, ap_uint<13> row_index, unsigned char weights[7], int& read_index, int& write_index) { // clang-format off #pragma HLS INLINE // clang-format on XF_SNAME(WORDWIDTH) buf0, buf1, buf2, buf3, buf4, buf5, buf6; uint16_t npc = XF_NPIXPERCYCLE(NPC); ap_uint<10> max_loop = XF_WORDDEPTH(WORDWIDTH); Col_Loop: for (ap_uint<13> col = 0; col < img_width; col++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=TC max=TC #pragma HLS pipeline // clang-format on if (row_index < img_height) buf[bottom3][col] = _src_mat.read(read_index++); //_src_mat.data[read_index++]; else buf[bottom3][col] = 0; buf0 = buf[tp1][col]; buf1 = buf[tp2][col]; buf2 = buf[tp3][col]; buf3 = buf[mid][col]; buf4 = buf[bottom1][col]; buf5 = buf[bottom2][col]; buf6 = buf[bottom3][col]; if (NPC == XF_NPPC8) { xfExtractData(src_buf1, src_buf2, src_buf3, src_buf4, src_buf5, src_buf6, src_buf7, buf0, buf1, buf2, buf3, buf4, buf5, buf6); } else { src_buf1[6] = buf0; src_buf2[6] = buf1; src_buf3[6] = buf2; src_buf4[6] = buf3; src_buf5[6] = buf4; src_buf6[6] = buf5; src_buf7[6] = buf6; } xFGaussian7x7(OutputValues, src_buf1, src_buf2, src_buf3, src_buf4, src_buf5, src_buf6, src_buf7, weights); xfCopyData(src_buf1, src_buf2, src_buf3, src_buf4, src_buf5, src_buf6, src_buf7); if (col == 0) { shiftx = 0; inter_valx = 0; xfPackPixels(&OutputValues[0], inter_valx, 3, (npc - 3), shiftx); } else { if ((NPC == XF_NPPC8) || (NPC == XF_NPPC16)) { xfPackPixels(&OutputValues[0], inter_valx, 0, 3, shiftx); // _out_mat.data[write_index++] = (inter_valx); _out_mat.write(write_index++, inter_valx); shiftx = 0; inter_valx = 0; xfPackPixels(&OutputValues[0], inter_valx, 3, (npc - 3), shiftx); } else { if (col >= 3) { if (PLANES == 1) { inter_valx((max_loop - 1), (max_loop - 8)) = OutputValues[0]; //_out_mat.data[write_index++] = (inter_valx); _out_mat.write(write_index++, inter_valx); } else { // _out_mat.data[write_index++] = //(OutputValues[0]); _out_mat.write(write_index++, OutputValues[0]); } } } } } // Col_Loop } template void xFGaussianFilter7x7(xf::cv::Mat& _src_mat, xf::cv::Mat& _out_mat, uint16_t img_height, uint16_t img_width, unsigned char weights[7]) { ap_uint<13> row_ind, row, col; ap_uint<4> tp1, tp2, tp3, mid, bottom1, bottom2, bottom3; ap_uint<5> i; ap_uint<8> buf_size = (XF_NPIXPERCYCLE(NPC) + 6); ap_uint<10> max_loop = XF_WORDDEPTH(WORDWIDTH); int read_index = 0, write_index = 0; XF_PTNAME(DEPTH) OutputValues[XF_NPIXPERCYCLE(NPC)]; // clang-format off #pragma HLS ARRAY_PARTITION variable=OutputValues complete dim=1 // clang-format on // Temporary buffers to hold image data from three rows. XF_PTNAME(DEPTH) src_buf1[XF_NPIXPERCYCLE(NPC) + 6], src_buf2[XF_NPIXPERCYCLE(NPC) + 6], src_buf3[XF_NPIXPERCYCLE(NPC) + 6], src_buf4[XF_NPIXPERCYCLE(NPC) + 6], src_buf5[XF_NPIXPERCYCLE(NPC) + 6]; XF_PTNAME(DEPTH) src_buf6[XF_NPIXPERCYCLE(NPC) + 6], src_buf7[XF_NPIXPERCYCLE(NPC) + 6]; // clang-format off #pragma HLS ARRAY_PARTITION variable=src_buf1 complete dim=1 #pragma HLS ARRAY_PARTITION variable=src_buf2 complete dim=1 #pragma HLS ARRAY_PARTITION variable=src_buf3 complete dim=1 #pragma HLS ARRAY_PARTITION variable=src_buf4 complete dim=1 #pragma HLS ARRAY_PARTITION variable=src_buf5 complete dim=1 #pragma HLS ARRAY_PARTITION variable=src_buf6 complete dim=1 #pragma HLS ARRAY_PARTITION variable=src_buf7 complete dim=1 // clang-format on XF_SNAME(WORDWIDTH) inter_valx = 0; uint16_t shiftx = 0; XF_SNAME(WORDWIDTH) buf[7][(COLS >> XF_BITSHIFT(NPC))]; // clang-format off #pragma HLS RESOURCE variable=buf core=RAM_S2P_BRAM #pragma HLS ARRAY_PARTITION variable=buf complete dim=1 // clang-format on row_ind = 3; Clear_Row_Loop: for (col = 0; col < img_width; col++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=TC max=TC #pragma HLS pipeline // clang-format on buf[0][col] = 0; buf[1][col] = 0; buf[2][col] = 0; buf[row_ind][col] = _src_mat.read(read_index++); // data[read_index++]; } row_ind++; Read_Row1_Loop: for (col = 0; col < img_width; col++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=TC max=TC #pragma HLS pipeline // clang-format on buf[row_ind][col] = _src_mat.read(read_index++); //_src_mat.data[read_index++]; } row_ind++; Read_Row2_Loop: for (col = 0; col < img_width; col++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=TC max=TC #pragma HLS pipeline // clang-format on buf[row_ind][col] = _src_mat.read(read_index++); //_src_mat.data[read_index++]; } row_ind++; Row_Loop: for (row = 3; row < img_height + 3; row++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on // modify the buffer indices to re use if (row_ind == 0) { tp1 = 1; tp2 = 2; tp3 = 3; mid = 4; bottom1 = 5; bottom2 = 6; bottom3 = 0; } else if (row_ind == 1) { tp1 = 2; tp2 = 3; tp3 = 4; mid = 5; bottom1 = 6; bottom2 = 0; bottom3 = 1; } else if (row_ind == 2) { tp1 = 3; tp2 = 4; tp3 = 5; mid = 6; bottom1 = 0; bottom2 = 1; bottom3 = 2; } else if (row_ind == 3) { tp1 = 4; tp2 = 5; tp3 = 6; mid = 0; bottom1 = 1; bottom2 = 2; bottom3 = 3; } else if (row_ind == 4) { tp1 = 5; tp2 = 6; tp3 = 0; mid = 1; bottom1 = 2; bottom2 = 3; bottom3 = 4; } else if (row_ind == 5) { tp1 = 6; tp2 = 0; tp3 = 1; mid = 2; bottom1 = 3; bottom2 = 4; bottom3 = 5; } else if (row_ind == 6) { tp1 = 0; tp2 = 1; tp3 = 2; mid = 3; bottom1 = 4; bottom2 = 5; bottom3 = 6; } for (i = 0; i < 6; i++) { // clang-format off #pragma HLS unroll // clang-format on src_buf1[i] = 0; src_buf2[i] = 0; src_buf3[i] = 0; src_buf4[i] = 0; src_buf5[i] = 0; src_buf6[i] = 0; src_buf7[i] = 0; } inter_valx = 0; ProcessGaussian7x7( _src_mat, _out_mat, buf, src_buf1, src_buf2, src_buf3, src_buf4, src_buf5, src_buf6, src_buf7, OutputValues, inter_valx, img_width, img_height, shiftx, tp1, tp2, tp3, mid, bottom1, bottom2, bottom3, row, weights, read_index, write_index); if ((NPC == XF_NPPC8) || (NPC == XF_NPPC16)) { for (i = 0; i < 8; i++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=8 max=8 #pragma HLS unroll // clang-format on src_buf1[buf_size + i - (XF_NPIXPERCYCLE(NPC))] = 0; src_buf2[buf_size + i - (XF_NPIXPERCYCLE(NPC))] = 0; src_buf3[buf_size + i - (XF_NPIXPERCYCLE(NPC))] = 0; src_buf4[buf_size + i - (XF_NPIXPERCYCLE(NPC))] = 0; src_buf5[buf_size + i - (XF_NPIXPERCYCLE(NPC))] = 0; src_buf6[buf_size + i - (XF_NPIXPERCYCLE(NPC))] = 0; src_buf7[buf_size + i - (XF_NPIXPERCYCLE(NPC))] = 0; } for (i = 0; i < 3; i++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=3 max=3 #pragma HLS unroll // clang-format on OutputValues[i] = xfapplygaussian7x7(&src_buf1[i], &src_buf2[i], &src_buf3[i], &src_buf4[i], &src_buf5[i], &src_buf6[i], &src_buf7[i], weights); } xfPackPixels(&OutputValues[0], inter_valx, 0, 3, shiftx); //_out_mat.data[write_index++] = (inter_valx); _out_mat.write(write_index++, inter_valx); shiftx = 0; inter_valx = 0; } else { src_buf1[6] = 0; src_buf2[6] = 0; src_buf3[6] = 0; src_buf4[6] = 0; src_buf5[6] = 0; src_buf6[6] = 0; src_buf7[6] = 0; for (i = 0; i < 3; i++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=3 max=3 #pragma HLS unroll #pragma HLS ALLOCATION function instances=xfapplygaussian7x7 limit=1 // clang-format on OutputValues[0] = xfapplygaussian7x7(&src_buf1[0], &src_buf2[0], &src_buf3[0], &src_buf4[0], &src_buf5[0], &src_buf6[0], &src_buf7[0], weights); xfCopyData(src_buf1, src_buf2, src_buf3, src_buf4, src_buf5, src_buf6, src_buf7); if (PLANES == 1) { inter_valx((max_loop - 1), (max_loop - 8)) = OutputValues[0]; // _out_mat.data[write_index++] = (inter_valx); _out_mat.write(write_index++, inter_valx); } else { // _out_mat.data[write_index++] = ( OutputValues[0]); _out_mat.write(write_index++, OutputValues[0]); } } } row_ind++; if (row_ind == 7) { row_ind = 0; } } // Row_Loop ends here } /* template void xFGaussianFilter(hls::stream< XF_SNAME(WORDWIDTH)> &_src, hls::stream< XF_SNAME(WORDWIDTH) > &_dst, int _filter_width, int _border_type, uint16_t imgheight, uint16_t imgwidth, float sigma) { }*/ template void GaussianBlur(xf::cv::Mat& _src, xf::cv::Mat& _dst, float sigma) { // clang-format off #pragma HLS inline off // clang-format on int imgwidth = _src.cols >> XF_BITSHIFT(NPC); if (FILTER_SIZE == XF_FILTER_3X3) { unsigned char weights[3]; // clang-format off #pragma HLS ARRAY_PARTITION variable=weights complete dim=1 // clang-format on weightsghcalculation3x3(sigma, weights); xfGaussianFilter3x3> XF_BITSHIFT(NPC))>(_src, _dst, _src.rows, imgwidth, weights); } else if (FILTER_SIZE == XF_FILTER_5X5) { unsigned char weights[5]; // clang-format off #pragma HLS ARRAY_PARTITION variable=weights complete dim=1 // clang-format on weightsghcalculation5x5(sigma, weights); xFGaussianFilter5x5> XF_BITSHIFT(NPC)), false>(_src, _dst, _src.rows, imgwidth, weights); } else if (FILTER_SIZE == XF_FILTER_7X7) { unsigned char weights[7]; // clang-format off #pragma HLS ARRAY_PARTITION variable=weights complete dim=1 // clang-format on weightsghcalculation7x7(sigma, weights); xFGaussianFilter7x7> XF_BITSHIFT(NPC))>(_src, _dst, _src.rows, imgwidth, weights); } } } // namespace cv } // namespace xf #endif //_XF_GAUSSIAN_HPP_