Program Listing for File xf_averagegaussianmask.hpp
↰ Return to documentation for file (/tmp/ws/src/vitis_common/include/imgproc/xf_averagegaussianmask.hpp
)
/*
* Copyright 2019 Xilinx, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _XF_AVERAGEGAUSSIANMASK_HPP_
#define _XF_AVERAGEGAUSSIANMASK_HPP_
/*****************************************************************
* GaussianFilter3x3
*****************************************************************
* -------------
* |1 2 1|
* |2 4 2|
* |1 2 1|
* -------------
*****************************************************************/
template <int DEPTH_SRC>
XF_PTNAME(DEPTH_SRC)
xFGaussianFixed3x3(XF_PTNAME(DEPTH_SRC) t0,
XF_PTNAME(DEPTH_SRC) t1,
XF_PTNAME(DEPTH_SRC) t2,
XF_PTNAME(DEPTH_SRC) m0,
XF_PTNAME(DEPTH_SRC) m1,
XF_PTNAME(DEPTH_SRC) m2,
XF_PTNAME(DEPTH_SRC) b0,
XF_PTNAME(DEPTH_SRC) b1,
XF_PTNAME(DEPTH_SRC) b2) {
// clang-format off
#pragma HLS INLINE
// clang-format on
XF_PTNAME(DEPTH_SRC) g_x = 0;
uint16_t A00 = (uint16_t)t0 + t2;
uint16_t A01 = (uint16_t)b0 + b2;
uint16_t A02 = (uint16_t)t1 + m0;
uint16_t A03 = (uint16_t)m2 + b1;
uint16_t A0 = (uint16_t)A00 + A01;
uint16_t A1 = ((uint16_t)(A02 + A03)) << 1;
uint16_t A2 = (((uint16_t)m1) << 2);
g_x = ((A0 + A1 + A2) >> 4);
return g_x;
}
template <int NPC, int DEPTH_SRC>
void xFAverageGaussian3x3(XF_PTNAME(DEPTH_SRC) * Maskvalues,
XF_PTNAME(DEPTH_SRC) * src_buf1,
XF_PTNAME(DEPTH_SRC) * src_buf2,
XF_PTNAME(DEPTH_SRC) * src_buf3) {
// clang-format off
#pragma HLS INLINE
// clang-format on
Compute_Grad_Loop:
for (int j = 0; j < (1 << XF_BITSHIFT(NPC)); j++) {
// clang-format off
#pragma HLS UNROLL
// clang-format on
Maskvalues[j] =
xFGaussianFixed3x3<DEPTH_SRC>(src_buf1[j], src_buf1[j + 1], src_buf1[j + 2], src_buf2[j], src_buf2[j + 1],
src_buf2[j + 2], src_buf3[j], src_buf3[j + 1], src_buf3[j + 2]);
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int DEPTH, int NPC, int WORDWIDTH, int TC>
void ProcessAverageGaussian3x3(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src_mat,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _out_mat,
XF_SNAME(WORDWIDTH) buf[3][(COLS >> XF_BITSHIFT(NPC))],
XF_PTNAME(DEPTH) src_buf1[XF_NPIXPERCYCLE(NPC) + 2],
XF_PTNAME(DEPTH) src_buf2[XF_NPIXPERCYCLE(NPC) + 2],
XF_PTNAME(DEPTH) src_buf3[XF_NPIXPERCYCLE(NPC) + 2],
XF_PTNAME(DEPTH) OutputValues[XF_NPIXPERCYCLE(NPC)],
XF_SNAME(WORDWIDTH) & P0,
uint16_t img_width,
uint16_t img_height,
uint16_t& shift_x,
ap_uint<2> tp,
ap_uint<2> mid,
ap_uint<2> bottom,
ap_uint<13> row,
int& read_index,
int& write_index) {
// clang-format off
#pragma HLS INLINE
// clang-format on
XF_SNAME(WORDWIDTH) buf0, buf1, buf2;
ap_uint<5> npc = XF_NPIXPERCYCLE(NPC);
ap_uint<5> buf_size = XF_NPIXPERCYCLE(NPC) + 2;
Col_Loop:
for (ap_uint<13> col = 0; col < img_width; col++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
#pragma HLS pipeline
// clang-format on
if (row < img_height)
buf[bottom][col] = _src_mat.read(read_index++); // Read data
else
buf[bottom][col] = 0;
buf0 = buf[tp][col];
buf1 = buf[mid][col];
buf2 = buf[bottom][col];
xfExtractPixels<NPC, WORDWIDTH, DEPTH>(&src_buf1[2], buf0, 0);
xfExtractPixels<NPC, WORDWIDTH, DEPTH>(&src_buf2[2], buf1, 0);
xfExtractPixels<NPC, WORDWIDTH, DEPTH>(&src_buf3[2], buf2, 0);
xFAverageGaussian3x3<NPC, DEPTH>(OutputValues, src_buf1, src_buf2, src_buf3);
if (col == 0) {
shift_x = 0;
P0 = 0;
xfPackPixels<NPC, WORDWIDTH, DEPTH>(&OutputValues[0], P0, 1, (npc - 1), shift_x);
} else {
xfPackPixels<NPC, WORDWIDTH, DEPTH>(&OutputValues[0], P0, 0, 1, shift_x);
_out_mat.write(write_index++, P0);
shift_x = 0;
P0 = 0;
xfPackPixels<NPC, WORDWIDTH, DEPTH>(&OutputValues[0], P0, 1, (npc - 1), shift_x);
}
src_buf1[0] = src_buf1[buf_size - 2];
src_buf1[1] = src_buf1[buf_size - 1];
src_buf2[0] = src_buf2[buf_size - 2];
src_buf2[1] = src_buf2[buf_size - 1];
src_buf3[0] = src_buf3[buf_size - 2];
src_buf3[1] = src_buf3[buf_size - 1];
} // Col_Loop
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int DEPTH_SRC, int NPC, int WORDWIDTH_SRC, int TC>
void xFAverageGaussianMask3x3(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src_mat,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _out_mat,
uint16_t img_height,
uint16_t img_width) {
img_width = img_width >> XF_BITSHIFT(NPC);
ap_uint<13> row_ind;
ap_uint<2> tp, mid, bottom;
ap_uint<5> buf_size = XF_NPIXPERCYCLE(NPC) + 2;
uint16_t shift_x = 0;
ap_uint<13> row, col;
int in_index_new = 0, out_index = 0;
XF_PTNAME(DEPTH_SRC) OutputValues[XF_NPIXPERCYCLE(NPC)];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=OutputValues complete dim=1
// clang-format on
XF_PTNAME(DEPTH_SRC)
src_buf1[XF_NPIXPERCYCLE(NPC) + 2], src_buf2[XF_NPIXPERCYCLE(NPC) + 2], src_buf3[XF_NPIXPERCYCLE(NPC) + 2];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=src_buf1 complete dim=1
#pragma HLS ARRAY_PARTITION variable=src_buf2 complete dim=1
#pragma HLS ARRAY_PARTITION variable=src_buf3 complete dim=1
// clang-format on
XF_SNAME(WORDWIDTH_SRC) P0;
XF_SNAME(WORDWIDTH_SRC) buf[3][(COLS >> XF_BITSHIFT(NPC))];
// clang-format off
#pragma HLS RESOURCE variable=buf core=RAM_S2P_BRAM
#pragma HLS ARRAY_PARTITION variable=buf complete dim=1
// clang-format on
row_ind = 1;
Clear_Row_Loop:
for (col = 0; col < img_width; col++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
#pragma HLS pipeline
// clang-format on
buf[0][col] = 0;
buf[row_ind][col] = _src_mat.read(in_index_new++);
}
row_ind++;
Row_Loop:
for (row = 1; row < img_height + 1; row++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
if (row_ind == 2) {
tp = 0;
mid = 1;
bottom = 2;
} else if (row_ind == 0) {
tp = 1;
mid = 2;
bottom = 0;
} else if (row_ind == 1) {
tp = 2;
mid = 0;
bottom = 1;
}
src_buf1[0] = src_buf1[1] = 0;
src_buf2[0] = src_buf2[1] = 0;
src_buf3[0] = src_buf3[1] = 0;
P0 = 0;
ProcessAverageGaussian3x3<SRC_T, DST_T, ROWS, COLS, DEPTH_SRC, NPC, WORDWIDTH_SRC, TC>(
_src_mat, _out_mat, buf, src_buf1, src_buf2, src_buf3, OutputValues, P0, img_width, img_height, shift_x, tp,
mid, bottom, row, in_index_new, out_index);
if ((NPC == XF_NPPC8) || (NPC == XF_NPPC16)) {
OutputValues[0] = xFGaussianFixed3x3<DEPTH_SRC>(src_buf1[buf_size - 2], src_buf1[buf_size - 1], 0,
src_buf2[buf_size - 2], src_buf2[buf_size - 1], 0,
src_buf3[buf_size - 2], src_buf3[buf_size - 1], 0);
} else {
OutputValues[0] = xFGaussianFixed3x3<DEPTH_SRC>(src_buf1[buf_size - 3], src_buf1[buf_size - 2], 0,
src_buf2[buf_size - 3], src_buf2[buf_size - 2], 0,
src_buf3[buf_size - 3], src_buf3[buf_size - 2], 0);
}
xfPackPixels<NPC, WORDWIDTH_SRC, DEPTH_SRC>(&OutputValues[0], P0, 0, 1, shift_x);
_out_mat.write(out_index++, P0);
shift_x = 0;
P0 = 0;
row_ind++;
if (row_ind == 3) {
row_ind = 0;
}
} // Row_Loop
}
// xFAverageGaussianMask3x3
#endif