Program Listing for File xf_hog_descriptor_gradients.hpp
↰ Return to documentation for file (/tmp/ws/src/vitis_common/include/imgproc/xf_hog_descriptor_gradients.hpp
)
/*
* Copyright 2019 Xilinx, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _XF_HOG_DESCRIPTOR_GRADIENTS_HPP_
#define _XF_HOG_DESCRIPTOR_GRADIENTS_HPP_
// maximum of three numbers function
#define MAX_MAG_OF_3_IDX(a, b, c) ((a > b ? a : b) > c ? (a > b ? 0 : 1) : 2)
/*****************************************************************
* Gradient computation
*****************************************************************
* X-Gradient Computation
*
* -----------
* |-1 0 1 |
* -----------
*
* Y-Gradient Computation
* -----
* |-1 |
* | 0 |
* | 1 |
* -----
*
**********************************************************************/
template <int DEPTH_SRC, int DEPTH_DST, int NOC>
XF_PTNAME(DEPTH_DST)
xFHOGgradientXY(XF_PTNAME(DEPTH_SRC) n1, XF_PTNAME(DEPTH_SRC) n2) {
// clang-format off
#pragma HLS INLINE
// clang-format on
XF_PTNAME(DEPTH_DST) grad;
grad = n2 - n1;
return grad;
}
/**********************************************************************
* xFHOGgradientCompute : Applies the mask and Computes the
* gradient values.
**********************************************************************/
template <int NPC, int DEPTH_SRC, int DEPTH_DST, int NOC, typename filter_type, int filter_width>
void xFHOGgradientCompute(XF_PTNAME(DEPTH_DST) * GradientvaluesX,
XF_PTNAME(DEPTH_DST) * GradientvaluesY,
filter_type src_buf0[][filter_width],
filter_type src_buf1[][filter_width],
filter_type src_buf2[][filter_width]) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
if (NOC == XF_GRAY) {
Compute_Grad_Loop_Gray:
for (uchar_t j = 0; j < XF_NPIXPERCYCLE(NPC); j++) {
// clang-format off
#pragma HLS UNROLL
// clang-format on
// x-gradient computation
GradientvaluesX[j] =
xFHOGgradientXY<DEPTH_SRC, DEPTH_DST, NOC>(src_buf1[NOC - 1][j], src_buf1[NOC - 1][j + 2]);
// y-gradient computation
GradientvaluesY[j] =
xFHOGgradientXY<DEPTH_SRC, DEPTH_DST, NOC>(src_buf0[NOC - 1][j + 1], src_buf2[NOC - 1][j + 1]);
}
} else {
// Temporary array to hold the gradient data for each channel separately
XF_PTNAME(DEPTH_DST) tmp_x[NOC], tmp_y[NOC];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=tmp_x complete dim=1
#pragma HLS ARRAY_PARTITION variable=tmp_y complete dim=1
// clang-format on
Compute_Grad_Loop_rgb:
for (uchar_t j = 0; j < XF_NPIXPERCYCLE(NPC); j++) {
// clang-format off
#pragma HLS UNROLL
// clang-format on
// x-gradient computation
tmp_x[NOC - 3] = xFHOGgradientXY<DEPTH_SRC, DEPTH_DST, NOC>(src_buf1[NOC - 3][j], src_buf1[NOC - 3][j + 2]);
tmp_x[NOC - 2] = xFHOGgradientXY<DEPTH_SRC, DEPTH_DST, NOC>(src_buf1[NOC - 2][j], src_buf1[NOC - 2][j + 2]);
tmp_x[NOC - 1] = xFHOGgradientXY<DEPTH_SRC, DEPTH_DST, NOC>(src_buf1[NOC - 1][j], src_buf1[NOC - 1][j + 2]);
// y-gradient computation
tmp_y[NOC - 3] =
xFHOGgradientXY<DEPTH_SRC, DEPTH_DST, NOC>(src_buf0[NOC - 3][j + 1], src_buf2[NOC - 3][j + 1]);
tmp_y[NOC - 2] =
xFHOGgradientXY<DEPTH_SRC, DEPTH_DST, NOC>(src_buf0[NOC - 2][j + 1], src_buf2[NOC - 2][j + 1]);
tmp_y[NOC - 1] =
xFHOGgradientXY<DEPTH_SRC, DEPTH_DST, NOC>(src_buf0[NOC - 1][j + 1], src_buf2[NOC - 1][j + 1]);
// finding the maximum magnitude of RGB planes
int mag_r = ((tmp_x[NOC - 3] * tmp_x[NOC - 3]) + (tmp_y[NOC - 3] * tmp_y[NOC - 3]));
int mag_g = ((tmp_x[NOC - 2] * tmp_x[NOC - 2]) + (tmp_y[NOC - 2] * tmp_y[NOC - 2]));
int mag_b = ((tmp_x[NOC - 1] * tmp_x[NOC - 1]) + (tmp_y[NOC - 1] * tmp_y[NOC - 1]));
// gradient of higher magnitude plane is written to output array
GradientvaluesX[j] = tmp_x[MAX_MAG_OF_3_IDX(mag_r, mag_g, mag_b)];
GradientvaluesY[j] = tmp_y[MAX_MAG_OF_3_IDX(mag_r, mag_g, mag_b)];
}
}
}
/**************************************************************************************
* xFHOGcomputeColGrad : Computes HoG gradients for the column input data
**************************************************************************************/
template <int ROWS,
int COLS,
int DEPTH_SRC,
int DEPTH_DST,
int NPC,
int WORDWIDTH_SRC,
int WORDWIDTH_DST,
int NOS_SRC,
int TC,
int PIX_COUNT>
void xFHOGcomputeColGrad(hls::stream<XF_SNAME(WORDWIDTH_SRC)> _src_strm[NOS_SRC],
hls::stream<XF_SNAME(WORDWIDTH_DST)>& _gradx_strm,
hls::stream<XF_SNAME(WORDWIDTH_DST)>& _grady_strm,
XF_SNAME(WORDWIDTH_SRC) buf[NOS_SRC][3][(COLS >> XF_BITSHIFT(NPC))],
XF_PTNAME(DEPTH_SRC) src_buf0[NOS_SRC][XF_NPIXPERCYCLE(NPC) + 2],
XF_PTNAME(DEPTH_SRC) src_buf1[NOS_SRC][XF_NPIXPERCYCLE(NPC) + 2],
XF_PTNAME(DEPTH_SRC) src_buf2[NOS_SRC][XF_NPIXPERCYCLE(NPC) + 2],
XF_PTNAME(DEPTH_DST) GradientValuesX[XF_NPIXPERCYCLE(NPC)],
XF_PTNAME(DEPTH_DST) GradientValuesY[XF_NPIXPERCYCLE(NPC)],
XF_SNAME(WORDWIDTH_DST) & P0,
XF_SNAME(WORDWIDTH_DST) & P1,
uint16_t img_width,
ap_uint<13> row_ind,
ap_uint<2> tp,
ap_uint<2> mid,
ap_uint<2> bottom,
bool flag) {
uchar_t step = XF_PIXELDEPTH(DEPTH_DST);
uint16_t max_loop = XF_WORDDEPTH(WORDWIDTH_DST);
uchar_t buf_size = XF_NPIXPERCYCLE(NPC) + 2;
uint16_t col = 0, i = 0, j = 0;
ap_uint<3> p;
// clang-format off
#pragma HLS INLINE off
// clang-format on
// column loop up to the end of the row
Col_Loop:
for (col = 0; col < (img_width); col++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
#pragma HLS PIPELINE
// clang-format on
// reading the data from the stream
Plane_Loop3:
for (p = 0; p < NOS_SRC; p++) {
// clang-format off
#pragma HLS UNROLL
// clang-format on
XF_SNAME(WORDWIDTH_SRC) in_data = 0;
if (flag) {
in_data = _src_strm[p].read();
buf[p][row_ind][col] = in_data;
}
// extracting the data from the input buffer to the process buffer
xfExtractPixels<NPC, WORDWIDTH_SRC, DEPTH_SRC>(&src_buf0[p][2], buf[p][tp][col], 0);
xfExtractPixels<NPC, WORDWIDTH_SRC, DEPTH_SRC>(&src_buf1[p][2], buf[p][mid][col], 0);
xfExtractPixels<NPC, WORDWIDTH_SRC, DEPTH_SRC>(&src_buf2[p][2], in_data, 0);
}
// function to compute the gradients
xFHOGgradientCompute<NPC, DEPTH_SRC, DEPTH_DST, NOS_SRC>(GradientValuesX, GradientValuesY, src_buf0, src_buf1,
src_buf2);
if (col == 0) {
j = 1;
data_pack_loop1:
for (i = 0; i < (max_loop - step); i = i + step) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=PIX_COUNT max=PIX_COUNT
#pragma HLS UNROLL
// clang-format on
P0.range(i + (step - 1), i) = GradientValuesX[j];
P1.range(i + (step - 1), i) = GradientValuesY[j++];
}
} else {
P0.range((max_loop - 1), (max_loop - step)) = GradientValuesX[0];
P1.range((max_loop - 1), (max_loop - step)) = GradientValuesY[0];
_gradx_strm.write(P0);
_grady_strm.write(P1);
j = 1;
data_pack_loop2:
for (i = 0; i < (max_loop - step); i = i + step) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=PIX_COUNT max=PIX_COUNT
#pragma HLS UNROLL
// clang-format on
P0.range(i + (step - 1), i) = GradientValuesX[j];
P1.range(i + (step - 1), i) = GradientValuesY[j++];
}
}
// copy the last two pixel data to the next iteration
Plane_Loop4:
for (p = 0; p < NOS_SRC; p++) {
// clang-format off
#pragma HLS UNROLL
// clang-format on
src_buf0[p][0] = src_buf0[p][buf_size - 2];
src_buf0[p][1] = src_buf0[p][buf_size - 1];
src_buf1[p][0] = src_buf1[p][buf_size - 2];
src_buf1[p][1] = src_buf1[p][buf_size - 1];
src_buf2[p][0] = src_buf2[p][buf_size - 2];
src_buf2[p][1] = src_buf2[p][buf_size - 1];
}
} // Col_Loop
}
template <int ROWS,
int COLS,
int DEPTH_SRC,
int DEPTH_DST,
int NPC,
int WORDWIDTH_SRC,
int WORDWIDTH_DST,
int NOS_SRC,
int TC,
int PIX_COUNT,
bool USE_URAM>
void xFHOGgradientsKernel(hls::stream<XF_SNAME(WORDWIDTH_SRC)> _src_strm[NOS_SRC],
hls::stream<XF_SNAME(WORDWIDTH_DST)>& _gradx_strm,
hls::stream<XF_SNAME(WORDWIDTH_DST)>& _grady_strm,
uint16_t _height,
uint16_t _width) {
// row_index for circular buffer organization
uint16_t row_ind;
ap_uint<3> p;
ap_uint<2> tp, mid, bottom;
uchar_t step = XF_PIXELDEPTH(DEPTH_DST);
uint16_t max_loop = XF_WORDDEPTH(WORDWIDTH_DST);
uchar_t buf_size = XF_NPIXPERCYCLE(NPC) + 2;
uint16_t col, j, row, i;
// output gradient buffers; gradient-x and gradient-y
XF_PTNAME(DEPTH_DST) GradientValuesX[XF_NPIXPERCYCLE(NPC)];
XF_PTNAME(DEPTH_DST) GradientValuesY[XF_NPIXPERCYCLE(NPC)];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=GradientValuesX complete dim=1
#pragma HLS ARRAY_PARTITION variable=GradientValuesY complete dim=1
// clang-format on
// temporary buffer to hold the input data for computation
XF_PTNAME(DEPTH_SRC)
src_buf0[NOS_SRC][XF_NPIXPERCYCLE(NPC) + 2], src_buf1[NOS_SRC][XF_NPIXPERCYCLE(NPC) + 2],
src_buf2[NOS_SRC][XF_NPIXPERCYCLE(NPC) + 2];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=src_buf0 complete dim=0
#pragma HLS ARRAY_PARTITION variable=src_buf1 complete dim=0
#pragma HLS ARRAY_PARTITION variable=src_buf2 complete dim=0
// clang-format on
// used to temporarily hold the output data before pushing into the stream
XF_SNAME(WORDWIDTH_DST) P0, P1;
// Line buffer to hold image data
XF_SNAME(WORDWIDTH_SRC) buf[NOS_SRC][3][(COLS >> XF_BITSHIFT(NPC))];
if (USE_URAM) {
// clang-format off
#pragma HLS ARRAY_PARTITION variable=buf complete dim=1
#pragma HLS RESOURCE variable=buf core=RAM_S2P_URAM
#pragma HLS ARRAY_RESHAPE variable=buf cyclic factor=3 dim=2
// clang-format on
} else {
// clang-format off
#pragma HLS ARRAY_PARTITION variable=buf complete dim=1
#pragma HLS ARRAY_PARTITION variable=buf complete dim=2
// clang-format on
}
row_ind = 1;
// reading the complete first line to the input buffer
Clear_Row_Read_Buf_Loop:
for (col = 0; col < (_width); col++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
#pragma HLS PIPELINE
// clang-format on
Plane_Loop1:
for (p = 0; p < NOS_SRC; p++) {
buf[p][0][col] = 0;
buf[p][row_ind][col] = _src_strm[p].read(); // Read data
}
}
row_ind++;
// process loop up to the end of the image
Row_Loop:
for (row = 1; row < (_height); row++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
// updating the row index for the circular buffer organization
if (row_ind == 2) {
tp = 0;
mid = 1;
bottom = 2;
} else if (row_ind == 0) {
tp = 1;
mid = 2;
bottom = 0;
} else if (row_ind == 1) {
tp = 2;
mid = 0;
bottom = 1;
}
// padding the left border with zero
Plane_Loop2:
for (p = 0; p < NOS_SRC; p++) {
// clang-format off
#pragma HLS UNROLL
// clang-format on
src_buf0[p][0] = src_buf0[p][1] = 0;
src_buf1[p][0] = src_buf1[p][1] = 0;
src_buf2[p][0] = src_buf2[p][1] = 0;
}
P0 = P1 = 0;
// compute the gradient for the data in the Source buffer
xFHOGcomputeColGrad<ROWS, COLS, DEPTH_SRC, DEPTH_DST, NPC, WORDWIDTH_SRC, WORDWIDTH_DST, NOS_SRC, TC,
PIX_COUNT>(_src_strm, _gradx_strm, _grady_strm, buf, src_buf0, src_buf1, src_buf2,
GradientValuesX, GradientValuesY, P0, P1, _width, row_ind, tp, mid, bottom,
true);
if (row) {
// copy the last two pixel data to the next iteration
Plane_Loop4:
for (p = 0; p < NOS_SRC; p++) {
// clang-format off
#pragma HLS UNROLL
// clang-format on
src_buf0[p][2] = 0;
src_buf1[p][2] = 0;
src_buf2[p][2] = 0;
}
xFHOGgradientCompute<NPC, DEPTH_SRC, DEPTH_DST, NOS_SRC>(GradientValuesX, GradientValuesY, src_buf0,
src_buf1, src_buf2);
P0.range((max_loop - 1), (max_loop - step)) = GradientValuesX[0];
P1.range((max_loop - 1), (max_loop - step)) = GradientValuesY[0];
_gradx_strm.write(P0);
_grady_strm.write(P1);
}
row_ind++;
if (row_ind == 3) {
row_ind = 0;
}
} // Row_Loop
// compute indexes for the input buffer
if (row_ind == 3) {
row_ind = 0;
}
if (row_ind == 2) {
tp = 0;
mid = 1;
bottom = 2;
} else if (row_ind == 0) {
tp = 1;
mid = 2;
bottom = 0;
} else if (row_ind == 1) {
tp = 2;
mid = 0;
bottom = 1;
}
Plane_Loop6:
for (p = 0; p < NOS_SRC; p++) {
// clang-format off
#pragma HLS UNROLL
// clang-format on
src_buf0[p][0] = src_buf0[p][1] = 0;
src_buf1[p][0] = src_buf1[p][1] = 0;
src_buf2[p][0] = src_buf2[p][1] = 0;
}
Clear_Row_Loop1:
for (col = 0; col < (_width); col++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
#pragma HLS PIPELINE
// clang-format on
Plane_Loop7:
for (p = 0; p < NOS_SRC; p++) {
// clang-format off
#pragma HLS UNROLL
// clang-format on
buf[p][bottom][col] = 0;
}
}
// compute the gradient for the data in the Source buffer
xFHOGcomputeColGrad<ROWS, COLS, DEPTH_SRC, DEPTH_DST, NPC, WORDWIDTH_SRC, WORDWIDTH_DST, NOS_SRC, TC, PIX_COUNT>(
_src_strm, _gradx_strm, _grady_strm, buf, src_buf0, src_buf1, src_buf2, GradientValuesX, GradientValuesY, P0,
P1, _width, row_ind, tp, mid, bottom, false);
Plane_Loop5:
for (p = 0; p < NOS_SRC; p++) {
// clang-format off
#pragma HLS UNROLL
// clang-format on
src_buf0[p][2] = 0;
src_buf1[p][2] = 0;
src_buf2[p][2] = 0;
}
xFHOGgradientCompute<NPC, DEPTH_SRC, DEPTH_DST, NOS_SRC>(GradientValuesX, GradientValuesY, src_buf0, src_buf1,
src_buf2);
P0.range((max_loop - 1), (max_loop - step)) = GradientValuesX[0];
P1.range((max_loop - 1), (max_loop - step)) = GradientValuesY[0];
_gradx_strm.write(P0);
_grady_strm.write(P1);
}
// xFHOGGradientComputation
/**************************************************************************
* xFHOGgradients : Wrapper function which calls the kernel function
* depending upon the configurations.
**************************************************************************/
template <int ROWS,
int COLS,
int DEPTH_SRC,
int DEPTH_DST,
int NPC,
int WORDWIDTH_SRC,
int WORDWIDTH_DST,
int NOS_SRC,
bool USE_URAM>
void xFHOGgradients(hls::stream<XF_SNAME(WORDWIDTH_SRC)> _src[NOS_SRC],
hls::stream<XF_SNAME(WORDWIDTH_DST)>& _gradx,
hls::stream<XF_SNAME(WORDWIDTH_DST)>& _grady,
int _border_type,
uint16_t _height,
uint16_t _width) {
#ifndef __SYNTHESIS__
assert(((DEPTH_SRC == XF_8UP)) && " Input image must be of type XF_8UP");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && "NPC must be XF_NPPC1 or XF_NPPC8");
assert(((WORDWIDTH_SRC == XF_8UW) || (WORDWIDTH_SRC == XF_64UW)) && "WORDWIDTH must be XF_8UW or XF_64UW");
assert(((DEPTH_DST == XF_9SP)) && " Input image must be of type XF_9SP");
assert(((WORDWIDTH_DST == XF_9UW) || (WORDWIDTH_DST == XF_72UW)) && "WORDWIDTH must be XF_9UW or XF_72UW");
assert((_border_type == XF_BORDER_CONSTANT) && "Border type must be XF_BORDER_CONSTANT ");
assert(((NOS_SRC == XF_GRAY) || (NOS_SRC == XF_RGB)) && "input_image_type must be either XF_GRAY or XF_RGB");
#endif
xFHOGgradientsKernel<ROWS, COLS, DEPTH_SRC, DEPTH_DST, NPC, WORDWIDTH_SRC, WORDWIDTH_DST, NOS_SRC,
(COLS >> XF_BITSHIFT(NPC)), (XF_NPIXPERCYCLE(NPC)), USE_URAM>(_src, _gradx, _grady, _height,
_width);
}
// xFHOGgradients
#endif // _XF_HOG_DESCRIPTOR_GRADIENTS_HPP_