Program Listing for File xf_harris_utils.hpp
↰ Return to documentation for file (/tmp/ws/src/vitis_common/include/features/xf_harris_utils.hpp
)
/*
* Copyright 2019 Xilinx, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _XF_HARRIS_UTILS_H_
#define _XF_HARRIS_UTILS_H_
#ifndef __cplusplus
#error C++ is needed to use this file!
#endif
template <int SRC_T, int ROWS, int COLS, int DEPTH, int NPC, int WORDWIDTH, int TC>
void xFDuplicate(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src_mat,
xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _dst1_mat,
xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _dst2_mat,
uint16_t img_height,
uint16_t img_width) {
img_width = img_width >> XF_BITSHIFT(NPC);
ap_uint<13> row, col;
Row_Loop:
for (row = 0; row < img_height; row++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
#pragma HLS LOOP_FLATTEN off
// clang-format on
Col_Loop:
for (col = 0; col < img_width; col++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
#pragma HLS pipeline
// clang-format on
XF_SNAME(WORDWIDTH) tmp_src;
tmp_src = _src_mat.read(row * img_width + col);
_dst1_mat.write(row * img_width + col, tmp_src);
_dst2_mat.write(row * img_width + col, tmp_src);
}
}
}
template <int SRC_T,
int DST_T,
int ROWS,
int COLS,
int IN_DEPTH,
int OUT_DEPTH,
int NPC,
int IN_WW,
int OUT_WW,
int TC,
typename SCALE_T>
void xFSquare(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src_mat,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst_mat,
SCALE_T scale,
uint8_t filter_width,
uint16_t img_height,
uint16_t img_width) {
img_width = img_width >> XF_BITSHIFT(NPC);
ap_uint<13> row, col;
XF_SNAME(IN_WW) tmp_src;
XF_SNAME(OUT_WW) tmp_dst;
uint16_t shift = 0;
uint16_t npc = XF_NPIXPERCYCLE(NPC);
XF_PTNAME(IN_DEPTH) src_buf[(1 << XF_BITSHIFT(NPC))];
XF_PTNAME(OUT_DEPTH) dst_buf[(1 << XF_BITSHIFT(NPC))];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=src_buf complete dim=1
#pragma HLS ARRAY_PARTITION variable=dst_buf complete dim=1
// clang-format on
Row_Loop:
for (row = 0; row < img_height; row++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
#pragma HLS LOOP_FLATTEN off
// clang-format on
Col_Loop:
for (col = 0; col < img_width; col++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
#pragma HLS pipeline
// clang-format on
tmp_src = _src_mat.read(row * img_width + col);
xfExtractPixels<NPC, IN_WW, IN_DEPTH>(src_buf, tmp_src, 0);
Square_Loop:
for (ap_uint<9> k = 0; k < (1 << XF_BITSHIFT(NPC)); k++) {
// clang-format off
#pragma HLS UNROLL
// clang-format on
XF_PTNAME(IN_DEPTH) val;
if (filter_width == XF_FILTER_7X7) {
int16_t val2;
uint16_t val1;
val2 = src_buf[k] >> 9;
if (val2 < 0)
val1 = -(val2);
else
val1 = val2;
dst_buf[k] = ((val1 * val1) >> scale);
} else {
if (src_buf[k] < 0)
val = -(src_buf[k]);
else
val = src_buf[k];
dst_buf[k] = (val * val) >> scale;
}
}
tmp_dst = 0;
xfPackPixels<NPC, OUT_WW, OUT_DEPTH>(&dst_buf[0], tmp_dst, 0, npc, shift);
shift = 0;
_dst_mat.write(row * img_width + col, tmp_dst); // Write the data in to output stream
} // Col_Loop
} // Row_Loop
}
// xFSquare
template <int SRC_T,
int DST_T,
int ROWS,
int COLS,
int IN_DEPTH,
int OUT_DEPTH,
int NPC,
int IN_WW,
int OUT_WW,
int TC,
typename SCALE_T>
void xFMultiply(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src_mat1,
xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src_mat2,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst_mat,
SCALE_T scale,
uint8_t filter_width,
uint16_t img_height,
uint16_t img_width) {
img_width = img_width >> XF_BITSHIFT(NPC);
ap_uint<13> row, col;
XF_SNAME(IN_WW) tmp_src1, tmp_src2;
XF_SNAME(OUT_WW) tmp_dst;
XF_PTNAME(IN_DEPTH) src_buf1[(1 << XF_BITSHIFT(NPC))], src_buf2[(1 << XF_BITSHIFT(NPC))];
XF_PTNAME(OUT_DEPTH) dst_buf[(1 << XF_BITSHIFT(NPC))];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=src_buf1 complete dim=1
#pragma HLS ARRAY_PARTITION variable=src_buf2 complete dim=1
#pragma HLS ARRAY_PARTITION variable=dst_buf complete dim=1
// clang-format on
uint16_t npc = XF_NPIXPERCYCLE(NPC);
uint16_t shift = 0;
Row_Loop:
for (row = 0; row < img_height; row++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
#pragma HLS LOOP_FLATTEN off
// clang-format on
Col_Loop:
for (col = 0; col < img_width; col++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
#pragma HLS pipeline
// clang-format on
tmp_src1 = _src_mat1.read(row * img_width + col); // Read data from the source1
tmp_src2 = _src_mat2.read(row * img_width + col); // Read data from the source2
/* Extract data from source */
xfExtractPixels<NPC, IN_WW, IN_DEPTH>(src_buf1, tmp_src1, 0);
xfExtractPixels<NPC, IN_WW, IN_DEPTH>(src_buf2, tmp_src2, 0);
for (ap_uint<9> k = 0; k < (1 << XF_BITSHIFT(NPC)); k++) {
// clang-format off
#pragma HLS UNROLL
// clang-format on
XF_PTNAME(IN_DEPTH) val1 = src_buf1[k];
XF_PTNAME(IN_DEPTH) val2 = src_buf2[k];
// TODO:: I included only for filter 3x3
if (filter_width == XF_FILTER_7X7) {
int16_t val11 = val1 >> 9;
int16_t val22 = val2 >> 9;
dst_buf[k] = (val11 * val22) >> scale;
} else {
dst_buf[k] = (val1 * val2) >> scale;
}
}
tmp_dst = 0;
xfPackPixels<NPC, OUT_WW, OUT_DEPTH>(&dst_buf[0], tmp_dst, 0, npc, shift);
shift = 0;
_dst_mat.write(row * img_width + col, (tmp_dst)); // Write data into the output stream
} // Col_Loop
} // Row_Loop
}
// xFMultiply
template <int SRC_T, int ROWS, int COLS, int DEPTH, int NPC, int WORDWIDTH, int TC>
void xFThreshold(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src_mat,
xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _dst_mat,
uint16_t threshold,
uint16_t img_height,
uint16_t img_width) {
img_width = img_width >> XF_BITSHIFT(NPC);
XF_SNAME(WORDWIDTH) tmp_src;
int buf1;
XF_PTNAME(DEPTH) thresh = threshold;
int res[(1 << XF_BITSHIFT(NPC))];
ap_uint<9> i, j;
ap_uint<8> STEP = XF_PIXELDEPTH(XF_32UP);
Row_Loop:
for (uint16_t row = 0; row < img_height; row++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
#pragma HLS LOOP_FLATTEN off
// clang-format on
Col_Loop:
for (uint16_t col = 0; col < img_width; col++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
#pragma HLS pipeline
// clang-format on
tmp_src = _src_mat.read(row * img_width + col); // Read data from the input stream
Threshold_Loop:
for (i = 0, j = 0; i < (32 << XF_BITSHIFT(NPC)); i += 32) {
// clang-format off
#pragma HLS unroll
// clang-format on
buf1 = tmp_src.range(i + 31, i);
/* Pack the data into result */
buf1 = (buf1 > thresh) ? buf1 : 0;
res[j++] = (uint32_t)buf1;
}
uint16_t npc = XF_NPIXPERCYCLE(NPC);
uint16_t shift = 0;
tmp_src = 0;
for (i = 0; i < npc; i++) {
// clang-format off
#pragma HLS unroll
// clang-format on
uint32_t tmp = res[i];
tmp_src = tmp_src | (((XF_SNAME(WORDWIDTH))tmp) << (shift * STEP));
shift++;
}
shift = 0;
_dst_mat.write(row * img_width + col, tmp_src); // Write data into output pixel
} // Col_Loop
} // Row_Loop
}
// xFThreshold
template <int SRC_T, int DST_T, int ROWS, int COLS, int IN_DEPTH, int OUT_DEPTH, int NPC, int IN_WW, int OUT_WW, int TC>
void xFComputeScore(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src1_mat,
xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src2_mat,
xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src3_mat,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst_mat,
uint16_t img_height,
uint16_t img_width,
uint16_t thresold,
uint8_t _filter_type) {
img_width = img_width >> XF_BITSHIFT(NPC);
XF_SNAME(IN_WW) tmp_src1, tmp_src2, tmp_src3;
XF_PTNAME(OUT_DEPTH) dst_buf[(1 << XF_BITSHIFT(NPC))];
XF_SNAME(OUT_WW) tmp_dst;
ap_uint<8> in_step = XF_PIXELDEPTH(IN_DEPTH);
uint16_t in_sumloop = (XF_PIXELDEPTH(IN_DEPTH) << XF_BITSHIFT(NPC));
uint16_t npc = XF_NPIXPERCYCLE(NPC);
ap_int<32> tmp_res[2];
ap_int<32> det_res;
ap_int<17> trace_res;
// clang-format off
#pragma HLS RESOURCE variable=trace_res core=DSP48 latency=2
// clang-format on
ap_int<50> trace_res1;
// clang-format off
#pragma HLS RESOURCE variable=trace_res1 core=DSP48 latency=2
// clang-format on
ap_int<32> trace_res2;
// clang-format off
#pragma HLS RESOURCE variable=trace_res2 core=DSP48 latency=2
// clang-format on
ap_uint<13> row, col;
ap_uint<10> i, j;
uint16_t shift = 0;
Row_Loop:
for (row = 0; row < img_height; row++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
#pragma HLS LOOP_FLATTEN off
// clang-format on
Col_Loop:
for (col = 0; col < img_width; col++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
#pragma HLS pipeline
// clang-format on
tmp_src1 = _src1_mat.read(row * img_width + col);
tmp_src2 = _src2_mat.read(row * img_width + col);
tmp_src3 = _src3_mat.read(row * img_width + col);
Determinant_Loop:
for (i = 0, j = 0; i < in_sumloop; i += in_step) {
// clang-format off
#pragma HLS unroll
// clang-format on
XF_PTNAME(IN_DEPTH) val1, val2, val3;
val1 = tmp_src1.range(i + (in_step - 1), i);
val2 = tmp_src2.range(i + (in_step - 1), i);
val3 = tmp_src3.range(i + (in_step - 1), i);
// TODO:shift according to box filter
if (_filter_type == XF_FILTER_7X7) {
val1 = val1 >> 0;
val2 = val2 >> 0;
val3 = val3 >> 0;
} else {
val1 = val1 >> 2;
val2 = val2 >> 2;
val3 = val3 >> 2;
}
/* Compute determinant */
tmp_res[0] = ((ap_int<32>)val1 * (ap_int<32>)val2);
tmp_res[1] = ((ap_int<32>)val3 * (ap_int<32>)val3);
det_res = tmp_res[0] - tmp_res[1];
/* Compute trace */
trace_res = val1 + val2;
/* Compute det - k*trace^2 */
trace_res1 = trace_res * trace_res;
trace_res2 = (trace_res1 * thresold) >> 16;
if (_filter_type == XF_FILTER_7X7) {
dst_buf[j++] = (XF_PTNAME(OUT_DEPTH))((det_res - trace_res2) >> 8);
} else {
dst_buf[j++] = (XF_PTNAME(OUT_DEPTH))(det_res - trace_res2);
}
}
tmp_dst = 0;
xfPackPixels<NPC, OUT_WW, OUT_DEPTH>(&dst_buf[0], tmp_dst, 0, npc, shift);
shift = 0;
_dst_mat.write(row * img_width + col, (tmp_dst)); // Write data into output pixel
}
}
}
// xFDeterminant
#endif // _XF_HARRIS_UTILS_H_