Program Listing for File xf_pyr_dense_optical_flow_median_blur.hpp
↰ Return to documentation for file (/tmp/ws/src/vitis_common/include/video/xf_pyr_dense_optical_flow_median_blur.hpp
)
/*
* Copyright 2019 Xilinx, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __XF_PYR_DENSE_OPTICAL_FLOW_MEDIAN_BLUR__
#define __XF_PYR_DENSE_OPTICAL_FLOW_MEDIAN_BLUR__
template <int NPC, int DEPTH, int WIN_SZ, int WIN_SZ_SQ, int FLOW_WIDTH, int FLOW_INT>
void auMedianProc(ap_fixed<FLOW_WIDTH, FLOW_INT> OutputValues[1],
ap_fixed<FLOW_WIDTH, FLOW_INT> src_buf[WIN_SZ][1 + (WIN_SZ - 1)],
ap_uint<8> win_size) {
// clang-format off
#pragma HLS INLINE
// clang-format on
ap_fixed<FLOW_WIDTH, FLOW_INT> array[WIN_SZ_SQ];
// #pragma HLS RESOURCE variable=array core=DSP48
// clang-format off
#pragma HLS ARRAY_PARTITION variable=array complete dim=1
// clang-format on
int array_ptr = 0;
// OutputValues[0] = src_buf[WIN_SZ>>1][WIN_SZ>>1];
// return;
Compute_Grad_Loop:
for (int copy_arr = 0; copy_arr < WIN_SZ; copy_arr++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=WIN_SZ max=WIN_SZ
#pragma HLS UNROLL
// clang-format on
for (int copy_in = 0; copy_in < WIN_SZ; copy_in++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=WIN_SZ max=WIN_SZ
#pragma HLS UNROLL
// clang-format on
array[array_ptr] = src_buf[copy_arr][copy_in];
array_ptr++;
}
}
// OutputValues[0] = array[(WIN_SZ_SQ)>>1];
// return;
auApplyMaskLoop:
for (int16_t j = 0; j <= WIN_SZ_SQ - 1; j++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=WIN_SZ max=WIN_SZ
// clang-format on
int16_t tmp = j & 0x0001;
if (tmp == 0) {
auSortLoop1:
for (int i = 0; i <= ((WIN_SZ_SQ >> 1) - 1); i++) // even sort
{
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=WIN_SZ max=WIN_SZ
#pragma HLS unroll
// clang-format on
int c = (i * 2);
int c1 = (c + 1);
if (array[c] < array[c1]) {
ap_fixed<FLOW_WIDTH, FLOW_INT> temp = array[c];
array[c] = array[c1];
array[c1] = temp;
}
}
}
else {
auSortLoop2:
for (int i = 0; i <= ((WIN_SZ_SQ >> 1) - 1); i++) // odd sort WINDOW_SIZE_H>>1 -1
{
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=WIN_SZ max=WIN_SZ
#pragma HLS unroll
// clang-format on
int c = (i * 2);
int c1 = (c + 1);
int c2 = (c + 2);
if (array[c1] < array[c2]) {
ap_fixed<FLOW_WIDTH, FLOW_INT> temp = array[c1];
array[c1] = array[c2];
array[c2] = temp;
}
}
}
}
// OutputValues[0] = auapplymedian3x3<DEPTH, WIN_SZ>(array, WIN_SZ);
OutputValues[0] = array[(WIN_SZ_SQ) >> 1];
return;
}
template <int ROWS,
int COLS,
int DEPTH,
int NPC,
int WORDWIDTH,
int TC,
int WIN_SZ,
int WIN_SZ_SQ,
int FLOW_WIDTH,
int FLOW_INT>
void ProcessMedian3x3(hls::stream<ap_fixed<FLOW_WIDTH, FLOW_INT> >& _src_mat,
hls::stream<ap_fixed<FLOW_WIDTH, FLOW_INT> >& _out_mat,
hls::stream<bool>& flag,
ap_fixed<FLOW_WIDTH, FLOW_INT> buf[WIN_SZ][(COLS >> NPC)],
ap_fixed<FLOW_WIDTH, FLOW_INT> src_buf[WIN_SZ][1 + (WIN_SZ - 1)],
ap_fixed<FLOW_WIDTH, FLOW_INT> OutputValues[1],
ap_fixed<FLOW_WIDTH, FLOW_INT>& P0,
uint16_t img_width,
uint16_t img_height,
uint16_t& shift_x,
ap_uint<13> row_ind[WIN_SZ],
ap_uint<13> row,
ap_uint<8> win_size) {
// clang-format off
#pragma HLS INLINE
// clang-format on
ap_fixed<FLOW_WIDTH, FLOW_INT> buf_cop[WIN_SZ];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=buf_cop complete dim=1
// clang-format on
uint16_t npc = 1;
Col_Loop:
for (ap_uint<16> col = 0; col < img_width + (WIN_SZ >> 1); col++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=1 max=TC
#pragma HLS pipeline
#pragma HLS LOOP_FLATTEN OFF
// clang-format on
if (row < img_height && col < img_width)
buf[row_ind[win_size - 1]][col] = _src_mat.read(); // Read data
else
buf[row_ind[win_size - 1]][col] = 0;
for (int copy_buf_var = 0; copy_buf_var < WIN_SZ; copy_buf_var++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=1 max=WIN_SZ
#pragma HLS UNROLL
// clang-format on
if ((row > (img_height - 1)) && (copy_buf_var > (win_size - 1 - (row - (img_height - 1))))) {
buf_cop[copy_buf_var] = buf[(row_ind[win_size - 1 - (row - (img_height - 1))])][col];
} else {
buf_cop[copy_buf_var] = buf[(row_ind[copy_buf_var])][col];
}
}
// if(NPC == AU_NPPC8)
// {
// for(int extract_px=0;extract_px<win_size;extract_px++)
// {
// #pragma HLS LOOP_TRIPCOUNT min=WIN_SZ max=WIN_SZ
// auExtractPixels<NPC, WORDWIDTH, DEPTH>(&src_buf[extract_px][win_size-1], buf_cop[extract_px], 0);
// }
// }
// else
{
for (int extract_px = 0; extract_px < WIN_SZ; extract_px++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=WIN_SZ max=WIN_SZ
#pragma HLS UNROLL
// clang-format on
if (col < img_width) {
src_buf[extract_px][win_size - 1] = buf_cop[extract_px];
} else {
src_buf[extract_px][win_size - 1] = src_buf[extract_px][win_size - 2];
}
}
}
auMedianProc<NPC, DEPTH, WIN_SZ, WIN_SZ_SQ, FLOW_WIDTH, FLOW_INT>(OutputValues, src_buf, win_size);
if (col >= (win_size >> 1)) {
// auPackPixels<NPC, WORDWIDTH, DEPTH>(&OutputValues[0], P0, 0, 1, shift_x);
// shift_x = 0;
// P0 = 0;
// auPackPixels<NPC, WORDWIDTH, DEPTH>(&OutputValues[0], P0, 1, (npc-1), shift_x);
if (flag.read()) {
_out_mat.write(OutputValues[0]);
} else {
_out_mat.write(OutputValues[0]); // can use the disable medianblur filter flag at a later point
}
}
for (int wrap_buf = 0; wrap_buf < WIN_SZ; wrap_buf++) {
// clang-format off
#pragma HLS UNROLL
#pragma HLS LOOP_TRIPCOUNT min=WIN_SZ max=WIN_SZ
// clang-format on
for (int col_warp = 0; col_warp < WIN_SZ - 1; col_warp++) {
// clang-format off
#pragma HLS UNROLL
#pragma HLS LOOP_TRIPCOUNT min=WIN_SZ max=WIN_SZ
// clang-format on
if ((col >= (img_width - 1) - (win_size >> 1)) && (wrap_buf >= win_size >> 1)) {
src_buf[wrap_buf][col_warp] = src_buf[win_size - 1][col_warp];
}
if (col == 0) {
src_buf[wrap_buf][col_warp] = src_buf[wrap_buf][win_size - 1];
} else {
src_buf[wrap_buf][col_warp] = src_buf[wrap_buf][col_warp + 1];
}
}
}
} // Col_Loop
}
template <int ROWS,
int COLS,
int DEPTH,
int NPC,
int WORDWIDTH,
int TC,
int WIN_SZ,
int WIN_SZ_SQ,
int FLOW_WIDTH,
int FLOW_INT,
bool USE_URAM>
void auMedian3x3(hls::stream<ap_fixed<FLOW_WIDTH, FLOW_INT> >& _src_mat,
hls::stream<ap_fixed<FLOW_WIDTH, FLOW_INT> >& _out_mat,
hls::stream<bool>& flag,
ap_uint<8> win_size,
uint16_t img_height,
uint16_t img_width) {
ap_uint<13> row_ind[WIN_SZ];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=row_ind complete dim=1
// clang-format on
ap_uint<8> buf_size = 1 + (WIN_SZ - 1);
uint16_t shift_x = 0;
ap_uint<16> row, col;
ap_fixed<FLOW_WIDTH, FLOW_INT> OutputValues[1];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=OutputValues complete dim=1
// clang-format on
ap_fixed<FLOW_WIDTH, FLOW_INT> src_buf[WIN_SZ][1 + (WIN_SZ - 1)];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=src_buf complete dim=1
#pragma HLS ARRAY_PARTITION variable=src_buf complete dim=2
// clang-format on
// src_buf1 et al merged
ap_fixed<FLOW_WIDTH, FLOW_INT> P0;
ap_fixed<FLOW_WIDTH, FLOW_INT> buf[WIN_SZ][(COLS >> NPC)];
if (USE_URAM) {
// clang-format off
#pragma HLS ARRAY_RESHAPE variable=buf complete dim=1
#pragma HLS RESOURCE variable=buf core=RAM_S2P_URAM
// clang-format on
} else {
// clang-format off
#pragma HLS ARRAY_PARTITION variable=buf complete dim=1
#pragma HLS RESOURCE variable=buf core=RAM_S2P_BRAM
// clang-format on
}
// initializing row index
for (int init_row_ind = 0; init_row_ind < win_size; init_row_ind++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=1 max=WIN_SZ
// clang-format on
row_ind[init_row_ind] = init_row_ind;
}
read_lines:
for (int init_buf = row_ind[win_size >> 1]; init_buf < row_ind[win_size - 1]; init_buf++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=1 max=WIN_SZ
// clang-format on
for (col = 0; col < img_width; col++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
#pragma HLS pipeline
#pragma HLS LOOP_FLATTEN OFF
// clang-format on
buf[init_buf][col] = _src_mat.read();
}
}
// takes care of top borders
for (col = 0; col < img_width; col++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=1 max=TC
// clang-format on
for (int init_buf = 0; init_buf<WIN_SZ>> 1; init_buf++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=WIN_SZ max=WIN_SZ
#pragma HLS UNROLL
// clang-format on
buf[init_buf][col] = buf[row_ind[win_size >> 1]][col];
}
}
Row_Loop:
for (row = (win_size >> 1); row < img_height + (win_size >> 1); row++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=1 max=ROWS
// clang-format on
// //initialize buffers to be sent for sorting
// for(int init_src=0;init_src<(win_size>>1);init_src++)
// {
// for(int init_src1=0;init_src1<win_size;init_src1++)
// {
// #pragma HLS UNROLL
// src_buf[init_src1][init_src] = buf[row_ind[init_src1]][0];
// }
// }
P0 = 0;
ProcessMedian3x3<ROWS, COLS, DEPTH, NPC, WORDWIDTH, TC, WIN_SZ, WIN_SZ_SQ, FLOW_WIDTH, FLOW_INT>(
_src_mat, _out_mat, flag, buf, src_buf, OutputValues, P0, img_width, img_height, shift_x, row_ind, row,
win_size);
// update indices
ap_uint<13> zero_ind = row_ind[0];
for (int init_row_ind = 0; init_row_ind < WIN_SZ - 1; init_row_ind++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=WIN_SZ max=WIN_SZ
#pragma HLS UNROLL
// clang-format on
row_ind[init_row_ind] = row_ind[init_row_ind + 1];
}
row_ind[win_size - 1] = zero_ind;
} // Row_Loop
}
template <int ROWS,
int COLS,
int DEPTH,
int NPC,
int WORDWIDTH,
int PIPELINEFLAG,
int WIN_SZ,
int WIN_SZ_SQ,
int FLOW_WIDTH,
int FLOW_INT,
bool USE_URAM>
void auMedianBlur(hls::stream<ap_fixed<FLOW_WIDTH, FLOW_INT> >& _src,
hls::stream<ap_fixed<FLOW_WIDTH, FLOW_INT> >& _dst,
hls::stream<bool>& flag,
ap_uint<8> win_size,
int _border_type,
uint16_t imgheight,
uint16_t imgwidth) {
// clang-format off
#pragma HLS inline off
// clang-format on
// #pragma HLS license key=IPAUVIZ_CV_BASIC
// assert(_border_type == AU_BORDER_CONSTANT && "Only AU_BORDER_CONSTANT is supported");
#ifndef __SYNTHESIS__
assert(((imgheight <= ROWS) && (imgwidth <= COLS)) && "ROWS and COLS should be greater than input image");
assert((win_size <= WIN_SZ) && "win_size must not be greater than WIN_SZ");
#endif
imgwidth = imgwidth >> NPC;
auMedian3x3<ROWS, COLS, DEPTH, NPC, WORDWIDTH, (COLS >> NPC) + (WIN_SZ >> 1), WIN_SZ, WIN_SZ_SQ, FLOW_WIDTH,
FLOW_INT, USE_URAM>(_src, _dst, flag, WIN_SZ, imgheight, imgwidth);
}
#endif