Program Listing for File xf_remap.hpp
↰ Return to documentation for file (/tmp/ws/src/vitis_common/include/imgproc/xf_remap.hpp
)
/*
* Copyright 2019 Xilinx, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _XF_REMAP_HPP_
#define _XF_REMAP_HPP_
#ifndef __cplusplus
#error C++ is needed to include this header.
#endif
#include "../common/xf_common.hpp"
#include "../common/xf_utility.hpp"
#include "hls_stream.h"
#include <algorithm>
#define XF_RESIZE_INTER_TAB_SIZE 32
#define XF_RESIZE_INTER_BITS 5
namespace xf {
namespace cv {
template <int SRC_T, int DST_T, int PLANES, int MAP_T, int WIN_ROW, int ROWS, int COLS, int NPC, bool USE_URAM>
void xFRemapNNI(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& dst,
xf::cv::Mat<MAP_T, ROWS, COLS, NPC>& mapx,
xf::cv::Mat<MAP_T, ROWS, COLS, NPC>& mapy,
uint16_t rows,
uint16_t cols) {
XF_TNAME(DST_T, NPC) buf[WIN_ROW][COLS];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=buf complete dim=1
// clang-format on
XF_TNAME(SRC_T, NPC) s;
int read_pointer_src = 0, read_pointer_map = 0, write_pointer = 0;
ap_uint<64> bufUram[PLANES][WIN_ROW][(COLS + 7) / 8];
// clang-format off
#pragma HLS resource variable=bufUram core=RAM_T2P_URAM latency=2
// clang-format on
//#pragma HLS dependence variable=bufUram inter false
// clang-format off
#pragma HLS ARRAY_PARTITION variable=bufUram complete dim=2
#pragma HLS ARRAY_PARTITION variable=bufUram complete dim=1
// clang-format on
XF_TNAME(SRC_T, NPC) sx8[8];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=sx8 complete dim=1
// clang-format on
XF_TNAME(DST_T, NPC) d;
#ifndef __SYNTHESIS__
assert(rows <= ROWS);
assert(cols <= COLS);
#endif
int ishift = WIN_ROW / 2;
int r[WIN_ROW] = {};
const int row_tripcount = ROWS + WIN_ROW;
loop_height:
for (int i = 0; i < rows + ishift; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN OFF
#pragma HLS LOOP_TRIPCOUNT min=1 max=row_tripcount
// clang-format on
loop_width:
for (int j = 0; j < cols; j++) {
// clang-format off
#pragma HLS PIPELINE II=1
#pragma HLS dependence variable=buf inter false
#pragma HLS dependence variable=bufUram inter false
#pragma HLS dependence variable=r inter false
#pragma HLS LOOP_TRIPCOUNT min=1 max=COLS
// clang-format on
if (i < rows && j < cols) {
s = src.read(read_pointer_src++);
if (USE_URAM) {
sx8[j % 8] = s;
for (int pl = 0, bit = 0; pl < PLANES; pl++, bit += 8) {
// clang-format off
#pragma HLS UNROLL
// clang-format on
for (int k = 0; k < 8; k++) {
// clang-format off
#pragma HLS UNROLL
// clang-format on
bufUram[pl][i % WIN_ROW][j / 8](k * 8 + 7, k * 8) = sx8[k](bit + 7, bit);
}
}
}
}
if (!USE_URAM) buf[i % WIN_ROW][j] = s;
r[i % WIN_ROW] = i;
if (i >= ishift) {
float mx_fl = mapx.read_float(read_pointer_map);
float my_fl = mapy.read_float(read_pointer_map++);
int x = (int)(mx_fl + 0.5f);
int y = (int)(my_fl + 0.5f);
bool in_range = (y >= 0 && my_fl <= (rows - 1) && r[y % WIN_ROW] == y && x >= 0 && mx_fl <= (cols - 1));
if (in_range)
if (USE_URAM) {
XF_TNAME(DST_T, NPC) dx9[8];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=dx9 complete dim=1
// clang-format on
for (int pl = 0, bit = 0; pl < PLANES; pl++, bit += 8) {
ap_uint<72> tempvalue[PLANES]; //
tempvalue[pl] = bufUram[pl][y % WIN_ROW][x / 8];
for (int k = 0; k < 8; k++) {
dx9[k](bit + 7, bit) = tempvalue[pl].range(k * 8 + 7, k * 8);
}
}
d = dx9[x % 8];
} else
d = buf[y % WIN_ROW][x];
else
d = 0;
dst.write(write_pointer++, d);
}
}
}
}
#define TWO_POW_16 65536
template <int SRC_T, int DST_T, int PLANES, int MAP_T, int WIN_ROW, int ROWS, int COLS, int NPC, bool USE_URAM>
void xFRemapLI(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& dst,
xf::cv::Mat<MAP_T, ROWS, COLS, NPC>& mapx,
xf::cv::Mat<MAP_T, ROWS, COLS, NPC>& mapy,
uint16_t rows,
uint16_t cols) {
// Add one to always get zero for boundary interpolation. Maybe need
// initialization here?
XF_TNAME(DST_T, NPC)
buf[WIN_ROW / 2 + 1][2][COLS / 2 + 1][2]; // AK,ZoTech: static added for
// initialization, otherwise X are
// generated in co-sim.
// clang-format off
#pragma HLS array_partition complete variable=buf dim=2
#pragma HLS array_partition complete variable=buf dim=4
// clang-format on
XF_TNAME(SRC_T, NPC) s;
// URAM storage garnularity is 3x3-pel block in 2x2-pixel picture grid, it
// fits to one URAM word
ap_uint<72> bufUram[PLANES][(WIN_ROW + 1) / 2][(COLS + 1) / 2];
// clang-format off
#pragma HLS resource variable=bufUram core=RAM_T2P_URAM latency=2
#pragma HLS array_partition complete variable=bufUram dim=1
// clang-format on
ap_uint<24> lineBuf[PLANES][(COLS + 1) / 2];
// clang-format off
#pragma HLS resource variable=lineBuf core=RAM_S2P_BRAM latency=1
#pragma HLS array_partition complete variable=lineBuf dim=1
// clang-format on
XF_TNAME(MAP_T, NPC) mx;
XF_TNAME(MAP_T, NPC) my;
int read_pointer_src = 0, read_pointer_map = 0, write_pointer = 0;
#ifndef __SYNTHESIS__
assert(rows <= ROWS);
assert(cols <= COLS);
#endif
int ishift = WIN_ROW / 2;
int r1[WIN_ROW] = {};
int r2[WIN_ROW] = {};
const int row_tripcount = ROWS + WIN_ROW;
bool store_col = 1;
bool store_row = 1;
ap_uint<16> temppix[PLANES]; // = 0;
ap_uint<24> pixval[PLANES]; // = 0;
ap_uint<48> pixval_2[PLANES]; // = 0;
ap_uint<24> prev_pixval[PLANES]; // = 0;
ap_uint<72> tempbuf[PLANES];
for (int pl = 0; pl < PLANES; pl++) {
// clang-format off
#pragma HLS UNROLL
// clang-format on
temppix[pl] = 0;
pixval[pl] = 0;
pixval_2[pl] = 0;
prev_pixval[pl] = 0;
tempbuf[pl] = 0;
}
loop_height:
for (int i = 0; i < rows + ishift; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN OFF
#pragma HLS LOOP_TRIPCOUNT min=1 max=row_tripcount
// clang-format on
// Initialize for every row
store_col = 1;
loop_width:
for (int j = 0; j < cols + 1; j++) {
// clang-format off
#pragma HLS PIPELINE II=1
#pragma HLS dependence variable=buf inter false
#pragma HLS dependence variable=bufUram inter false
#pragma HLS dependence variable=bufUram intra false
#pragma HLS dependence variable=r1 inter false
#pragma HLS dependence variable=r2 inter false
#pragma HLS LOOP_TRIPCOUNT min=1 max=COLS+2
// clang-format on
if (i < rows && j < cols) {
s = src.read(read_pointer_src++);
} else {
s = 0;
}
if (USE_URAM && i < rows) {
for (int pl = 0, bit = 0; pl < PLANES; pl++, bit += 8) {
// clang-format off
#pragma HLS UNROLL
// clang-format on
if (store_col && (j != 0)) {
pixval[pl].range(15, 0) = temppix[pl];
pixval[pl].range(23, 16) = s.range(bit + 7, bit);
if (store_row) {
// Store every 3rd row in a buffer
lineBuf[pl][(j / 2) - 1] = pixval[pl];
} else {
// Read the stored row and fill in
prev_pixval[pl] = lineBuf[pl][(j / 2) - 1];
}
if (i != 0) {
if (store_row) {
bufUram[pl][((i - 1) / 2) % (WIN_ROW / 2)][(j / 2) - 1].range(71, 48) = pixval[pl];
} else {
pixval_2[pl].range(23, 0) = prev_pixval[pl];
pixval_2[pl].range(47, 24) = pixval[pl];
bufUram[pl][((i - 1) / 2) % (WIN_ROW / 2)][(j / 2) - 1].range(47, 0) = pixval_2[pl];
}
}
}
if (store_col) {
temppix[pl].range(7, 0) = s.range(bit + 7, bit);
} else {
temppix[pl].range(15, 8) = s.range(bit + 7, bit);
}
}
store_col = !(store_col);
}
if (!USE_URAM) {
if ((i % WIN_ROW) % 2) {
buf[(i % WIN_ROW) / 2][(i % WIN_ROW) % 2][j / 2][j % 2] = s; //.range(bit+7,bit);
} else {
buf[(i % WIN_ROW) / 2][(i % WIN_ROW) % 2][j / 2][j % 2] = s; //.range(bit+7,bit);
}
}
r1[i % WIN_ROW] = i;
r2[i % WIN_ROW] = i;
if (i >= ishift && j < cols) {
float x_fl = mapx.read_float(read_pointer_map);
float y_fl = mapy.read_float(read_pointer_map++);
int x_fix = (int)((float)x_fl * (float)XF_RESIZE_INTER_TAB_SIZE); // mapx data in
// A16.XF_RESIZE_INTER_TAB_SIZE
// format
int y_fix = (int)((float)y_fl * (float)XF_RESIZE_INTER_TAB_SIZE); // mapy data in
// A16.XF_RESIZE_INTER_TAB_SIZE
// format
int x = x_fix >> XF_RESIZE_INTER_BITS;
int y = y_fix >> XF_RESIZE_INTER_BITS;
int x_frac = x_fix & (XF_RESIZE_INTER_TAB_SIZE - 1);
int y_frac = y_fix & (XF_RESIZE_INTER_TAB_SIZE - 1);
int ynext = y + 1;
ap_ufixed<XF_RESIZE_INTER_BITS, 0> iu, iv;
iu(XF_RESIZE_INTER_BITS - 1, 0) = x_frac;
iv(XF_RESIZE_INTER_BITS - 1, 0) = y_frac;
// Note that the range here is larger than expected by 1 horizontal and
// 1 vertical pixel, to allow
// Interpolating at the edge of the image
bool in_range = (y >= 0 && y_fl <= (rows - 1) && r1[y % WIN_ROW] == y && r2[ynext % WIN_ROW] == ynext &&
x >= 0 && x_fl <= (cols - 1));
int xa0, xa1, ya0, ya1;
// The buffer is essentially cyclic partitioned, but we have
// to do this manually because HLS can't figure it out.
// The code below is wierd, but it is this code expanded.
// if ((y % WIN_ROW) % 2) {
// // Case 1, where y hits in bank 1 and ynext in bank 0
// ya0 = (ynext%WIN_ROW)/2;
// ya1 = (y%WIN_ROW)/2;
// } else {
// // The simpler case, where y hits in bank 0 and ynext
// hits in bank 1
// ya0 = (y%WIN_ROW)/2;
// ya1 = (ynext%WIN_ROW)/2;
// }
// Both cases reduce to this, if WIN_ROW is a multiple of two.
#ifndef __SYNTHESIS__
assert(((WIN_ROW & 1) == 0) && "WIN_ROW must be a multiple of two");
#endif
xa0 = x / 2 + x % 2;
xa1 = x / 2;
ya0 = (y / 2 + y % 2) % (WIN_ROW / 2);
ya1 = (y / 2) % (WIN_ROW / 2);
XF_TNAME(DST_T, NPC) d;
for (int ch = 0; ch < PLANES; ch++) {
XF_CTUNAME(DST_T, NPC) d00, d01, d10, d11;
if (in_range) {
if (USE_URAM) {
XF_TNAME(DST_T, NPC) d3x3[9];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=d3x3 complete
// clang-format on
tempbuf[ch] = bufUram[ch][ya1][xa1];
for (int k = 0; k < 9; k++) {
d3x3[k] = tempbuf[ch].range(k * 8 + 7, k * 8);
}
d00 = d3x3[(y % 2) * 3 + x % 2];
d01 = d3x3[(y % 2) * 3 + x % 2 + 1];
d10 = d3x3[(y % 2 + 1) * 3 + x % 2];
d11 = d3x3[(y % 2 + 1) * 3 + x % 2 + 1];
} else {
d00 = buf[ya0][0][xa0][0].range((ch + 1) * 8 - 1, ch * 8);
d01 = buf[ya0][0][xa1][1].range((ch + 1) * 8 - 1, ch * 8);
d10 = buf[ya1][1][xa0][0].range((ch + 1) * 8 - 1, ch * 8);
d11 = buf[ya1][1][xa1][1].range((ch + 1) * 8 - 1, ch * 8);
if (x % 2) {
// std::swap(d00,d01);
int t = d00;
d00 = d01;
d01 = t;
int t2 = d10;
d10 = d11;
d11 = d10;
// std::swap(d10,d11);
}
if (y % 2) {
int t = d00;
d00 = d10;
d10 = t;
int t2 = d01;
d01 = d11;
d11 = d01;
// std::swap(d00,d10);
// std::swap(d01,d11);
}
// if(x == (cols-1))
//{
// d01=0;d11=0;
//}
}
}
ap_ufixed<2 * XF_RESIZE_INTER_BITS + 1, 1> k01 = (1 - iv) * (iu); // iu-iu*iv
ap_ufixed<2 * XF_RESIZE_INTER_BITS + 1, 1> k10 = (iv) * (1 - iu); // iv-iu*iv
ap_ufixed<2 * XF_RESIZE_INTER_BITS + 1, 1> k11 = (iv) * (iu); // iu*iv
ap_ufixed<2 * XF_RESIZE_INTER_BITS + 1, 1> k00 =
1 - iv - k01; //(1-iv)*(1-iu) = 1-iu-iv+iu*iv = 1-iv-k01
#ifndef __SYNTHESIS__
assert(k00 + k01 + k10 + k11 == 1);
#endif
if (in_range)
d.range((ch + 1) * 8 - 1, ch * 8) = d00 * k00 + d01 * k01 + d10 * k10 + d11 * k11;
else
d.range((ch + 1) * 8 - 1, ch * 8) = 0;
}
dst.write(write_pointer++, d);
}
}
store_row = !(store_row);
}
}
template <int WIN_ROWS,
int INTERPOLATION_TYPE,
int SRC_T,
int MAP_T,
int DST_T,
int ROWS,
int COLS,
int NPC,
bool USE_URAM = false>
void remap(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src_mat,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _remapped_mat,
xf::cv::Mat<MAP_T, ROWS, COLS, NPC>& _mapx_mat,
xf::cv::Mat<MAP_T, ROWS, COLS, NPC>& _mapy_mat) {
// clang-format off
#pragma HLS inline off
#pragma HLS dataflow
// clang-format on
#ifndef __SYNTHESIS__
assert((MAP_T == XF_32FC1) && "The MAP_T must be XF_32FC1");
assert(((SRC_T == XF_8UC1) || (SRC_T == XF_8UC3)) && "The SRC_T must be XF_8UC1 or XF_8UC3");
assert(((DST_T == XF_8UC1) || (SRC_T == XF_8UC3)) && "The DST_T must be XF_8UC1 or XF_8UC3");
assert((SRC_T == DST_T) && "Source Mat type and Destination Mat type must be the same");
assert((NPC == XF_NPPC1) && "The NPC must be XF_NPPC1");
#endif
int depth_est = WIN_ROWS * _src_mat.cols;
uint16_t rows = _src_mat.rows;
uint16_t cols = _src_mat.cols;
if (INTERPOLATION_TYPE == XF_INTERPOLATION_NN) {
xFRemapNNI<SRC_T, DST_T, XF_CHANNELS(SRC_T, NPC), MAP_T, WIN_ROWS, ROWS, COLS, NPC, USE_URAM>(
_src_mat, _remapped_mat, _mapx_mat, _mapy_mat, rows, cols);
} else if (INTERPOLATION_TYPE == XF_INTERPOLATION_BILINEAR) {
xFRemapLI<SRC_T, DST_T, XF_CHANNELS(SRC_T, NPC), MAP_T, WIN_ROWS, ROWS, COLS, NPC, USE_URAM>(
_src_mat, _remapped_mat, _mapx_mat, _mapy_mat, rows, cols);
} else {
#ifndef __SYNTHESIS__
assert(((INTERPOLATION_TYPE == XF_INTERPOLATION_NN) || (INTERPOLATION_TYPE == XF_INTERPOLATION_BILINEAR)) &&
"The INTERPOLATION_TYPE must be either XF_INTERPOLATION_NN or "
"XF_INTERPOLATION_BILINEAR");
#endif
}
}
} // namespace cv
} // namespace xf
#endif //_XF_REMAP_HPP_