Program Listing for File xf_bilateral_filter.hpp
↰ Return to documentation for file (/tmp/ws/src/vitis_common/include/imgproc/xf_bilateral_filter.hpp
)
/*
* Copyright 2019 Xilinx, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __BILATERAL_FILTER__
#define __BILATERAL_FILTER__
#include "hls_stream.h"
#include "ap_int.h"
#include "../common/xf_common.hpp"
#include "../common/xf_utility.hpp"
#include <cmath>
#include <cstdlib>
namespace xf {
namespace cv {
static float xFBilateralFloatInv(float in_val) {
return float(1.0 / (in_val));
}
static float xFBilateralFloatMul(float in_val1, float in_val2) {
return float(in_val1 * in_val2);
}
static float xFBilateralExpf(float in_val) {
float result;
result = expf(in_val);
return result;
}
template <int NPC, int DEPTH, int PLANES, int WIN_SZ, int WIN_SZ_SQ, int NUM_DIST, int FPRES_SC>
static void xFBilateralProc(XF_DTUNAME(DEPTH, NPC) OutputValues[XF_NPIXPERCYCLE(NPC)],
XF_DTUNAME(DEPTH, NPC) src_buf[WIN_SZ][XF_NPIXPERCYCLE(NPC) + (WIN_SZ - 1)],
ap_uint<8> win_size,
ap_ufixed<FPRES_SC, 1> exp_lut_sigma_color[WIN_SZ * WIN_SZ][NUM_DIST][256 * PLANES],
ap_int<8> distances_array_revmap[(WIN_SZ >> 1) * (WIN_SZ >> 1) + 1]) {
// clang-format off
#pragma HLS INLINE
// clang-format on
XF_DTUNAME(DEPTH, NPC) pixel_mat[WIN_SZ][WIN_SZ];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=pixel_mat complete dim=1
#pragma HLS ARRAY_PARTITION variable=pixel_mat complete dim=2
// clang-format on
for (int i = 0; i < WIN_SZ; i++) {
for (int j = 0; j < WIN_SZ; j++) {
// clang-format off
#pragma HLS UNROLL
// clang-format on
pixel_mat[i][j] = src_buf[i][j];
}
}
ap_ufixed<16, 1> color_weights;
ap_ufixed<32, 16> weight_sum = 0;
ap_ufixed<32, 16> px_sum = 0;
int buf_indx = 0;
XF_DTUNAME(DEPTH, NPC) tmp;
ap_int<24> diffpx;
for (ap_uint<5> c = 0, k = 0; c < PLANES; c++, k += 8) {
// clang-format off
#pragma HLS unroll
// clang-format on
weight_sum = 0;
px_sum = 0;
buf_indx = 0;
for (ap_uint<5> i = 0; i < WIN_SZ; i++) {
for (ap_uint<5> j = 0; j < WIN_SZ; j++) {
// clang-format off
#pragma HLS unroll
// clang-format on
ap_uint<8> sub = WIN_SZ >> 1;
ap_uint<8> sub_sq = sub * sub;
ap_int<8> ei = i - sub;
ap_int<8> ej = j - sub;
ap_uint<8> comp = ei * ei;
comp += ej * ej;
if (comp > sub_sq) {
continue;
} else {
if (PLANES == 3) {
diffpx =
std::abs(pixel_mat[i][j].range(7, 0) - pixel_mat[WIN_SZ >> 1][WIN_SZ >> 1].range(7, 0)) +
std::abs(pixel_mat[i][j].range(15, 8) - pixel_mat[WIN_SZ >> 1][WIN_SZ >> 1].range(15, 8)) +
std::abs(pixel_mat[i][j].range(23, 16) - pixel_mat[WIN_SZ >> 1][WIN_SZ >> 1].range(23, 16));
} else {
diffpx = (pixel_mat[i][j] - pixel_mat[WIN_SZ >> 1][WIN_SZ >> 1]);
}
if (diffpx < 0) {
diffpx = -diffpx;
}
if (comp == 0)
color_weights = 1;
else
color_weights = (ap_ufixed<16, 1>)(exp_lut_sigma_color[buf_indx >> 1]
[distances_array_revmap[comp]][diffpx]);
px_sum += (color_weights) * (ap_uint<16>)(pixel_mat[i][j].range(k + 7, k));
weight_sum += color_weights;
buf_indx++;
}
}
}
float val = (float)1.0 / (float)weight_sum;
float mul_val = (float)px_sum * (float)val;
OutputValues[0].range(k + 7, k) = ((ap_ufixed<32, 16>)mul_val + (ap_ufixed<32, 16>)(0.5));
// OutputValues[0].range(k+7,k) =
//(XF_DTUNAME(DEPTH,NPC))((ap_ufixed<32,16>)(xf::cv::xFBilateralFloatMul(px_sum,xf::cv::xFBilateralFloatInv(weight_sum)))
//+ (ap_ufixed<32,16>)(0.5));
}
// OutputValues[0]=weight_sum;
return;
}
template <int TYPE,
int ROWS,
int COLS,
int PLANES,
int DEPTH,
int NPC,
int WORDWIDTH,
int TC,
int WIN_SZ,
int WIN_SZ_SQ,
int NUM_DIST,
int FPRES_SC>
static void ProcessBilateralNXN(xf::cv::Mat<TYPE, ROWS, COLS, NPC>& _src_mat,
xf::cv::Mat<TYPE, ROWS, COLS, NPC>& _dst_mat,
XF_TNAME(DEPTH, NPC) buf[WIN_SZ][(COLS >> XF_BITSHIFT(NPC))],
XF_DTUNAME(DEPTH, NPC) src_buf[WIN_SZ][XF_NPIXPERCYCLE(NPC) + (WIN_SZ - 1)],
XF_DTUNAME(DEPTH, NPC) OutputValues[XF_NPIXPERCYCLE(NPC)],
XF_TNAME(DEPTH, NPC) & P0,
uint16_t img_width,
uint16_t img_height,
uint16_t& shift_x,
ap_uint<13> row_ind[WIN_SZ],
ap_uint<13> row,
ap_uint<8> win_size,
ap_ufixed<FPRES_SC, 1> exp_lut_sigma_color[WIN_SZ * WIN_SZ][NUM_DIST][256 * PLANES],
ap_int<8> distances_array_revmap[(WIN_SZ >> 1) * (WIN_SZ >> 1) + 1],
int& rd_ind,
int& wr_ind) {
// clang-format off
#pragma HLS INLINE
// clang-format on
XF_TNAME(DEPTH, NPC) buf_cop[WIN_SZ];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=buf_cop complete dim=1
// clang-format on
uint16_t npc = XF_NPIXPERCYCLE(NPC);
uint16_t col_loop_var = 0;
if (npc == 1) {
col_loop_var = (WIN_SZ >> 1);
} else {
col_loop_var = 1;
}
for (int extract_px = 0; extract_px < WIN_SZ; extract_px++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=3 max=WIN_SZ
#pragma HLS unroll
// clang-format on
for (int ext_copy = 0; ext_copy < npc + WIN_SZ - 1; ext_copy++) {
// clang-format off
#pragma HLS unroll
// clang-format on
src_buf[extract_px][ext_copy] = 0;
}
}
Col_Loop:
for (ap_uint<13> col = 0; col < ((img_width) >> XF_BITSHIFT(NPC)) + col_loop_var; col++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=1 max=TC
#pragma HLS pipeline
#pragma HLS LOOP_FLATTEN OFF
// clang-format on
if (row < img_height && col < (img_width >> XF_BITSHIFT(NPC)))
buf[row_ind[win_size - 1]][col] = _src_mat.read(rd_ind++); // data[rd_ind++]; // Read data
if (NPC != XF_NPPC1) {
for (int copy_buf_var = 0; copy_buf_var < WIN_SZ; copy_buf_var++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=3 max=WIN_SZ
#pragma HLS UNROLL
// clang-format on
if ((row > (img_height - 1)) && (copy_buf_var > (win_size - 1 - (row - (img_height - 1))))) {
buf_cop[copy_buf_var] = buf[(row_ind[win_size - 1 - (row - (img_height - 1))])][col];
} else {
if (col < (img_width >> XF_BITSHIFT(NPC)))
buf_cop[copy_buf_var] = buf[(row_ind[copy_buf_var])][col];
else
buf_cop[copy_buf_var] = buf_cop[copy_buf_var];
}
}
XF_DTUNAME(DEPTH, NPC) src_buf_temp_copy[WIN_SZ][XF_NPIXPERCYCLE(NPC)];
XF_DTUNAME(DEPTH, NPC) src_buf_temp_copy_extract[XF_NPIXPERCYCLE(NPC)];
for (int extract_px = 0; extract_px < WIN_SZ; extract_px++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=3 max=WIN_SZ
#pragma HLS unroll
// clang-format on
XF_TNAME(DEPTH, NPC) toextract = buf_cop[extract_px];
xfExtractPixels<NPC, XF_WORDWIDTH(DEPTH, NPC), XF_DEPTH(DEPTH, NPC)>(src_buf_temp_copy_extract,
toextract, 0);
// xfExtractPixels(src_buf_temp_copy_extract, toextract, 0);
for (int ext_copy = 0; ext_copy < npc; ext_copy++) {
// clang-format off
#pragma HLS unroll
// clang-format on
src_buf_temp_copy[extract_px][ext_copy] = src_buf_temp_copy_extract[ext_copy];
}
}
for (int extract_px = 0; extract_px < WIN_SZ; extract_px++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=3 max=WIN_SZ
// clang-format on
for (int col_warp = 0; col_warp < (WIN_SZ >> 1); col_warp++) {
// clang-format off
#pragma HLS UNROLL
#pragma HLS LOOP_TRIPCOUNT min=3 max=WIN_SZ
// clang-format on
if (col == img_width >> XF_BITSHIFT(NPC)) {
src_buf[extract_px][col_warp + npc + (WIN_SZ >> 1)] =
src_buf[extract_px][npc + (WIN_SZ >> 1) - 1];
} else {
src_buf[extract_px][col_warp + npc + (WIN_SZ >> 1)] = src_buf_temp_copy[extract_px][col_warp];
}
}
}
if (col == 0) {
for (int extract_px = 0; extract_px < WIN_SZ; extract_px++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=3 max=WIN_SZ
// clang-format on
for (int col_warp = 0; col_warp < npc + (WIN_SZ >> 1); col_warp++) {
// clang-format off
#pragma HLS UNROLL
#pragma HLS LOOP_TRIPCOUNT min=3 max=WIN_SZ
// clang-format on
src_buf[extract_px][col_warp] = src_buf_temp_copy[extract_px][0];
}
}
}
XF_DTUNAME(DEPTH, NPC) src_buf_temp_med_apply[WIN_SZ][XF_NPIXPERCYCLE(NPC) + (WIN_SZ - 1)];
for (int applymedian = 0; applymedian < npc; applymedian++) {
// clang-format off
#pragma HLS UNROLL
// clang-format on
for (int copyi = 0; copyi < WIN_SZ; copyi++) {
for (int copyj = 0; copyj < WIN_SZ; copyj++) {
src_buf_temp_med_apply[copyi][copyj] = src_buf[copyi][copyj + applymedian];
}
}
XF_DTUNAME(DEPTH, NPC) OutputValues_percycle[XF_NPIXPERCYCLE(NPC)];
xFBilateralProc<NPC, DEPTH, PLANES, WIN_SZ, WIN_SZ_SQ, NUM_DIST, FPRES_SC>(
OutputValues_percycle, src_buf_temp_med_apply, WIN_SZ, exp_lut_sigma_color, distances_array_revmap);
OutputValues[applymedian] = OutputValues_percycle[0];
}
if (col >= 1) {
shift_x = 0;
P0 = 0;
xfPackPixels<NPC, XF_WORDWIDTH(DEPTH, NPC), XF_DEPTH(DEPTH, NPC)>(OutputValues, P0, 0, npc, shift_x);
_dst_mat.write(wr_ind++, P0);
}
for (int extract_px = 0; extract_px < WIN_SZ; extract_px++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=3 max=WIN_SZ
// clang-format on
for (int col_warp = 0; col_warp < (WIN_SZ >> 1); col_warp++) {
// clang-format off
#pragma HLS UNROLL
#pragma HLS LOOP_TRIPCOUNT min=3 max=WIN_SZ
// clang-format on
src_buf[extract_px][col_warp] = src_buf[extract_px][col_warp + npc];
}
}
for (int extract_px = 0; extract_px < WIN_SZ; extract_px++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=3 max=WIN_SZ
// clang-format on
for (int col_warp = 0; col_warp < npc; col_warp++) {
// clang-format off
#pragma HLS UNROLL
#pragma HLS LOOP_TRIPCOUNT min=3 max=WIN_SZ
// clang-format on
src_buf[extract_px][col_warp + (WIN_SZ >> 1)] = src_buf_temp_copy[extract_px][col_warp];
}
}
} else {
for (int copy_buf_var = 0; copy_buf_var < WIN_SZ; copy_buf_var++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=3 max=WIN_SZ
#pragma HLS UNROLL
// clang-format on
if ((row > (img_height - 1)) && (copy_buf_var > (win_size - 1 - (row - (img_height - 1))))) {
buf_cop[copy_buf_var] = buf[(row_ind[win_size - 1 - (row - (img_height - 1))])][col];
} else {
buf_cop[copy_buf_var] = buf[(row_ind[copy_buf_var])][col];
}
}
for (int extract_px = 0; extract_px < WIN_SZ; extract_px++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=3 max=WIN_SZ
#pragma HLS UNROLL
// clang-format on
if (col < img_width) {
src_buf[extract_px][win_size - 1] = buf_cop[extract_px];
} else {
src_buf[extract_px][win_size - 1] = src_buf[extract_px][win_size - 2];
}
}
xFBilateralProc<NPC, DEPTH, PLANES, WIN_SZ, WIN_SZ_SQ, NUM_DIST, FPRES_SC>(
OutputValues, src_buf, win_size, exp_lut_sigma_color, distances_array_revmap);
if (col >= (WIN_SZ >> 1)) {
_dst_mat.write(wr_ind++, OutputValues[0]);
}
for (int wrap_buf = 0; wrap_buf < WIN_SZ; wrap_buf++) {
// clang-format off
#pragma HLS UNROLL
#pragma HLS LOOP_TRIPCOUNT min=3 max=WIN_SZ
// clang-format on
for (int col_warp = 0; col_warp < WIN_SZ - 1; col_warp++) {
// clang-format off
#pragma HLS UNROLL
#pragma HLS LOOP_TRIPCOUNT min=3 max=WIN_SZ
// clang-format on
if (col == 0) {
src_buf[wrap_buf][col_warp] = src_buf[wrap_buf][win_size - 1];
} else {
src_buf[wrap_buf][col_warp] = src_buf[wrap_buf][col_warp + 1];
}
}
}
}
} // Col_Loop
}
template <int TYPE,
int ROWS,
int COLS,
int PLANES,
int DEPTH,
int NPC,
int WORDWIDTH,
int TC,
int WIN_SZ,
int WIN_SZ_SQ,
int NUM_DIST,
int FPRES_SC>
static void xFBilateralFilterNXN(xf::cv::Mat<TYPE, ROWS, COLS, NPC>& _src_mat,
xf::cv::Mat<TYPE, ROWS, COLS, NPC>& _dst_mat,
ap_uint<8> win_size,
uint16_t img_height,
uint16_t img_width,
ap_ufixed<FPRES_SC, 1> exp_lut_sigma_color[WIN_SZ * WIN_SZ][NUM_DIST][256 * PLANES],
ap_int<8> distances_array_revmap[(WIN_SZ >> 1) * (WIN_SZ >> 1) + 1]) {
// clang-format off
#pragma HLS INLINE
// clang-format on
ap_uint<13> row_ind[WIN_SZ];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=row_ind complete dim=1
// clang-format on
uint16_t shift_x = 0;
ap_uint<13> row, col;
int rd_ind = 0, wr_ind = 0;
XF_DTUNAME(DEPTH, NPC) OutputValues[XF_NPIXPERCYCLE(NPC)];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=OutputValues complete dim=1
// clang-format on
XF_DTUNAME(DEPTH, NPC) src_buf[WIN_SZ][XF_NPIXPERCYCLE(NPC) + (WIN_SZ - 1)];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=src_buf complete dim=1
#pragma HLS ARRAY_PARTITION variable=src_buf complete dim=2
// clang-format on
// src_buf1 et al merged
XF_TNAME(DEPTH, NPC) P0;
XF_TNAME(DEPTH, NPC) buf[WIN_SZ][(COLS >> XF_BITSHIFT(NPC))];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=buf complete dim=1
#pragma HLS RESOURCE variable=buf core=RAM_S2P_BRAM
// clang-format on
// initializing row index
for (int init_row_ind = 0; init_row_ind < win_size; init_row_ind++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=3 max=WIN_SZ
// clang-format on
row_ind[init_row_ind] = init_row_ind;
}
read_lines:
for (int init_buf = row_ind[win_size >> 1]; init_buf < row_ind[win_size - 1]; init_buf++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=3 max=WIN_SZ
// clang-format on
for (col = 0; col<img_width>> XF_BITSHIFT(NPC); col++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=1 max=TC
#pragma HLS pipeline
#pragma HLS LOOP_FLATTEN OFF
// clang-format on
buf[init_buf][col] = _src_mat.read(rd_ind++); //_src_mat.data[rd_ind++];
}
}
// takes care of top borders
for (col = 0; col<img_width>> XF_BITSHIFT(NPC); col++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=1 max=TC
// clang-format on
for (int init_buf = 0; init_buf<WIN_SZ>> 1; init_buf++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=3 max=WIN_SZ
#pragma HLS UNROLL
// clang-format on
buf[init_buf][col] = buf[row_ind[win_size >> 1]][col];
}
}
Row_Loop:
for (row = (win_size >> 1); row < img_height + (win_size >> 1); row++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=1 max=ROWS
// clang-format on
P0 = 0;
ProcessBilateralNXN<TYPE, ROWS, COLS, PLANES, DEPTH, NPC, WORDWIDTH, TC, WIN_SZ, WIN_SZ_SQ, NUM_DIST, FPRES_SC>(
_src_mat, _dst_mat, buf, src_buf, OutputValues, P0, img_width, img_height, shift_x, row_ind, row, win_size,
exp_lut_sigma_color, distances_array_revmap, rd_ind, wr_ind);
// update indices
ap_uint<13> zero_ind = row_ind[0];
for (int init_row_ind = 0; init_row_ind < WIN_SZ - 1; init_row_ind++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=3 max=WIN_SZ
#pragma HLS UNROLL
// clang-format on
row_ind[init_row_ind] = row_ind[init_row_ind + 1];
}
row_ind[win_size - 1] = zero_ind;
} // Row_Loop
}
template <int TYPE, int ROWS, int COLS, int PLANES, int DEPTH, int NPC, int WORDWIDTH, int WIN_SZ>
static void xFbilateralFilterKernel(xf::cv::Mat<TYPE, ROWS, COLS, NPC>& _src_mat,
xf::cv::Mat<TYPE, ROWS, COLS, NPC>& _dst_mat,
int _border_type,
uint16_t imgheight,
uint16_t imgwidth,
float sigma_color,
float sigma_space) {
// clang-format off
#pragma HLS INLINE OFF
#pragma HLS ALLOCATION function instances=xf::cv::xFBilateralFloatMul limit=1
#pragma HLS ALLOCATION function instances=xf::cv::xFBilateralFloatInv limit=1
#pragma HLS ALLOCATION function instances=xf::cv::xFBilateralExpf limit=1
// clang-format on
// assert(_border_type == XF_BORDER_REPLICATE && "Only XF_BORDER_REPLICATE is supported");
// assert(((imgheight <= ROWS ) && (imgwidth <= COLS)) && "ROWS and COLS should be greater than input image");
// assert(((WIN_SZ == 7 ) || (WIN_SZ == 5) || (WIN_SZ == 3 )) && "Window Size should only be 3, 5 or 7");
// compute the inverse square of sigma_color and sigma_space
float sigma_color_sqinv = xf::cv::xFBilateralFloatInv(xf::cv::xFBilateralFloatMul(sigma_color, sigma_color));
float sigma_space_sqinv = xf::cv::xFBilateralFloatInv(xf::cv::xFBilateralFloatMul(sigma_space, sigma_space));
// find the number of valied distances for the filter size
// for 3x3, the euclidean distances to the neighborhood pixels, 0 and 1 are valid
// for 5x5, the euclidean distances to the neighborhood pixels, 0, 1, sqrt(2) are valid
// for 7x7, the euclidean distances to the neighborhood pixels, 0, 1, sqrt(2), 2, sqrt(5), sqrt(8) and 3 are valid
// TODO: Come up with a formula to get the number of distances
const int NUM_DIST = (WIN_SZ == 3) ? 1 : ((WIN_SZ == 5) ? 3 : 6);
// fractional precision to store the weights. The actual precision will be the number below - 1
const int FPRES_SC = 16;
// as the bilateral filter has a circular kernel not a square kernel,
// not all the weights in the square window are going to be valid.
// distances greater than (WIN_SZ-1/2) need to be ignored
// the following code computes the number of such distances per square
ap_uint<8> sub = WIN_SZ >> 1;
ap_uint<8> sub_sq = sub * sub;
int comp_sz = 0;
for (int i = 0; i < WIN_SZ; i++) {
for (int j = 0; j < WIN_SZ; j++) {
int temp_cmp = (i - sub) * (i - sub) + (j - sub) * (j - sub);
if (temp_cmp <= sub_sq) comp_sz++;
}
}
// the following code computes all the valid (squares of) distances
// The size of the array is NUM_DIST for the given window size
ap_uint<8> distances_array[(WIN_SZ >> 1) * (WIN_SZ >> 1) + 1];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=distances_array complete dim=1
// clang-format on
int dist_index = 0;
for (int i = 0; i <= (WIN_SZ >> 1); i++) {
for (int j = 0; j <= i; j++) {
int temp_cmp = (i) * (i) + (j) * (j);
if (temp_cmp <= sub_sq && temp_cmp != 0) {
distances_array[dist_index] = temp_cmp;
dist_index++;
}
}
}
// this array is to reverse map the index to each of the buffer location
// Theis is necessary to reduce the BRAM utilization. The squares of distances for 7x7,
// 1, 2, 4, 5, 8, 9 if directly index, take 9 locations while they just need 6
// This reverse map stores the location for each of the square distances
ap_int<8> distances_array_revmap[(WIN_SZ >> 1) * (WIN_SZ >> 1) + 1];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=distances_array_revmap complete dim=1
// clang-format on
int index = 0;
for (int i = 0; i <= sub_sq; i++) {
if (distances_array[index] == i) {
distances_array_revmap[i] = index;
index++;
} else
distances_array_revmap[i] = -1;
}
// this array stores the weights needed to filter the input image
// the weight of a pixel in the neighborhood is exp(-0.5*d^2/sigma_space^2 -0.5*(I1-I2)^2/sigma_color^2)
// where d is the euclidean distance between the center pixel and the neighborhood pixel
// I1-I2 is the difference in pixel intensities of the center and neighborhood pixels
// The array is replicated as many times as needed by the kernel.
// for 3x3 - 4, for 5x5 - 12, for 7x7 - 28. Weight for the central pixel is always 1
// The numbers 4, 12 and 28 are the Number of valid pixels for a circular window in the square window
static const int array_size = comp_sz;
ap_ufixed<FPRES_SC, 1> exp_lut_sigma_color[WIN_SZ * WIN_SZ][NUM_DIST][256 * PLANES];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=exp_lut_sigma_color complete dim=1
#pragma HLS ARRAY_PARTITION variable=exp_lut_sigma_color complete dim=2
// clang-format on
if (NPC == 8) {
// clang-format off
#pragma HLS ARRAY_PARTITION variable=exp_lut_sigma_color complete dim=3
// clang-format on
}
for (unsigned int m = 0; m < (256 * PLANES); m++) {
ap_uint<32> jsq = (ap_uint<16>)m * (ap_uint<16>)m;
for (int i = 0; i < NUM_DIST; i++) {
// clang-format off
#pragma HLS PIPELINE
// clang-format on
for (unsigned short k = 0; k < array_size; k++) {
// clang-format off
#pragma HLS UNROLL
// clang-format on
exp_lut_sigma_color[k][i][m] = xf::cv::xFBilateralFloatMul(
xf::cv::xFBilateralExpf(
xf::cv::xFBilateralFloatMul(-0.5, xf::cv::xFBilateralFloatMul(sigma_color_sqinv, jsq))),
xf::cv::xFBilateralExpf(xf::cv::xFBilateralFloatMul(
-0.5, xf::cv::xFBilateralFloatMul(sigma_space_sqinv, distances_array[i]))));
}
}
}
xFBilateralFilterNXN<TYPE, ROWS, COLS, PLANES, DEPTH, NPC, WORDWIDTH, (COLS >> (XF_BITSHIFT(NPC))) + (WIN_SZ >> 1),
WIN_SZ, WIN_SZ * WIN_SZ, NUM_DIST, FPRES_SC>(_src_mat, _dst_mat, WIN_SZ, imgheight, imgwidth,
exp_lut_sigma_color, distances_array_revmap);
}
template <int WINDOW_SIZE, int BORDER_TYPE, int TYPE, int ROWS, int COLS, int NPC = 1>
void bilateralFilter(xf::cv::Mat<TYPE, ROWS, COLS, NPC>& _src_mat,
xf::cv::Mat<TYPE, ROWS, COLS, NPC>& _dst_mat,
float sigma_color,
float sigma_space) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
xFbilateralFilterKernel<TYPE, ROWS, COLS, XF_CHANNELS(TYPE, NPC), TYPE, NPC, (TYPE << (XF_BITSHIFT(NPC))),
WINDOW_SIZE>(_src_mat, _dst_mat, BORDER_TYPE, _src_mat.rows, _src_mat.cols, sigma_color,
sigma_space);
}
} // namespace cv
} // namespace xf
#endif