Program Listing for File xf_quantizationdithering.hpp
↰ Return to documentation for file (/tmp/ws/src/vitis_common/include/imgproc/xf_quantizationdithering.hpp
)
/*
* Copyright 2020 Xilinx, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _XF_QUANTIZATION_DITHERING_
#define _XF_QUANTIZATION_DITHERING_
#include "ap_int.h"
#include "../common/xf_common.hpp"
#include "../common/xf_utility.hpp"
#include "../core/xf_math.h"
#include "hls_stream.h"
#include "math.h"
namespace xf {
namespace cv {
constexpr unsigned XF_LOG2(unsigned x) {
return (x < 2) ? 0 : 1 + XF_LOG2(x >> 1);
}
template <int OUT_TYPE>
bool isPowerOfTwo(int n) {
if (n == 0) return 0;
while (n != 1) {
if (n % 2 != 0) return 0;
n = n / 2;
}
return 1;
}
template <int IN_TYPE, int OUT_TYPE, int ROWS, int COLS, int SCALE_FACTOR, int MAX_REPRESENTED_VALUE, int NPC>
void xf_QuatizationDithering(xf::cv::Mat<IN_TYPE, ROWS, COLS, NPC>& stream_in,
xf::cv::Mat<OUT_TYPE, ROWS, COLS, NPC>& stream_out) {
enum {
PLANES = XF_CHANNELS(IN_TYPE, NPC),
PIXELWIDTH_IN = XF_PIXELWIDTH(IN_TYPE, NPC),
BITDEPTH_IN = PIXELWIDTH_IN / PLANES,
PIXELWIDTH_OUT = XF_PIXELWIDTH(OUT_TYPE, NPC),
BITDEPTH_OUT = PIXELWIDTH_OUT / PLANES,
QUANTIZATION_INTERVAL = MAX_REPRESENTED_VALUE / SCALE_FACTOR,
LOG2_SCALE_FACTOR = XF_LOG2(SCALE_FACTOR),
LOG2_QUANTIZATION_INTERVAL = XF_LOG2(QUANTIZATION_INTERVAL),
LOG2_MAX_REPRESENTED_VALUE = XF_LOG2(MAX_REPRESENTED_VALUE),
DEPTH_OFFSETBUFFER = (COLS + (NPC - 1)) / NPC
};
#ifndef __SYNTHESIS__
assert(((stream_in.rows <= ROWS) && (stream_in.cols <= COLS)) &&
"ROWS and COLS should be greater than input image");
assert(((stream_out.rows <= ROWS) && (stream_out.cols <= COLS)) &&
"ROWS and COLS should be greater than output image");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC2)) && "The NPC must be XF_NPPC1 or XF_NPPC2");
assert(((IN_TYPE == XF_8UC1) || (IN_TYPE == XF_8UC3) || (IN_TYPE == XF_10UC1) || (IN_TYPE == XF_10UC3) ||
(IN_TYPE == XF_12UC1) || (IN_TYPE == XF_12UC3) || (IN_TYPE == XF_16UC1) || (IN_TYPE == XF_16UC3)) &&
"The IN_TYPE must be XF_8UC1 or XF_8UC3 or XF_10UC1 or XF_10UC3 or "
"XF_12UC1 or XF_12UC3 or XF_16UC1 or XF_16UC3");
assert(((OUT_TYPE == XF_8UC1) || (OUT_TYPE == XF_8UC3) || (OUT_TYPE == XF_10UC1) || (OUT_TYPE == XF_10UC3) ||
(OUT_TYPE == XF_12UC1) || (OUT_TYPE == XF_12UC3) || (OUT_TYPE == XF_16UC1) || (OUT_TYPE == XF_16UC3)) &&
"The OUT_TYPE must be XF_8UC1 or XF_8UC3 or XF_10UC1 or XF_10UC3 or "
"XF_12UC1 or XF_12UC3 or XF_16UC1 or XF_16UC3");
bool scale_power_of_2 = isPowerOfTwo<OUT_TYPE>(SCALE_FACTOR);
assert((scale_power_of_2 == 1) && "The SCALE_FACTOR must be power of two");
assert((SCALE_FACTOR <= (1 << BITDEPTH_OUT)) &&
"The SCALE_FACTOR must be "
"less than or equal to "
"2^(output pixel bit width)");
assert((MAX_REPRESENTED_VALUE == (1 << BITDEPTH_IN)) &&
"The MAX_REPRESENTED_VALUE must be 2^(input pixel bit width)");
assert((SCALE_FACTOR <= MAX_REPRESENTED_VALUE) &&
"The SCALE_FACTOR must be less than or equal to MAX_REPRESENTED_VALUE");
#endif
unsigned short height = stream_in.rows;
unsigned short width = stream_in.cols;
unsigned short imgInput_ncpr = (width + (NPC - 1)) >> XF_BITSHIFT(NPC);
short in_col_loop_bound = imgInput_ncpr + 1;
//## offset buffer
ap_int<BITDEPTH_IN> offset_buffer[PLANES][NPC][DEPTH_OFFSETBUFFER];
// clang-format off
#pragma HLS ARRAY_PARTITION variable = offset_buffer complete dim = 1
#pragma HLS ARRAY_PARTITION variable = offset_buffer complete dim = 2
// clang-format on
for (int col_index = 0; col_index < imgInput_ncpr; col_index++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min = COLS / NPC max = COLS / NPC
#pragma HLS PIPELINE II = 1
// clang-format on
for (int npc_index = 0; npc_index < NPC; npc_index++) {
// clang-format off
#pragma HLS unroll
// clang-format on
for (int channel_index = 0; channel_index < PLANES; channel_index++) {
// clang-format off
#pragma HLS unroll
// clang-format on
offset_buffer[channel_index][npc_index][col_index] = 0;
} // channel_index
} // npc_index
} // col_index
ap_int<BITDEPTH_IN> offset_NPC[PLANES][NPC];
// clang-format off
#pragma HLS ARRAY_PARTITION variable = offset_NPC complete dim = 0
// clang-format on
ap_int<BITDEPTH_IN> offset_prev_NPC[PLANES][NPC];
// clang-format off
#pragma HLS ARRAY_PARTITION variable = offset_prev_NPC complete dim = 0
// clang-format on
int read_index = 0;
int write_index = 0;
ap_int<BITDEPTH_IN> q_err_1st[PLANES][NPC];
ap_int<BITDEPTH_IN> q_err_2nd[PLANES][NPC];
ap_int<BITDEPTH_IN> q_err_3rd[PLANES][NPC];
// clang-format off
#pragma HLS ARRAY_PARTITION variable = q_err_1st complete dim = 0
#pragma HLS ARRAY_PARTITION variable = q_err_2nd complete dim = 0
#pragma HLS ARRAY_PARTITION variable = q_err_3rd complete dim = 0
// clang-format on
// initialize at the beginning for every row
for (int channel_index = 0; channel_index < PLANES; channel_index++) {
// clang-format off
#pragma HLS unroll
// clang-format on
for (int pix_num = 0; pix_num < NPC; pix_num++) {
// clang-format off
#pragma HLS unroll
// clang-format on
q_err_1st[channel_index][pix_num] = 0;
q_err_2nd[channel_index][pix_num] = 0;
q_err_3rd[channel_index][pix_num] = 0;
} // npc
} // channel_index
LOOP_ROW:
for (short row_index = 0; row_index < height; row_index++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min = ROWS max = ROWS
// clang-format on
LOOP_COL:
for (short col_index = 0; col_index < in_col_loop_bound; col_index++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min = COLS / NPC max = COLS / NPC
#pragma HLS PIPELINE II = 1
#pragma HLS DEPENDENCE variable = offset_buffer inter false
// clang-format on
XF_TNAME(IN_TYPE, NPC) read_word;
XF_TNAME(OUT_TYPE, NPC) write_word;
if (col_index < imgInput_ncpr) read_word = stream_in.read(read_index++);
ap_uint<BITDEPTH_IN> read_word_extract[PLANES][NPC];
// clang-format off
#pragma HLS ARRAY_PARTITION variable = read_word_extract complete dim = 0
// clang-format on
for (int pixel = 0, bit1 = 0, bit1_out = 0; pixel < NPC;
pixel++, bit1 += (PLANES * BITDEPTH_IN), bit1_out += (PLANES * BITDEPTH_OUT)) {
// clang-format off
#pragma HLS unroll
// clang-format on
for (int channel = 0, bit2 = 0, bit2_out = 0; channel < PLANES;
channel++, bit2 += BITDEPTH_IN, bit2_out += BITDEPTH_OUT) {
// clang-format off
#pragma HLS unroll
// clang-format on
ap_uint<BITDEPTH_IN> in_pixel = read_word.range(bit1 + (bit2 + BITDEPTH_IN - 1), bit1 + bit2);
ap_int<BITDEPTH_IN + 2> q_2nd_err_scale7 = q_err_2nd[channel][pixel] * 7;
ap_int<BITDEPTH_IN - 2> q_2nd_err_scale7by16 = q_2nd_err_scale7.range(BITDEPTH_IN + 1, 4);
ap_int<BITDEPTH_IN + 2> quatizer_in =
(ap_int<BITDEPTH_IN + 2>)offset_buffer[channel][pixel][col_index] +
(ap_int<BITDEPTH_IN + 2>)in_pixel + q_2nd_err_scale7by16;
ap_int<BITDEPTH_IN + 2> round_out =
(ap_int<LOG2_SCALE_FACTOR + 2>)quatizer_in.range(BITDEPTH_IN + 1, LOG2_QUANTIZATION_INTERVAL) +
quatizer_in[LOG2_QUANTIZATION_INTERVAL - 1];
ap_int<LOG2_QUANTIZATION_INTERVAL + 1> q_err_3rd_local;
q_err_3rd_local.range(LOG2_QUANTIZATION_INTERVAL - 1, 0) =
quatizer_in.range(LOG2_QUANTIZATION_INTERVAL - 1, 0);
q_err_3rd_local[LOG2_QUANTIZATION_INTERVAL] = quatizer_in[LOG2_QUANTIZATION_INTERVAL - 1];
ap_int<BITDEPTH_IN + 4> sum_tmp =
q_err_1st[channel][pixel] + q_err_2nd[channel][pixel] * 5 + q_err_3rd_local * 3;
offset_NPC[channel][pixel] = sum_tmp.range(BITDEPTH_IN + 3, 4);
if (col_index != 0) offset_buffer[channel][pixel][col_index - 1] = offset_prev_NPC[channel][pixel];
if (col_index == in_col_loop_bound - 1 && pixel == NPC - 1) {
q_err_1st[channel][pixel] = 0;
q_err_2nd[channel][pixel] = 0;
} else {
if (pixel != NPC - 1) {
q_err_1st[channel][pixel + 1] = q_err_2nd[channel][pixel];
q_err_2nd[channel][pixel + 1] = q_err_3rd_local;
} else {
q_err_1st[channel][0] = q_err_2nd[channel][pixel];
q_err_2nd[channel][0] = q_err_3rd_local;
}
}
ap_uint<BITDEPTH_OUT> out_tmp;
if ((col_index == in_col_loop_bound - 1) || (round_out[LOG2_SCALE_FACTOR + 1] == 1)) {
out_tmp = 0;
} else {
if (round_out[LOG2_SCALE_FACTOR] == 0) {
out_tmp = (ap_uint<BITDEPTH_OUT>)round_out.range(BITDEPTH_OUT - 1, 0);
} else {
out_tmp = SCALE_FACTOR - 1;
}
}
write_word.range(bit1_out + (bit2_out + BITDEPTH_OUT - 1), bit1_out + bit2_out) = out_tmp;
offset_prev_NPC[channel][pixel] = offset_NPC[channel][pixel];
}
}
if (col_index < imgInput_ncpr) stream_out.write(write_index++, write_word);
} // LOOP_COL
} // LOOP_ROW
}
} // namespace cv
} // namespace xf
#endif //_XF_QUANTIZATION_DITHERING_