.. _program_listing_file__tmp_ws_src_vitis_common_include_imgproc_xf_resize_nn_bilinear.hpp: Program Listing for File xf_resize_nn_bilinear.hpp ================================================== |exhale_lsh| :ref:`Return to documentation for file ` (``/tmp/ws/src/vitis_common/include/imgproc/xf_resize_nn_bilinear.hpp``) .. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS .. code-block:: cpp /* * Copyright 2019 Xilinx, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _XF_RESIZE_NN_BILINEAR_ #define _XF_RESIZE_NN_BILINEAR_ #include "hls_stream.h" #include "ap_int.h" #include "../common/xf_common.hpp" #include "../common/xf_utility.hpp" #ifndef __SYNTHESIS__ #include #endif template void interpolatePixel(XF_CTUNAME(DEPTH, NPPC) A0, XF_CTUNAME(DEPTH, NPPC) B0, XF_CTUNAME(DEPTH, NPPC) A1, XF_CTUNAME(DEPTH, NPPC) B1, ap_ufixed<12, 2> Wx, ap_ufixed<12, 2> Wy, XF_CTUNAME(DEPTH, NPPC) & pixel) { // clang-format off #pragma HLS inline // clang-format on if (INTERPOLATION_TYPE == XF_INTERPOLATION_NN) { pixel = A0; } else { ap_ufixed<12, 2> Wxy; ap_int<16> val0, val1, val2; ap_fixed<28, 18> P1, P2, P3, P4; ap_ufixed<28, 18> one_num = 1.0; Wxy = (Wx * Wy); // Wx - 0.32, Wy-0.32 (Wx*Wy-0.64) Wxy - 0.32 val0 = (A0 + B1 - (B0 + A1)); val1 = (B0 - A0); val2 = (A1 - A0); P1 = (val0 * Wxy); // val0(16.0) * Wxy(0.32) = P1(16.32) P2 = (val1 * Wy); // val1(16.0) * Wy(0.32) = P2(16.32) P3 = (val2 * Wx); // val1(16.0) * Wx(0.32) = P3(16.32) P4 = (A0); // A0(8.0) P4(8.32) pixel = (XF_CTUNAME(DEPTH, NPPC))((ap_fixed<32, 22>)(P1 + P2 + P3 + P4)); // to get only integer part from sum of 8.32's , right shift by 32 } } template void computeOutputPixel(XF_TNAME(DEPTH, NPPC) A0[NUMBEROFINPUTWORDS], XF_TNAME(DEPTH, NPPC) B0[NUMBEROFINPUTWORDS], ap_uint initIndex, ap_uint indexx[XF_NPIXPERCYCLE(NPPC)], ap_ufixed Wx[XF_NPIXPERCYCLE(NPPC)], ap_ufixed Wy, XF_TNAME(DEPTH, NPPC) & pixel) { // clang-format off #pragma HLS inline // clang-format on const int PIXELDEPTH = XF_DTPIXELDEPTH(DEPTH, NPPC); /*if(indexx[XF_NPIXPERCYCLE(NPPC)-1] > (initIndex+NUMBEROFINPUTWORDS*XF_NPIXPERCYCLE(NPPC)-1)) { fprintf(stderr, "Insufficient number of words to resize in X\n"); return; }*/ assert((indexx[XF_NPIXPERCYCLE(NPPC) - 1] < (initIndex + NUMBEROFINPUTWORDS * XF_NPIXPERCYCLE(NPPC) - 1)) && "Insufficient number of words to resize in X"); XF_PTUNAME(DEPTH) unpackX1[XF_NPIXPERCYCLE(NPPC) * NUMBEROFINPUTWORDS]; // clang-format off #pragma HLS ARRAY_PARTITION variable=unpackX1 complete dim=1 // clang-format on XF_PTUNAME(DEPTH) unpackX2[XF_NPIXPERCYCLE(NPPC) * NUMBEROFINPUTWORDS]; // clang-format off #pragma HLS ARRAY_PARTITION variable=unpackX2 complete dim=1 // clang-format on XF_PTUNAME(DEPTH) outputPixel[XF_NPIXPERCYCLE(NPPC)]; // clang-format off #pragma HLS ARRAY_PARTITION variable=outputPixel complete dim=1 // clang-format on for (int k = 0; k < NUMBEROFINPUTWORDS; k++) { // clang-format off #pragma HLS UNROLL // clang-format on for (int i = 0; i < XF_NPIXPERCYCLE(NPPC); i++) { // clang-format off #pragma HLS UNROLL // clang-format on unpackX1[k * XF_NPIXPERCYCLE(NPPC) + i] = A0[k].range((i + 1) * XF_DTPIXELDEPTH(DEPTH, NPPC) * XF_CHANNELS(DEPTH, NPPC) - 1, i * XF_DTPIXELDEPTH(DEPTH, NPPC) * XF_CHANNELS(DEPTH, NPPC)); unpackX2[k * XF_NPIXPERCYCLE(NPPC) + i] = B0[k].range((i + 1) * XF_DTPIXELDEPTH(DEPTH, NPPC) * XF_CHANNELS(DEPTH, NPPC) - 1, i * XF_DTPIXELDEPTH(DEPTH, NPPC) * XF_CHANNELS(DEPTH, NPPC)); } } for (int i = 0; i < XF_NPIXPERCYCLE(NPPC); i++) { // clang-format off #pragma HLS UNROLL // clang-format on for (int k = 0; k < XF_CHANNELS(DEPTH, NPPC); k++) { // clang-format off #pragma HLS UNROLL // clang-format on XF_CTUNAME(DEPTH, NPPC) unpackX1temp[XF_NPIXPERCYCLE(NPPC) * NUMBEROFINPUTWORDS]; // clang-format off #pragma HLS ARRAY_PARTITION variable=unpackX1temp complete dim=1 // clang-format on XF_CTUNAME(DEPTH, NPPC) unpackX2temp[XF_NPIXPERCYCLE(NPPC) * NUMBEROFINPUTWORDS]; // clang-format off #pragma HLS ARRAY_PARTITION variable=unpackX2temp complete dim=1 // clang-format on for (int l = 0; l < XF_NPIXPERCYCLE(NPPC) * NUMBEROFINPUTWORDS; l++) { // clang-format off #pragma HLS UNROLL // clang-format on unpackX1temp[l] = unpackX1[l].range((k + 1) * PIXELDEPTH - 1, k * PIXELDEPTH); unpackX2temp[l] = unpackX2[l].range((k + 1) * PIXELDEPTH - 1, k * PIXELDEPTH); } XF_CTUNAME(DEPTH, NPPC) currentoutput; interpolatePixel( unpackX1temp[indexx[i] - initIndex], unpackX2temp[indexx[i] - initIndex], unpackX1temp[indexx[i] - initIndex + 1], unpackX2temp[indexx[i] - initIndex + 1], Wx[i], Wy, currentoutput); outputPixel[i].range((k + 1) * PIXELDEPTH - 1, k * PIXELDEPTH) = currentoutput; } } for (int i = 0; i < XF_NPIXPERCYCLE(NPPC); i++) { // clang-format off #pragma HLS UNROLL // clang-format on pixel.range((i + 1) * XF_DTPIXELDEPTH(DEPTH, NPPC) * XF_CHANNELS(DEPTH, NPPC) - 1, i * XF_DTPIXELDEPTH(DEPTH, NPPC) * XF_CHANNELS(DEPTH, NPPC)) = outputPixel[i]; } } static uint64_t xfUDivResize(uint64_t in_n, unsigned short in_d) { // clang-format off #pragma HLS INLINE OFF // clang-format on uint64_t out_res = in_n / in_d; return out_res; } template void scaleMult(ap_ufixed scalex, ap_fixed scaleXParallel[XF_NPIXPERCYCLE(NPPC)]) { // clang-format off #pragma HLS INLINE // clang-format on for (int i = 0; i < XF_NPIXPERCYCLE(NPPC); i++) { // clang-format off #pragma HLS PIPELINE // clang-format on scaleXParallel[i] = (ap_fixed)scalex * (ap_uint<8>)i; } return; } template void scaleCompute(int currindex, ap_ufixed inscale, ap_fixed& ind_pre) { if (INTERPOLATION_TYPE == XF_INTERPOLATION_NN) { ind_pre = (ap_fixed)currindex * inscale + (ap_fixed)0.001; } else { ind_pre = ((ap_fixed)currindex + (ap_fixed)0.5) * inscale - (ap_fixed)0.5; } } template void computeInterpolation(int inrows, int incols, int j, int output_rows_count, ap_ufixed scalex, ap_fixed scaleXParallel[XF_NPIXPERCYCLE(NPPC)], ap_ufixed scaley, ap_uint indexx[XF_NPIXPERCYCLE(NPPC)], ap_uint& indexy, ap_uint& nextYScale, ap_ufixed WeightX[XF_NPIXPERCYCLE(NPPC)], ap_ufixed& WeightY, ap_fixed indexx_pre_comp, ap_fixed indexy_pre_comp) { const int INDEX_INT = T_INDEX_INT; const int WEIGHT_WIDTH = T_WEIGHT_WIDTH; const int WEIGHT_INT = T_WEIGHT_INT; const int SCALE_WIDTH = T_SCALE_WIDTH; const int SCALE_INT = T_SCALE_INT; const int COMP_INDEX_WIDTH = T_COMP_INDEX_WIDTH; const int COMP_INDEX_INT = T_COMP_INDEX_INT; ap_fixed indexx_pre = 0; ap_fixed indexy_pre = 0; if (INTERPOLATION_TYPE == XF_INTERPOLATION_NN) { indexy_pre = indexy_pre_comp; nextYScale = indexy_pre + scaley; indexy = (ap_uint)indexy_pre; } else { indexy_pre = indexy_pre_comp; nextYScale = indexy_pre + (ap_fixed)scaley; if (indexy_pre < 0) { indexy_pre = 0; } else if (indexy_pre > inrows - 1) { indexy_pre = inrows - 1; } indexy = (ap_uint)indexy_pre; WeightY = ((ap_fixed)indexy_pre - (ap_fixed)indexy); } // fprintf(stderr,"\nIndexX:"); for (int i = 0; i < XF_NPIXPERCYCLE(NPPC); i++) { ap_fixed indexy_pre = 0; if (INTERPOLATION_TYPE == XF_INTERPOLATION_NN) { indexx_pre = indexx_pre_comp + scaleXParallel[i]; indexx[i] = (ap_uint)indexx_pre; } else { indexx_pre = indexx_pre_comp + scaleXParallel[i]; if (indexx_pre < 0) { indexx_pre = 0; } else if (indexx_pre > incols - 1) { indexx_pre = incols - 1; } indexx[i] = (ap_uint)indexx_pre; WeightX[i] = ((ap_fixed)indexx_pre - (ap_fixed)indexx[i]); } // fprintf(stderr,"\t%d(%f)<%f>",(int)indexx[i],(float)indexx_pre,(float)WeightX[i]); } } template void resizeNNBilinear(xf::cv::Mat& imgInput, xf::cv::Mat& imgOutput) { const int INDEX_INT = 17; const int WEIGHT_WIDTH = 48; const int WEIGHT_INT = 16; const int SCALE_WIDTH = 48; const int SCALE_INT = 16; const int PRE_INDEX_WIDTH = 10; const int PRE_INDEX_INT = 17; const int COMP_INDEX_WIDTH = 42; // SCALE_WIDTH+PRE_INDEX_WIDTH; const int COMP_INDEX_INT = 20; // SCALE_INT+PRE_INDEX_INT; const int BUFFER_WORDS = MAX_DOWN_SCALE; const int BUFFER_DUP_FACTOR = (BUFFER_WORDS + 1) >> 1; uint64_t xnew, ynew; xnew = (imgInput.cols); ynew = (imgInput.rows); //(float)(out_height); xnew = xnew << 32; ynew = ynew << 32; ap_ufixed scalex, scaley; uint64_t Xscale64, Yscale64; // Q32.32 Xscale64 = xfUDivResize(xnew, (imgOutput.cols)); Yscale64 = xfUDivResize(ynew, (imgOutput.rows)); ap_ufixed<64, 32> temp_scale_conv; // clang-format off #pragma HLS ALLOCATION function instances=scaleCompute limit=1 #pragma HLS ALLOCATION function instances=xfUDivResize limit=1 // clang-format on temp_scale_conv = *(ap_ufixed<64, 32>*)&Xscale64; scalex = temp_scale_conv; temp_scale_conv = *(ap_ufixed<64, 32>*)&Yscale64; scaley = temp_scale_conv; int imgInput_cols_align_npc = ((imgInput.cols + (NPPC - 1)) >> XF_BITSHIFT(NPPC)) << XF_BITSHIFT(NPPC); int imgOutput_cols_align_npc = ((imgOutput.cols + (NPPC - 1)) >> XF_BITSHIFT(NPPC)) << XF_BITSHIFT(NPPC); ap_fixed scaleXParallel[XF_NPIXPERCYCLE(NPPC)]; // clang-format off #pragma HLS ARRAY_PARTITION variable=scaleXParallel complete dim=1 // clang-format on scaleMult(scalex, scaleXParallel); XF_TNAME(SRC_TYPE, NPPC) line_buffer[3][BUFFER_DUP_FACTOR][(INWIDTH + NPPC - 1) >> (XF_BITSHIFT(NPPC))]; // clang-format off #pragma HLS ARRAY_PARTITION variable=line_buffer complete dim=1 #pragma HLS ARRAY_PARTITION variable=line_buffer complete dim=2 // clang-format on int input_read_pointer = 0; int read_rows_count = 0; int output_write_pointer = 0; for (int i = 0; i < 2; i++) // read two rows { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=1 max=2 // clang-format on for (int j = 0; j < (imgInput_cols_align_npc >> (XF_BITSHIFT(NPPC))); j++) { // clang-format off #pragma HLS PIPELINE #pragma HLS LOOP_TRIPCOUNT min=1 max=INWIDTH/NPPC XF_TNAME(SRC_TYPE, NPPC) read_word = imgInput.read(input_read_pointer); // clang-format on for (int k = 0; k < BUFFER_DUP_FACTOR; k++) { line_buffer[i][k][j] = read_word; } input_read_pointer++; } read_rows_count++; } int output_rows_count = 0; int first_row_index = 0; int second_row_index = 1; int read_row_index = 2; int loop_row_count = (imgOutput.rows > imgInput.rows) ? imgOutput.rows : imgInput.rows; int loop_col_count = (imgOutput_cols_align_npc > imgInput_cols_align_npc) ? imgOutput_cols_align_npc : imgInput_cols_align_npc; const int LOOPCOUNTROW = (INHEIGHT > OUTHEIGHT) ? INHEIGHT : OUTHEIGHT; const int LOOPCOUNTCOL = (INWIDTH > OUTWIDTH) ? INWIDTH : OUTWIDTH; ap_uint indexx[XF_NPIXPERCYCLE(NPPC)]; // clang-format off #pragma HLS ARRAY_PARTITION variable=indexx complete dim=1 // clang-format on ap_uint indexy = 0; ap_uint nextYScale = 0; ap_ufixed WeightX[XF_NPIXPERCYCLE(NPPC)]; // clang-format off #pragma HLS ARRAY_PARTITION variable=WeightX complete dim=1 // clang-format on ap_ufixed WeightY = 0; XF_TNAME(SRC_TYPE, NPPC) P0Buf[BUFFER_DUP_FACTOR << 1]; // clang-format off #pragma HLS ARRAY_PARTITION variable=P0Buf complete dim=1 // clang-format on XF_TNAME(SRC_TYPE, NPPC) P1Buf[BUFFER_DUP_FACTOR << 1]; // clang-format off #pragma HLS ARRAY_PARTITION variable=P1Buf complete dim=1 // clang-format on ap_fixed indexx_pre_comp = 0; ap_fixed indexy_pre_comp = 0; for (int i = 0; i < loop_row_count; i++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=1 max=LOOPCOUNTROW // clang-format on scaleCompute( output_rows_count, scaley, indexy_pre_comp); for (int j = 0; j < (loop_col_count >> (XF_BITSHIFT(NPPC))); j++) { // clang-format off #pragma HLS PIPELINE #pragma HLS LOOP_TRIPCOUNT min=1 max=LOOPCOUNTCOL/NPPC // clang-format on scaleCompute( j << (XF_BITSHIFT(NPPC)), scalex, indexx_pre_comp); computeInterpolation( imgInput.rows, imgInput.cols, j << (XF_BITSHIFT(NPPC)), output_rows_count, scalex, scaleXParallel, scaley, indexx, indexy, nextYScale, WeightX, WeightY, indexx_pre_comp, indexy_pre_comp); int indexstores = first_row_index; XF_TNAME(SRC_TYPE, NPPC) read_pixel; bool flag_write = 0; if (read_rows_count != imgInput.rows) { if ((nextYScale >= read_rows_count - 1)) // check if the next index y needed needs to be read. { if (j < (imgInput_cols_align_npc >> (XF_BITSHIFT(NPPC)))) { read_pixel = imgInput.read(input_read_pointer); flag_write = 1; input_read_pointer++; } else { flag_write = 0; } } else { flag_write = 0; } } else { flag_write = 0; } if (indexstores == 0) { for (int k = 0; k < BUFFER_DUP_FACTOR; k++) { // clang-format off #pragma HLS UNROLL // clang-format on int idx = (indexx[0] >> XF_BITSHIFT(NPPC)) + (k << 1); int idx_nxt = idx + (indexx[0] == (imgInput.cols - 1) ? 0 : 1); P0Buf[(k << 1)] = line_buffer[0][k][idx]; P0Buf[(k << 1) + 1] = line_buffer[0][k][idx_nxt]; P1Buf[(k << 1)] = line_buffer[1][k][idx]; P1Buf[(k << 1) + 1] = line_buffer[1][k][idx_nxt]; } if (flag_write) { for (int k = 0; k < BUFFER_DUP_FACTOR; k++) { // clang-format off #pragma HLS UNROLL // clang-format on line_buffer[2][k][j] = read_pixel; } } } else if (indexstores == 1) { for (int k = 0; k < BUFFER_DUP_FACTOR; k++) { // clang-format off #pragma HLS UNROLL // clang-format on int idx = (indexx[0] >> XF_BITSHIFT(NPPC)) + (k << 1); int idx_nxt = idx + (indexx[0] == (imgInput.cols - 1) ? 0 : 1); P0Buf[(k << 1)] = line_buffer[1][k][idx]; P0Buf[(k << 1) + 1] = line_buffer[1][k][idx_nxt]; P1Buf[(k << 1)] = line_buffer[2][k][idx]; P1Buf[(k << 1) + 1] = line_buffer[2][k][idx_nxt]; } if (flag_write) { for (int k = 0; k < BUFFER_DUP_FACTOR; k++) { // clang-format off #pragma HLS UNROLL // clang-format on line_buffer[0][k][j] = read_pixel; } } } else { for (int k = 0; k < BUFFER_DUP_FACTOR; k++) { // clang-format off #pragma HLS UNROLL // clang-format on int idx = (indexx[0] >> XF_BITSHIFT(NPPC)) + (k << 1); int idx_nxt = idx + (indexx[0] == (imgInput.cols - 1) ? 0 : 1); P0Buf[(k << 1)] = line_buffer[2][k][idx]; P0Buf[(k << 1) + 1] = line_buffer[2][k][idx_nxt]; P1Buf[(k << 1)] = line_buffer[0][k][idx]; P1Buf[(k << 1) + 1] = line_buffer[0][k][idx_nxt]; } if (flag_write) { for (int k = 0; k < BUFFER_DUP_FACTOR; k++) { // clang-format off #pragma HLS UNROLL // clang-format on line_buffer[1][k][j] = read_pixel; } } } if ((output_rows_count <= imgOutput.rows - 1) && (((indexy == read_rows_count - 1) && (read_rows_count == imgInput.rows)) || (indexy == read_rows_count - 2))) { if (j < (imgOutput_cols_align_npc >> (XF_BITSHIFT(NPPC)))) { if (indexy == read_rows_count - 1) { for (int k = 0; k < BUFFER_WORDS; k++) { // clang-format off #pragma HLS UNROLL // clang-format on P0Buf[k] = P1Buf[k]; } } XF_TNAME(SRC_TYPE, NPPC) temp_store_output; computeOutputPixel(P0Buf, P1Buf, ((indexx[0] >> XF_BITSHIFT(NPPC)) << XF_BITSHIFT(NPPC)), indexx, WeightX, WeightY, temp_store_output); imgOutput.write(output_write_pointer, temp_store_output); output_write_pointer++; } } } if ((output_rows_count <= imgOutput.rows - 1) && (((indexy == read_rows_count - 1) && (read_rows_count == imgInput.rows)) || (indexy == read_rows_count - 2))) { output_rows_count++; } if (read_rows_count != imgInput.rows) { if ((nextYScale >= read_rows_count - 1)) // check if the next index y needed needs to be read. { first_row_index++; second_row_index++; read_row_index++; if (read_row_index == 3) { read_row_index = 0; } if (first_row_index == 3) { first_row_index = 0; } if (second_row_index == 3) { second_row_index = 0; } read_rows_count++; } } } } #endif