.. _program_listing_file__tmp_ws_src_vitis_common_include_imgproc_xf_resize_up_area.hpp: Program Listing for File xf_resize_up_area.hpp ============================================== |exhale_lsh| :ref:`Return to documentation for file ` (``/tmp/ws/src/vitis_common/include/imgproc/xf_resize_up_area.hpp``) .. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS .. code-block:: cpp /* * Copyright 2019 Xilinx, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _XF_RESIZE_UP_AREA_ #define _XF_RESIZE_UP_AREA_ #include "hls_stream.h" #include "ap_int.h" #include "../common/xf_common.hpp" #include "../core/xf_math.h" #include "../common/xf_utility.hpp" //#define POW32 4294967296 // 2^32 /* * Coreprocessing Processing Block * * PixelValue = A0*(1-Wx)*(1-Wy) + B0*(Wx)*(1-Wy) + A1*(1-Wx)*(Wy) + B1*(Wx)*(Wy) * = Wx*Wy*(A0+B1-B0-A1) + Wx*(B0-A0) + Wy*(A1-A0) + A0 */ static void CoreProcessUpArea( uchar_t A0, uchar_t B0, uchar_t A1, uchar_t B1, uint32_t Wx, uint32_t Wy, uchar_t* pixel) { // clang-format off #pragma HLS PIPELINE // clang-format on uint32_t Wxy; int16_t val0, val1, val2; int64_t P1, P2, P3, P4; Wxy = ((uint64_t)Wx * Wy) >> 32; // Wx - 0.32, Wy-0.32 (Wx*Wy-0.64) Wxy - 0.32 val0 = (A0 + B1 - (B0 + A1)); val1 = (B0 - A0); val2 = (A1 - A0); P1 = ((int64_t)val0 * Wxy); // val0(16.0) * Wxy(0.32) = P1(16.32) P2 = ((int64_t)val1 * Wx); // val1(16.0) * Wx(0.32) = P2(16.32) P3 = ((int64_t)val2 * Wy); // val1(16.0) * Wy(0.32) = P3(16.32) P4 = ((int64_t)A0 << 32); // A0(8.0) P4(8.32) *pixel = (uchar_t)((P1 + P2 + P3 + P4) >> 32); // to get only integer part from sum of 8.32's , right shift by 32 } /* * Processes the 8 pixel block * outputs 8 pixles packed into 64-bit */ template static XF_TNAME(DEPTH, NPC) ProcessBlockAreaUp(ap_uint<13> Offset[], uint32_t Weight[], uint32_t Yweight, XF_TNAME(DEPTH, NPC) D0[2], XF_TNAME(DEPTH, NPC) D1[2], ap_uint<13> blockstart, ap_uint<13> indoffset) { // clang-format off #pragma HLS ARRAY_PARTITION variable=D0 dim=1 #pragma HLS ARRAY_PARTITION variable=D1 dim=1 #pragma HLS INLINE // clang-format on XF_PTUNAME(DEPTH) line0[2 * (1 << XF_BITSHIFT(NPC))], line1[2 * (1 << XF_BITSHIFT(NPC))]; // holds the unpacked pixeldata // clang-format off #pragma HLS ARRAY_PARTITION variable=line0 complete dim=1 #pragma HLS ARRAY_PARTITION variable=line1 complete dim=1 // clang-format on uchar_t i, input_read, Pixels; uint16_t block_start_ind = (blockstart >> XF_BITSHIFT(NPC)) << XF_BITSHIFT(NPC); // for(i=0;i<2;i++){ //#pragma HLS unroll // xfExtractPixels(line0,D0[i],i<(line1,D1[i],i<(line0_0, D0[0], 0); xfExtractPixels(line0_1, D0[1], 0); xfExtractPixels(line1_0, D1[0], 0); xfExtractPixels(line1_1, D1[1], 0); for (int ii = 0; ii < 2 * (1 << XF_BITSHIFT(NPC)); ii++) { // clang-format off #pragma HLS UNROLL // clang-format on if (ii < (1 << XF_BITSHIFT(NPC))) { line0[ii] = line0_0[ii]; line1[ii] = line1_0[ii]; } else { line0[ii] = line0_1[ii - (1 << XF_BITSHIFT(NPC))]; line1[ii] = line1_1[ii - (1 << XF_BITSHIFT(NPC))]; } } XF_TNAME(DEPTH, NPC) val = 0; int shift = 0; process_block_loop: for (i = 0; i < (1 << XF_BITSHIFT(NPC)); i++) { // clang-format off #pragma HLS UNROLL // clang-format on // input_read = (NPC == XF_NPPC1) ?0:Offset[indoffset+i] - block_start_ind; if (NPC == XF_NPPC1) { input_read = 0; } else { input_read = Offset[indoffset + i] - block_start_ind; } for (ap_uint<5> c = 0, k = 0; c < 3; c++, k += 8) { if (PLANES != 1) { CoreProcessUpArea(line0[input_read].range(k + 7, k), line0[input_read + 1].range(k + 7, k), line1[input_read].range(k + 7, k), line1[input_read + 1].range(k + 7, k), Weight[indoffset + i], Yweight, &Pixels); val.range((i * XF_PIXELWIDTH(DEPTH, NPC)) + k + 7, (i * XF_PIXELWIDTH(DEPTH, NPC)) + k) = Pixels; } else { if (c == 0) { CoreProcessUpArea(line0[input_read], line0[input_read + 1], line1[input_read], line1[input_read + 1], Weight[indoffset + i], Yweight, &Pixels); shift = i << XF_BITSHIFT(NPC); val.range(shift + 7, shift) = Pixels; } } } } return val; } static uint64_t xFUDivAreaUp(uint64_t in_n, unsigned short in_d) { // clang-format off #pragma HLS INLINE OFF // clang-format on uint64_t out_res = in_n / in_d; return out_res; } template void xFResizeAreaUpScale(xf::cv::Mat& stream_in, xf::cv::Mat& resize_out) { enum { DEPTH_LBUF = (SRC_COLS + NPC - 1) / NPC }; XF_TNAME(DEPTH, NPC) lbuf_in0[DEPTH_LBUF]; // input buffers (ping pong) XF_TNAME(DEPTH, NPC) lbuf_in1[DEPTH_LBUF]; // input buffers (ping pong) XF_TNAME(DEPTH, NPC) lbuf_in2[DEPTH_LBUF]; // input buffers (ping pong) ap_uint<13> Hoffset[DST_COLS], Voffset[DST_ROWS]; // offset buffers which indicate from where the data is to be read uint32_t Hweight[DST_COLS], Vweight[DST_ROWS + 1]; // buffers which hold the weights for each corresponding input pixels if (NPC == XF_NPPC8) { // clang-format off #pragma HLS ARRAY_PARTITION variable=Hoffset cyclic factor=8 dim=1 #pragma HLS ARRAY_PARTITION variable=Hweight cyclic factor=8 dim=1 // clang-format on } else if (NPC == XF_NPPC4) { // clang-format off #pragma HLS ARRAY_PARTITION variable=Hoffset cyclic factor=4 dim=1 #pragma HLS ARRAY_PARTITION variable=Hweight cyclic factor=4 dim=1 // clang-format on } else if (NPC == XF_NPPC2) { // clang-format off #pragma HLS ARRAY_PARTITION variable=Hoffset cyclic factor=2 dim=1 #pragma HLS ARRAY_PARTITION variable=Hweight cyclic factor=2 dim=1 // clang-format on } unsigned short height = stream_in.rows; unsigned short width = stream_in.cols; unsigned short out_height = resize_out.rows; unsigned short out_width = resize_out.cols; unsigned short imgInput_ncpr = (width + (NPC - 1)) >> XF_BITSHIFT(NPC); unsigned short imgOutput_ncpr = (out_width + (NPC - 1)) >> XF_BITSHIFT(NPC); uchar_t idx = 0, repcount = 0, datacount = 0; uint16_t Hstart[((DST_COLS + NPC - 1) >> XF_BITSHIFT(NPC)) + 1]; // Buffers holding the starting offset for each 8pixel block short x, read_line, block_ind, block_start, bufferIndex; short prev_y = -1, j = 0, i = 0, k, ii = 0, Yoffset, offset_temp, prev_offset_temp = 0; uint32_t Xscale, Yscale, Yweight; uint64_t inv_Xscale, inv_Yscale; int64_t Xtemp = 0, Ytemp = 0; int read_index = 0, write_index = 0; // float Xscale_float,Yscale_float,inv_Xscale_float,inv_Yscale_float; XF_TNAME(DEPTH, NPC) D0[2], D1[2]; // Holds the packed pixels required for processing // clang-format off #pragma HLS ARRAY_PARTITION variable=D0 dim=1 #pragma HLS ARRAY_PARTITION variable=D1 dim=1 // clang-format on // Xscale_float = (width<> 32; // Extracting only the integer part,x(16.0) Xscale(0.32) Xtemp = ((uint64_t)(x + 1) << 32) - (offset_temp + 1) * inv_Xscale; // inv_Xscale 32.32 if (Xtemp < 0) Hweight[x] = 0; else Hweight[x] = (uint32_t)(Xtemp & 0xFFFFFFFF); // Extracting fractional part if (npc_counter == 0) Hstart[Hstart_index++] = offset_temp; Hoffset[x] = offset_temp; prev_offset_temp = offset_temp; if (npc_counter == NPC - 1) npc_counter = 0; else npc_counter++; } /* Calculating required Vertical parameters*/ Voffset_loop: for (x = 0; x < out_height; x++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=1 max=DST_ROWS // clang-format on offset_temp = ((uint64_t)x * Yscale + 429496) >> 32; // Yscale(0.32) Extracting only the integer part Ytemp = ((uint64_t)(x + 1) << 32) - (offset_temp + 1) * inv_Yscale; if (Ytemp < 0) Vweight[x] = 0; else Vweight[x] = offset_temp < (height - 1) ? (uint32_t)(Ytemp & 0xFFFFFFFF) : 0; // Voffset[x] = (offset_temp+1)<(height)?(offset_temp+1):height; if ((offset_temp) < height) { Voffset[x] = (offset_temp); } else { Voffset[x] = height - 1; } } idx = 0; bool read_flag = 0; ap_uint<2> l0 = 0, l1 = 1, l2 = 2, read_into = 2; ap_uint<16> lind0 = 0, lind1 = 1, lind2 = 65535, out_j = 0; for (x = 0; x < imgInput_ncpr; x++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=1 max = SRC_TC // clang-format on XF_TNAME(DEPTH, NPC) tmp_in = stream_in.read(read_index++); lbuf_in0[x] = tmp_in; } out_j++; for (x = 0; x < imgInput_ncpr; x++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=1 max = SRC_TC // clang-format on XF_TNAME(DEPTH, NPC) tmp_in = stream_in.read(read_index++); lbuf_in1[x] = tmp_in; } out_j++; int test = (int)lbuf_in0[0].range(7, 0); outerloop: for (j = 0; j < out_height; j++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=1 max = DST_ROWS // clang-format on if (read_flag) { if (read_into == 2) { lind2 = out_j; } else if (read_into == 1) { lind1 = out_j; } else { lind0 = out_j; } out_j++; } Yoffset = Voffset[j]; // row to be read Yweight = Vweight[j]; // weight of the next row if (Yoffset == lind0 && (Yoffset + 1) == lind1) { read_into = 2; l0 = 0; l1 = 1; } else if (Yoffset == lind1 && (Yoffset + 1) == lind2) { read_into = 0; l0 = 1; l1 = 2; } else if (Yoffset == lind2 && (Yoffset + 1) == lind0) { read_into = 1; l0 = 2; l1 = 0; } if (j < out_height - 1) { if (Voffset[j + 1] != Voffset[j]) { read_flag = 1; } else { read_flag = 0; } } else { read_flag = 0; } innerloop: for (i = 0; i < out_width; i = i + (1 << XF_BITSHIFT(NPC))) { // clang-format off #pragma HLS PIPELINE #pragma HLS LOOP_FLATTEN OFF #pragma HLS LOOP_TRIPCOUNT min=1 max=DST_TC avg=DST_TC // clang-format on block_ind = i >> XF_BITSHIFT(NPC); block_start = (NPC == XF_NPPC1) ? (ap_uint<13>)Hoffset[block_ind] : (ap_uint<13>) Hstart[block_ind]; // block_start is index of the input pixel to be read in image dimesions bufferIndex = block_start >> XF_BITSHIFT(NPC); if (read_flag && i < width && out_j < height) { if (read_into == 0) { lbuf_in0[i >> XF_BITSHIFT(NPC)] = stream_in.read(read_index++); for (k = 0; k < 2; k++) { // clang-format off #pragma HLS UNROLL // clang-format on if ((k + bufferIndex) < imgInput_ncpr) { D0[k] = lbuf_in1[bufferIndex + k]; D1[k] = lbuf_in2[bufferIndex + k]; } else { D0[k] = 0; D1[k] = 0; } } } else if (read_into == 1) { lbuf_in1[i >> XF_BITSHIFT(NPC)] = stream_in.read(read_index++); for (k = 0; k < 2; k++) { // clang-format off #pragma HLS UNROLL // clang-format on if ((k + bufferIndex) < imgInput_ncpr) { D0[k] = lbuf_in2[bufferIndex + k]; D1[k] = lbuf_in0[bufferIndex + k]; } else { D0[k] = 0; D1[k] = 0; } } } else { lbuf_in2[i >> XF_BITSHIFT(NPC)] = stream_in.read(read_index++); for (k = 0; k < 2; k++) { // clang-format off #pragma HLS UNROLL // clang-format on if ((k + bufferIndex) < imgInput_ncpr) { D0[k] = lbuf_in0[bufferIndex + k]; D1[k] = lbuf_in1[bufferIndex + k]; } else { D0[k] = 0; D1[k] = 0; } } } } else { for (k = 0; k < 2; k++) { // clang-format off #pragma HLS UNROLL // clang-format on if ((k + bufferIndex) < imgInput_ncpr) { if (l0 == 0) { D0[k] = lbuf_in0[bufferIndex + k]; if (l1 == 1) D1[k] = lbuf_in1[bufferIndex + k]; else D1[k] = lbuf_in2[bufferIndex + k]; } else if (l0 == 1) { D0[k] = lbuf_in1[bufferIndex + k]; if (l1 == 0) D1[k] = lbuf_in0[bufferIndex + k]; else D1[k] = lbuf_in2[bufferIndex + k]; } else { D0[k] = lbuf_in2[bufferIndex + k]; if (l1 == 0) D1[k] = lbuf_in0[bufferIndex + k]; else D1[k] = lbuf_in1[bufferIndex + k]; } } else { D0[k] = 0; D1[k] = 0; } } } XF_TNAME(DEPTH, NPC) out_pix = ProcessBlockAreaUp(Hoffset, Hweight, Yweight, D0, D1, block_start, i); resize_out.write(write_index++, out_pix); } } } #endif