Program Listing for File xf_letterbox.hpp

Return to documentation for file (/tmp/ws/src/vitis_common/include/dnn/xf_letterbox.hpp)

/*
 * Copyright 2019 Xilinx, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef _XF_INSERTBORDER_
#define _XF_INSERTBORDER_

#include "hls_stream.h"
#include "ap_int.h"
#include "common/xf_common.hpp"
#include "common/xf_utility.hpp"

namespace xf {
namespace cv {

template <int TYPE, int SRC_ROWS, int SRC_COLS, int DST_ROWS, int DST_COLS, int NPC, int INSERT_VAL>
void insertBorder(xf::cv::Mat<TYPE, SRC_ROWS, SRC_COLS, NPC>& _src, xf::cv::Mat<TYPE, DST_ROWS, DST_COLS, NPC>& _dst) {
// clang-format off
#pragma HLS INLINE OFF
    // clang-format on

    enum { DEPTH = TYPE, PLANES = XF_CHANNELS(TYPE, NPC) };
#if 1
    unsigned short in_height = _src.rows;
    unsigned short in_width = _src.cols;
    unsigned short out_height = _dst.rows;
    unsigned short out_width = _dst.cols;
#else
    unsigned short in_height = 2;
    unsigned short in_width = 4;
    unsigned short out_height = 2;
    unsigned short out_width = 4;
#endif

    unsigned short dx = (out_width - in_width) >> 1;
    unsigned short dy = (out_height - in_height) >> 1;

    unsigned short imgInput_ncpr = (in_width + (NPC - 1)) >> XF_BITSHIFT(NPC);
    unsigned short imgInput_width_align_npc = imgInput_ncpr << XF_BITSHIFT(NPC);
    unsigned short imgOutput_ncpr = (out_width + (NPC - 1)) >> XF_BITSHIFT(NPC);
    unsigned short imgOutput_width_align_npc = imgOutput_ncpr << XF_BITSHIFT(NPC);

    unsigned short padded_in_col_due_2_xfmat = imgInput_width_align_npc - in_width;
    unsigned short non_padded_in_col_due_2_xfmat = NPC - padded_in_col_due_2_xfmat;

    unsigned short row_loop_bound;
    bool row_pad_enable;
    if (out_height > in_height) {
        row_loop_bound = out_height;
        row_pad_enable = 1;
    } else {
        row_loop_bound = in_height;
        row_pad_enable = 0;
    }

    unsigned short col_loop_bound;
    bool col_pad_enable;
    if (imgOutput_ncpr > imgInput_ncpr) {
        col_loop_bound = imgOutput_ncpr;
        col_pad_enable = 1;
    } else {
        col_loop_bound = imgInput_ncpr;
        col_pad_enable = 0;
    }

    //##DDR index
    uint32_t read_index = 0;
    ap_uint<32> write_index = 0;
    ap_uint<16> write_col_index = 0;

    short pad_col_count_left = dx;
    short pad_row_count_top = dy;
    unsigned short input_col_index = 0;
    unsigned short input_col_index_prev = 0;
    unsigned short input_row_index = 0;

    XF_TNAME(DEPTH, NPC) read_word_init;
    for (int pixel = 0, bit1 = 0; pixel < NPC; pixel++, bit1 += (PLANES * 8)) {
// clang-format off
#pragma HLS unroll
        // clang-format on
        for (int channel = 0, bit2 = 0; channel < PLANES; channel++, bit2 += 8) {
// clang-format off
#pragma HLS unroll
            // clang-format on
            read_word_init.range(bit1 + (bit2 + 7), bit1 + bit2) = INSERT_VAL;
        }
    }

    XF_TNAME(DEPTH, NPC) read_word_tmp = 0;
    XF_TNAME(DEPTH, NPC) read_word = 0;
    XF_TNAME(DEPTH, NPC) read_word_prev = read_word_init;
    XF_TNAME(DEPTH, NPC) write_word = 0;

    short col_index = 0;
    short row_index = 0;

LOOP_ROW_COL:
    for (int row_col_index = 0; row_col_index < row_loop_bound * col_loop_bound; row_col_index++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=1 max=SRC_COLS
#pragma HLS pipeline II=1
        // clang-format on
        bool input_read_en = pad_col_count_left < NPC && input_col_index < imgInput_ncpr && pad_row_count_top <= 0 &&
                             (row_index - dy) < in_height;
        if (input_read_en == 1) {
            read_word_tmp = _src.read(read_index++);
        } else
            read_word_tmp = read_word_init;

        for (int pixel = 0, bit1 = 0; pixel < NPC; pixel++, bit1 += (PLANES * 8)) {
// clang-format off
#pragma HLS unroll
            // clang-format on
            for (int channel = 0, bit2 = 0; channel < PLANES; channel++, bit2 += 8) {
// clang-format off
#pragma HLS unroll
                // clang-format on

                if ((pixel >= non_padded_in_col_due_2_xfmat) && (input_col_index == (imgInput_ncpr - 1)))
                    read_word.range(bit1 + (bit2 + 7), bit1 + bit2) = INSERT_VAL;
                else
                    read_word.range(bit1 + (bit2 + 7), bit1 + bit2) =
                        read_word_tmp.range(bit1 + (bit2 + 7), bit1 + bit2);
            }
        }

        short prev_valid_elements = pad_col_count_left;
        short current_valid_elements = NPC - pad_col_count_left;

        ap_uint<8> read_word_extract[NPC][PLANES];
        ap_uint<8> read_word_extract_prev[NPC][PLANES];
// clang-format off
#pragma HLS array_partition variable=read_word_extract dim=0
#pragma HLS array_partition variable=read_word_extract_prev dim=0
        // clang-format on
        for (int pixel = 0, bit1 = 0; pixel < NPC; pixel++, bit1 += (PLANES * 8)) {
// clang-format off
#pragma HLS unroll
            // clang-format on
            for (int channel = 0, bit2 = 0; channel < PLANES; channel++, bit2 += 8) {
// clang-format off
#pragma HLS unroll
                // clang-format on
                read_word_extract[pixel][channel] = read_word.range(bit1 + (bit2 + 7), bit1 + bit2);
                read_word_extract_prev[pixel][channel] = read_word_prev.range(bit1 + (bit2 + 7), bit1 + bit2);
            }
        }

        bool out_write_en;
        if (current_valid_elements < NPC && input_col_index_prev == (imgInput_ncpr - 1))
            out_write_en = pad_col_count_left < NPC && input_col_index < (imgInput_ncpr + 1) &&
                           pad_row_count_top <= 0 && (row_index - dy) < in_height;
        else
            out_write_en = input_read_en;

        ap_uint<8> DDR_write_data[NPC][PLANES];
// clang-format off
#pragma HLS array_partition variable=DDR_write_data dim=0
        // clang-format on
        for (int pixel = 0; pixel < NPC; pixel++) {
            for (int channel = 0; channel < PLANES; channel++) {
                if (out_write_en == 1) {
                    if ((pixel + current_valid_elements) < NPC)
                        DDR_write_data[pixel][channel] =
                            read_word_extract_prev[pixel + current_valid_elements][channel];
                    else
                        DDR_write_data[pixel][channel] = read_word_extract[pixel - prev_valid_elements][channel];
                } else {
                    DDR_write_data[pixel][channel] = INSERT_VAL;
                }
            }
        }

        XF_TNAME(DEPTH, NPC) out_pix;
        ap_uint<PLANES * 8> plane_tmp;
        for (int pixel = 0, bit1 = 0; pixel < NPC; pixel++, bit1 += (PLANES * 8)) {
// clang-format off
#pragma HLS unroll
            // clang-format on
            for (int channel = 0, bit2 = 0; channel < PLANES; channel++, bit2 += 8) {
// clang-format off
#pragma HLS unroll
                // clang-format on
                plane_tmp.range(bit2 + 7, bit2) = DDR_write_data[pixel][channel];
            }
            out_pix.range(bit1 + (PLANES * 8) - 1, bit1) = plane_tmp;
        }
        _dst.write(write_index++, out_pix);

        // Last

        if (pad_col_count_left >= NPC)
            pad_col_count_left -= NPC;
        else if (col_index == (col_loop_bound - 1)) {
            pad_col_count_left = dx;
            pad_row_count_top--;
        }
        input_col_index_prev = input_col_index;
        if (col_index == (col_loop_bound - 1)) {
            input_col_index = 0;
            read_word_prev = read_word_init;
        } else if (input_read_en == 1) {
            input_col_index++;
            read_word_prev = read_word;
        }

        if (col_index < (col_loop_bound - 1)) {
            col_index++;
        } else {
            col_index = 0;
            row_index++;
        }

    } // LOOP_COL_row
}

template <int INTERPOLATION_TYPE,
          int TYPE,
          int SRC_ROWS,
          int SRC_COLS,
          int DST_ROWS,
          int DST_COLS,
          int NPC,
          int MAX_DOWN_SCALE,
          int INSERT_VAL>
void letterbox(xf::cv::Mat<TYPE, SRC_ROWS, SRC_COLS, NPC>& _src,
               xf::cv::Mat<TYPE, DST_ROWS, DST_COLS, NPC>& _dst,
               int rows_out_resize,
               int cols_out_resize) {
// clang-format off
#pragma inline off
    // clang-format on
    xf::cv::Mat<TYPE, NEWHEIGHT, NEWWIDTH, NPC> out_mat_resize(rows_out_resize, cols_out_resize);
// clang-format off
#pragma HLS stream variable=out_mat_resize.data depth=2
#pragma HLS DATAFLOW
    // clang-format on
    xf::cv::resize<INTERPOLATION_TYPE, TYPE, SRC_ROWS, SRC_COLS, DST_ROWS, DST_COLS, NPC, MAX_DOWN_SCALE>(
        _src, out_mat_resize);
    xf::cv::insertBorder<TYPE, DST_ROWS, DST_COLS, DST_ROWS, DST_COLS, NPC, 128>(out_mat_resize, _dst);
}
} // namespace cv
} // namespace xf

#endif