Program Listing for File xf_pyr_down_gaussian_blur.hpp

Return to documentation for file (/tmp/ws/src/vitis_common/include/imgproc/xf_pyr_down_gaussian_blur.hpp)

/*
 * Copyright 2019 Xilinx, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef _XF_PYR_DOWN_GAUSSIAN_DOWN_
#define _XF_PYR_DOWN_GAUSSIAN_DOWN_

#include "ap_int.h"
#include "hls_stream.h"
#include "../common/xf_common.hpp"

template <int NPC, int DEPTH, int WIN_SZ, int WIN_SZ_SQ, int PLANES>
void xFPyrDownApplykernel(XF_PTUNAME(DEPTH) OutputValues[XF_NPIXPERCYCLE(NPC)],
                          XF_PTUNAME(DEPTH) src_buf[WIN_SZ][XF_NPIXPERCYCLE(NPC) + (WIN_SZ - 1)],
                          ap_uint<8> win_size) {
// clang-format off
    #pragma HLS INLINE
    // clang-format on
    ap_uint<32> array[WIN_SZ_SQ];
// clang-format off
    #pragma HLS ARRAY_PARTITION variable=array complete dim=1
    // clang-format on

    int array_ptr = 0;

Compute_Grad_Loop:
    for (int copy_arr = 0; copy_arr < WIN_SZ; copy_arr++) {
// clang-format off
        #pragma HLS LOOP_TRIPCOUNT min=1 max=WIN_SZ
        #pragma HLS UNROLL
        // clang-format on
        for (int copy_in = 0; copy_in < WIN_SZ; copy_in++) {
// clang-format off
            #pragma HLS LOOP_TRIPCOUNT min=1 max=WIN_SZ
            #pragma HLS UNROLL
            // clang-format on
            array[array_ptr] = src_buf[copy_arr][copy_in];
            array_ptr++;
        }
    }
    ap_uint<32> out_pixel = 0;
    int k[25] = {1, 4, 6, 4, 1, 4, 16, 24, 16, 4, 6, 24, 36, 24, 6, 4, 16, 24, 16, 4, 1, 4, 6, 4, 1};
    for (int i = 0, k = 0; i < PLANES; i++, k += 8) {
// clang-format off
        #pragma HLS PIPELINE II=1
        // clang-format on
        out_pixel = array[0 * 5 + 0].range(k + 7, k) + array[0 * 5 + 4].range(k + 7, k) +
                    array[4 * 5 + 0].range(k + 7, k) + array[4 * 5 + 4].range(k + 7, k);
        out_pixel += (array[0 * 5 + 1].range(k + 7, k) + array[0 * 5 + 3].range(k + 7, k) +
                      array[1 * 5 + 0].range(k + 7, k) + array[1 * 5 + 4].range(k + 7, k))
                     << 2;
        out_pixel += (array[4 * 5 + 1].range(k + 7, k) + array[4 * 5 + 3].range(k + 7, k) +
                      array[3 * 5 + 0].range(k + 7, k) + array[3 * 5 + 4].range(k + 7, k))
                     << 2;
        out_pixel += (array[0 * 5 + 2].range(k + 7, k) + array[2 * 5 + 0].range(k + 7, k) +
                      array[2 * 5 + 4].range(k + 7, k) + array[4 * 5 + 2].range(k + 7, k))
                     << 2;
        out_pixel += (array[0 * 5 + 2].range(k + 7, k) + array[2 * 5 + 0].range(k + 7, k) +
                      array[2 * 5 + 4].range(k + 7, k) + array[4 * 5 + 2].range(k + 7, k))
                     << 1;
        out_pixel += (array[1 * 5 + 1].range(k + 7, k) + array[1 * 5 + 3].range(k + 7, k) +
                      array[3 * 5 + 1].range(k + 7, k) + array[3 * 5 + 3].range(k + 7, k))
                     << 4;
        out_pixel += (array[1 * 5 + 2].range(k + 7, k) + array[2 * 5 + 1].range(k + 7, k) +
                      array[2 * 5 + 3].range(k + 7, k) + array[3 * 5 + 2].range(k + 7, k))
                     << 4;
        out_pixel += (array[1 * 5 + 2].range(k + 7, k) + array[2 * 5 + 1].range(k + 7, k) +
                      array[2 * 5 + 3].range(k + 7, k) + array[3 * 5 + 2].range(k + 7, k))
                     << 3;
        out_pixel += (array[2 * 5 + 2].range(k + 7, k)) << 5;
        out_pixel += (array[2 * 5 + 2].range(k + 7, k)) << 2;

        OutputValues[0].range(k + 7, k) = (unsigned char)((out_pixel + 128) >> 8);
    }
    return;
}

template <int ROWS, int COLS, int DEPTH, int NPC, int WORDWIDTH, int TC, int WIN_SZ, int WIN_SZ_SQ, int PLANES>
void xFPyrDownprocessgaussian(hls::stream<XF_TNAME(DEPTH, NPC)>& _src_mat,
                              hls::stream<XF_TNAME(DEPTH, NPC)>& _out_mat,
                              XF_TNAME(DEPTH, NPC) buf[WIN_SZ][(COLS >> XF_BITSHIFT(NPC)) + (WIN_SZ >> 1)],
                              XF_PTUNAME(DEPTH) src_buf[WIN_SZ][XF_NPIXPERCYCLE(NPC) + (WIN_SZ - 1)],
                              XF_PTUNAME(DEPTH) OutputValues[XF_NPIXPERCYCLE(NPC)],
                              XF_PTUNAME(DEPTH) & P0,
                              uint16_t img_width,
                              uint16_t img_height,
                              uint16_t& shift_x,
                              ap_uint<13> row_ind[WIN_SZ],
                              ap_uint<13> row,
                              ap_uint<8> win_size) {
// clang-format off
    #pragma HLS INLINE
    // clang-format on

    XF_TNAME(DEPTH, NPC) buf_cop[WIN_SZ];
// clang-format off
    #pragma HLS ARRAY_PARTITION variable=buf_cop complete dim=1
    // clang-format on

    uint16_t npc = XF_NPIXPERCYCLE(NPC);
Col_Loop:
    for (ap_uint<13> col = 0; col < img_width + (WIN_SZ >> 1); col++) {
// clang-format off
        #pragma HLS LOOP_FLATTEN OFF
        #pragma HLS LOOP_TRIPCOUNT min=1 max=TC
        #pragma HLS pipeline
        // clang-format on
        if (row < img_height && col < img_width)
            buf[row_ind[win_size - 1]][col] = _src_mat.read(); // Read data
        else
            buf[row_ind[win_size - 1]][col] = 0;

        for (int copy_buf_var = 0; copy_buf_var < WIN_SZ; copy_buf_var++) {
// clang-format off
            #pragma HLS LOOP_TRIPCOUNT min=1 max=WIN_SZ
            #pragma HLS UNROLL
            // clang-format on
            if ((row > (img_height - 1)) && (copy_buf_var > (win_size - 1 - (row - (img_height - 1))))) {
                buf_cop[copy_buf_var] = buf[(row_ind[win_size - 1 - (row - (img_height - 1))])][col];
            } else {
                buf_cop[copy_buf_var] = buf[(row_ind[copy_buf_var])][col];
            }
        }
        for (int extract_px = 0; extract_px < WIN_SZ; extract_px++) {
// clang-format off
            #pragma HLS LOOP_TRIPCOUNT min=1 max=WIN_SZ
            #pragma HLS UNROLL
            // clang-format on
            if (col < img_width) {
                src_buf[extract_px][win_size - 1] = buf_cop[extract_px];
            } else {
                src_buf[extract_px][win_size - 1] = src_buf[extract_px][win_size - 2];
            }
        }

        xFPyrDownApplykernel<NPC, DEPTH, WIN_SZ, WIN_SZ_SQ, PLANES>(OutputValues, src_buf, win_size);
        if (col >= (win_size >> 1)) {
            _out_mat.write(OutputValues[0]);
        }

        for (int wrap_buf = 0; wrap_buf < WIN_SZ; wrap_buf++) {
// clang-format off
            #pragma HLS UNROLL
            #pragma HLS LOOP_TRIPCOUNT min=1 max=WIN_SZ
            // clang-format on
            for (int col_warp = 0; col_warp < WIN_SZ - 1; col_warp++) {
// clang-format off
                #pragma HLS UNROLL
                #pragma HLS LOOP_TRIPCOUNT min=1 max=WIN_SZ
                // clang-format on
                if (col == 0) {
                    src_buf[wrap_buf][col_warp] = src_buf[wrap_buf][win_size - 1];
                } else {
                    src_buf[wrap_buf][col_warp] = src_buf[wrap_buf][col_warp + 1];
                }
            }
        }
    } // Col_Loop
}

template <int ROWS, int COLS, int DEPTH, int NPC, int WORDWIDTH, int TC, int WIN_SZ, int WIN_SZ_SQ, int PLANES>
void xf_pyrdown_gaussian_nxn(hls::stream<XF_TNAME(DEPTH, NPC)>& _src_mat,
                             hls::stream<XF_TNAME(DEPTH, NPC)>& _out_mat,
                             ap_uint<8> win_size,
                             uint16_t img_height,
                             uint16_t img_width) {
    ap_uint<13> row_ind[WIN_SZ];
// clang-format off
    #pragma HLS ARRAY_PARTITION variable=row_ind complete dim=1
    // clang-format on

    ap_uint<8> buf_size = XF_NPIXPERCYCLE(NPC) + (WIN_SZ - 1);
    uint16_t shift_x = 0;
    ap_uint<13> row, col;

    XF_TNAME(DEPTH, NPC) OutputValues[XF_NPIXPERCYCLE(NPC)];
// clang-format off
    #pragma HLS ARRAY_PARTITION variable=OutputValues complete dim=1
    // clang-format on

    XF_PTUNAME(DEPTH) src_buf[WIN_SZ][XF_NPIXPERCYCLE(NPC) + (WIN_SZ - 1)];
// clang-format off
    #pragma HLS ARRAY_PARTITION variable=src_buf complete dim=1
    #pragma HLS ARRAY_PARTITION variable=src_buf complete dim=2
    // clang-format on
    // src_buf1 et al merged
    XF_TNAME(DEPTH, NPC) P0;

    XF_TNAME(DEPTH, NPC) buf[WIN_SZ][(COLS >> XF_BITSHIFT(NPC)) + (WIN_SZ >> 1)];
// clang-format off
    #pragma HLS ARRAY_PARTITION variable=buf complete dim=1
    #pragma HLS RESOURCE variable=buf core=RAM_S2P_BRAM
    // clang-format on

    // initializing row index

    for (int init_row_ind = 0; init_row_ind < win_size; init_row_ind++) {
// clang-format off
        #pragma HLS LOOP_TRIPCOUNT min=1 max=WIN_SZ
        // clang-format on
        row_ind[init_row_ind] = init_row_ind;
    }

read_lines:
    for (int init_buf = row_ind[win_size >> 1]; init_buf < row_ind[win_size - 1]; init_buf++) {
// clang-format off
        #pragma HLS LOOP_TRIPCOUNT min=1 max=WIN_SZ
        // clang-format on
        for (col = 0; col < img_width; col++) {
// clang-format off
            #pragma HLS LOOP_TRIPCOUNT min=1 max=TC
            #pragma HLS LOOP_FLATTEN OFF
            #pragma HLS pipeline
            // clang-format on
            buf[init_buf][col] = _src_mat.read();
        }
    }

    // takes care of top borders
    for (col = 0; col < img_width; col++) {
// clang-format off
        #pragma HLS LOOP_TRIPCOUNT min=1 max=TC
        // clang-format on
        for (int init_buf = 0; init_buf<WIN_SZ>> 1; init_buf++) {
// clang-format off
            #pragma HLS LOOP_TRIPCOUNT min=1 max=WIN_SZ
            #pragma HLS UNROLL
            // clang-format on
            buf[init_buf][col] = buf[row_ind[win_size >> 1]][col];
        }
    }

Row_Loop:
    for (row = (win_size >> 1); row < img_height + (win_size >> 1); row++) {
// clang-format off
        #pragma HLS LOOP_TRIPCOUNT min=1 max=ROWS
        // clang-format on
        P0 = 0;
        xFPyrDownprocessgaussian<ROWS, COLS, DEPTH, NPC, WORDWIDTH, TC, WIN_SZ, WIN_SZ_SQ, PLANES>(
            _src_mat, _out_mat, buf, src_buf, OutputValues, P0, img_width, img_height, shift_x, row_ind, row, win_size);

        // update indices
        ap_uint<13> zero_ind = row_ind[0];
        for (int init_row_ind = 0; init_row_ind < WIN_SZ - 1; init_row_ind++) {
// clang-format off
            #pragma HLS LOOP_TRIPCOUNT min=1 max=WIN_SZ
            #pragma HLS UNROLL
            // clang-format on
            row_ind[init_row_ind] = row_ind[init_row_ind + 1];
        }
        row_ind[win_size - 1] = zero_ind;

    } // Row_Loop
}

template <int ROWS,
          int COLS,
          int DEPTH,
          int NPC,
          int WORDWIDTH,
          int PIPELINEFLAG,
          int WIN_SZ,
          int WIN_SZ_SQ,
          int PLANES>
void xFPyrDownGaussianBlur(hls::stream<XF_TNAME(DEPTH, NPC)>& _src,
                           hls::stream<XF_TNAME(DEPTH, NPC)>& _dst,
                           ap_uint<8> win_size,
                           int _border_type,
                           uint16_t imgheight,
                           uint16_t imgwidth) {
#ifndef __SYNTHESIS__
    assert(((imgheight <= ROWS) && (imgwidth <= COLS)) && "ROWS and COLS should be greater than input image");

    assert((win_size <= WIN_SZ) && "win_size must not be greater than WIN_SZ");
#endif
    imgwidth = imgwidth >> XF_BITSHIFT(NPC);

    xf_pyrdown_gaussian_nxn<ROWS, COLS, DEPTH, NPC, WORDWIDTH, (COLS >> XF_BITSHIFT(NPC)) + (WIN_SZ >> 1), WIN_SZ,
                            WIN_SZ_SQ, PLANES>(_src, _dst, WIN_SZ, imgheight, imgwidth);
}

#endif