Program Listing for File xf_distancetransform.hpp

Return to documentation for file (/tmp/ws/src/vitis_common/include/imgproc/xf_distancetransform.hpp)

/*
 * Copyright 2021 Xilinx, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef __XF_VITIS_DISTANCETRANSFORM_HPP__
#define __XF_VITIS_DISTANCETRANSFORM_HPP__

#include "ap_int.h"
#include "../common/xf_common.hpp"
#include "../common/xf_utility.hpp"
#include "../common/xf_video_mem.hpp"
#include "hls_stream.h"

namespace xf {
namespace cv {

constexpr int _WINDOW_SIZE_ = 3;
constexpr int K_ROWS = _WINDOW_SIZE_;
constexpr int SET_MAX_VAL = (int)(2147483647 >> 2);

const int HV_DIST = (int)(0.954999983 * std::pow(2.0, 16.0));
const int DIAG_DIST = (int)(1.36930001 * std::pow(2.0, 16.0));
const float scale = 1.f / (1 << 16);

// Some global constants
typedef uint8_t K_COL_IDX_T;
typedef uint16_t COL_IDX_T; // Support upto 65,535
typedef uint16_t ROW_IDX_T; // Support upto 65,535
typedef uint32_t SIZE_IDX_T;

template <int IN_PTR, int FW_PTR, int ROWS, int COLS, int USE_URAM>
class dt_kernel_fw_pass {
   public:
    // Internal regsiters/buffers
    xf::cv::LineBuffer<1, COLS + 2, ap_uint<FW_PTR>, (USE_URAM ? RAM_S2P_URAM : RAM_S2P_BRAM), 1> buff;
    int im_h, im_w;

    // Internal Registers
    COL_IDX_T num_clks_per_row; // No.of clocks required for processing one row
    SIZE_IDX_T rd_ptr;          // Read pointer
    SIZE_IDX_T wr_ptr;          // Write pointer

    // Default Constructor
    dt_kernel_fw_pass() {
        num_clks_per_row = 0;
        rd_ptr = 0;
        wr_ptr = 0;
    }

    dt_kernel_fw_pass(int rows, int cols) {
        im_h = rows;
        im_w = cols;
    }

    // Internal functions
    void initialize_f() {
// clang-format off
#pragma HLS INLINE
        // clang-format on

        // Computing no.of clocks required for processing a row of given image
        // dimensions
        num_clks_per_row = im_w;

        // Read/Write pointer set to start location of input image
        rd_ptr = 0;
        wr_ptr = 0;

        return;
    };

    int fl = 0;

    void apply_f(ap_uint<IN_PTR> _src_data,
                 ap_uint<FW_PTR>& local_fw_pass_data,
                 ap_uint<FW_PTR> patch_top_0,
                 ap_uint<FW_PTR> patch_top_1,
                 ap_uint<FW_PTR> patch_top_2,
                 ap_uint<FW_PTR> patch_left) {
// clang-format off
#pragma HLS INLINE
        // clang-format on

        ap_uint<FW_PTR> tmp = 0;
        if (!_src_data)
            tmp = (ap_uint<FW_PTR>)0;
        else {
            int pt0 = patch_top_0 + DIAG_DIST;
            int pt1 = patch_top_1 + HV_DIST;
            int pt2 = patch_top_2 + DIAG_DIST;
            int pl = patch_left + HV_DIST;

            int t0 = (pt0 > pt1) ? pt1 : pt0;
            int t1 = (pt2 > pl) ? pl : pt2;
            tmp = (ap_uint<FW_PTR>)((t0 > t1) ? t1 : t0);
        }
        local_fw_pass_data = tmp;
    };

    void process_row_f(ROW_IDX_T r, ap_uint<IN_PTR>* _src, ap_uint<FW_PTR>* _fw_pass) {
// clang-format off
#pragma HLS INLINE
        // clang-format on

        // --------------------------------------
        // Constants
        // --------------------------------------
        const uint32_t _TC = COLS; // MAX Trip Count per row

        // --------------------------------------
        // Internal variables
        // --------------------------------------
        COL_IDX_T col_loop_cnt = num_clks_per_row;

        ap_uint<FW_PTR> patch_top[_WINDOW_SIZE_], patch_left;
        patch_top[1] = buff.val[0][0];
        patch_top[2] = buff.val[0][1];
        patch_left = SET_MAX_VAL;

    // --------------------------------------
    // Process columns of the row
    // --------------------------------------
    COL_LOOP:
        for (COL_IDX_T c = 0; c < col_loop_cnt; c++) {
// clang-format off
#pragma HLS PIPELINE II=1
#pragma HLS LOOP_TRIPCOUNT min=1 max=_TC
            // clang-format on
            patch_top[0] = patch_top[1];
            patch_top[1] = patch_top[2];
            patch_top[2] = buff.val[0][c + 2];

            ap_uint<FW_PTR> local_fw_pass_data;
            ap_uint<IN_PTR> in_data = _src[rd_ptr++];

            apply_f(in_data, local_fw_pass_data, patch_top[0], patch_top[1], patch_top[2], patch_left);

            buff.val[0][c + 1] = local_fw_pass_data;
            patch_left = local_fw_pass_data;
            _fw_pass[wr_ptr++] = local_fw_pass_data;
        }

        return;
    };

    bool process_image_f(ap_uint<IN_PTR>* _src, ap_uint<FW_PTR>* _fw_pass) {
// clang-format off
#pragma HLS INLINE OFF
        // clang-format on
        // Constant declaration
        const uint32_t _TC = COLS;

        // ----------------------------------
        // Start process with initialization
        // ----------------------------------
        initialize_f();

    // ----------------------------------
    // Initialize Line Buffer
    // ----------------------------------
    BORDER_INIT:
        for (COL_IDX_T c = 0; c < num_clks_per_row + 2; c++) {
// clang-format off
#pragma HLS PIPELINE
#pragma HLS LOOP_TRIPCOUNT min=1 max=_TC
            // clang-format on

            buff.val[0][c] = SET_MAX_VAL;
        }

    // ----------------------------------
    // Processing each row of the image
    // ----------------------------------
    ROW_LOOP:
        for (ROW_IDX_T r = 0; r < im_h; r++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=1 max=ROWS
            // clang-format on

            process_row_f(r, _src, _fw_pass);
        }

        return 1;
    };
};

// ======================================================================================

template <int FW_PTR, int ROWS, int COLS, int USE_URAM>
class dt_kernel_bk_pass {
   public:
    // Internal regsiters/buffers
    xf::cv::LineBuffer<1, COLS + 2, ap_uint<FW_PTR>, (USE_URAM ? RAM_S2P_URAM : RAM_S2P_BRAM), 1> buff;
    int im_h, im_w;
    bool flag;

    // Internal Registers
    COL_IDX_T num_clks_per_row; // No.of clocks required for processing one row
    SIZE_IDX_T rd_ptr;          // Read pointer
    SIZE_IDX_T wr_ptr;          // Write pointer

    // ping-pong BRAMs for forward-pass data and backward distance information
    ap_uint<FW_PTR> fw_ram1[COLS], fw_ram2[COLS];
    float dist_ram1[COLS], dist_ram2[COLS];

    // Default Constructor
    dt_kernel_bk_pass() {
        num_clks_per_row = 0;
        rd_ptr = 0;
        wr_ptr = 0;
    }

    dt_kernel_bk_pass(int rows, int cols) {
        im_h = rows;
        im_w = cols;
    }

    // Internal functions
    void initialize_b() {
// clang-format off
#pragma HLS INLINE
        // clang-format on

        // Computing no.of clocks required for processing a row of given image
        // dimensions
        num_clks_per_row = im_w;

        int total_copy = im_h * im_w;

        // Read/Write pointer set to start location of input image
        rd_ptr = total_copy;
        wr_ptr = total_copy;

        // Initializing the flags to '0'
        flag = 0;

        return;
    };

    void apply_b(ap_uint<FW_PTR> _fw_data,
                 ap_uint<FW_PTR>& local_dist_data,
                 ap_uint<FW_PTR> patch_top_0,
                 ap_uint<FW_PTR> patch_top_1,
                 ap_uint<FW_PTR> patch_top_2,
                 ap_uint<FW_PTR> patch_left) {
// clang-format off
#pragma HLS INLINE
        // clang-format on

        int dist = (int)_fw_data;
        if (dist > HV_DIST) {
            int t_d = dist;
            int pt0 = patch_top_0 + DIAG_DIST;
            int pt1 = patch_top_1 + HV_DIST;
            int pt2 = patch_top_2 + DIAG_DIST;
            int pl = patch_left + HV_DIST;

            int t0 = (pt0 > pt1) ? pt1 : pt0;
            int t1 = (pt2 > pl) ? pl : pt2;
            int t2 = (t0 > t1) ? t1 : t0;
            dist = (t_d > t2) ? t2 : t_d;
        }
        local_dist_data = (ap_uint<FW_PTR>)dist;
    };

    void process_row_b(ap_uint<FW_PTR>* _fw_ram, float* _dist_ram) {
// clang-format off
#pragma HLS INLINE OFF
        // clang-format on

        // --------------------------------------
        // Constants
        // --------------------------------------
        const uint32_t _TC = COLS; // MAX Trip Count per row

        // --------------------------------------
        // Internal variables
        // --------------------------------------
        COL_IDX_T col_loop_cnt = num_clks_per_row;

        ap_uint<FW_PTR> patch_top[_WINDOW_SIZE_], patch_left;
        patch_top[1] = buff.val[0][0];
        patch_top[2] = buff.val[0][1];
        patch_left = SET_MAX_VAL;

    // --------------------------------------
    // Process columns of the row
    // --------------------------------------
    COL_LOOP:
        for (COL_IDX_T c = 0; c < col_loop_cnt; c++) {
// clang-format off
#pragma HLS PIPELINE II=1
#pragma HLS LOOP_TRIPCOUNT min=1 max=_TC
            // clang-format on

            patch_top[0] = patch_top[1];
            patch_top[1] = patch_top[2];
            patch_top[2] = buff.val[0][c + 2];

            ap_uint<FW_PTR> local_dist_data;
            ap_uint<FW_PTR> fw_data = _fw_ram[im_w - c - 1];

            apply_b(fw_data, local_dist_data, patch_top[0], patch_top[1], patch_top[2], patch_left);

            buff.val[0][c + 1] = local_dist_data;
            patch_left = local_dist_data;
            float tmp = ((float)local_dist_data * scale);
            _dist_ram[im_w - c - 1] = tmp;
        }

        return;
    };

    void read_fw_to_ram(ap_uint<FW_PTR>* _fw_pass, ap_uint<FW_PTR>* ram, int _rd_ptr) {
// clang-format off
#pragma HLS INLINE OFF
        // clang-format on
        int ptr = _rd_ptr;
        for (int j = 0; j < im_w; j++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=1 max=COLS
#pragma HLS PIPELINE II=1
            // clang-format on
            ram[j] = _fw_pass[ptr++];
        }
    };

    void write_dist_to_mem(float* ram, float* _dst, int _wr_ptr) {
// clang-format off
#pragma HLS INLINE OFF
        // clang-format on
        int ptr = _wr_ptr;
        for (int j = 0; j < im_w; j++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=1 max=COLS
#pragma HLS PIPELINE II=1
            // clang-format on
            _dst[ptr++] = ram[j];
        }
    };

    void process_image_b(ap_uint<FW_PTR>* _fw_pass, float* _dst) {
// clang-format off
#pragma HLS INLINE OFF
        // clang-format on

        // Constant declaration
        const uint32_t _TC = COLS;

        // ----------------------------------
        // Start process with initialization
        // ----------------------------------
        initialize_b();

    // ----------------------------------
    // Initialize Line Buffer
    // ----------------------------------
    BORDER_INIT:
        for (COL_IDX_T c = 0; c < num_clks_per_row + 2; c++) {
// clang-format off
#pragma HLS PIPELINE
#pragma HLS LOOP_TRIPCOUNT min=1 max=_TC
            // clang-format on

            buff.val[0][c] = SET_MAX_VAL;
        }

        // ----------------------------------
        // Processing each row of the image
        // ----------------------------------
        rd_ptr -= im_w;
        read_fw_to_ram(_fw_pass, fw_ram1, rd_ptr);

        rd_ptr -= im_w;
        read_fw_to_ram(_fw_pass, fw_ram2, rd_ptr);
        process_row_b(fw_ram1, dist_ram1);

    ROW_LOOP:
        for (ROW_IDX_T r = 0; r < im_h - 2; r++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=1 max=ROWS
            // clang-format on

            rd_ptr -= im_w;
            wr_ptr -= im_w;
            if (flag == 0) {
                read_fw_to_ram(_fw_pass, fw_ram1, rd_ptr);
                process_row_b(fw_ram2, dist_ram2);
                write_dist_to_mem(dist_ram1, _dst, wr_ptr);
                flag = 1;
            } else {
                read_fw_to_ram(_fw_pass, fw_ram2, rd_ptr);
                process_row_b(fw_ram1, dist_ram1);
                write_dist_to_mem(dist_ram2, _dst, wr_ptr);
                flag = 0;
            }
        }

        wr_ptr -= im_w;
        if (flag == 0) {
            process_row_b(fw_ram2, dist_ram2);
            write_dist_to_mem(dist_ram1, _dst, wr_ptr);
            flag = 1;
        } else {
            process_row_b(fw_ram1, dist_ram1);
            write_dist_to_mem(dist_ram2, _dst, wr_ptr);
            flag = 0;
        }

        wr_ptr -= im_w;
        if (flag == 0) {
            write_dist_to_mem(dist_ram1, _dst, wr_ptr);
        } else {
            write_dist_to_mem(dist_ram2, _dst, wr_ptr);
        }

        return;
    };
};

// ======================================================================================

template <int IN_PTR, int FW_PTR, int ROWS, int COLS, int USE_URAM>
void distanceTransform(ap_uint<IN_PTR>* _src, float* _dst, ap_uint<FW_PTR>* _fw_pass, int rows, int cols) {
// clang-format off
#pragma HLS INLINE OFF
    // clang-format on

    assert(((rows <= ROWS) && (cols <= COLS)) &&
           "ROWS and COLS must be greater or equal torows and cols respectively.");
    assert((IN_PTR == 8) &&
           "The input must be a grayscale image, encoded with "
           "binary values (0 or 255), which means the pointer "
           "width must be '8'.");
    assert((FW_PTR == 32) && "FW_PTR, is the forwards-pass datawidth, which must be '32'.");

    xf::cv::dt_kernel_fw_pass<IN_PTR, FW_PTR, ROWS, COLS, USE_URAM> dt_fw(rows, cols);
    xf::cv::dt_kernel_bk_pass<FW_PTR, ROWS, COLS, USE_URAM> dt_bk(rows, cols);

    for (int i = 0; i < 2; i++) {
        if (i == 0)
            dt_fw.process_image_f(_src, _fw_pass);
        else
            dt_bk.process_image_b(_fw_pass, _dst);
    }

    return;
}

} // namespace cv
} // namespace xf

#endif //__XF_VITIS_DISTANCETRANSFORM_HPP__