Program Listing for File xf_hog_descriptor_norm.hpp

↰ Return to documentation for file (/tmp/ws/src/vitis_common/include/imgproc/xf_hog_descriptor_norm.hpp)
/*
 * Copyright 2019 Xilinx, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef _XF_HOG_DESCRIPTOR_NORM_HPP_
#define _XF_HOG_DESCRIPTOR_NORM_HPP_

#ifndef __cplusplus
#error C++ is needed to include this header
#endif

#define XF_CLIP_VAL 13108 //  Q0.2 in Q0.16 format
#define XF_MAX_VAL_16_BIT 65535
#define XF_3_P_6 921 //  Q2.8 format

/*******************************************************************************
 *                       xFDHOGnormalizeKernel
 *******************************************************************************
 *  This function performs normalization and computes the partial
 *  renormalization factor.
 *
 *  HA arrays: contains binned data from the histogram computation function (I)
 *
 *  ssv arrays: contains sum of squared data from the histogram computation
 *          function (I)
 *
 *  norm_block: holds the normalized data (O)
 *
 *  partial_rnf_sum: contains the partial ssv value for Re-normalization (O)
 *
 *  bj : index for horizontal blocks
 *
 *******************************************************************************/
template <typename ssv_type, typename tmp_nf_sq24_type>
void xFDHOGnormalizeKernel1(ssv_type ssv_1, ssv_type ssv_2, uint16_t bj, tmp_nf_sq24_type& tmp_nf_sq24) {
// clang-format off
    #pragma HLS INLINE OFF
    // clang-format on

    // temporary arrays to hold the ssv values of 4 cells
    ap_uint<51> tmp_nf_1, tmp_nf_2;

    tmp_nf_1 = ssv_1[bj] + ssv_2[bj];
    tmp_nf_2 = ssv_1[bj + 1] + ssv_2[bj + 1];
    ap_ufixed<50, 50, AP_TRN, AP_SAT> tmp_nf_sum;

    // contains the added up ssv values Q31.16 format
    tmp_nf_sum = (tmp_nf_1 + tmp_nf_2);

    // after square root the format is Q16.8
    tmp_nf_sq24 = xFSqrtHOG<24>(tmp_nf_sum) + XF_3_P_6;
}

template <typename tmp_nf_sq24_type, typename norm_fact_type>
void xFDHOGnormalizeKernelInv(tmp_nf_sq24_type tmp_nf_sq24, norm_fact_type& nf, char& n) {
    int m = 16;
    // after inverse the format will be Q(32-n).n
    nf = xFInverse24(tmp_nf_sq24, m, &n);
}

template <int NOHC,
          int NOHCPB,
          int NOVCPB,
          typename hist_type,
          int NOB,
          typename norm_block_type,
          typename fx_rnf_sq_type,
          typename norm_fact_type>
void xFDHOGnormalizeKernel2(hist_type HA_1[][NOB],
                            hist_type HA_2[][NOB],
                            norm_fact_type nf,
                            norm_block_type* norm_block,
                            fx_rnf_sq_type& fx_rnf_sq,
                            uint16_t bj,
                            char n) {
// clang-format off
    #pragma HLS INLINE OFF
    // clang-format on

    // HA_1 nad HA_2 in the Q15.8 format
    uint32_t tmp_clip_1, tmp_clip_2;

    // offsets to index the norm_array
    uchar_t offset_1[2], offset_2[2];
// clang-format off
    #pragma HLS ARRAY_PARTITION variable=offset_1 complete dim=1
    #pragma HLS ARRAY_PARTITION variable=offset_2 complete dim=1
    // clang-format on

    offset_1[0] = 0;
    offset_1[1] = NOB << 1;
    offset_2[0] = NOB;
    offset_2[1] = (NOB << 1) + NOB;

    /* keeping the clip value and norm factor in
                    the same format in the same format  */
    uint16_t fx_clip_val = XF_CLIP_VAL; // 0.2 taken in Q0.16 format
    ap_uint<16> i, j;

    ap_uint<33> rnf_sum = 0;
norm_loop:
    for (j = 0; j < NOHCPB; j++) {
    num_of_bins_loop:
        for (i = 0; i < NOB; i++) {
// clang-format off
            #pragma HLS LOOP_FLATTEN
            #pragma HLS PIPELINE
            // clang-format on

            // normalization I
            // // (Q15.8 x Q(32-n).n) >> n-8 -> Q16.16 format
            tmp_clip_1 = (((HA_1[bj + j][i]) * nf) >> (n - 8));
            tmp_clip_2 = (((HA_2[bj + j][i]) * nf) >> (n - 8));

            // clipping
            if (tmp_clip_1 > fx_clip_val) tmp_clip_1 = fx_clip_val;

            if (tmp_clip_2 > fx_clip_val) tmp_clip_2 = fx_clip_val;

            // norm_block format -> Q0.16
            norm_block[offset_1[j] + i] = tmp_clip_1;
            norm_block[offset_2[j] + i] = tmp_clip_2;

            // rnf_sum -> Q0.32
            rnf_sum += (norm_block[offset_1[j] + i] * norm_block[offset_1[j] + i]) +
                       (norm_block[offset_2[j] + i] * norm_block[offset_2[j] + i]);
        }
    }

    ap_ufixed<33, 33, AP_TRN, AP_SAT> fx_rnf_sum = rnf_sum;

    ap_uint17_t fx_rnf_sq_2 = (ap_uint17_t)(xFSqrtHOG<17>(fx_rnf_sum)); // Q1.16 format result
    fx_rnf_sq = fx_rnf_sq_2 >> 1;                                       // Q1.15 format (to take in 16 bits)
}

/*****************************************************************************
 *                       xFDHOGReNormalizeKernel
 *****************************************************************************
 *  This function performs the renormalization operation
 *
 *  norm_block: normalized block data (I)
 *
 *  partial_rnf_sum: temporary variable for re-normalization (I)
 *
 *  _block_strm: output mat containing the normalized data (O)
 *
 *****************************************************************************/

template <int NOHC,
          int NOHCPB,
          int NOVCPB,
          int NOB,
          int ROWS,
          int COLS,
          int DEPTH,
          int NPC,
          int WORDWIDTH,
          typename norm_block_type,
          typename fx_rnf_sq_type>
void xFDHOGReNormalizeKernel(norm_block_type* norm_block,
                             fx_rnf_sq_type fx_rnf_sq,
                             hls::stream<XF_SNAME(WORDWIDTH)>& _block_strm) {
// clang-format off
    #pragma HLS INLINE OFF
    // clang-format on

    char n_rnf;
    uint32_t rnf = xf::cv::Inverse(fx_rnf_sq, 1, &n_rnf); // output in Q(32-n_rnf).n_rnf format

    XF_SNAME(WORDWIDTH) block_data;
    uchar_t step = XF_PIXELDEPTH(DEPTH);
    ap_uint<10> offset = 0;

renorm_loop2:
    for (uchar_t k = 0; k < (NOB * NOHCPB * NOVCPB); k++) {
// clang-format off
        #pragma HLS PIPELINE
        // clang-format on

        ap_uint32_t tmp_block_data = (norm_block[k] * rnf) >> n_rnf; // output in format Q0.16

        // to take care of the MSBs
        if (tmp_block_data > XF_MAX_VAL_16_BIT) tmp_block_data = XF_MAX_VAL_16_BIT;

        // packing the data to the output variable
        block_data.range(offset + (step - 1), offset) = tmp_block_data;
        offset += step;
    }

    _block_strm.write(block_data);
}

/*****************************************************************************
 *              xFDHOGNormalize
 *****************************************************************************
 * This function acts as a wrapper function for normalize and renormalize
 * functions
 *
 * Inputs - HA arrays, ssv arrays and bi index
 *
 * Outputs - _block_strm (stream)
 *
 *****************************************************************************/
template <int ROWS,
          int COLS,
          int DEPTH,
          int NPC,
          int WORDWIDTH,
          int NOHC,
          int NOHCPB,
          int NOVCPB,
          int NOHW,
          int NOVW,
          int NODPB,
          int WIN_HEIGHT,
          int WIN_WIDTH,
          int CELL_HEIGHT,
          int CELL_WIDTH,
          int NOHB,
          int NOB,
          typename hist_type,
          typename ssv_type>
void xFDHOGNormalize(hist_type HA_1[][NOB],
                     hist_type HA_2[][NOB],
                     ssv_type* ssv_1,
                     ssv_type* ssv_2,
                     hls::stream<XF_SNAME(WORDWIDTH)>& _block_strm,
                     uint16_t bi,
                     uint16_t nohb,
                     uint16_t nohc) {
    // number of horizontal block index
    uint16_t bj = 0;

    ap_uint<26> tmp_nf_sq24_1[1], tmp_nf_sq24_2[1];
    uint32_t nf_1[1], nf_2[1];
    char n_1[1], n_2[1];
// clang-format off
    #pragma HLS RESOURCE variable=nf_1 core=RAM_1P_LUTRAM
    #pragma HLS RESOURCE variable=nf_2 core=RAM_1P_LUTRAM
    #pragma HLS RESOURCE variable=tmp_nf_sq24_1 core=RAM_1P_LUTRAM
    #pragma HLS RESOURCE variable=tmp_nf_sq24_2 core=RAM_1P_LUTRAM
    #pragma HLS RESOURCE variable=n_1 core=RAM_1P_LUTRAM
    #pragma HLS RESOURCE variable=n_2 core=RAM_1P_LUTRAM
    // clang-format on

    // taking each bin as 16-bit unsigned type
    uint16_t norm_block_1[NODPB], norm_block_2[NODPB];
// clang-format off
    #pragma HLS ARRAY_PARTITION variable=norm_block_1 complete dim=1
    #pragma HLS ARRAY_PARTITION variable=norm_block_2 complete dim=1
    // clang-format on

    // temporary variable for the renormalization (to hold the sum value)
    uint16_t fx_rnf_sq_1[1], fx_rnf_sq_2[1];

    bool flag = 1;
    xFDHOGnormalizeKernel1(ssv_1, ssv_2, bj, tmp_nf_sq24_1[0]);

    bj++;
    xFDHOGnormalizeKernel1(ssv_1, ssv_2, bj, tmp_nf_sq24_2[0]);
    xFDHOGnormalizeKernelInv(tmp_nf_sq24_1[0], nf_1[0], n_1[0]);

    bj++;
    xFDHOGnormalizeKernel1(ssv_1, ssv_2, bj, tmp_nf_sq24_1[0]);
    xFDHOGnormalizeKernelInv(tmp_nf_sq24_2[0], nf_2[0], n_2[0]);
    xFDHOGnormalizeKernel2<NOHC, NOHCPB, NOVCPB>(HA_1, HA_2, nf_1[0], norm_block_1, fx_rnf_sq_1[0], bj - 2, n_1[0]);

no_of_HB:
    for (bj = 3; bj < nohb; bj++) {
// clang-format off
        #pragma HLS LOOP_TRIPCOUNT min=NOHB max=NOHB
        // clang-format on

        if (flag) {
            xFDHOGnormalizeKernel1(ssv_1, ssv_2, bj, tmp_nf_sq24_2[0]);
            xFDHOGnormalizeKernelInv(tmp_nf_sq24_1[0], nf_1[0], n_1[0]);
            xFDHOGnormalizeKernel2<NOHC, NOHCPB, NOVCPB>(HA_1, HA_2, nf_2[0], norm_block_2, fx_rnf_sq_2[0], bj - 2,
                                                         n_2[0]);
            xFDHOGReNormalizeKernel<NOHC, NOHCPB, NOVCPB, NOB, ROWS, COLS, DEPTH, NPC, WORDWIDTH>(
                norm_block_1, fx_rnf_sq_1[0], _block_strm);
            flag = 0;
        }

        else {
            xFDHOGnormalizeKernel1(ssv_1, ssv_2, bj, tmp_nf_sq24_1[0]);
            xFDHOGnormalizeKernelInv(tmp_nf_sq24_2[0], nf_2[0], n_2[0]);
            xFDHOGnormalizeKernel2<NOHC, NOHCPB, NOVCPB>(HA_1, HA_2, nf_1[0], norm_block_1, fx_rnf_sq_1[0], bj - 2,
                                                         n_1[0]);
            xFDHOGReNormalizeKernel<NOHC, NOHCPB, NOVCPB, NOB, ROWS, COLS, DEPTH, NPC, WORDWIDTH>(
                norm_block_2, fx_rnf_sq_2[0], _block_strm);
            flag = 1;
        }
    }

    if (flag) {
        xFDHOGnormalizeKernelInv(tmp_nf_sq24_1[0], nf_1[0], n_1[0]);
        xFDHOGnormalizeKernel2<NOHC, NOHCPB, NOVCPB>(HA_1, HA_2, nf_2[0], norm_block_2, fx_rnf_sq_2[0], bj - 2, n_2[0]);
        xFDHOGReNormalizeKernel<NOHC, NOHCPB, NOVCPB, NOB, ROWS, COLS, DEPTH, NPC, WORDWIDTH>(
            norm_block_1, fx_rnf_sq_1[0], _block_strm);
        flag = 0;
        bj++;
    }

    else {
        xFDHOGnormalizeKernelInv(tmp_nf_sq24_2[0], nf_2[0], n_2[0]);
        xFDHOGnormalizeKernel2<NOHC, NOHCPB, NOVCPB>(HA_1, HA_2, nf_1[0], norm_block_1, fx_rnf_sq_1[0], bj - 2, n_1[0]);
        xFDHOGReNormalizeKernel<NOHC, NOHCPB, NOVCPB, NOB, ROWS, COLS, DEPTH, NPC, WORDWIDTH>(
            norm_block_2, fx_rnf_sq_2[0], _block_strm);
        flag = 1;
        bj++;
    }

    if (flag) {
        xFDHOGnormalizeKernel2<NOHC, NOHCPB, NOVCPB>(HA_1, HA_2, nf_2[0], norm_block_2, fx_rnf_sq_2[0], bj - 2, n_2[0]);
        xFDHOGReNormalizeKernel<NOHC, NOHCPB, NOVCPB, NOB, ROWS, COLS, DEPTH, NPC, WORDWIDTH>(
            norm_block_1, fx_rnf_sq_1[0], _block_strm);
        flag = 0;
    }

    else {
        xFDHOGnormalizeKernel2<NOHC, NOHCPB, NOVCPB>(HA_1, HA_2, nf_1[0], norm_block_1, fx_rnf_sq_1[0], bj - 2, n_1[0]);
        xFDHOGReNormalizeKernel<NOHC, NOHCPB, NOVCPB, NOB, ROWS, COLS, DEPTH, NPC, WORDWIDTH>(
            norm_block_2, fx_rnf_sq_2[0], _block_strm);
        flag = 1;
    }

    if (flag) {
        xFDHOGReNormalizeKernel<NOHC, NOHCPB, NOVCPB, NOB, ROWS, COLS, DEPTH, NPC, WORDWIDTH>(
            norm_block_1, fx_rnf_sq_1[0], _block_strm);
    } else {
        xFDHOGReNormalizeKernel<NOHC, NOHCPB, NOVCPB, NOB, ROWS, COLS, DEPTH, NPC, WORDWIDTH>(
            norm_block_2, fx_rnf_sq_2[0], _block_strm);
    }
}

#endif // _XF_HOG_DESCRIPTOR_NORM_HPP_