Program Listing for File xf_addweighted_aie.hpp
↰ Return to documentation for file (/tmp/ws/src/vitis_common/include/aie/imgproc/xf_addweighted_aie.hpp
)
/*
* Copyright 2021 Xilinx, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <adf.h>
#include <aie_api/aie.hpp>
#include <common/xf_aie_utils.hpp>
#ifndef _AIE_ADDWEIGHTED_H_
#define _AIE_ADDWEIGHTED_H_
#define SHIFT_CNT 10
namespace xf {
namespace cv {
namespace aie {
template <typename T, int N>
__attribute__((noinline)) void addweighted(const T* restrict src1,
const T* restrict src2,
T* restrict dst,
const int16_t width,
const int16_t height,
const float& alpha,
const float& beta,
const float& gamma) {
int16_t alpha_q1dot15 = float2fix(alpha, SHIFT_CNT); //(alpha * (1 << 15));
int16_t beta_q1dot15 = float2fix(beta, SHIFT_CNT); //(beta * (1 << 15));
int16_t gamma_q1dot15 = float2fix(gamma, SHIFT_CNT); //(gamma * (1 << 15));
::aie::vector<T, N> coeff(alpha_q1dot15, beta_q1dot15);
::aie::vector<T, N> gamma_coeff;
::aie::accum<acc32, N> gamma_acc;
for (int i = 0; i < N; i++) {
gamma_coeff[i] = gamma_q1dot15;
}
gamma_acc.template from_vector(gamma_coeff, 0);
for (int j = 0; j < width * height; j += N) // 16 samples per loop
chess_prepare_for_pipelining chess_loop_range(14, ) // loop_range(14) - loop : 1 cycle
{
::aie::vector<T, N> data_buf1 = ::aie::load_v<16>(src1);
src1 += N;
::aie::vector<T, N> data_buf2 = ::aie::load_v<16>(src2);
src2 += N;
::aie::accum<acc32, N> acc = ::aie::accumulate<16>(
gamma_acc, coeff, 0, data_buf1, data_buf2); // weight[0] * data_buf1 + weight[1] * data_buf2
::aie::store_v(dst, acc.template to_vector<T>(SHIFT_CNT));
dst += N;
}
}
void addweighted_api(input_window_int16* img_in1,
input_window_int16* img_in2,
output_window_int16* img_out,
const float& alpha,
const float& beta,
const float& gamma) {
int16* ptr0 = (int16*)img_in1->ptr;
int16* ptr1 = (int16*)img_in2->ptr;
int16* ptr_out = (int16*)img_out->ptr;
const int16_t img_width = xfcvGetTileWidth(ptr0);
const int16_t img_height = xfcvGetTileHeight(ptr0);
xfcvCopyMetaData(ptr0, ptr_out);
xfcvUnsignedSaturation(ptr_out);
int16* ptr_src1 = xfcvGetImgDataPtr(ptr0);
int16* ptr_src2 = xfcvGetImgDataPtr(ptr1);
int16* ptr_dst = xfcvGetImgDataPtr(ptr_out);
addweighted<int16_t, 16>(ptr_src1, ptr_src2, ptr_dst, img_width, img_height, alpha, beta, gamma);
/*
int16_t alpha_q1dot15 = float2fix(alpha, SHIFT_CNT); //(alpha * (1 << 15));
int16_t beta_q1dot15 = float2fix(beta, SHIFT_CNT); //(beta * (1 << 15));
int16_t gamma_q1dot15 = float2fix(gamma, SHIFT_CNT); //(gamma * (1 << 15));
v16int16 data_buf1;
v16int16 data_buf2;
v16int16 gamma_reg;
v16int16 coeff_v16;
v16acc48 gamma_out;
v16acc48 acc;
// loading accumulator with gama value
for (int i = 0; i < 16; i++) {
gamma_reg = upd_elem(gamma_reg, i, gamma_q1dot15);
}
gamma_out = ups(gamma_reg, 0);
// loading alpha, beta into vec register
coeff_v16 = upd_elem(coeff_v16, 0, alpha_q1dot15);
coeff_v16 = upd_elem(coeff_v16, 1, beta_q1dot15);
// process loop
for (int j = 0; j < img_width * img_height; j += 16) // 16 samples per loop
chess_prepare_for_pipelining chess_loop_range(14, ) // loop_range(14) - loop : 1 cycle
{
data_buf1 = *(ptr_src1++);
data_buf2 = *(ptr_src2++);
acc = mac16(gamma_out, concat(data_buf1, data_buf2), 0, 0x73727170, 0x77767574, 0x3120, coeff_v16, 0, 0, 0,
1);
*(ptr_dst++) = srs(acc, SHIFT_CNT);
}
*/
}
} // aie
} // cv
} // xf
#endif