Program Listing for File xf_cvtcolor_aie.hpp
↰ Return to documentation for file (/tmp/ws/src/vitis_common/include/aie/imgproc/xf_cvtcolor_aie.hpp
)
/*
* Copyright 2021 Xilinx, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <adf.h>
#include <aie_api/aie.hpp>
#include <common/xf_aie_utils.hpp>
#ifndef _AIE_CVT_COLOR_H_
#define _AIE_CVT_COLOR_H_
namespace xf {
namespace cv {
namespace aie {
/****************************************************************************
* CalculateY - calculates the Y(luma) component using R,G,B values
* Y = (0.257 * R) + (0.504 * G) + (0.098 * B) + 16
* An offset of 16 is added to the resultant value
***************************************************************************/
// int16_t y_wei[16]={ 8422, 16516, 3212, 0};
/* const int16_t R_WEI=float2fix(0.257,11) ;
const int16_t G_WEI=float2fix(0.504,11) ;
const int16_t B_WEI=float2fix(0.098,11) ;
const int16_t WEI=float2fix(0.5,11) ;
printf("weights are %d %d %d\n",R_WEI,G_WEI,B_WEI,WEI);*/
int16_t y_wei[16] = {526, 1032, 201, 2048};
int16_t const_val1[16] = {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16};
int16_t rounding_val[16] = {1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024,
1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024};
template <typename T, int N>
void calculate_Y(const T* restrict ptr1,
const T* restrict ptr2,
const T* restrict ptr3,
T* restrict ptr_out1,
const T& img_width,
const T& img_height) {
::aie::vector<T, N> data_buf1, data_buf2, data_buf3, data_out, round_buff;
::aie::vector<T, N> const_val(16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16);
::aie::vector<T, N> weights(526, 1032);
::aie::vector<T, N> weights1(201, 2048);
::aie::accum<acc32, N> acc;
acc.from_vector(round_buff);
for (int i = 0; i < (img_height * img_width); i += N) chess_prepare_for_pipelining chess_loop_range(14, ) {
data_buf1 = ::aie::load_v<N>(ptr1);
ptr1 += N;
data_buf2 = ::aie::load_v<N>(ptr2);
ptr2 += N;
acc = ::aie::accumulate<N>(acc, weights, 0, data_buf1, data_buf2);
data_buf3 = ::aie::load_v<N>(ptr3);
ptr3 += N;
acc = ::aie::accumulate<N>(acc, weights1, 0, data_buf3, const_val);
::aie::store_v(ptr_out1, acc.template to_vector<T>(11));
ptr_out1 += N;
acc.from_vector(round_buff);
}
}
void calculate_Y_api(input_window_int16* ptr1_img_buffer,
input_window_int16* ptr2_img_buffer,
input_window_int16* ptr3_img_buffer,
output_window_int16* ptr_out1) {
int16_t* r_in_ptr = (int16_t*)ptr1_img_buffer->ptr;
int16_t* g_in_ptr = (int16_t*)ptr2_img_buffer->ptr;
int16_t* b_in_ptr = (int16_t*)ptr3_img_buffer->ptr;
int16_t* y_out_ptr = (int16_t*)ptr_out1->ptr;
const int16_t img_width = xfcvGetTileWidth(r_in_ptr);
const int16_t img_height = xfcvGetTileHeight(r_in_ptr);
xfcvCopyMetaData(r_in_ptr, y_out_ptr);
xfcvUnsignedSaturation(y_out_ptr);
int16* restrict ptr1 = xfcvGetImgDataPtr(r_in_ptr);
int16* restrict ptr2 = xfcvGetImgDataPtr(g_in_ptr);
int16* restrict ptr3 = xfcvGetImgDataPtr(b_in_ptr);
int16* restrict data_out = xfcvGetImgDataPtr(y_out_ptr);
calculate_Y<int16_t, 16>(ptr1, ptr2, ptr3, data_out, img_width, img_height);
}
/***********************************************************************
* CalculateU - calculates the U(Chroma) component using R,G,B values
* U = -(0.148 * R) - (0.291 * G) + (0.439 * B) + 128
* an offset of 128 is added to the resultant value
**********************************************************************/
/***********************************************************************
* CalculateV - calculates the V(Chroma) component using R,G,B values
* V = (0.439 * R) - (0.368 * G) - (0.071 * B) + 128
* an offset of 128 is added to the resultant value
**********************************************************************/
/* const int16_t VR_WEI=float2fix(0.439,7) ;
const int16_t VG_WEI=float2fix(-0.368,7) ;
const int16_t VB_WEI=float2fix(-0.071,7) ;
const int16_t V_WEI=float2fix(0.5,7) ;
printf("weights are %d %d %d\n",VR_WEI,VG_WEI,VB_WEI,V_WEI);
const int16_t UR_WEI=float2fix(-0.148,7) ;
const int16_t UG_WEI=float2fix(-0.291,7) ;
const int16_t UB_WEI=float2fix(0.439,7) ;
const int16_t U_WEI=float2fix(0.5,7) ;
printf("weights are %d %d %d\n",UR_WEI,UG_WEI,UB_WEI,U_WEI);*/
int16_t UV_wei[16] = {-19, 0, -37, 0, 56, 0, 1, 0, 56, 0, -47, 0, -9, 0, 1, 0};
int16_t weight[16] = {16448, 16448, 16448, 16448, 16448, 16448, 16448, 16448,
16448, 16448, 16448, 16448, 16448, 16448, 16448, 16448};
// int16_t UV_wei[16]={ -38, 0, -74, 0, 112, 0, 256, 0 , 112, 0, -94, 0, -18, 0, 256, 0 };
// int16_t weight[16]={ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 };
template <typename T, int N>
void calculate_UV(const T* restrict ptr1,
const T* restrict ptr2,
const T* restrict ptr3,
T* restrict ptr_out2,
T* restrict ptr_out3,
const T& img_width,
const T& img_height) {
constexpr unsigned Lanes = 16;
constexpr unsigned Points = 2;
constexpr unsigned CoeffStep = 1;
constexpr unsigned DataStepY = 2;
using mul_ops = ::aie::sliding_mul_y_ops<Lanes, Points, CoeffStep, DataStepY, int16, int16>;
::aie::vector<int16_t, 16> kernel_coeff(-19, 0, -37, 0, 56, 0, 1, 0, 56, 0, -47, 0, -9, 0, 1, 0);
::aie::accum<acc48, 16> acc_u, acc_v;
::aie::vector<T, 32> data_buf1;
::aie::vector<T, 32> data_buf2;
::aie::vector<T, 16> weights(16448, 16448, 16448, 16448, 16448, 16448, 16448, 16448, 16448, 16448, 16448, 16448,
16448, 16448, 16448, 16448);
for (int i = 0; i < img_height; i += 2) chess_prepare_for_pipelining chess_loop_range(16, ) {
for (int j = 0; j < img_width; j += (2 * 16)) chess_prepare_for_pipelining chess_loop_range(4, ) {
data_buf1.insert(0, ::aie::load_v<16>(ptr1));
ptr1 += 16;
data_buf1.insert(1, ::aie::load_v<16>(ptr1));
ptr1 += 16; // | loading R channel
acc_u = mul_ops::mul(kernel_coeff, 0, data_buf1, 0);
acc_v = mul_ops::mul(kernel_coeff, 8, data_buf1, 0);
data_buf2.insert(0, ::aie::load_v<16>(ptr2));
ptr2 += 16;
data_buf2.insert(1, ::aie::load_v<16>(ptr2));
ptr2 += 16;
acc_u = mul_ops::mac(acc_u, kernel_coeff, 2, data_buf2, 0);
acc_v = mul_ops::mac(acc_v, kernel_coeff, 10, data_buf2, 0);
data_buf1.insert(0, ::aie::load_v<16>(ptr3));
ptr3 += 16;
data_buf1.insert(1, ::aie::load_v<16>(ptr3));
ptr3 += 16;
acc_u = mul_ops::mac(acc_u, kernel_coeff, 4, data_buf1, 0);
acc_v = mul_ops::mac(acc_v, kernel_coeff, 12, data_buf1, 0);
data_buf2.insert(0, weights);
data_buf2.insert(1, weights);
acc_u = mul_ops::mac(acc_u, kernel_coeff, 6, data_buf2, 0);
acc_v = mul_ops::mac(acc_v, kernel_coeff, 14, data_buf2, 0);
::aie::store_v(ptr_out2, acc_u.template to_vector<int16>(7));
::aie::store_v(ptr_out3, acc_v.template to_vector<int16>(7));
ptr_out2 += 16;
ptr_out3 += 16;
}
ptr1 += img_width;
ptr2 += img_width;
ptr3 += img_width;
}
}
void calculate_UV_api(input_window_int16* ptr1_img_buffer,
input_window_int16* ptr2_img_buffer,
input_window_int16* ptr3_img_buffer,
output_window_int16* ptr_out2,
output_window_int16* ptr_out3) {
int16_t* r_in_ptr = (int16_t*)ptr1_img_buffer->ptr;
int16_t* g_in_ptr = (int16_t*)ptr2_img_buffer->ptr;
int16_t* b_in_ptr = (int16_t*)ptr3_img_buffer->ptr;
int16_t* u_out_ptr = (int16_t*)ptr_out2->ptr;
int16_t* v_out_ptr = (int16_t*)ptr_out3->ptr;
const int16_t img_width = xfcvGetTileWidth(g_in_ptr);
const int16_t img_height = xfcvGetTileHeight(b_in_ptr);
xfcvCopyMetaData(g_in_ptr, u_out_ptr);
xfcvCopyMetaData(b_in_ptr, v_out_ptr);
xfcvUnsignedSaturation(u_out_ptr);
xfcvUnsignedSaturation(v_out_ptr);
xfcvSetUVMetaData(u_out_ptr);
xfcvSetUVMetaData(v_out_ptr);
int16* restrict ptr1 = xfcvGetImgDataPtr(r_in_ptr);
int16* restrict ptr2 = xfcvGetImgDataPtr(g_in_ptr);
int16* restrict ptr3 = xfcvGetImgDataPtr(b_in_ptr);
int16* restrict data_out2 = xfcvGetImgDataPtr(u_out_ptr);
int16* restrict data_out3 = xfcvGetImgDataPtr(v_out_ptr);
calculate_UV<int16_t, 16>(ptr1, ptr2, ptr3, data_out2, data_out3, img_width, img_height);
}
void cvtcolor_api(input_window_int16* img_r,
input_window_int16* img_g,
input_window_int16* img_b,
output_window_int16* img_y,
output_window_int16* img_u,
output_window_int16* img_v) {
calculate_Y_api(img_r, img_g, img_b, img_y);
calculate_UV_api(img_r, img_g, img_b, img_u, img_v);
}
} // aie
} // cv
} // xf
#endif