.. _program_listing_file__tmp_ws_src_vitis_common_include_aie_imgproc_xf_cvtcolor_aie.hpp: Program Listing for File xf_cvtcolor_aie.hpp ============================================ |exhale_lsh| :ref:`Return to documentation for file ` (``/tmp/ws/src/vitis_common/include/aie/imgproc/xf_cvtcolor_aie.hpp``) .. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS .. code-block:: cpp /* * Copyright 2021 Xilinx, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #ifndef _AIE_CVT_COLOR_H_ #define _AIE_CVT_COLOR_H_ namespace xf { namespace cv { namespace aie { /**************************************************************************** * CalculateY - calculates the Y(luma) component using R,G,B values * Y = (0.257 * R) + (0.504 * G) + (0.098 * B) + 16 * An offset of 16 is added to the resultant value ***************************************************************************/ // int16_t y_wei[16]={ 8422, 16516, 3212, 0}; /* const int16_t R_WEI=float2fix(0.257,11) ; const int16_t G_WEI=float2fix(0.504,11) ; const int16_t B_WEI=float2fix(0.098,11) ; const int16_t WEI=float2fix(0.5,11) ; printf("weights are %d %d %d\n",R_WEI,G_WEI,B_WEI,WEI);*/ int16_t y_wei[16] = {526, 1032, 201, 2048}; int16_t const_val1[16] = {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16}; int16_t rounding_val[16] = {1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024}; template void calculate_Y(const T* restrict ptr1, const T* restrict ptr2, const T* restrict ptr3, T* restrict ptr_out1, const T& img_width, const T& img_height) { ::aie::vector data_buf1, data_buf2, data_buf3, data_out, round_buff; ::aie::vector const_val(16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16); ::aie::vector weights(526, 1032); ::aie::vector weights1(201, 2048); ::aie::accum acc; acc.from_vector(round_buff); for (int i = 0; i < (img_height * img_width); i += N) chess_prepare_for_pipelining chess_loop_range(14, ) { data_buf1 = ::aie::load_v(ptr1); ptr1 += N; data_buf2 = ::aie::load_v(ptr2); ptr2 += N; acc = ::aie::accumulate(acc, weights, 0, data_buf1, data_buf2); data_buf3 = ::aie::load_v(ptr3); ptr3 += N; acc = ::aie::accumulate(acc, weights1, 0, data_buf3, const_val); ::aie::store_v(ptr_out1, acc.template to_vector(11)); ptr_out1 += N; acc.from_vector(round_buff); } } void calculate_Y_api(input_window_int16* ptr1_img_buffer, input_window_int16* ptr2_img_buffer, input_window_int16* ptr3_img_buffer, output_window_int16* ptr_out1) { int16_t* r_in_ptr = (int16_t*)ptr1_img_buffer->ptr; int16_t* g_in_ptr = (int16_t*)ptr2_img_buffer->ptr; int16_t* b_in_ptr = (int16_t*)ptr3_img_buffer->ptr; int16_t* y_out_ptr = (int16_t*)ptr_out1->ptr; const int16_t img_width = xfcvGetTileWidth(r_in_ptr); const int16_t img_height = xfcvGetTileHeight(r_in_ptr); xfcvCopyMetaData(r_in_ptr, y_out_ptr); xfcvUnsignedSaturation(y_out_ptr); int16* restrict ptr1 = xfcvGetImgDataPtr(r_in_ptr); int16* restrict ptr2 = xfcvGetImgDataPtr(g_in_ptr); int16* restrict ptr3 = xfcvGetImgDataPtr(b_in_ptr); int16* restrict data_out = xfcvGetImgDataPtr(y_out_ptr); calculate_Y(ptr1, ptr2, ptr3, data_out, img_width, img_height); } /*********************************************************************** * CalculateU - calculates the U(Chroma) component using R,G,B values * U = -(0.148 * R) - (0.291 * G) + (0.439 * B) + 128 * an offset of 128 is added to the resultant value **********************************************************************/ /*********************************************************************** * CalculateV - calculates the V(Chroma) component using R,G,B values * V = (0.439 * R) - (0.368 * G) - (0.071 * B) + 128 * an offset of 128 is added to the resultant value **********************************************************************/ /* const int16_t VR_WEI=float2fix(0.439,7) ; const int16_t VG_WEI=float2fix(-0.368,7) ; const int16_t VB_WEI=float2fix(-0.071,7) ; const int16_t V_WEI=float2fix(0.5,7) ; printf("weights are %d %d %d\n",VR_WEI,VG_WEI,VB_WEI,V_WEI); const int16_t UR_WEI=float2fix(-0.148,7) ; const int16_t UG_WEI=float2fix(-0.291,7) ; const int16_t UB_WEI=float2fix(0.439,7) ; const int16_t U_WEI=float2fix(0.5,7) ; printf("weights are %d %d %d\n",UR_WEI,UG_WEI,UB_WEI,U_WEI);*/ int16_t UV_wei[16] = {-19, 0, -37, 0, 56, 0, 1, 0, 56, 0, -47, 0, -9, 0, 1, 0}; int16_t weight[16] = {16448, 16448, 16448, 16448, 16448, 16448, 16448, 16448, 16448, 16448, 16448, 16448, 16448, 16448, 16448, 16448}; // int16_t UV_wei[16]={ -38, 0, -74, 0, 112, 0, 256, 0 , 112, 0, -94, 0, -18, 0, 256, 0 }; // int16_t weight[16]={ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; template void calculate_UV(const T* restrict ptr1, const T* restrict ptr2, const T* restrict ptr3, T* restrict ptr_out2, T* restrict ptr_out3, const T& img_width, const T& img_height) { constexpr unsigned Lanes = 16; constexpr unsigned Points = 2; constexpr unsigned CoeffStep = 1; constexpr unsigned DataStepY = 2; using mul_ops = ::aie::sliding_mul_y_ops; ::aie::vector kernel_coeff(-19, 0, -37, 0, 56, 0, 1, 0, 56, 0, -47, 0, -9, 0, 1, 0); ::aie::accum acc_u, acc_v; ::aie::vector data_buf1; ::aie::vector data_buf2; ::aie::vector weights(16448, 16448, 16448, 16448, 16448, 16448, 16448, 16448, 16448, 16448, 16448, 16448, 16448, 16448, 16448, 16448); for (int i = 0; i < img_height; i += 2) chess_prepare_for_pipelining chess_loop_range(16, ) { for (int j = 0; j < img_width; j += (2 * 16)) chess_prepare_for_pipelining chess_loop_range(4, ) { data_buf1.insert(0, ::aie::load_v<16>(ptr1)); ptr1 += 16; data_buf1.insert(1, ::aie::load_v<16>(ptr1)); ptr1 += 16; // | loading R channel acc_u = mul_ops::mul(kernel_coeff, 0, data_buf1, 0); acc_v = mul_ops::mul(kernel_coeff, 8, data_buf1, 0); data_buf2.insert(0, ::aie::load_v<16>(ptr2)); ptr2 += 16; data_buf2.insert(1, ::aie::load_v<16>(ptr2)); ptr2 += 16; acc_u = mul_ops::mac(acc_u, kernel_coeff, 2, data_buf2, 0); acc_v = mul_ops::mac(acc_v, kernel_coeff, 10, data_buf2, 0); data_buf1.insert(0, ::aie::load_v<16>(ptr3)); ptr3 += 16; data_buf1.insert(1, ::aie::load_v<16>(ptr3)); ptr3 += 16; acc_u = mul_ops::mac(acc_u, kernel_coeff, 4, data_buf1, 0); acc_v = mul_ops::mac(acc_v, kernel_coeff, 12, data_buf1, 0); data_buf2.insert(0, weights); data_buf2.insert(1, weights); acc_u = mul_ops::mac(acc_u, kernel_coeff, 6, data_buf2, 0); acc_v = mul_ops::mac(acc_v, kernel_coeff, 14, data_buf2, 0); ::aie::store_v(ptr_out2, acc_u.template to_vector(7)); ::aie::store_v(ptr_out3, acc_v.template to_vector(7)); ptr_out2 += 16; ptr_out3 += 16; } ptr1 += img_width; ptr2 += img_width; ptr3 += img_width; } } void calculate_UV_api(input_window_int16* ptr1_img_buffer, input_window_int16* ptr2_img_buffer, input_window_int16* ptr3_img_buffer, output_window_int16* ptr_out2, output_window_int16* ptr_out3) { int16_t* r_in_ptr = (int16_t*)ptr1_img_buffer->ptr; int16_t* g_in_ptr = (int16_t*)ptr2_img_buffer->ptr; int16_t* b_in_ptr = (int16_t*)ptr3_img_buffer->ptr; int16_t* u_out_ptr = (int16_t*)ptr_out2->ptr; int16_t* v_out_ptr = (int16_t*)ptr_out3->ptr; const int16_t img_width = xfcvGetTileWidth(g_in_ptr); const int16_t img_height = xfcvGetTileHeight(b_in_ptr); xfcvCopyMetaData(g_in_ptr, u_out_ptr); xfcvCopyMetaData(b_in_ptr, v_out_ptr); xfcvUnsignedSaturation(u_out_ptr); xfcvUnsignedSaturation(v_out_ptr); xfcvSetUVMetaData(u_out_ptr); xfcvSetUVMetaData(v_out_ptr); int16* restrict ptr1 = xfcvGetImgDataPtr(r_in_ptr); int16* restrict ptr2 = xfcvGetImgDataPtr(g_in_ptr); int16* restrict ptr3 = xfcvGetImgDataPtr(b_in_ptr); int16* restrict data_out2 = xfcvGetImgDataPtr(u_out_ptr); int16* restrict data_out3 = xfcvGetImgDataPtr(v_out_ptr); calculate_UV(ptr1, ptr2, ptr3, data_out2, data_out3, img_width, img_height); } void cvtcolor_api(input_window_int16* img_r, input_window_int16* img_g, input_window_int16* img_b, output_window_int16* img_y, output_window_int16* img_u, output_window_int16* img_v) { calculate_Y_api(img_r, img_g, img_b, img_y); calculate_UV_api(img_r, img_g, img_b, img_u, img_v); } } // aie } // cv } // xf #endif