.. _program_listing_file__tmp_ws_src_vitis_common_include_aie_imgproc_xf_addweighted_aie.hpp: Program Listing for File xf_addweighted_aie.hpp =============================================== |exhale_lsh| :ref:`Return to documentation for file ` (``/tmp/ws/src/vitis_common/include/aie/imgproc/xf_addweighted_aie.hpp``) .. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS .. code-block:: cpp /* * Copyright 2021 Xilinx, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #ifndef _AIE_ADDWEIGHTED_H_ #define _AIE_ADDWEIGHTED_H_ #define SHIFT_CNT 10 namespace xf { namespace cv { namespace aie { template __attribute__((noinline)) void addweighted(const T* restrict src1, const T* restrict src2, T* restrict dst, const int16_t width, const int16_t height, const float& alpha, const float& beta, const float& gamma) { int16_t alpha_q1dot15 = float2fix(alpha, SHIFT_CNT); //(alpha * (1 << 15)); int16_t beta_q1dot15 = float2fix(beta, SHIFT_CNT); //(beta * (1 << 15)); int16_t gamma_q1dot15 = float2fix(gamma, SHIFT_CNT); //(gamma * (1 << 15)); ::aie::vector coeff(alpha_q1dot15, beta_q1dot15); ::aie::vector gamma_coeff; ::aie::accum gamma_acc; for (int i = 0; i < N; i++) { gamma_coeff[i] = gamma_q1dot15; } gamma_acc.template from_vector(gamma_coeff, 0); for (int j = 0; j < width * height; j += N) // 16 samples per loop chess_prepare_for_pipelining chess_loop_range(14, ) // loop_range(14) - loop : 1 cycle { ::aie::vector data_buf1 = ::aie::load_v<16>(src1); src1 += N; ::aie::vector data_buf2 = ::aie::load_v<16>(src2); src2 += N; ::aie::accum acc = ::aie::accumulate<16>( gamma_acc, coeff, 0, data_buf1, data_buf2); // weight[0] * data_buf1 + weight[1] * data_buf2 ::aie::store_v(dst, acc.template to_vector(SHIFT_CNT)); dst += N; } } void addweighted_api(input_window_int16* img_in1, input_window_int16* img_in2, output_window_int16* img_out, const float& alpha, const float& beta, const float& gamma) { int16* ptr0 = (int16*)img_in1->ptr; int16* ptr1 = (int16*)img_in2->ptr; int16* ptr_out = (int16*)img_out->ptr; const int16_t img_width = xfcvGetTileWidth(ptr0); const int16_t img_height = xfcvGetTileHeight(ptr0); xfcvCopyMetaData(ptr0, ptr_out); xfcvUnsignedSaturation(ptr_out); int16* ptr_src1 = xfcvGetImgDataPtr(ptr0); int16* ptr_src2 = xfcvGetImgDataPtr(ptr1); int16* ptr_dst = xfcvGetImgDataPtr(ptr_out); addweighted(ptr_src1, ptr_src2, ptr_dst, img_width, img_height, alpha, beta, gamma); /* int16_t alpha_q1dot15 = float2fix(alpha, SHIFT_CNT); //(alpha * (1 << 15)); int16_t beta_q1dot15 = float2fix(beta, SHIFT_CNT); //(beta * (1 << 15)); int16_t gamma_q1dot15 = float2fix(gamma, SHIFT_CNT); //(gamma * (1 << 15)); v16int16 data_buf1; v16int16 data_buf2; v16int16 gamma_reg; v16int16 coeff_v16; v16acc48 gamma_out; v16acc48 acc; // loading accumulator with gama value for (int i = 0; i < 16; i++) { gamma_reg = upd_elem(gamma_reg, i, gamma_q1dot15); } gamma_out = ups(gamma_reg, 0); // loading alpha, beta into vec register coeff_v16 = upd_elem(coeff_v16, 0, alpha_q1dot15); coeff_v16 = upd_elem(coeff_v16, 1, beta_q1dot15); // process loop for (int j = 0; j < img_width * img_height; j += 16) // 16 samples per loop chess_prepare_for_pipelining chess_loop_range(14, ) // loop_range(14) - loop : 1 cycle { data_buf1 = *(ptr_src1++); data_buf2 = *(ptr_src2++); acc = mac16(gamma_out, concat(data_buf1, data_buf2), 0, 0x73727170, 0x77767574, 0x3120, coeff_v16, 0, 0, 0, 1); *(ptr_dst++) = srs(acc, SHIFT_CNT); } */ } } // aie } // cv } // xf #endif