.. _program_listing_file__tmp_ws_src_vitis_common_include_aie_imgproc_xf_blobfromimage_aie.hpp: Program Listing for File xf_blobfromimage_aie.hpp ================================================= |exhale_lsh| :ref:`Return to documentation for file ` (``/tmp/ws/src/vitis_common/include/aie/imgproc/xf_blobfromimage_aie.hpp``) .. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS .. code-block:: cpp /* * Copyright 2021 Xilinx, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #ifndef _AIE_BLOBFROMIAMGE_H_ #define _AIE_BLOBFROMIAMGE_H_ #define img_height 32 #define img_width 128 /*#define alpha 0.342 #define beta 0.0039215686274509803921568627451 #define gama 5.2432 #define threshold1 -127 #define threshold2 127*/ namespace xf { namespace cv { namespace aie { enum ops { mean_sub, scale_n_clip, clip, scale_n_bias, scale_n_bias_mean_sub, fused_op }; void mean_subtraction(v8float* restrict ptr_in, v8float* restrict ptr_out, float alpha) { v8float data_buf1 = null_v8float(); v8float chess_storage(WR2) alpha_acc = null_v8float(); v8float chess_storage(WD0) data_out = null_v8float(); for (int i = 0; i < 8; i++) { alpha_acc = upd_elem(alpha_acc, i, alpha); } for (int j = 0; j < (img_height * img_width); j += 8) // 8x samples per loop chess_prepare_for_pipelining chess_loop_range(14, ) { data_buf1 = *(ptr_in++); // in:00++8|_________|_________|_________ data_out = fpsub(data_buf1, concat(alpha_acc, undef_v8float()), 0, 0x76543210); *(ptr_out++) = (v8float)data_out; } } void clip_fun(v8float* restrict ptr_in, v8float* restrict ptr_out, int th1, int th2) { v8float data_buf1 = null_v8float(); v8float chess_storage(WR2) thresh1_acc = null_v8float(); v8float chess_storage(WR3) thresh2_acc = null_v8float(); v8float chess_storage(WD0) temp_out = null_v8float(); v8float chess_storage(WD1) data_out = null_v8float(); for (int i = 0; i < 8; i++) { thresh1_acc = upd_elem(thresh1_acc, i, th1); thresh2_acc = upd_elem(thresh2_acc, i, th2); } for (int j = 0; j < (img_height * img_width); j += 8) // 8x samples per loop chess_prepare_for_pipelining chess_loop_range(14, ) { data_buf1 = *(ptr_in++); // in:00++8|_________|_________|_________ temp_out = fpmax(thresh1_acc, concat(data_buf1, undef_v8float()), 0, 0x76543210); data_out = fpmin(thresh2_acc, concat(temp_out, undef_v8float()), 0, 0x76543210); *(ptr_out++) = (v8float)data_out; } } void scale_n_bias_fun(v8float* restrict ptr_in, v8float* restrict ptr_out, float beta, float gama) { v8float data_buf1 = null_v8float(); v8float bias_acc = null_v8float(); v8float scale = null_v8float(); v8float chess_storage(WD0) data_out = null_v8float(); for (int i = 0; i < 8; i++) { scale = upd_elem(scale, i, beta); bias_acc = upd_elem(bias_acc, i, gama); } for (int j = 0; j < (img_height * img_width); j += 8) // 8x samples per loop chess_prepare_for_pipelining chess_loop_range(14, ) { data_buf1 = *(ptr_in++); // in:00++8|_________|_________|_________ data_out = fpmac(bias_acc, concat(data_buf1, undef_v8float()), 0, 0x76543210, scale, 0, 0x76543210); *(ptr_out++) = (v8float)data_out; } } void scale_n_clip_fun(v8float* restrict ptr_in, v8float* restrict ptr_out, float beta, int th1, int th2) { v8float data_buf1 = null_v8float(); v8float scale = null_v8float(); v8float bias_acc = null_v8float(); v8float chess_storage(WR2) thresh1_acc = null_v8float(); v8float chess_storage(WR3) thresh2_acc = null_v8float(); v8float chess_storage(WD0) temp_out = null_v8float(); v8float chess_storage(WD1) data_out = null_v8float(); v8float* restrict ptr_out_temp = ptr_out; for (int i = 0; i < 8; i++) { scale = upd_elem(scale, i, beta); thresh1_acc = upd_elem(thresh1_acc, i, th1); thresh2_acc = upd_elem(thresh2_acc, i, th2); } for (int j = 0; j < (img_height * img_width); j += 8) // 8x samples per loop chess_prepare_for_pipelining chess_loop_range(14, ) { data_buf1 = *(ptr_in++); // in:00++8|_________|_________|_________ temp_out = fpmac(bias_acc, concat(data_buf1, undef_v8float()), 0, 0x76543210, scale, 0, 0x76543210); *(ptr_out_temp++) = (v8float)data_out; } ptr_out_temp = ptr_out_temp - ((img_height * img_width) / 8); for (int j = 0; j < (img_height * img_width); j += 8) // 8x samples per loop chess_prepare_for_pipelining chess_loop_range(14, ) { data_buf1 = *(ptr_out_temp++); // in:00++8|_________|_________|_________ temp_out = fpmax(thresh1_acc, concat(data_buf1, undef_v8float()), 0, 0x76543210); data_out = fpmin(thresh2_acc, concat(temp_out, undef_v8float()), 0, 0x76543210); *(ptr_out++) = (v8float)data_out; } } void scale_n_bias_mean_sub_fun( v8float* restrict ptr_in, v8float* restrict ptr_out, float alpha, float beta, float gama) { v8float chess_storage(WR2) data_buf1 = null_v8float(); v8float chess_storage(WR3) bias_acc = null_v8float(); v8float scale = null_v8float(); v8float alpha_acc = null_v8float(); v8float chess_storage(WD0) temp_out = null_v8float(); v8float chess_storage(WD1) data_out = null_v8float(); for (int i = 0; i < 8; i++) { alpha_acc = upd_elem(alpha_acc, i, alpha); scale = upd_elem(scale, i, beta); bias_acc = upd_elem(bias_acc, i, gama); } for (int j = 0; j < (img_height * img_width); j += 8) // 8x samples per loop chess_prepare_for_pipelining chess_loop_range(14, ) { data_buf1 = *(ptr_in++); // in:00++8|_________|_________|_________ temp_out = fpsub(data_buf1, concat(alpha_acc, undef_v8float()), 0, 0); data_out = fpmac(bias_acc, concat(temp_out, undef_v8float()), 0, 0x76543210, scale, 0, 0x76543210); *(ptr_out++) = (v8float)data_out; } } void fused_op_fun( v8float* restrict ptr_in, v8float* restrict ptr_out, float alpha, float beta, float gama, int th1, int th2) { v8float* restrict ptr_out_temp = ptr_out; v8float chess_storage(WR2) data_buf1 = null_v8float(); v8float data_buf = null_v8float(); v8float bias_acc = null_v8float(); v8float scale = null_v8float(); v8float alpha_acc = null_v8float(); v8float chess_storage(WD0) temp_out = null_v8float(); v8float chess_storage(WD1) data_out = null_v8float(); for (int i = 0; i < 8; i++) { alpha_acc = upd_elem(alpha_acc, i, alpha); bias_acc = upd_elem(bias_acc, i, gama); scale = upd_elem(scale, i, beta); } for (int j = 0; j < (img_height * img_width); j += 8) // 8x samples per loop chess_prepare_for_pipelining chess_loop_range(14, ) { data_buf1 = *(ptr_in++); // in:00++8|_________|_________|_________ temp_out = fpsub(data_buf1, concat(alpha_acc, undef_v8float()), 0, 0); data_out = fpmac(bias_acc, concat(temp_out, undef_v8float()), 0, 0x76543210, scale, 0, 0x76543210); *(ptr_out_temp++) = (v8float)data_out; } ptr_out_temp = ptr_out_temp - ((img_height * img_width) / 8); v8float chess_storage(WR2) thresh1_acc = null_v8float(); v8float chess_storage(WR3) thresh2_acc = null_v8float(); for (int i = 0; i < 8; i++) { thresh1_acc = upd_elem(thresh1_acc, i, th1); thresh2_acc = upd_elem(thresh2_acc, i, th2); } for (int j = 0; j < (img_height * img_width); j += 8) // 8x samples per loop chess_prepare_for_pipelining chess_loop_range(14, ) { data_buf = *(ptr_out_temp++); // in:00++8|_________|_________|_________ temp_out = fpmax(thresh1_acc, concat(data_buf, undef_v8float()), 0, 0x76543210); data_out = fpmin(thresh2_acc, concat(temp_out, undef_v8float()), 0, 0x76543210); *(ptr_out++) = (v8float)data_out; } } // void blobFromImage( input_window_float * img_in, output_window_float * restrict img_out,float alpha, float beta, // float gama,int threshold1,int threshold2) void blobFromImage_api(input_window_float* img_in, output_window_float* img_out) // void blobFromImage( input_window_float * img_in, output_window_float * img_out) { float alpha = 0.342; float beta = 0.0039215686274509803921568627451; float gama = 5.2432; int threshold1 = -127; int threshold2 = 127; v8float* restrict ptr_img_buffer = (v8float*)img_in->ptr; v8float* restrict ptr_out_buffer = (v8float*)img_out->ptr; v8float* restrict ptr_in = (v8float*)ptr_img_buffer; v8float* restrict ptr_out = (v8float*)ptr_out_buffer; switch (OPMODE) { case mean_sub: mean_subtraction(ptr_in, ptr_out, alpha); break; case scale_n_clip: scale_n_clip_fun(ptr_in, ptr_out, beta, threshold1, threshold2); break; case clip: clip_fun(ptr_in, ptr_out, threshold1, threshold2); break; case scale_n_bias: scale_n_bias_fun(ptr_in, ptr_out, beta, gama); break; case scale_n_bias_mean_sub: scale_n_bias_mean_sub_fun(ptr_in, ptr_out, alpha, beta, gama); break; case fused_op: fused_op_fun(ptr_in, ptr_out, alpha, beta, gama, threshold1, threshold2); break; } } } // aie } // cv } // xf #endif