Program Listing for File xf_blobfromimage_aie.hpp
↰ Return to documentation for file (/tmp/ws/src/vitis_common/include/aie/imgproc/xf_blobfromimage_aie.hpp
)
/*
* Copyright 2021 Xilinx, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <adf.h>
#ifndef _AIE_BLOBFROMIAMGE_H_
#define _AIE_BLOBFROMIAMGE_H_
#define img_height 32
#define img_width 128
/*#define alpha 0.342
#define beta 0.0039215686274509803921568627451
#define gama 5.2432
#define threshold1 -127
#define threshold2 127*/
namespace xf {
namespace cv {
namespace aie {
enum ops { mean_sub, scale_n_clip, clip, scale_n_bias, scale_n_bias_mean_sub, fused_op };
void mean_subtraction(v8float* restrict ptr_in, v8float* restrict ptr_out, float alpha) {
v8float data_buf1 = null_v8float();
v8float chess_storage(WR2) alpha_acc = null_v8float();
v8float chess_storage(WD0) data_out = null_v8float();
for (int i = 0; i < 8; i++) {
alpha_acc = upd_elem(alpha_acc, i, alpha);
}
for (int j = 0; j < (img_height * img_width); j += 8) // 8x samples per loop
chess_prepare_for_pipelining chess_loop_range(14, ) {
data_buf1 = *(ptr_in++); // in:00++8|_________|_________|_________
data_out = fpsub(data_buf1, concat(alpha_acc, undef_v8float()), 0, 0x76543210);
*(ptr_out++) = (v8float)data_out;
}
}
void clip_fun(v8float* restrict ptr_in, v8float* restrict ptr_out, int th1, int th2) {
v8float data_buf1 = null_v8float();
v8float chess_storage(WR2) thresh1_acc = null_v8float();
v8float chess_storage(WR3) thresh2_acc = null_v8float();
v8float chess_storage(WD0) temp_out = null_v8float();
v8float chess_storage(WD1) data_out = null_v8float();
for (int i = 0; i < 8; i++) {
thresh1_acc = upd_elem(thresh1_acc, i, th1);
thresh2_acc = upd_elem(thresh2_acc, i, th2);
}
for (int j = 0; j < (img_height * img_width); j += 8) // 8x samples per loop
chess_prepare_for_pipelining chess_loop_range(14, ) {
data_buf1 = *(ptr_in++); // in:00++8|_________|_________|_________
temp_out = fpmax(thresh1_acc, concat(data_buf1, undef_v8float()), 0, 0x76543210);
data_out = fpmin(thresh2_acc, concat(temp_out, undef_v8float()), 0, 0x76543210);
*(ptr_out++) = (v8float)data_out;
}
}
void scale_n_bias_fun(v8float* restrict ptr_in, v8float* restrict ptr_out, float beta, float gama) {
v8float data_buf1 = null_v8float();
v8float bias_acc = null_v8float();
v8float scale = null_v8float();
v8float chess_storage(WD0) data_out = null_v8float();
for (int i = 0; i < 8; i++) {
scale = upd_elem(scale, i, beta);
bias_acc = upd_elem(bias_acc, i, gama);
}
for (int j = 0; j < (img_height * img_width); j += 8) // 8x samples per loop
chess_prepare_for_pipelining chess_loop_range(14, ) {
data_buf1 = *(ptr_in++); // in:00++8|_________|_________|_________
data_out = fpmac(bias_acc, concat(data_buf1, undef_v8float()), 0, 0x76543210, scale, 0, 0x76543210);
*(ptr_out++) = (v8float)data_out;
}
}
void scale_n_clip_fun(v8float* restrict ptr_in, v8float* restrict ptr_out, float beta, int th1, int th2) {
v8float data_buf1 = null_v8float();
v8float scale = null_v8float();
v8float bias_acc = null_v8float();
v8float chess_storage(WR2) thresh1_acc = null_v8float();
v8float chess_storage(WR3) thresh2_acc = null_v8float();
v8float chess_storage(WD0) temp_out = null_v8float();
v8float chess_storage(WD1) data_out = null_v8float();
v8float* restrict ptr_out_temp = ptr_out;
for (int i = 0; i < 8; i++) {
scale = upd_elem(scale, i, beta);
thresh1_acc = upd_elem(thresh1_acc, i, th1);
thresh2_acc = upd_elem(thresh2_acc, i, th2);
}
for (int j = 0; j < (img_height * img_width); j += 8) // 8x samples per loop
chess_prepare_for_pipelining chess_loop_range(14, ) {
data_buf1 = *(ptr_in++); // in:00++8|_________|_________|_________
temp_out = fpmac(bias_acc, concat(data_buf1, undef_v8float()), 0, 0x76543210, scale, 0, 0x76543210);
*(ptr_out_temp++) = (v8float)data_out;
}
ptr_out_temp = ptr_out_temp - ((img_height * img_width) / 8);
for (int j = 0; j < (img_height * img_width); j += 8) // 8x samples per loop
chess_prepare_for_pipelining chess_loop_range(14, ) {
data_buf1 = *(ptr_out_temp++); // in:00++8|_________|_________|_________
temp_out = fpmax(thresh1_acc, concat(data_buf1, undef_v8float()), 0, 0x76543210);
data_out = fpmin(thresh2_acc, concat(temp_out, undef_v8float()), 0, 0x76543210);
*(ptr_out++) = (v8float)data_out;
}
}
void scale_n_bias_mean_sub_fun(
v8float* restrict ptr_in, v8float* restrict ptr_out, float alpha, float beta, float gama) {
v8float chess_storage(WR2) data_buf1 = null_v8float();
v8float chess_storage(WR3) bias_acc = null_v8float();
v8float scale = null_v8float();
v8float alpha_acc = null_v8float();
v8float chess_storage(WD0) temp_out = null_v8float();
v8float chess_storage(WD1) data_out = null_v8float();
for (int i = 0; i < 8; i++) {
alpha_acc = upd_elem(alpha_acc, i, alpha);
scale = upd_elem(scale, i, beta);
bias_acc = upd_elem(bias_acc, i, gama);
}
for (int j = 0; j < (img_height * img_width); j += 8) // 8x samples per loop
chess_prepare_for_pipelining chess_loop_range(14, ) {
data_buf1 = *(ptr_in++); // in:00++8|_________|_________|_________
temp_out = fpsub(data_buf1, concat(alpha_acc, undef_v8float()), 0, 0);
data_out = fpmac(bias_acc, concat(temp_out, undef_v8float()), 0, 0x76543210, scale, 0, 0x76543210);
*(ptr_out++) = (v8float)data_out;
}
}
void fused_op_fun(
v8float* restrict ptr_in, v8float* restrict ptr_out, float alpha, float beta, float gama, int th1, int th2) {
v8float* restrict ptr_out_temp = ptr_out;
v8float chess_storage(WR2) data_buf1 = null_v8float();
v8float data_buf = null_v8float();
v8float bias_acc = null_v8float();
v8float scale = null_v8float();
v8float alpha_acc = null_v8float();
v8float chess_storage(WD0) temp_out = null_v8float();
v8float chess_storage(WD1) data_out = null_v8float();
for (int i = 0; i < 8; i++) {
alpha_acc = upd_elem(alpha_acc, i, alpha);
bias_acc = upd_elem(bias_acc, i, gama);
scale = upd_elem(scale, i, beta);
}
for (int j = 0; j < (img_height * img_width); j += 8) // 8x samples per loop
chess_prepare_for_pipelining chess_loop_range(14, ) {
data_buf1 = *(ptr_in++); // in:00++8|_________|_________|_________
temp_out = fpsub(data_buf1, concat(alpha_acc, undef_v8float()), 0, 0);
data_out = fpmac(bias_acc, concat(temp_out, undef_v8float()), 0, 0x76543210, scale, 0, 0x76543210);
*(ptr_out_temp++) = (v8float)data_out;
}
ptr_out_temp = ptr_out_temp - ((img_height * img_width) / 8);
v8float chess_storage(WR2) thresh1_acc = null_v8float();
v8float chess_storage(WR3) thresh2_acc = null_v8float();
for (int i = 0; i < 8; i++) {
thresh1_acc = upd_elem(thresh1_acc, i, th1);
thresh2_acc = upd_elem(thresh2_acc, i, th2);
}
for (int j = 0; j < (img_height * img_width); j += 8) // 8x samples per loop
chess_prepare_for_pipelining chess_loop_range(14, ) {
data_buf = *(ptr_out_temp++); // in:00++8|_________|_________|_________
temp_out = fpmax(thresh1_acc, concat(data_buf, undef_v8float()), 0, 0x76543210);
data_out = fpmin(thresh2_acc, concat(temp_out, undef_v8float()), 0, 0x76543210);
*(ptr_out++) = (v8float)data_out;
}
}
// void blobFromImage( input_window_float * img_in, output_window_float * restrict img_out,float alpha, float beta,
// float gama,int threshold1,int threshold2)
void blobFromImage_api(input_window_float* img_in, output_window_float* img_out)
// void blobFromImage( input_window_float * img_in, output_window_float * img_out)
{
float alpha = 0.342;
float beta = 0.0039215686274509803921568627451;
float gama = 5.2432;
int threshold1 = -127;
int threshold2 = 127;
v8float* restrict ptr_img_buffer = (v8float*)img_in->ptr;
v8float* restrict ptr_out_buffer = (v8float*)img_out->ptr;
v8float* restrict ptr_in = (v8float*)ptr_img_buffer;
v8float* restrict ptr_out = (v8float*)ptr_out_buffer;
switch (OPMODE) {
case mean_sub:
mean_subtraction(ptr_in, ptr_out, alpha);
break;
case scale_n_clip:
scale_n_clip_fun(ptr_in, ptr_out, beta, threshold1, threshold2);
break;
case clip:
clip_fun(ptr_in, ptr_out, threshold1, threshold2);
break;
case scale_n_bias:
scale_n_bias_fun(ptr_in, ptr_out, beta, gama);
break;
case scale_n_bias_mean_sub:
scale_n_bias_mean_sub_fun(ptr_in, ptr_out, alpha, beta, gama);
break;
case fused_op:
fused_op_fun(ptr_in, ptr_out, alpha, beta, gama, threshold1, threshold2);
break;
}
}
} // aie
} // cv
} // xf
#endif