.. _program_listing_file__tmp_ws_src_vitis_common_include_video_xf_kalmanfilter.hpp: Program Listing for File xf_kalmanfilter.hpp ============================================ |exhale_lsh| :ref:`Return to documentation for file ` (``/tmp/ws/src/vitis_common/include/video/xf_kalmanfilter.hpp``) .. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS .. code-block:: cpp /* * Copyright 2019 Xilinx, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _XF_KALMANFILTER_HPP_ #define _XF_KALMANFILTER_HPP_ #define DEBUG 0 #include "common/xf_common.hpp" #include "ap_int.h" namespace xf { namespace cv { template float KF_dotProduct(float dot_in1[PROC], float dot_in2[PROC]) { // clang-format off #pragma HLS ARRAY_PARTITION variable=dot_in1 complete dim=1 #pragma HLS ARRAY_PARTITION variable=dot_in2 complete dim=1 #pragma HLS inline off // clang-format on enum { TA_L1 = (PROC / 2 + (((PROC % 2) != 0) & (PROC != 1))), TA_L2 = (TA_L1 / 2 + (((TA_L1 % 2) != 0) & (TA_L1 != 1))), TA_L3 = (TA_L2 / 2 + (((TA_L2 % 2) != 0) & (TA_L2 != 1))), TA_L4 = (TA_L3 / 2 + (((TA_L3 % 2) != 0) & (TA_L3 != 1))), TA_L5 = (TA_L4 / 2 + (((TA_L4 % 2) != 0) & (TA_L4 != 1))), TA_L6 = (TA_L5 / 2 + (((TA_L5 % 2) != 0) & (TA_L5 != 1))), TA_L7 = (TA_L6 / 2 + (((TA_L6 % 2) != 0) & (TA_L6 != 1))), TA_L8 = (TA_L7 / 2 + (((TA_L7 % 2) != 0) & (TA_L7 != 1))) }; float mul_out[PROC]; for (ap_uint<10> idx = 0; idx < PROC; idx++) { // clang-format off #pragma HLS unroll // clang-format on mul_out[idx] = dot_in1[idx] * dot_in2[idx]; } float add1_out[TA_L1]; float add2_out[TA_L2]; float add3_out[TA_L3]; float add4_out[TA_L4]; float add5_out[TA_L5]; float add6_out[TA_L6]; float add7_out[TA_L7]; float add8_out[TA_L8]; if (TA_L1 != 0) { for (ap_uint<10> idx = 0; idx < TA_L1; idx++) { // clang-format off #pragma HLS unroll // clang-format on if (idx == TA_L1 - 1 && PROC % 2 == 1) add1_out[idx] = mul_out[2 * idx]; else add1_out[idx] = mul_out[2 * idx] + mul_out[2 * idx + 1]; } } if (TA_L2 != 0) { for (ap_uint<10> idx = 0; idx < TA_L2; idx++) { // clang-format off #pragma HLS unroll // clang-format on if (idx == TA_L2 - 1 && TA_L1 % 2 == 1) add2_out[idx] = add1_out[2 * idx]; else add2_out[idx] = add1_out[2 * idx] + add1_out[2 * idx + 1]; } } if (TA_L3 != 0) { for (ap_uint<10> idx = 0; idx < TA_L3; idx++) { // clang-format off #pragma HLS unroll // clang-format on if (idx == TA_L3 - 1 && TA_L2 % 2 == 1) add3_out[idx] = add2_out[2 * idx]; else add3_out[idx] = add2_out[2 * idx] + add2_out[2 * idx + 1]; } } if (TA_L4 != 0) { for (ap_uint<10> idx = 0; idx < TA_L4; idx++) { // clang-format off #pragma HLS unroll // clang-format on if (idx == TA_L4 - 1 && TA_L3 % 2 == 1) add4_out[idx] = add3_out[2 * idx]; else add4_out[idx] = add3_out[2 * idx] + add3_out[2 * idx + 1]; } } if (TA_L5 != 0) { for (ap_uint<10> idx = 0; idx < TA_L5; idx++) { // clang-format off #pragma HLS unroll // clang-format on if (idx == TA_L5 - 1 && TA_L4 % 2 == 1) add5_out[idx] = add4_out[2 * idx]; else add5_out[idx] = add4_out[2 * idx] + add4_out[2 * idx + 1]; } } if (TA_L6 != 0) { for (ap_uint<10> idx = 0; idx < TA_L6; idx++) { // clang-format off #pragma HLS unroll // clang-format on if (idx == TA_L6 - 1 && TA_L5 % 2 == 1) add6_out[idx] = add5_out[2 * idx]; else add6_out[idx] = add5_out[2 * idx] + add5_out[2 * idx + 1]; } } if (TA_L7 != 0) { for (ap_uint<10> idx = 0; idx < TA_L7; idx++) { // clang-format off #pragma HLS unroll // clang-format on if (idx == TA_L7 - 1 && TA_L6 % 2 == 1) add7_out[idx] = add6_out[2 * idx]; else add7_out[idx] = add6_out[2 * idx] + add6_out[2 * idx + 1]; } } if (TA_L8 != 0) { for (ap_uint<10> idx = 0; idx < TA_L8; idx++) { // clang-format off #pragma HLS unroll // clang-format on if (idx == TA_L8 - 1 && TA_L7 % 2 == 1) add8_out[idx] = add7_out[2 * idx]; else add8_out[idx] = add7_out[2 * idx] + add7_out[2 * idx + 1]; } } float add_out; if (TA_L1 == 1) add_out = add1_out[0]; else if (TA_L2 == 1) add_out = add2_out[0]; else if (TA_L3 == 1) add_out = add3_out[0]; else if (TA_L4 == 1) add_out = add4_out[0]; else if (TA_L5 == 1) add_out = add5_out[0]; else if (TA_L6 == 1) add_out = add6_out[0]; else if (TA_L7 == 1) add_out = add7_out[0]; else if (TA_L8 == 1) add_out = add8_out[0]; else add_out = mul_out[0]; return (add_out); } template void KF_treeAdder(float in1[DEPTH], float* output) { // clang-format off #pragma HLS ARRAY_PARTITION variable=in1 complete dim=1 #pragma HLS inline // clang-format on enum { TA_L1 = (DEPTH / 2 + (((DEPTH % 2) != 0) & (DEPTH != 1))), TA_L2 = (TA_L1 / 2 + (((TA_L1 % 2) != 0) & (TA_L1 != 1))), TA_L3 = (TA_L2 / 2 + (((TA_L2 % 2) != 0) & (TA_L2 != 1))), TA_L4 = (TA_L3 / 2 + (((TA_L3 % 2) != 0) & (TA_L3 != 1))), TA_L5 = (TA_L4 / 2 + (((TA_L4 % 2) != 0) & (TA_L4 != 1))), TA_L6 = (TA_L5 / 2 + (((TA_L5 % 2) != 0) & (TA_L5 != 1))), TA_L7 = (TA_L6 / 2 + (((TA_L6 % 2) != 0) & (TA_L6 != 1))), TA_L8 = (TA_L7 / 2 + (((TA_L7 % 2) != 0) & (TA_L7 != 1))) }; float add1_out[TA_L1]; float add2_out[TA_L2]; float add3_out[TA_L3]; float add4_out[TA_L4]; float add5_out[TA_L5]; float add6_out[TA_L6]; float add7_out[TA_L7]; float add8_out[TA_L8]; if (TA_L1 != 0) { for (ap_uint<10> idx = 0; idx < TA_L1; idx++) { // clang-format off #pragma HLS unroll // clang-format on if (idx == TA_L1 - 1 && DEPTH % 2 == 1) add1_out[idx] = in1[2 * idx]; else add1_out[idx] = in1[2 * idx] + in1[2 * idx + 1]; } } if (TA_L2 != 0) { for (ap_uint<10> idx = 0; idx < TA_L2; idx++) { // clang-format off #pragma HLS unroll // clang-format on if (idx == TA_L2 - 1 && TA_L1 % 2 == 1) add2_out[idx] = add1_out[2 * idx]; else add2_out[idx] = add1_out[2 * idx] + add1_out[2 * idx + 1]; } } if (TA_L3 != 0) { for (ap_uint<10> idx = 0; idx < TA_L3; idx++) { // clang-format off #pragma HLS unroll // clang-format on if (idx == TA_L3 - 1 && TA_L2 % 2 == 1) add3_out[idx] = add2_out[2 * idx]; else add3_out[idx] = add2_out[2 * idx] + add2_out[2 * idx + 1]; } } if (TA_L4 != 0) { for (ap_uint<10> idx = 0; idx < TA_L4; idx++) { // clang-format off #pragma HLS unroll // clang-format on if (idx == TA_L4 - 1 && TA_L3 % 2 == 1) add4_out[idx] = add3_out[2 * idx]; else add4_out[idx] = add3_out[2 * idx] + add3_out[2 * idx + 1]; } } if (TA_L5 != 0) { for (ap_uint<10> idx = 0; idx < TA_L5; idx++) { // clang-format off #pragma HLS unroll // clang-format on if (idx == TA_L5 - 1 && TA_L4 % 2 == 1) add5_out[idx] = add4_out[2 * idx]; else add5_out[idx] = add4_out[2 * idx] + add4_out[2 * idx + 1]; } } if (TA_L6 != 0) { for (ap_uint<10> idx = 0; idx < TA_L6; idx++) { // clang-format off #pragma HLS unroll // clang-format on if (idx == TA_L6 - 1 && TA_L5 % 2 == 1) add6_out[idx] = add5_out[2 * idx]; else add6_out[idx] = add5_out[2 * idx] + add5_out[2 * idx + 1]; } } if (TA_L7 != 0) { for (ap_uint<10> idx = 0; idx < TA_L7; idx++) { // clang-format off #pragma HLS unroll // clang-format on if (idx == TA_L7 - 1 && TA_L6 % 2 == 1) add7_out[idx] = add6_out[2 * idx]; else add7_out[idx] = add6_out[2 * idx] + add6_out[2 * idx + 1]; } } if (TA_L8 != 0) { for (ap_uint<10> idx = 0; idx < TA_L8; idx++) { // clang-format off #pragma HLS unroll // clang-format on if (idx == TA_L8 - 1 && TA_L7 % 2 == 1) add8_out[idx] = add7_out[2 * idx]; else add8_out[idx] = add7_out[2 * idx] + add7_out[2 * idx + 1]; } } float add_out; if (TA_L1 == 1) add_out = add1_out[0]; else if (TA_L2 == 1) add_out = add2_out[0]; else if (TA_L3 == 1) add_out = add3_out[0]; else if (TA_L4 == 1) add_out = add4_out[0]; else if (TA_L5 == 1) add_out = add5_out[0]; else if (TA_L6 == 1) add_out = add6_out[0]; else if (TA_L7 == 1) add_out = add7_out[0]; else if (TA_L8 == 1) add_out = add8_out[0]; else add_out = in1[0]; *output = (add_out); } template void KF_scaleSub(float in1[PROC], float scale, float in2[PROC], float out[PROC]) { // clang-format off #pragma HLS ARRAY_PARTITION variable=in1 complete dim=1 #pragma HLS ARRAY_PARTITION variable=in2 complete dim=1 #pragma HLS ARRAY_PARTITION variable=out complete dim=1 #pragma HLS inline off // clang-format on float scale_neg = -scale; for (int idx = 0; idx < PROC; idx++) { // clang-format off #pragma HLS unroll // clang-format on out[idx] = in1[idx] + (scale_neg * in2[idx]); } } template void KF_scale(float in[PROC], float scale, float out[PROC]) { // clang-format off #pragma HLS ARRAY_PARTITION variable=in complete dim=1 #pragma HLS ARRAY_PARTITION variable=out complete dim=1 #pragma HLS inline off // clang-format on for (int idx = 0; idx < PROC; idx++) { // clang-format off #pragma HLS unroll // clang-format on out[idx] = scale * in[idx]; } } template void KF_add(float in1[PROC], float in2[PROC], float out[PROC]) { // clang-format off #pragma HLS ARRAY_PARTITION variable=in1 complete dim=1 #pragma HLS ARRAY_PARTITION variable=in2 complete dim=1 #pragma HLS ARRAY_PARTITION variable=out complete dim=1 #pragma HLS inline off // clang-format on for (int idx = 0; idx < PROC; idx++) { // clang-format off #pragma HLS unroll // clang-format on out[idx] = in1[idx] + in2[idx]; } } template void KF_X_write(float xu_vector[512], xf::cv::Mat& Xout_mat) { // clang-format off #pragma HLS inline off // clang-format on for (int ptr = 0; ptr < N_STATE; ptr++) { // clang-format off #pragma HLS pipeline // clang-format on Xout_mat.write_float(ptr, xu_vector[ptr]); } } template void KF_UD_write(float U_matrix[PROC_MU][UMAT_DEPTH], float D_vector[512], xf::cv::Mat& Uout_mat, xf::cv::Mat& Dout_mat) { // clang-format off #pragma HLS inline off // clang-format on ap_uint<32> counter1 = 0; ap_uint<32> counter1_1 = 0; // for dim2 ap_uint<32> counter2 = 0; // for dim1 ap_uint<32> counter3 = 0; // for dim2 LOOPI_U: for (int ptr = 0; ptr < N_STATE * N_STATE; ptr++) { // clang-format off #pragma HLS pipeline // clang-format on ap_uint<8> dim1 = counter2; ap_uint<16> dim2 = counter1_1 + counter3; Uout_mat.write_float(ptr, U_matrix[dim1][dim2]); if (counter1 == N_STATE - 1) { if (counter2 == PROC_MU - 1) { counter2 = 0; counter3++; } else { counter2++; } counter1 = 0; counter1_1 = 0; } else { counter1++; counter1_1 += DEPTH_MU; } } for (int ptr = 0; ptr < N_STATE; ptr++) { // clang-format off #pragma HLS pipeline // clang-format on Dout_mat.write_float(ptr, D_vector[ptr]); } } template void MeasUpdate_1x(float Uin_matrix[PROC_MU][UMAT_DEPTH], float Din_vector[N_STATE], float Uout_matrix[PROC_MU][UMAT_DEPTH], float Dout_vector[N_STATE], float xu_vector[512], float h_vector[PROC_MU][DEPTH_MU], float r_value, float z_value, bool UDX_en) { // clang-format off //** comment for Ubar Dbar // f1= h(1) & g1 = f1*D(1) & a0 = r // --------------------------- // | Dbar(1) = D(1)*a0/a1 | Dbar(2) = D(2)*a1/a2 |..| Dbar(n) = D(n)*a(n-1)/a(n) // f1'=f1/a0 | Ubar1= U1-f1'k1 -> f2'=f2/a1 | Ubar2= U2-f2'k2 -> f3'=f3/a2 |..| Ubar(n)= Un-fn'kn -> f(n+1)'=f(n+1)/a(n) // k1 = {0,0..0} | k2 = k1 + g1U1 | k3 = k2 + g2U2 |..| k(n+1) = k(n) + g(n+1)U(n+1)_sv // f2 = U2*h | f3 = U3*h | f4 = U4*h |..| f(n+2) = mulAcc // g2 = f2*D(2) | g3 = f3*D(3) | g4 = f4*D(4) |..| g(n+2) = f(n+2)*g(n+2) // a1 = a0 + f1g1 | a2 = a1 + f2g2 | a3 = a2 + f3g3 |..| a(n+1) = a(n) + f(n+1)g(n+1) //############################################################################################################## // a0 pass | a1 pass | a2 pass |..| // a1 compute/pass | a2 compute/pass | a3 compute/pass |..| //############################################################################################################### // a_prev=a0,a_up=a1 | Dbar(1)=D(1)*a_prev/a_up | Dbar(2)=D(2)*a_prev/a_up |..| Dbar(n)=D(n)*a_prev/a_up // f'=f1/a_prev | Ubar1=U1-f'K ->f'=f_nex/a_up | Ubar2=U2-f'K -> f'=f/a_up |..| Ubar(n)=U(n)-f'K -> f'=f/a_up // k=k1 | K= K + g*U1 | K= K + g*U2 |..| K= K + g*U2 // g=g1 | /*a1*/a_prev = a_up | /*a2*/a_prev = a_up |..| /*a(n)*/a_prev = a_up // | /*a2*/a_up = a_up +f_nex*g_nex | /*a3*/a_up = a_up + f_nex*g_nex |..| /*a(n+1)*/a_up = a_up +f_nex*g_nex // | g = g_nex | g = g_nex |..| g = g_nex // f=f2 | f_nex = U3*h | f_nex = f4 = U4*h |..| f_nex= f(n+2) // g=g2 | g_nex = g3 = f*D(3) | g_nex = g4 = f4*D(4) |..| g_nex= g(n+2)=D(n+4) //************* // clang-format on if (URAM_EN == 0) { // clang-format off #pragma HLS ARRAY_PARTITION variable=Uin_matrix complete dim=1 // clang-format on } else { // clang-format off #pragma HLS RESOURCE variable=Uin_matrix core=RAM_S2P_URAM #pragma HLS ARRAY_RESHAPE variable=Uin_matrix complete dim=1 // clang-format on } if (URAM_EN == 0) { // clang-format off #pragma HLS ARRAY_PARTITION variable=Uout_matrix complete dim=1 // clang-format on } else { // clang-format off #pragma HLS RESOURCE variable=Uout_matrix core=RAM_S2P_URAM #pragma HLS ARRAY_RESHAPE variable=Uout_matrix complete dim=1 // clang-format on } // clang-format off #pragma HLS ARRAY_PARTITION variable=h_vector complete dim=0 #pragma HLS inline off // clang-format on float res = z_value; float f1_value = h_vector[0][0]; float g1_value = f1_value * Din_vector[0]; float alpha_prev = r_value; // alpha0 float alpha_up = r_value + f1_value * g1_value; // alpha1 float f_dash_div = f1_value / r_value; // f1' = f1/alpha0 float f_dash; if (UDX_en == 0) f_dash = 0; else f_dash = f_dash_div; float kg_vector[PROC_MU][DEPTH_MU]; // clang-format off #pragma HLS ARRAY_PARTITION variable=kg_vector complete dim=0 // clang-format on for (int i = 0; i < PROC_MU; i++) { // clang-format off #pragma HLS unroll // clang-format on for (int j = 0; j < DEPTH_MU; j++) { // clang-format off #pragma HLS unroll // clang-format on kg_vector[i][j] = 0; } } float h_value2; if (PROC_MU == 1) h_value2 = h_vector[0][1]; else h_value2 = h_vector[1][0]; float f_nex = h_vector[0][0] * Uin_matrix[0][1 * DEPTH_MU] + h_value2; float g_nex = f_nex * Din_vector[1]; float fg_nex = f_nex * g_nex; float g_value = g1_value; LOOP2: for (int state = 0, u_offset = 0; state < N_STATE; state++, u_offset += DEPTH_MU) { // clang-format off #pragma HLS pipeline II=10 // clang-format on if (EKF_EN == 0) { //### needed for X update float hval = h_vector[state % PROC_MU][state / PROC_MU]; res -= hval * xu_vector[state]; } //####Dbar update float Din0 = Din_vector[state]; float Din2 = Din_vector[state + 2]; float alpha_div; if (UDX_en == 0) alpha_div = 1; else alpha_div = alpha_prev / alpha_up; Dout_vector[state] = Din0 * alpha_div; // Read col_j and col_j+2 from U matrix // For timing sake & II , Uin_matrix data is loaded in Uin0_col & Uin2_col float Uin0_col[PROC_MU][DEPTH_MU]; float Uin2_col[PROC_MU][DEPTH_MU]; // clang-format off #pragma HLS ARRAY_PARTITION variable=Uin0_col complete dim=0 #pragma HLS ARRAY_PARTITION variable=Uin2_col complete dim=0 // clang-format on for (int i = 0; i < PROC_MU; i++) { // clang-format off #pragma HLS unroll // clang-format on for (int j = 0; j < DEPTH_MU; j++) { // clang-format off #pragma HLS unroll // clang-format on Uin0_col[i][j] = Uin_matrix[i][j + u_offset]; #if !__SYNTHESIS__ if ((j + u_offset + 2 * DEPTH_MU) < UMAT_DEPTH) Uin2_col[i][j] = Uin_matrix[i][j + u_offset + 2 * DEPTH_MU]; else Uin2_col[i][j] = 0; #else Uin2_col[i][j] = Uin_matrix[i][j + u_offset + 2 * DEPTH_MU]; #endif } } float tmp_kg_vector[PROC_MU][DEPTH_MU]; // clang-format off #pragma HLS ARRAY_PARTITION variable=tmp_kg_vector complete dim=0 // clang-format on for (int i = 0; i < PROC_MU; i++) { // clang-format off #pragma HLS unroll // clang-format on for (int j = 0; j < DEPTH_MU; j++) { // clang-format off #pragma HLS unroll // clang-format on tmp_kg_vector[i][j] = kg_vector[i][j]; } } float Uout_col[PROC_MU][DEPTH_MU]; // clang-format off #pragma HLS ARRAY_PARTITION variable=Uout_col complete dim=0 // clang-format on LOOP5: for (ap_uint<8> u_seq = 0, var = 0; u_seq < DEPTH_MU; u_seq++, var += PROC_MU) { // clang-format off #pragma HLS unroll // clang-format on float u_readchunk[PROC_MU]; float k_readchunk[PROC_MU]; // clang-format off #pragma HLS ARRAY_PARTITION variable=u_readchunk complete dim=1 #pragma HLS ARRAY_PARTITION variable=k_readchunk complete dim=1 // clang-format on for (ap_uint<8> loadin = 0; loadin < PROC_MU; loadin++) { // clang-format off #pragma HLS unroll // clang-format on u_readchunk[loadin] = Uin0_col[loadin][u_seq]; k_readchunk[loadin] = tmp_kg_vector[loadin][u_seq]; } float u_writechunk[PROC_MU]; KF_scaleSub(u_readchunk, f_dash, k_readchunk, u_writechunk); for (ap_uint<8> loadin = 0; loadin < PROC_MU; loadin++) { // clang-format off #pragma HLS unroll // clang-format on Uout_col[loadin][u_seq] = u_writechunk[loadin]; } } // u seq loop for (int i = 0; i < PROC_MU; i++) { // clang-format off #pragma HLS unroll // clang-format on for (int j = 0; j < DEPTH_MU; j++) { // clang-format off #pragma HLS unroll // clang-format on Uout_matrix[i][j + u_offset] = Uout_col[i][j]; } } //###f_dash calculation float f_dash_temp = f_nex / alpha_up; if (UDX_en == 0) f_dash = 0; else f_dash = f_dash_temp; //##Update Kalman gain kg_vector float gu_vector[PROC_MU][DEPTH_MU]; LOOP61: for (ap_uint<8> k_seq = 0, var = 0; k_seq < DEPTH_MU; k_seq++, var += PROC_MU) { // clang-format off #pragma HLS unroll // clang-format on float u_readchunk[PROC_MU]; for (ap_uint<8> loadin = 0; loadin < PROC_MU; loadin++) { // clang-format off #pragma HLS unroll // clang-format on u_readchunk[loadin] = Uin0_col[loadin][k_seq]; } float gu_writechunk[PROC_MU]; KF_scale(u_readchunk, g_value, gu_writechunk); for (ap_uint<8> loadin = 0; loadin < PROC_MU; loadin++) { // clang-format off #pragma HLS unroll // clang-format on gu_vector[loadin][k_seq] = gu_writechunk[loadin]; } } // k seq loop LOOP62: for (ap_uint<8> k_seq = 0, var = 0; k_seq < DEPTH_MU; k_seq++, var += PROC_MU) { // clang-format off #pragma HLS unroll // clang-format on float k_readchunk[PROC_MU]; float gu_readchunk[PROC_MU]; for (ap_uint<8> loadin = 0; loadin < PROC_MU; loadin++) { // clang-format off #pragma HLS unroll // clang-format on k_readchunk[loadin] = tmp_kg_vector[loadin][k_seq]; gu_readchunk[loadin] = gu_vector[loadin][k_seq]; } float k_writechunk[PROC_MU]; KF_add(k_readchunk, gu_readchunk, k_writechunk); for (ap_uint<8> loadin = 0; loadin < PROC_MU; loadin++) { // clang-format off #pragma HLS unroll // clang-format on kg_vector[loadin][k_seq] = k_writechunk[loadin]; } } // k seq loop //### update alpha alpha_prev = alpha_up; alpha_up = alpha_up + f_nex * g_nex; //### f and g calculation g_value = g_nex; float dot_out[DEPTH_MU]; // clang-format off #pragma HLS ARRAY_PARTITION variable=dot_out complete dim=1 // clang-format on LOOP7: for (ap_uint<10> dot_seq = 0, var = 0; dot_seq < DEPTH_MU; dot_seq++, var += PROC_MU) { // clang-format off #pragma HLS unroll // clang-format on float dot_in1[PROC_MU]; float dot_in2[PROC_MU]; // clang-format off #pragma HLS ARRAY_PARTITION variable=dot_in1 complete dim=1 #pragma HLS ARRAY_PARTITION variable=dot_in2 complete dim=1 // clang-format on for (ap_uint<10> loadin = 0; loadin < PROC_MU; loadin++) { // clang-format off #pragma HLS unroll // clang-format on dot_in1[loadin] = Uin2_col[loadin][dot_seq]; dot_in2[loadin] = h_vector[loadin][dot_seq]; } // loadin loop dot_out[dot_seq] = KF_dotProduct(dot_in1, dot_in2); } // dot seq loop float tmp_ta; KF_treeAdder(dot_out, &tmp_ta); f_nex = tmp_ta; g_nex = tmp_ta * Din2; } // state loop for (ap_uint<8> x_update = 0; x_update < N_STATE; x_update++) { // clang-format off #pragma HLS pipeline // clang-format on float kg_temp = kg_vector[x_update % PROC_MU][x_update / PROC_MU] / alpha_prev; float kg; if (UDX_en == 0) kg = 0; else kg = kg_temp; xu_vector[x_update] = xu_vector[x_update] + kg * res; Dout_vector[x_update + N_STATE] = Dout_vector[x_update]; } } template void load_Uq(float T_matrix[PROC_TU][TMAT_DEPTH], float Uq_matrix[UQMAT_DEPTH]) { if (URAM_EN == 0) { // clang-format off #pragma HLS ARRAY_PARTITION variable=T_matrix complete dim=1 #pragma HLS resource variable=T_matrix core=RAM_S2P_BRAM // clang-format on } else { // clang-format off #pragma HLS RESOURCE variable=T_matrix core=RAM_S2P_URAM #pragma HLS ARRAY_RESHAPE variable=T_matrix complete dim=1 // clang-format on } if (URAM_EN == 1) { // clang-format off #pragma HLS RESOURCE variable=Uq_matrix core=RAM_S2P_URAM // clang-format on } // clang-format off #pragma HLS inline off // clang-format on ap_uint<16> counter_trow = 0; ap_uint<32> offset_inc = N_STATE; LOOPI_UQ: for (int ptr = 0; ptr < N_STATE * N_STATE; ptr++) { // clang-format off #pragma HLS pipeline // clang-format on ap_uint<32> offset = offset_inc + counter_trow; ap_uint<8> dim1 = offset % PROC_TU; ap_uint<16> dim2 = offset / PROC_TU; T_matrix[dim1][dim2] = Uq_matrix[ptr]; if (counter_trow == N_STATE - 1) { counter_trow = 0; offset_inc += DEPTH_TU * PROC_TU; } else counter_trow++; } } template void MeasUpdate(float U_matrix[PROC_MU][UMAT_DEPTH], float H_matrix[PROC_MU][HMAT_DEPTH], float D_vector[512], float xu_vector[512], float ry_vector[512], #if KF_C != 0 xf::cv::Mat& u_mat, #endif xf::cv::Mat& y_mat, xf::cv::Mat& R_mat, xf::cv::Mat& H_mat, xf::cv::Mat& Xout_mat, xf::cv::Mat& Uout_mat, xf::cv::Mat& Dout_mat, bool X_write_en, bool UD_write_en) { if (URAM_EN == 0) { // clang-format off #pragma HLS ARRAY_PARTITION variable=U_matrix complete dim=1 // clang-format on } else { // clang-format off #pragma HLS RESOURCE variable=U_matrix core=RAM_S2P_URAM #pragma HLS ARRAY_RESHAPE variable=U_matrix complete dim=1 // clang-format on } if (URAM_EN == 0) { // clang-format off #pragma HLS ARRAY_PARTITION variable=H_matrix complete dim=1 // clang-format on } else { // clang-format off #pragma HLS RESOURCE variable=H_matrix core=RAM_S2P_URAM #pragma HLS ARRAY_RESHAPE variable=H_matrix complete dim=1 // clang-format on } // clang-format off #pragma HLS inline off // clang-format on enum { M_MEAS_align2 = (M_MEAS + (M_MEAS % 2)) }; float Uint_matrix[PROC_MU][UMAT_DEPTH]; if (URAM_EN == 0) { // clang-format off #pragma HLS ARRAY_PARTITION variable=Uint_matrix complete dim=1 // clang-format on } else { // clang-format off #pragma HLS RESOURCE variable=Uint_matrix core=RAM_S2P_URAM #pragma HLS ARRAY_RESHAPE variable=Uint_matrix complete dim=1 // clang-format on } float Dint_vector[512]; ap_uint<8> meas_index; if (EKF_EN == 1) meas_index = xu_vector[511]; else meas_index = 0; float hx, Zekf, Rekf; //##### Read Y mesurements if (EKF_EN == 0) { LOOP1: for (ap_uint<8> ddr_ptr = 0; ddr_ptr < M_MEAS; ddr_ptr++) { // clang-format off #pragma HLS pipeline // clang-format on ry_vector[ddr_ptr + M_MEAS] = y_mat.read_float(ddr_ptr); } } else { Zekf = y_mat.read_float(0); #if KF_C != 0 hx = u_mat.read_float(0); #endif Rekf = R_mat.read_float(0); ap_uint<32> offset_incH = 0; LOOPI_H: for (int ptr = 0; ptr < N_STATE; ptr++) { // clang-format off #pragma HLS pipeline // clang-format on ap_uint<10> dim1 = ptr % PROC_MU; ap_uint<16> dim2 = ptr / PROC_MU; H_matrix[dim1][dim2] = H_mat.read_float(ptr); } } bool flip = 0; ap_uint<8> meas_loop_cnt; if (EKF_EN == 1) meas_loop_cnt = 2; else meas_loop_cnt = M_MEAS_align2; LOOP2: for (ap_uint<8> meas = 0; meas < meas_loop_cnt; meas++) { bool UDX_en; if (EKF_EN == 0) { if (meas == M_MEAS) UDX_en = 0; else UDX_en = 1; } else { if (meas == 1) UDX_en = 0; else UDX_en = 1; } float h_vector[PROC_MU][DEPTH_MU]; // clang-format off #pragma HLS ARRAY_PARTITION variable=h_vector complete dim=0 // clang-format on LOOPHM: for (ap_uint<8> i = 0; i < DEPTH_MU; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS pipeline // clang-format on for (ap_uint<8> j = 0; j < PROC_MU; j++) { // clang-format off #pragma HLS unroll // clang-format on h_vector[j][i] = H_matrix[j][meas * DEPTH_MU + i]; } } float r_value; // = ry_vector[meas]; float z_value; if (EKF_EN == 0) { z_value = ry_vector[meas + M_MEAS]; r_value = ry_vector[meas]; } else { z_value = Zekf - hx; r_value = Rekf; } if (flip == 0) { MeasUpdate_1x( U_matrix, D_vector, Uint_matrix, Dint_vector, xu_vector, h_vector, r_value, z_value, UDX_en); flip = 1; } else { MeasUpdate_1x( Uint_matrix, Dint_vector, U_matrix, D_vector, xu_vector, h_vector, r_value, z_value, UDX_en); flip = 0; } } //###### Write X corrected state vector if (X_write_en) KF_X_write(xu_vector, Xout_mat); //###### Write P corrected state vector if (UD_write_en) KF_UD_write(U_matrix, D_vector, Uout_mat, Dout_mat); } template void MeasUpdate_wrapper(float U_matrix[PROC_MU][UMAT_DEPTH], float H_matrix[PROC_MU][HMAT_DEPTH], float D_vector[512], float xu_vector[512], float ry_vector[512], float T_matrix[PROC_TU][TMAT_DEPTH], float Uq_matrix[UQMAT_DEPTH], #if KF_C != 0 xf::cv::Mat& u_mat, #endif xf::cv::Mat& y_mat, xf::cv::Mat& R_mat, xf::cv::Mat& H_mat, xf::cv::Mat& Xout_mat, xf::cv::Mat& Uout_mat, xf::cv::Mat& Dout_mat, bool X_write_en, bool UD_write_en) { if (URAM_EN == 0) { // clang-format off #pragma HLS ARRAY_PARTITION variable=U_matrix complete dim=1 // clang-format on } else { // clang-format off #pragma HLS RESOURCE variable=U_matrix core=RAM_S2P_URAM #pragma HLS ARRAY_RESHAPE variable=U_matrix complete dim=1 // clang-format on } if (URAM_EN == 0) { // clang-format off #pragma HLS ARRAY_PARTITION variable=H_matrix complete dim=1 // clang-format on } else { // clang-format off #pragma HLS RESOURCE variable=H_matrix core=RAM_S2P_URAM #pragma HLS ARRAY_RESHAPE variable=H_matrix complete dim=1 // clang-format on } if (URAM_EN == 0) { // clang-format off #pragma HLS ARRAY_PARTITION variable=T_matrix complete dim=1 #pragma HLS resource variable=T_matrix core=RAM_S2P_BRAM // clang-format on } else { // clang-format off #pragma HLS RESOURCE variable=T_matrix core=RAM_S2P_URAM #pragma HLS ARRAY_RESHAPE variable=T_matrix complete dim=1 // clang-format on } // clang-format off #pragma HLS inline off // clang-format on LOOP1: for (int itr1 = 0; itr1 < 1; itr1++) { load_Uq(T_matrix, Uq_matrix); MeasUpdate( U_matrix, H_matrix, D_vector, xu_vector, ry_vector, #if KF_C != 0 u_mat, #endif y_mat, R_mat, H_mat, Xout_mat, Uout_mat, Dout_mat, X_write_en, UD_write_en); } } template void load_control_input( #if KF_C != 0 xf::cv::Mat& control_input, #endif float xu_vector[512]) { for (ap_uint<8> idx = 0; idx < U_SIZE; idx++) { // clang-format off #pragma HLS pipeline // clang-format on #if KF_C != 0 xu_vector[N_STATE + idx] = control_input.read_float(idx); #endif } } //########################################################################################// // For gemv operation, //... // 0__ 4__ 8 __ 11__ a| 0a 4b 8c // 12d 1__ 5__ 9 __ 12__ b| 1a 5b 9c // 13d // A = 2__ 6__ 10__ 13__ X = c| 1st = 2a 2nd += 6b 3rd += 10c 4th += 14d // 3__ 7__ 11__ 14__ d| 3a 7b 11c // 15d //##########################################################################################// // for x' = Ax, x_buffer[256-383] = A_buffer*x_buffer[0-127] // for x_tu = Bu, x_buffer[0-127] = B_buffer*x_buffer[256-383] //##########################################################################################// template void gemv(float AB_matrix[PROC_MU][ABMAT_DEPTH], float xu_vector[512], ap_uint<16> matrix_offset, ap_uint<10> vector_offset_in, ap_uint<10> vector_offset_out, ap_uint<8> outer_loop_bound) { // clang-format off #pragma HLS inline off // clang-format on // New Gemv design with 1 multipliers and 1 adders LOOP1: for (ap_uint<8> outer_loop = 0; outer_loop < outer_loop_bound; outer_loop++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=128 max=128 // clang-format on float input_x = xu_vector[vector_offset_in + outer_loop]; ap_uint<16> buffer_idx = 0; ap_uint<16> idx_inc; if (vector_offset_in == 0) idx_inc = DEPTH_MU; else idx_inc = DEPTH_MU_CTRL; LOOPF1: for (ap_uint<10> inner_loop = 0; inner_loop < N_STATE; inner_loop++) { // clang-format off #pragma HLS loop_flatten off #pragma HLS DEPENDENCE variable=xu_vector inter false #pragma HLS pipeline // clang-format on float input_A = AB_matrix[outer_loop % PROC_MU][matrix_offset + buffer_idx + outer_loop / PROC_MU]; float mul_out = input_A * input_x; ap_uint<32> offset; if (vector_offset_in != 0 && outer_loop != 0) offset = 0; else offset = 256; float intermediate_x = xu_vector[inner_loop + offset]; float add_input; if (vector_offset_in == 0 && outer_loop == 0) add_input = 0; else add_input = intermediate_x; float add_in2; if (C_CTRL == 0) { if (vector_offset_in != 0) add_in2 = 0; else add_in2 = mul_out; } else { add_in2 = mul_out; } xu_vector[inner_loop + vector_offset_out] = add_input + add_in2; buffer_idx += idx_inc; } // end proc loop } // end outer loop } template void state_predict(float AB_matrix[PROC_MU][ABMAT_DEPTH], float xu_vector[512]) { // clang-format off #pragma HLS inline off // clang-format on for (ap_uint<2> iteration = 0; iteration < 2; iteration++) { ap_uint<16> matrix_offset; ap_uint<10> vector_offset_in; ap_uint<10> vector_offset_out; ap_uint<8> outer_loop_bound; if (iteration == 0) { matrix_offset = 0; vector_offset_in = 0; vector_offset_out = 256; outer_loop_bound = N_STATE; } else { matrix_offset = UMAT_DEPTH; vector_offset_in = N_STATE; vector_offset_out = 0; if (C_CTRL == 0) outer_loop_bound = 1; else outer_loop_bound = C_CTRL; } gemv( AB_matrix, xu_vector, matrix_offset, vector_offset_in, vector_offset_out, outer_loop_bound); } } template void gemm_update(float AB_matrix[PROC_MU][ABMAT_DEPTH], float U_matrix[PROC_MU][UMAT_DEPTH], float T_matrix[PROC_TU][TMAT_DEPTH], ap_uint<10> out_col_start, ap_uint<10> out_col_cnt, ap_uint<10> iteration) { if (URAM_EN == 0) { // clang-format off #pragma HLS ARRAY_PARTITION variable=U_matrix complete dim=1 // clang-format on } else { // clang-format off #pragma HLS RESOURCE variable=U_matrix core=RAM_S2P_URAM #pragma HLS ARRAY_RESHAPE variable=U_matrix complete dim=1 // clang-format on } if (URAM_EN == 0) { // clang-format off #pragma HLS ARRAY_PARTITION variable=AB_matrix complete dim=1 // clang-format on } else { // clang-format off #pragma HLS RESOURCE variable=AB_matrix core=RAM_S2P_URAM #pragma HLS ARRAY_RESHAPE variable=AB_matrix complete dim=1 // clang-format on } if (URAM_EN == 0) { // clang-format off #pragma HLS ARRAY_PARTITION variable=T_matrix complete dim=1 #pragma HLS resource variable=T_matrix core=RAM_S2P_BRAM // clang-format on } else { // clang-format off #pragma HLS RESOURCE variable=T_matrix core=RAM_S2P_URAM #pragma HLS ARRAY_RESHAPE variable=T_matrix complete dim=1 // clang-format on } // clang-format off #pragma HLS inline off // clang-format on LOOP2: for (ap_uint<10> out_row0 = 0; out_row0 < N_STATE; out_row0++) { LOOP3: for (ap_uint<10> out_col_idx = 0; out_col_idx < out_col_cnt; out_col_idx++) { // clang-format off #pragma HLS loop_flatten #pragma HLS LOOP_TRIPCOUNT min=128 max=128 #pragma HLS DEPENDENCE variable=T_matrix inter false #pragma HLS pipeline // clang-format on ap_uint<8> out_col = out_col_start + out_col_idx; ap_uint<20> out_index0 = out_row0 * (DEPTH_TU * PROC_TU) + out_col; ap_uint<8> dim1_0_Tmatrix = out_index0 % PROC_TU; ap_uint<16> dim2_0_Tmatrix = out_index0 / PROC_TU; ap_uint<16> dim2_0_Amatrix = out_row0 * (DEPTH_MU) + iteration * 2; ap_uint<16> dim2_1_Amatrix = dim2_0_Amatrix + 1; ap_uint<16> dim2_0_Umatrix = out_col * (DEPTH_MU) + iteration * 2; ap_uint<16> dim2_1_Umatrix = dim2_0_Umatrix + 1; bool pad_en = (2 * iteration + 2) > DEPTH_MU; float input1_dotproduct[PROC_MU * 2]; float input2_dotproduct[PROC_MU * 2]; for (ap_uint<8> idx = 0; idx < PROC_MU; idx++) { // clang-format off #pragma HLS unroll // clang-format on input1_dotproduct[idx] = AB_matrix[idx][dim2_0_Amatrix]; if (pad_en) input1_dotproduct[idx + PROC_MU] = 0; else input1_dotproduct[idx + PROC_MU] = AB_matrix[idx][dim2_1_Amatrix]; input2_dotproduct[idx] = U_matrix[idx][dim2_0_Umatrix]; input2_dotproduct[idx + PROC_MU] = U_matrix[idx][dim2_1_Umatrix]; } float dot_output = KF_dotProduct<2 * PROC_MU>(input1_dotproduct, input2_dotproduct); float read1 = T_matrix[dim1_0_Tmatrix][dim2_0_Tmatrix]; float write1; if (iteration == 0) write1 = dot_output; else write1 = read1 + dot_output; T_matrix[dim1_0_Tmatrix][dim2_0_Tmatrix] = write1; } } } template void AU_compute(float AB_matrix[PROC_MU][ABMAT_DEPTH], float U_matrix[PROC_MU][UMAT_DEPTH], float T_matrix[PROC_TU][TMAT_DEPTH]) { if (URAM_EN == 0) { // clang-format off #pragma HLS ARRAY_PARTITION variable=U_matrix complete dim=1 // clang-format on } else { // clang-format off #pragma HLS RESOURCE variable=U_matrix core=RAM_S2P_URAM #pragma HLS ARRAY_RESHAPE variable=U_matrix complete dim=1 // clang-format on } if (URAM_EN == 0) { // clang-format off #pragma HLS ARRAY_PARTITION variable=AB_matrix complete dim=1 // clang-format on } else { // clang-format off #pragma HLS RESOURCE variable=AB_matrix core=RAM_S2P_URAM #pragma HLS ARRAY_RESHAPE variable=AB_matrix complete dim=1 // clang-format on } if (URAM_EN == 0) { // clang-format off #pragma HLS ARRAY_PARTITION variable=T_matrix complete dim=1 #pragma HLS resource variable=T_matrix core=RAM_S2P_BRAM // clang-format on } else { // clang-format off #pragma HLS RESOURCE variable=T_matrix core=RAM_S2P_URAM #pragma HLS ARRAY_RESHAPE variable=T_matrix complete dim=1 // clang-format on } // clang-format off #pragma HLS inline off // clang-format on enum { GEMM_ITERATION = ((DEPTH_MU / 2) + (DEPTH_MU % 2)) }; LOOP1: for (ap_uint<10> iteration = 0, out_col_start = 0, out_col_cnt = N_STATE; iteration < GEMM_ITERATION; iteration++, out_col_start += (2 * PROC_MU), out_col_cnt -= (2 * PROC_MU)) { gemm_update( AB_matrix, U_matrix, T_matrix, out_col_start, out_col_cnt, iteration); } } // In update_T_matrix function... // // for j= n..1 //{ // Dbar[j] = trans(t[j]) * DpDq * t[j]; //Delta[j] = DpDq * t[j] // // for i= 1..j-1 // { // Ubar[i,j] = trans(t[i]) * Delta[j] / Dbar[j] // t[i] = t[i] - Ubar[i,j]*t[j] // } //} // // Above psuedo code is modified as below // // for j= n..1 //{ // for i= j..1 // { // Delta[j] = DpDq * t[j] // Udash = dotproduct(trans(t[i]) ,Delta[j]) // if(i=j) // Dbar[j] = Udash // Ubar[i,j] = Udash / Dbar[j] // t[i] = t[i] - Ubar[i,j]*t[j] // } //} template void update_T_matrix(float Tj_vector[PROC_TU][DPDQ_DEPTH], float Deltaj_vector[PROC_TU][DPDQ_DEPTH], float T_matrix[PROC_TU][TMAT_DEPTH], float U_matrix[PROC_MU][UMAT_DEPTH], float D_vector[512], ap_uint<10> u_col_num) { if (URAM_EN == 0) { // clang-format off #pragma HLS ARRAY_PARTITION variable=T_matrix complete dim=1 #pragma HLS resource variable=T_matrix core=RAM_S2P_BRAM // clang-format on } else { // clang-format off #pragma HLS RESOURCE variable=T_matrix core=RAM_S2P_URAM #pragma HLS ARRAY_RESHAPE variable=T_matrix complete dim=1 // clang-format on } // clang-format off #pragma HLS inline off // clang-format on float Dn_value = 0; float dotOutInt_ti_Deltaj[DEPTH_TU]; // clang-format off #pragma HLS ARRAY_PARTITION variable=dotOutInt_ti_Deltaj complete dim=1 // clang-format on for (int i = 0; i < DEPTH_TU; i++) { // clang-format off #pragma HLS unroll // clang-format on dotOutInt_ti_Deltaj[i] = 0; } float Un_dash = 0; float U_value_in = 0; float Ti_ping[PROC_TU][DEPTH_TU]; // clang-format off #pragma HLS ARRAY_PARTITION variable=Ti_ping complete dim=0 // clang-format on float Ti_pong[PROC_TU][DEPTH_TU]; // clang-format off #pragma HLS ARRAY_PARTITION variable=Ti_pong complete dim=0 // clang-format on //######################### // 1st iteration of LOOPM_1, T matrix's rows will not be updated // since this loop is running in ping-pong, 1st iteration will be ideal for U_value and T matrix // After 1st iteration, T matrix row index = u_row_num+1 LOOPM_1: for (ap_int<16> u_row_num = u_col_num, start = 0; u_row_num >= -1; u_row_num--, start++) { LOOPM_2: for (ap_uint<10> depth_num = 0; depth_num < DEPTH_TU; depth_num++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=128*16 max=128*16 #pragma HLS DEPENDENCE variable=T_matrix inter false #pragma HLS pipeline // clang-format on ap_uint<16> index_num; ap_uint<16> index_num2; index_num = u_row_num * DEPTH_TU + depth_num; index_num2 = (u_row_num + 1) * DEPTH_TU + depth_num; float Ti_chunk[PROC_TU]; // clang-format off #pragma HLS ARRAY_PARTITION variable=Ti_chunk complete dim=1 // clang-format on for (ap_uint<10> idx = 0; idx < PROC_TU; idx++) { // clang-format off #pragma HLS unroll // clang-format on #if !__SYNTHESIS__ float T_mat_read; if (u_row_num == -1) T_mat_read = 0; else { T_mat_read = T_matrix[idx][index_num]; } #else float T_mat_read = T_matrix[idx][index_num]; #endif Ti_chunk[idx] = T_mat_read; if (start[0] == 0) Ti_ping[idx][depth_num] = T_mat_read; else Ti_pong[idx][depth_num] = T_mat_read; } // idx loop float Tj_for_delta[PROC_TU]; // clang-format off #pragma HLS ARRAY_PARTITION variable=Tj_for_delta complete dim=1 // clang-format on float deltaj_chunk[PROC_TU]; // clang-format off #pragma HLS ARRAY_PARTITION variable=deltaj_chunk complete dim=1 // clang-format on for (ap_uint<10> idx = 0; idx < PROC_TU; idx++) { // clang-format off #pragma HLS unroll // clang-format on Tj_for_delta[idx] = Tj_vector[idx][depth_num]; deltaj_chunk[idx] = Deltaj_vector[idx][depth_num]; } // idx loop float temp_dotout; temp_dotout = KF_dotProduct(Ti_chunk, deltaj_chunk); dotOutInt_ti_Deltaj[depth_num] = temp_dotout; float Un_dash_temp; KF_treeAdder(dotOutInt_ti_Deltaj, &Un_dash_temp); if ((depth_num) == (DEPTH_TU - 1)) Un_dash = Un_dash_temp; if (u_row_num == u_col_num && (depth_num) == (DEPTH_TU - 1)) { Dn_value = Un_dash; D_vector[u_col_num] = Un_dash; } float Un_value = Un_dash / Dn_value; ap_uint<8> dim1_Umat = u_row_num % PROC_MU; ap_uint<16> dim2_Umat = u_col_num * (DEPTH_MU) + u_row_num / PROC_MU; if (u_row_num != -1) { U_matrix[dim1_Umat][dim2_Umat] = Un_value; } float Ti_select[PROC_TU]; for (ap_uint<10> idx = 0; idx < PROC_TU; idx++) { if (start[0] == 1) Ti_select[idx] = Ti_ping[idx][depth_num]; else Ti_select[idx] = Ti_pong[idx][depth_num]; } float Ti_update_chunk[PROC_TU]; KF_scaleSub(Ti_select, U_value_in, Tj_for_delta, Ti_update_chunk); for (ap_uint<10> idx = 0; idx < PROC_TU; idx++) { // clang-format off #pragma HLS unroll // clang-format on if (u_row_num != u_col_num) { T_matrix[idx][index_num2] = Ti_update_chunk[idx]; } } // idx loop if (depth_num == DEPTH_TU - 1) U_value_in = Un_value; } // depth_num } // u_row_num } //####Load 1 ROW from T_matrix from Tj_vector. ROW id = u_col_num template void load_TjDeltaj_vector(float T_matrix[PROC_TU][TMAT_DEPTH], float Tj_vector[PROC_TU][DPDQ_DEPTH], float Deltaj_vector[PROC_TU][DPDQ_DEPTH], float D_vector[512], ap_uint<10> u_col_num) { if (URAM_EN == 0) { // clang-format off #pragma HLS ARRAY_PARTITION variable=T_matrix complete dim=1 #pragma HLS resource variable=T_matrix core=RAM_S2P_BRAM // clang-format on } else { // clang-format off #pragma HLS RESOURCE variable=T_matrix core=RAM_S2P_URAM #pragma HLS ARRAY_RESHAPE variable=T_matrix complete dim=1 // clang-format on } // clang-format off #pragma HLS ARRAY_PARTITION variable=Tj_vector complete dim=1 #pragma HLS ARRAY_PARTITION variable=Deltaj_vector complete dim=1 #pragma HLS inline off // clang-format on ap_uint<10> dim1_D = N_STATE; for (ap_uint<14> idx1 = 0; idx1 < (DEPTH_TU); idx1++) { for (ap_uint<8> idx2 = 0; idx2 < PROC_TU; idx2++) { // clang-format off #pragma HLS pipeline // clang-format on ap_uint<16> dim2 = (idx1 + u_col_num * DEPTH_TU); float T_value = T_matrix[idx2][dim2]; float D_value = D_vector[dim1_D++]; Deltaj_vector[idx2][idx1] = T_value * D_value; Tj_vector[idx2][idx1] = T_value; } } } template void UD_compute(float T_matrix[PROC_TU][TMAT_DEPTH], float U_matrix[PROC_MU][UMAT_DEPTH], float D_vector[512]) { if (URAM_EN == 0) { // clang-format off #pragma HLS ARRAY_PARTITION variable=U_matrix complete dim=1 // clang-format on } else { // clang-format off #pragma HLS RESOURCE variable=U_matrix core=RAM_S2P_URAM #pragma HLS ARRAY_RESHAPE variable=U_matrix complete dim=1 // clang-format on } if (URAM_EN == 0) { // clang-format off #pragma HLS ARRAY_PARTITION variable=T_matrix complete dim=1 #pragma HLS resource variable=T_matrix core=RAM_S2P_BRAM // clang-format on } else { // clang-format off #pragma HLS RESOURCE variable=T_matrix core=RAM_S2P_URAM #pragma HLS ARRAY_RESHAPE variable=T_matrix complete dim=1 // clang-format on } // clang-format off #pragma HLS inline off // clang-format on float Tj_vector[PROC_TU][DPDQ_DEPTH]; // clang-format off #pragma HLS ARRAY_PARTITION variable=Tj_vector complete dim=1 // clang-format on float Deltaj_vector[PROC_TU][DPDQ_DEPTH]; // clang-format off #pragma HLS ARRAY_PARTITION variable=Deltaj_vector complete dim=1 // clang-format on for (ap_int<10> u_col_num = N_STATE - 1; u_col_num >= 0; u_col_num--) { load_TjDeltaj_vector( T_matrix, Tj_vector, Deltaj_vector, D_vector, u_col_num); update_T_matrix( Tj_vector, Deltaj_vector, T_matrix, U_matrix, D_vector, u_col_num); } } template void TimeUpdate(float T_matrix[PROC_TU][TMAT_DEPTH], float AB_matrix[PROC_MU][ABMAT_DEPTH], float xu_vector[512], float U_matrix[PROC_MU][UMAT_DEPTH], float D_vector[512], #if KF_C != 0 xf::cv::Mat& u_mat, #endif xf::cv::Mat& Xout_mat, xf::cv::Mat& Uout_mat, xf::cv::Mat& Dout_mat, bool X_write_en, bool UD_write_en) { // clang-format off #pragma HLS inline off // clang-format on LOOP1: for (int itr1 = 0; itr1 < 1; itr1++) { #if KF_C != 0 if (EKF_EN == 0) load_control_input(u_mat, xu_vector); #endif AU_compute( AB_matrix, U_matrix, T_matrix); } LOOP2: for (int itr1 = 0; itr1 < 1; itr1++) { if (EKF_EN == 0) state_predict(AB_matrix, xu_vector); UD_compute( T_matrix, U_matrix, D_vector); } if (X_write_en) KF_X_write(xu_vector, Xout_mat); if (UD_write_en) KF_UD_write(U_matrix, D_vector, Uout_mat, Dout_mat); } template void initialization(xf::cv::Mat& A_mat, #if KF_C != 0 xf::cv::Mat& B_mat, #endif xf::cv::Mat& Uq_mat, xf::cv::Mat& Dq_mat, xf::cv::Mat& H_mat, xf::cv::Mat& X0_mat, xf::cv::Mat& U0_mat, xf::cv::Mat& D0_mat, xf::cv::Mat& R_mat, float H_matrix[PROC_MU][HMAT_DEPTH], float U_matrix[PROC_MU][UMAT_DEPTH], float xu_vector[512], float ry_vector[512], float D_vector[512], float AB_matrix[PROC_MU][ABMAT_DEPTH], float T_matrix[PROC_TU][TMAT_DEPTH], float Uq_matrix[UQMAT_DEPTH], bool read_opt_flag) { if (URAM_EN == 0) { // clang-format off #pragma HLS ARRAY_PARTITION variable=H_matrix complete dim=1 // clang-format on } else { // clang-format off #pragma HLS RESOURCE variable=H_matrix core=RAM_S2P_URAM #pragma HLS ARRAY_RESHAPE variable=H_matrix complete dim=1 // clang-format on } if (URAM_EN == 0) { // clang-format off #pragma HLS ARRAY_PARTITION variable=U_matrix complete dim=1 // clang-format on } else { // clang-format off #pragma HLS RESOURCE variable=U_matrix core=RAM_S2P_URAM #pragma HLS ARRAY_RESHAPE variable=U_matrix complete dim=1 // clang-format on } if (URAM_EN == 0) { // clang-format off #pragma HLS ARRAY_PARTITION variable=AB_matrix complete dim=1 // clang-format on } else { // clang-format off #pragma HLS RESOURCE variable=AB_matrix core=RAM_S2P_URAM #pragma HLS ARRAY_RESHAPE variable=AB_matrix complete dim=1 // clang-format on } if (URAM_EN == 0) { // clang-format off #pragma HLS ARRAY_PARTITION variable=T_matrix complete dim=1 #pragma HLS resource variable=T_matrix core=RAM_S2P_BRAM // clang-format on } else { // clang-format off #pragma HLS RESOURCE variable=T_matrix core=RAM_S2P_URAM #pragma HLS ARRAY_RESHAPE variable=T_matrix complete dim=1 // clang-format on } if (URAM_EN == 1) { // clang-format off #pragma HLS RESOURCE variable=Uq_matrix core=RAM_S2P_URAM // clang-format on } // clang-format off #pragma HLS inline off // clang-format on int U0_loop_cnt; if (EKF_EN == 1 && read_opt_flag == 1) U0_loop_cnt = 0; else U0_loop_cnt = N_STATE * N_STATE; ap_uint<32> counter1 = 0; ap_uint<32> counter1_1 = 0; // for dim2 ap_uint<32> counter2 = 0; // for dim1 ap_uint<32> counter3 = 0; // for dim2 LOOPI_U: for (int ptr = 0; ptr < U0_loop_cnt; ptr++) { // clang-format off #pragma HLS pipeline // clang-format on ap_uint<8> dim1 = counter2; ap_uint<16> dim2 = counter1_1 + counter3; U_matrix[dim1][dim2] = U0_mat.read_float(ptr); if (counter1 == N_STATE - 1) { if (counter2 == PROC_MU - 1) { counter2 = 0; counter3++; } else { counter2++; } counter1 = 0; counter1_1 = 0; } else { counter1++; counter1_1 += DEPTH_MU; } } LOOPHZ: for (int ptr_zero = 0, dim2 = (DEPTH_MU - 1); ptr_zero < M_MEAS; ptr_zero++, dim2 += DEPTH_MU) { // clang-format off #pragma HLS pipeline // clang-format on for (int dim1 = 0; dim1 < PROC_MU; dim1++) { // clang-format off #pragma HLS unroll // clang-format on H_matrix[dim1][dim2] = 0; } } if (EKF_EN == 0) { ap_uint<32> offset_incH = 0; ap_uint<32> counter_Hrow = 0; LOOPI_H: for (int ptr = 0; ptr < M_MEAS * N_STATE; ptr++) { // clang-format off #pragma HLS pipeline // clang-format on ap_uint<32> offset = offset_incH + counter_Hrow; ap_uint<10> dim1 = offset % PROC_MU; ap_uint<16> dim2 = offset / PROC_MU; H_matrix[dim1][dim2] = H_mat.read_float(ptr); if (counter_Hrow == N_STATE - 1) { counter_Hrow = 0; offset_incH += DEPTH_MU * PROC_MU; } else counter_Hrow++; } } //******************************Load R ****************************// int R_loop_cnt; if (EKF_EN == 1 && read_opt_flag == 1) R_loop_cnt = 0; else R_loop_cnt = M_MEAS; LOOPI_R: for (int ptr = 0; ptr < R_loop_cnt; ptr++) { // clang-format off #pragma HLS pipeline // clang-format on ry_vector[ptr] = R_mat.read_float(ptr); } //******************************Load X0 ****************************// LOOPI_X: for (int ptr = 0; ptr < N_STATE; ptr++) { // clang-format off #pragma HLS pipeline // clang-format on xu_vector[ptr] = X0_mat.read_float(ptr); } //******************************Load D0 ****************************// int D0_loop_cnt; if (EKF_EN == 1 && read_opt_flag == 1) D0_loop_cnt = 0; else D0_loop_cnt = N_STATE; LOOPI_D: for (int ptr = 0; ptr < D0_loop_cnt; ptr++) { // clang-format off #pragma HLS pipeline // clang-format on D_vector[ptr] = D0_mat.read_float(ptr); } LOOPI_T1: for (int ptr = 0; ptr < D0_loop_cnt; ptr++) { // clang-format off #pragma HLS pipeline // clang-format on D_vector[ptr + N_STATE] = D_vector[ptr]; } //******************************Load A ****************************// ap_uint<16> dim2 = (DEPTH_MU - 1); LOOPAZ: for (int ptr_zero = 0; ptr_zero < 2 * N_STATE; ptr_zero++) { // clang-format off #pragma HLS pipeline // clang-format on for (int dim1 = 0; dim1 < PROC_MU; dim1++) { // clang-format off #pragma HLS unroll // clang-format on AB_matrix[dim1][dim2] = 0; } if (ptr_zero < (N_STATE - 1)) dim2 += DEPTH_MU; else dim2 += DEPTH_MU_CTRL; } ap_uint<32> offset_incA = 0; ap_uint<32> counter_Arow = 0; LOOPI_A: for (int ptr = 0; ptr < N_STATE * N_STATE; ptr++) { // clang-format off #pragma HLS pipeline // clang-format on ap_uint<32> offset = offset_incA + counter_Arow; ap_uint<8> dim1 = offset % PROC_MU; ap_uint<16> dim2 = offset / PROC_MU; AB_matrix[dim1][dim2] = A_mat.read_float(ptr); if (counter_Arow == N_STATE - 1) { counter_Arow = 0; offset_incA += DEPTH_MU * PROC_MU; } else counter_Arow++; } ap_uint<32> offset_incB = 0; ap_uint<32> counter_Brow = 0; int B_loop_cnt; if (EKF_EN == 1) B_loop_cnt = 0; else B_loop_cnt = N_STATE * C_CTRL; LOOPI_B: for (int ptr = 0; ptr < B_loop_cnt; ptr++) { // clang-format off #pragma HLS pipeline // clang-format on ap_uint<32> offset = offset_incB + counter_Brow; ap_uint<8> dim1 = offset % PROC_MU; ap_uint<16> dim2 = offset / PROC_MU; #if KF_C != 0 AB_matrix[dim1][dim2 + (DEPTH_MU * N_STATE)] = B_mat.read_float(ptr); #endif if (counter_Brow == C_CTRL - 1) { counter_Brow = 0; offset_incB += DEPTH_MU_CTRL * PROC_MU; } else counter_Brow++; } //******************************Load Dq only digonal elements****************************// int Dq_loop_cnt; if (EKF_EN == 1 && read_opt_flag == 1) Dq_loop_cnt = 0; else Dq_loop_cnt = N_STATE; LOOPI_T2: for (int ptr = 0; ptr < Dq_loop_cnt; ptr++) { // clang-format off #pragma HLS pipeline // clang-format on D_vector[ptr + 2 * N_STATE] = Dq_mat.read_float(ptr); } //******************************Load Uq ****************************// int Uq_loop_cnt; if (EKF_EN == 1 && read_opt_flag == 1) Uq_loop_cnt = 0; else Uq_loop_cnt = N_STATE * N_STATE; ap_uint<16> counter_trow = 0; ap_uint<32> offset_inc = N_STATE; LOOPI_UQ: for (int ptr = 0; ptr < Uq_loop_cnt; ptr++) { // clang-format off #pragma HLS pipeline // clang-format on ap_uint<32> offset = offset_inc + counter_trow; ap_uint<8> dim1 = offset % PROC_TU; ap_uint<16> dim2 = offset / PROC_TU; float Uq_value = Uq_mat.read_float(ptr); T_matrix[dim1][dim2] = Uq_value; Uq_matrix[ptr] = Uq_value; if (counter_trow == N_STATE - 1) { counter_trow = 0; offset_inc += DEPTH_TU * PROC_TU; } else counter_trow++; } } template void KalmanFilter_def(xf::cv::Mat& A_mat, #if KF_C != 0 xf::cv::Mat& B_mat, #endif xf::cv::Mat& Uq_mat, xf::cv::Mat& Dq_mat, xf::cv::Mat& H_mat, xf::cv::Mat& X0_mat, xf::cv::Mat& U0_mat, xf::cv::Mat& D0_mat, xf::cv::Mat& R_mat, #if KF_C != 0 xf::cv::Mat& u_mat, #endif xf::cv::Mat& y_mat, xf::cv::Mat& Xout_mat, xf::cv::Mat& Uout_mat, xf::cv::Mat& Dout_mat, unsigned char flag) { // clang-format off #pragma HLS inline off // clang-format on enum { DEPTH_TU = ((2 * N_STATE) / PROC_TU + (((2 * N_STATE) % PROC_TU) != 0)), DEPTH_MU = (N_STATE / PROC_MU + ((N_STATE % PROC_MU) != 0)), DEPTH_MU_CTRL = (C_CTRL / PROC_MU + ((C_CTRL % PROC_MU) != 0)), UMAT_DEPTH = (DEPTH_MU * N_STATE), HMAT_DEPTH = (DEPTH_MU * M_MEAS), ABMAT_DEPTH = ((DEPTH_MU * N_STATE) + (DEPTH_MU_CTRL * N_STATE)), DPDQ_DEPTH = DEPTH_TU, TMAT_DEPTH = (DEPTH_TU * N_STATE), UQMAT_DEPTH = (N_STATE * N_STATE) }; static float H_matrix[PROC_MU][HMAT_DEPTH]; if (URAM_EN == 0) { // clang-format off #pragma HLS ARRAY_PARTITION variable=H_matrix complete dim=1 // clang-format on } else { // clang-format off #pragma HLS RESOURCE variable=H_matrix core=RAM_S2P_URAM #pragma HLS ARRAY_RESHAPE variable=H_matrix complete dim=1 // clang-format on } static float U_matrix[PROC_MU][UMAT_DEPTH]; if (URAM_EN == 0) { // clang-format off #pragma HLS ARRAY_PARTITION variable=U_matrix complete dim=1 // clang-format on } else { // clang-format off #pragma HLS RESOURCE variable=U_matrix core=RAM_S2P_URAM #pragma HLS ARRAY_RESHAPE variable=U_matrix complete dim=1 // clang-format on } static float xu_vector[512]; static float ry_vector[512]; static float D_vector[512]; static float AB_matrix[PROC_MU][ABMAT_DEPTH]; if (URAM_EN == 0) { // clang-format off #pragma HLS ARRAY_PARTITION variable=AB_matrix complete dim=1 // clang-format on } else { // clang-format off #pragma HLS RESOURCE variable=AB_matrix core=RAM_S2P_URAM #pragma HLS ARRAY_RESHAPE variable=AB_matrix complete dim=1 // clang-format on } static float T_matrix[PROC_TU][TMAT_DEPTH]; if (URAM_EN == 0) { // clang-format off #pragma HLS ARRAY_PARTITION variable=T_matrix complete dim=1 #pragma HLS resource variable=T_matrix core=RAM_S2P_BRAM // clang-format on } else { // clang-format off #pragma HLS RESOURCE variable=T_matrix core=RAM_S2P_URAM #pragma HLS ARRAY_RESHAPE variable=T_matrix complete dim=1 // clang-format on } static float Uq_matrix[UQMAT_DEPTH]; if (URAM_EN == 1) { // clang-format off #pragma HLS RESOURCE variable=Uq_matrix core=RAM_S2P_URAM // clang-format on } ap_uint<8> flag_reg = flag; if (EKF_EN == 1) { if (flag_reg[0] == 1) xu_vector[511] = 0; } if (flag_reg[0]) initialization( A_mat, #if KF_C != 0 B_mat, #endif Uq_mat, Dq_mat, H_mat, X0_mat, U0_mat, D0_mat, R_mat, H_matrix, U_matrix, xu_vector, ry_vector, D_vector, AB_matrix, T_matrix, Uq_matrix, flag_reg[7]); if (flag_reg[1]) TimeUpdate( T_matrix, AB_matrix, xu_vector, U_matrix, D_vector, #if KF_C != 0 u_mat, #endif Xout_mat, Uout_mat, Dout_mat, flag_reg[3], flag_reg[4]); if (flag_reg[2]) MeasUpdate_wrapper( U_matrix, H_matrix, D_vector, xu_vector, ry_vector, T_matrix, Uq_matrix, #if KF_C != 0 u_mat, #endif y_mat, R_mat, H_mat, Xout_mat, Uout_mat, Dout_mat, flag_reg[5], flag_reg[6]); if (EKF_EN == 1) { if (flag_reg[2] == 1) xu_vector[511]++; } } #if KF_C != 0 #endif #if KF_C != 0 #endif template void KalmanFilter(xf::cv::Mat& A_mat, #if KF_C != 0 xf::cv::Mat& B_mat, #endif xf::cv::Mat& Uq_mat, xf::cv::Mat& Dq_mat, xf::cv::Mat& H_mat, xf::cv::Mat& X0_mat, xf::cv::Mat& U0_mat, xf::cv::Mat& D0_mat, xf::cv::Mat& R_mat, #if KF_C != 0 xf::cv::Mat& u_mat, #endif xf::cv::Mat& y_mat, xf::cv::Mat& Xout_mat, xf::cv::Mat& Uout_mat, xf::cv::Mat& Dout_mat, unsigned char flag) { assert((N_STATE > 0 && N_STATE <= 128) && "For N_STATE, possible options are 1 to 128"); assert((M_MEAS > 0 && M_MEAS <= 128) && "For M_MEAS, possible options are 1 to 128"); assert((C_CTRL >= 0 && C_CTRL <= 128) && "For C_CTRL, possible options are 0 to 128"); assert((MTU > 0 && MTU <= N_STATE) && "For MTU, possible options are 1 to N_STATE"); assert((MMU > 0 && MMU <= N_STATE) && "For MMU, possible options are 1 to N_STATE"); assert(((A_mat.rows == N_STATE) && (A_mat.cols == N_STATE)) && "A matrix dimension must be N_STATE x N_STATE"); #if KF_C != 0 assert(((B_mat.rows == N_STATE) && (B_mat.cols == C_CTRL)) && "B matrix dimension must be N_STATE x C_CTRL"); #endif assert(((Uq_mat.rows == N_STATE) && (Uq_mat.cols == N_STATE)) && "Uq matrix dimension must be N_STATE x N_STATE"); assert(((Dq_mat.rows == N_STATE) && (Dq_mat.cols == 1)) && "Dq matrix dimension must be N_STATE x 1"); assert(((H_mat.rows == M_MEAS) && (H_mat.cols == N_STATE)) && "H matrix dimension must be M_MEAS x N_STATE"); assert(((X0_mat.rows == N_STATE) && (X0_mat.cols == 1)) && "X0 matrix dimension must be N_STATE x 1"); assert(((U0_mat.rows == N_STATE) && (U0_mat.cols == N_STATE)) && "U0 matrix dimension must be N_STATE x N_STATE"); assert(((D0_mat.rows == N_STATE) && (D0_mat.cols == 1)) && "D0 matrix dimension must be N_STATE x 1"); assert(((R_mat.rows == M_MEAS) && (R_mat.cols == 1)) && "R matrix dimension must be M_MEAS x 1"); #if KF_C != 0 assert(((u_mat.rows == C_CTRL) && (u_mat.cols == 1)) && "u matrix dimension must be C_CTRL x 1"); #endif assert(((y_mat.rows == M_MEAS) && (y_mat.cols == 1)) && "y matrix dimension must be M_MEAS x 1"); assert(((Xout_mat.rows == N_STATE) && (Xout_mat.cols == 1)) && "Xout matrix dimension must be N_STATE x 1"); assert(((Uout_mat.rows == N_STATE) && (Uout_mat.cols == N_STATE)) && "Uout matrix dimension must be N_STATE x N_STATE"); assert(((Dout_mat.rows == N_STATE) && (Dout_mat.cols == 1)) && "Dout matrix dimension must be N_STATE x 1"); assert((TYPE == XF_32FC1) && "TYPE must be XF_32FC1"); assert((NPC == XF_NPPC1) && "NPC must be XF_NPPC1"); KalmanFilter_def( A_mat, #if KF_C != 0 B_mat, #endif Uq_mat, Dq_mat, H_mat, X0_mat, U0_mat, D0_mat, R_mat, #if KF_C != 0 u_mat, #endif y_mat, Xout_mat, Uout_mat, Dout_mat, flag); } } // namespace cv } // namespace xf #endif //_XF_KALMANFILTER_HPP_