.. _program_listing_file__tmp_ws_src_vitis_common_include_video_xf_pyr_dense_optical_flow.hpp: Program Listing for File xf_pyr_dense_optical_flow.hpp ====================================================== |exhale_lsh| :ref:`Return to documentation for file ` (``/tmp/ws/src/vitis_common/include/video/xf_pyr_dense_optical_flow.hpp``) .. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS .. code-block:: cpp /* * Copyright 2019 Xilinx, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef __XF_PYR_DENSE_OPTICAL_FLOW__ #define __XF_PYR_DENSE_OPTICAL_FLOW__ #include "hls_stream.h" #include "ap_int.h" #include "common/xf_common.hpp" #include "xf_pyr_dense_optical_flow_config_types.h" #include "xf_pyr_dense_optical_flow_scale.hpp" #include "xf_pyr_dense_optical_flow_median_blur.hpp" #include "xf_pyr_dense_optical_flow_find_gradients.hpp" #include "xf_pyr_dense_optical_flow_oflow_process.hpp" #include "math.h" template void stitch_stream_fixed_int(hls::stream >& in_stream1, hls::stream >& in_stream2, xf::cv::Mat& sitched_stream, unsigned int rows, unsigned int cols, unsigned int level) { // clang-format off #pragma HLS inline off // clang-format on #if DEBUG char name[200]; sprintf(name, "postscaleU_hw%d.txt", level); FILE* fpU = fopen(name, "w"); sprintf(name, "postscaleV_hw%d.txt", level); FILE* fpV = fopen(name, "w"); #endif for (ap_uint<16> i = 0; i < rows; i++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=1 max=MAXHEIGHT // clang-format on for (ap_uint<16> j = 0; j < cols; j++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=1 max=MAXWIDTH #pragma HLS pipeline ii=1 #pragma HLS LOOP_FLATTEN OFF // clang-format on ap_fixed reg1 = 0; ap_fixed reg2 = 0; reg1 = in_stream1.read(); reg2 = in_stream2.read(); short* shortconv1 = (short*)®1; short* shortconv2 = (short*)®2; #if DEBUG fprintf(fpU, "%f ", float(reg1)); fprintf(fpV, "%f ", float(reg2)); #endif int convert = (*shortconv2); unsigned int tempstore = convert & 0x0000FFFF; tempstore = (*shortconv1 << 16) | tempstore; sitched_stream.write(i * cols + j, tempstore); } // end j loop #if DEBUG fprintf(fpU, "\n"); fprintf(fpV, "\n"); #endif } // end i loop #if DEBUG fclose(fpU); fclose(fpV); #endif } // end split_stream() template void split_stream_int_fixed(xf::cv::Mat& instream, hls::stream >& out_stream1, hls::stream >& out_stream2, unsigned int rows, unsigned int cols, unsigned int level) { // clang-format off #pragma HLS inline off // clang-format on #if DEBUG char name[200]; sprintf(name, "prescaleU_hw%d.txt", level); FILE* fpU = fopen(name, "w"); sprintf(name, "prescaleV_hw%d.txt", level); FILE* fpV = fopen(name, "w"); #endif for (ap_uint<16> i = 0; i < rows; i++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=1 max=MAXHEIGHT // clang-format on for (ap_uint<16> j = 0; j < cols; j++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=1 max=MAXWIDTH #pragma HLS pipeline ii=1 #pragma HLS LOOP_FLATTEN OFF // clang-format on unsigned int tempcopy = instream.read(i * cols + j); short splittemp1 = (tempcopy >> 16); short splittemp2 = (0x0000FFFF & tempcopy); ap_fixed* uflow = (ap_fixed*)&splittemp1; ap_fixed* vflow = (ap_fixed*)&splittemp2; ap_fixed u = *uflow; ap_fixed v = *vflow; out_stream1.write(u); out_stream2.write(v); #if DEBUG fprintf(fpU, "%12.8f ", float(u)); fprintf(fpV, "%12.8f ", float(v)); #endif } // end j loop #if DEBUG fprintf(fpU, "\n"); fprintf(fpV, "\n"); #endif } // end i loop #if DEBUG fclose(fpU); fclose(fpV); #endif } // end split_stream() template void find_flow(hls::stream >& strmSigmaIx2, hls::stream >& strmSigmaIy2, hls::stream >& strmSigmaIxIy, hls::stream >& strmSigmaItIx, hls::stream >& strmSigmaItIy, hls::stream >& streamflowU_in, hls::stream >& streamflowV_in, hls::stream >& strmFlowU, hls::stream >& strmFlowV, hls::stream& flagU, hls::stream& flagV, unsigned int rows, unsigned int cols, unsigned int level, bool scale_up_flag, ap_uint<1> init_flag) { // clang-format off #pragma HLS inline off // clang-format on #if DEBUG char filename0[200]; char filename1[200]; char filename2[200]; sprintf(filename0, "flU_hw%d.txt", level); sprintf(filename1, "flV_hw%d.txt", level); sprintf(filename2, "det_hw%d.txt", level); FILE* fpdet = fopen(filename2, "w"); FILE* fpglxup = fopen(filename0, "w"); FILE* fpglyup = fopen(filename1, "w"); #endif int count = 0; for (ap_uint<16> i = 0; i < rows; i++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=1 max=MAXHEIGHT // clang-format on for (ap_uint<16> j = 0; j < cols; j++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=1 max=MAXWIDTH #pragma HLS pipeline ii=1 #pragma HLS LOOP_FLATTEN OFF // clang-format on ap_fixed flowU, flowV; ap_fixed sigmaIx2 = strmSigmaIx2.read(); ap_fixed sigmaIy2 = strmSigmaIy2.read(); ap_fixed sigmaIxIy = strmSigmaIxIy.read(); ap_fixed sigmaItIx = strmSigmaItIx.read(); ap_fixed sigmaItIy = strmSigmaItIy.read(); ap_fixed<((SIXIY_WIDTH + 1) << 1) + 3, ((SIXIY_INT + 1) << 1) + 3> S12sq = sigmaIxIy * sigmaIxIy; ap_fixed det = (sigmaIx2 * sigmaIy2 - S12sq); ap_fixed S1122 = (sigmaIx2 + sigmaIy2); ap_fixed<(SIXIY_WIDTH + 1) << 1, (SIXIY_INT + 1) << 1> S1122sq = S1122 * S1122; S12sq = (S12sq << 2) + S1122sq; // multiply by 4 static half div_by_eig = 1 / (2.0 * WINSIZE * WINSIZE); float S1122_h = S1122; float S12sq_h = S12sq; float eig_comp = S1122_h - sqrt(S12sq_h); // float eig_comp = (((A11 + A22) - sqrt( ((A11 + A22)*(A11 + A22)) // + 4.0*A12*A12))/(2.0*WINSIZE*WINSIZE)); float eig_comp2 = eig_comp * div_by_eig; float eig_comp3 = (eig_comp2 < 0) ? -eig_comp2 : eig_comp2; bool tflagu; bool tflagv; if ((det == 0) || (eig_comp3 < 0.025)) { flowU = (ap_fixed)0; flowV = (ap_fixed)0; count++; tflagu = 0; tflagv = 0; } else { ap_fixed divideBy; ap_fixed tempU; ap_fixed tempV; divideBy = (ap_fixed)(1.0) / ((ap_fixed)det); tempU = ((ap_fixed)sigmaIy2 * sigmaItIx - (ap_fixed)sigmaIxIy * sigmaItIy) * (divideBy); tempV = ((ap_fixed)sigmaIx2 * sigmaItIy - (ap_fixed)sigmaIxIy * sigmaItIx) * (divideBy); flowU = ap_fixed(tempU); flowV = ap_fixed(tempV); tflagu = 1; tflagv = 1; } if (init_flag == (ap_uint<1>)0) { flowU += ap_fixed(streamflowU_in.read()); flowV += ap_fixed(streamflowV_in.read()); } else { ap_fixed flow_dummyU = ap_fixed(streamflowU_in.read()); ap_fixed flow_dummyV = ap_fixed(streamflowV_in.read()); } flagU.write(tflagu); flagV.write(tflagv); #if DEBUG fprintf(fpdet, "%12.4f ", float(det)); fprintf(fpglxup, "%12.8f ", float(flowU)); fprintf(fpglyup, "%12.8f ", float(flowV)); #endif strmFlowU.write((ap_fixed)flowU); strmFlowV.write((ap_fixed)flowV); } #if DEBUG fprintf(fpdet, "\n"); fprintf(fpglxup, "\n"); fprintf(fpglyup, "\n"); #endif } #if DEBUG fclose(fpdet); fclose(fpglxup); fclose(fpglyup); #endif } // end find_flow() template void xFLKOpticalFlowDenseKernel(xf::cv::Mat& currImg, xf::cv::Mat& nextImg, xf::cv::Mat& strmFlowin, xf::cv::Mat& strmFlow, const unsigned int rows, const unsigned int cols, const unsigned int prev_rows, const unsigned int prev_cols, const int level, const bool scale_up_flag, float scale_in, ap_uint<1> init_flag) { const int WINDOW_SIZE = WINDOW_SIZE_FL; const int RMAPPX_WIDTH = TYPE_RMAPPX_WIDTH; const int RMAPPX_INT = TYPE_RMAPPX_INT; const int SCALE_WIDTH = TYPE_SCALE_WIDTH; const int SCALE_INT = TYPE_SCALE_INT; const int IT_WIDTH = TYPE_IT_WIDTH; const int IT_INT = TYPE_IT_INT; const int SIXIY_WIDTH = TYPE_SIXIY_WIDTH; const int SIXIY_INT = TYPE_SIXIY_INT; const int SIXYIT_WIDTH = TYPE_SIXYIT_WIDTH; const int SIXYIT_INT = TYPE_SIXYIT_INT; const int DET_WIDTH = TYPE_DET_WIDTH; const int DET_INT = TYPE_DET_INT; const int DIVBY_WIDTH = TYPE_DIVBY_WIDTH; const int DIVBY_INT = TYPE_DIVBY_INT; const int FLCMP_WIDTH = TYPE_FLCMP_WIDTH; const int FLCMP_INT = TYPE_FLCMP_INT; const int SCCMP_WIDTH = FLOW_WIDTH + SCALE_WIDTH + 12; const int SCCMP_INT = FLOW_INT + 12; const int ITCMP_WIDTH = FLOW_WIDTH + 12 + 4; const int ITCMP_INT = FLOW_INT + 12; // clang-format off #pragma HLS dataflow // clang-format on hls::stream > strmIx("Ix"), strmIy("Iy"); hls::stream > sigmaIx2("sigmaIx2"), sigmaIy2("sigmaIy2"); hls::stream > sigmaIxIt("sigmaIxIt"), sigmaIyIt("sigmaIyIt"); hls::stream > sigmaIxIy("sigmaIxIy"); hls::stream > strmIt_float("It"); hls::stream > strmFlowU_fil("U_median_in"), strmFlowV_fil("V_median_in"); hls::stream > strmFlowU_fil_out("U_median_out"), strmFlowV_fil_out("V_median_out"); hls::stream > strmFlowU_in1("U_in1"), strmFlowV_in1("V_in1"); hls::stream > strmFlowU_split("Flow_stream_splitU"), strmFlowV_split("Flow_stream_splitV"); hls::stream > strmFlowU_scaled("U_in_scaled"), strmFlowV_scaled("V_in_scaled"); hls::stream flagU("compute flowU flag"), flagV("compute flowV flag"); // Ix, Iy, and It will be consumed at the same time and without any d // Giving them a 64 depth buffer just in case. // clang-format off #pragma HLS STREAM variable=&strmIx depth=64 #pragma HLS STREAM variable=&strmIy depth=64 #pragma HLS STREAM variable=&strmIt_float depth=64 #pragma HLS STREAM variable=&flagU depth=5000 #pragma HLS STREAM variable=&flagV depth=5000 // clang-format on // Flow U and V _in1 will be consumed at most 17*Width cycles after the _scaled. // 1920*17= 32640 (17 is arrived at by trial and experiment) // This ideally has to be taken care of by the data flow module. // clang-format off #pragma HLS STREAM variable=&strmFlowU_in1 depth=32640 #pragma HLS STREAM variable=&strmFlowV_in1 depth=32640 // clang-format on #ifndef __SYNTHESIS__ assert(rows <= MAXHEIGHT); assert(cols <= MAXWIDTH); #endif // splitting the input flow streams to U and V to scale them up whenever scale up is enabled split_stream_int_fixed(strmFlowin, strmFlowU_split, strmFlowV_split, prev_rows, prev_cols, level); // scaling up U and V streams whenever scaleup is enabled scale_up(strmFlowU_split, strmFlowU_scaled, prev_rows, prev_cols, rows, cols, 2, scale_up_flag, scale_in); scale_up(strmFlowV_split, strmFlowV_scaled, prev_rows, prev_cols, rows, cols, 2, scale_up_flag, scale_in); // Finding the Temporal and space gradients for the input set of images findGradients( currImg, nextImg, strmIt_float, strmIx, strmIy, rows, cols, strmFlowU_scaled, strmFlowV_scaled, strmFlowU_in1, strmFlowV_in1, level); // finding the hessian matrix find_G_and_b_matrix(strmIx, strmIy, strmIt_float, sigmaIx2, sigmaIy2, sigmaIxIy, sigmaIxIt, sigmaIyIt, rows, cols, level); // computing the the optical flow find_flow( sigmaIx2, sigmaIy2, sigmaIxIy, sigmaIxIt, sigmaIyIt, strmFlowU_in1, strmFlowV_in1, strmFlowU_fil, strmFlowV_fil, flagU, flagV, rows, cols, level, scale_up_flag, init_flag); // filtering the flow vectors using median blur auMedianBlur(strmFlowU_fil, strmFlowU_fil_out, flagU, WINDOW_SIZE, 1, rows, cols); auMedianBlur(strmFlowV_fil, strmFlowV_fil_out, flagV, WINDOW_SIZE, 1, rows, cols); // stitching the U and V flow streams to a single flow stream stitch_stream_fixed_int(strmFlowU_fil_out, strmFlowV_fil_out, strmFlow, rows, cols, level); } #endif