.. _program_listing_file__tmp_ws_src_vitis_common_include_imgproc_xf_cvt_color.hpp: Program Listing for File xf_cvt_color.hpp ========================================= |exhale_lsh| :ref:`Return to documentation for file ` (``/tmp/ws/src/vitis_common/include/imgproc/xf_cvt_color.hpp``) .. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS .. code-block:: cpp /* * Copyright 2019 Xilinx, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _XF_CVT_COLOR_HPP_ #define _XF_CVT_COLOR_HPP_ #include "../common/xf_common.hpp" #include "hls_stream.h" #include "xf_cvt_color_1.hpp" #include "xf_cvt_color_utils.hpp" #include namespace xf { namespace cv { template void write_y_ro(xf::cv::Mat& src_y, xf::cv::Mat& out_y, uint16_t height, uint16_t width) { XF_SNAME(WORDWIDTH_SRC) tmp; unsigned long long int idx = 0; for (int i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS #pragma HLS LOOP_FLATTEN off // clang-format on for (int j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=TC max=TC // clang-format on tmp = src_y.read(i * (width >> XF_BITSHIFT(NPC)) + j); out_y.write(idx++, tmp); } } } // KernRgba2Yuv4 template void KernRgba2Yuv4_ro(xf::cv::Mat& src, xf::cv::Mat& dst1, xf::cv::Mat& dst2, xf::cv::Mat& dst3, uint16_t height, uint16_t width) { // width=width>>NPC; XF_PTNAME(XF_8UP) Y0[16], U[16], V[16]; uint8_t RGB[64]; // clang-format off #pragma HLS ARRAY_PARTITION variable=Y0 complete #pragma HLS ARRAY_PARTITION variable=U complete #pragma HLS ARRAY_PARTITION variable=V complete #pragma HLS ARRAY_PARTITION variable=RGB complete // clang-format on unsigned long long int y_idx = 0, u_idx = 0, v_idx = 0; XF_SNAME(WORDWIDTH_SRC) PackedPixels; XF_SNAME(WORDWIDTH_DST) YPacked, UPacked, VPacked; uint8_t offset; rowloop: for (int i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on columnloop: for (int j = 0; j < width; j++) { // clang-format off #pragma HLS PIPELINE #pragma HLS LOOP_TRIPCOUNT min=TC max=TC // clang-format on PackedPixels = src.read(i * width + j); ExtractRGBAPixels(PackedPixels, RGB); // Converting from RGBA to YUV4 // Y = (0.257 * R) + (0.504 * G) + (0.098 * B) + 16 // U = -(0.148 * R) - (0.291 * G) + (0.439 * B) + 128 // V = (0.439 * R) - (0.368 * G) - (0.071 * B) + 128 for (int l = 0; l<(1 << XF_BITSHIFT(NPC))>> 1; l++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC // clang-format on //#pragma HLS unroll if (PLANES == 4) { offset = l << 3; Y0[(l << 1)] = CalculateY(RGB[offset + 0], RGB[offset + 1], RGB[offset + 2]); Y0[(l << 1) + 1] = CalculateY(RGB[offset + 4], RGB[offset + 5], RGB[offset + 6]); U[(l << 1)] = CalculateU(RGB[offset + 0], RGB[offset + 1], RGB[offset + 2]); U[(l << 1) + 1] = CalculateU(RGB[offset + 4], RGB[offset + 5], RGB[offset + 6]); V[(l << 1)] = CalculateV(RGB[offset + 0], RGB[offset + 1], RGB[offset + 2]); V[(l << 1) + 1] = CalculateV(RGB[offset + 4], RGB[offset + 5], RGB[offset + 6]); } else { offset = l * 6; Y0[(l << 1)] = CalculateY(RGB[offset + 0], RGB[offset + 1], RGB[offset + 2]); Y0[(l << 1) + 1] = CalculateY(RGB[offset + 3], RGB[offset + 4], RGB[offset + 5]); U[(l << 1)] = CalculateU(RGB[offset + 0], RGB[offset + 1], RGB[offset + 2]); U[(l << 1) + 1] = CalculateU(RGB[offset + 3], RGB[offset + 4], RGB[offset + 5]); V[(l << 1)] = CalculateV(RGB[offset + 0], RGB[offset + 1], RGB[offset + 2]); V[(l << 1) + 1] = CalculateV(RGB[offset + 3], RGB[offset + 4], RGB[offset + 5]); } } YPacked = PackPixels(Y0); UPacked = PackPixels(U); VPacked = PackPixels(V); dst1.write(y_idx++, YPacked); dst2.write(u_idx++, UPacked); dst3.write(v_idx++, VPacked); } } } // KernRgba2Iyuv template void KernRgba2Iyuv_ro(xf::cv::Mat& rgba, xf::cv::Mat& y_plane, xf::cv::Mat& u_plane, xf::cv::Mat& v_plane, uint16_t height, uint16_t width) { ap_uint8_t Y0[16], U[16], V[16]; uint8_t RGB[64]; // clang-format off #pragma HLS ARRAY_PARTITION variable=Y0 complete #pragma HLS ARRAY_PARTITION variable=U complete #pragma HLS ARRAY_PARTITION variable=V complete #pragma HLS ARRAY_PARTITION variable=RGB complete // clang-format on unsigned long long int y_idx = 0, out_idx = 0, out_idx1 = 0; XF_SNAME(WORDWIDTH_SRC) PackedPixels; XF_SNAME(WORDWIDTH_DST) YPacked, UPacked, VPacked; uint8_t Ycount = 0, UVcount = 0; int offset; uchar_t UVoffset_ind, l; ap_uint<13> i, j; UVoffset_ind = (1 << XF_BITSHIFT(NPC)) >> 1; bool evenRow = true, evenBlock = true; rowloop: for (i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on columnloop: for (j = 0; j < width; j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=TC max=TC // clang-format on PackedPixels = rgba.read(i * width + j); ExtractRGBAPixels(PackedPixels, RGB); for (l = 0; l<(1 << XF_BITSHIFT(NPC))>> 1; l++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC #pragma HLS unroll // clang-format on if (PLANES == 4) { offset = l << 3; Y0[(l << 1)] = CalculateY(RGB[offset + 0], RGB[offset + 1], RGB[offset + 2]); Y0[(l << 1) + 1] = CalculateY(RGB[offset + 4], RGB[offset + 5], RGB[offset + 6]); } else { offset = l * 6; Y0[(l << 1)] = CalculateY(RGB[offset + 0], RGB[offset + 1], RGB[offset + 2]); Y0[(l << 1) + 1] = CalculateY(RGB[offset + 3], RGB[offset + 4], RGB[offset + 5]); } if (evenRow) // As Sampling rate is 2, Calculating U and V components // only for even rows { /* 128 is added to U and V values to make them always positive and in * studio range 16-240 */ if (evenBlock) { U[l] = CalculateU(RGB[offset + 0], RGB[offset + 1], RGB[offset + 2]); V[l] = CalculateV(RGB[offset + 0], RGB[offset + 1], RGB[offset + 2]); } else { U[UVoffset_ind + l] = CalculateU(RGB[offset + 0], RGB[offset + 1], RGB[offset + 2]); V[UVoffset_ind + l] = CalculateV(RGB[offset + 0], RGB[offset + 1], RGB[offset + 2]); } } } YPacked = PackPixels(Y0); y_plane.write(y_idx++, YPacked); if (evenRow & !evenBlock) { UPacked = PackPixels(U); VPacked = PackPixels(V); u_plane.write(out_idx++, UPacked); v_plane.write(out_idx1++, VPacked); } evenBlock = evenBlock ? false : true; } evenRow = evenRow ? false : true; } // if(((ROWS+1)>>1) & 0x1) // { // Filling the empty region with zeros, when the height is // multiple // of 2 but not a multiple of 4 // for( i = 0; i < width; i++) // { //#pragma HLS LOOP_TRIPCOUNT min=TC max=TC // u_plane.write(0); // v_plane.write(0); // } // } } // KernRgba2Nv12 template void KernRgba2Nv12_ro(xf::cv::Mat& rgba, xf::cv::Mat& y_plane, xf::cv::Mat& uv_plane, uint16_t height, uint16_t width) { // width=width>>NPC; XF_PTNAME(XF_8UP) Y0[16], UV[16]; uint8_t RGB[64]; // clang-format off #pragma HLS ARRAY_PARTITION variable=Y0 complete #pragma HLS ARRAY_PARTITION variable=UV complete #pragma HLS ARRAY_PARTITION variable=RGB complete // clang-format on XF_SNAME(WORDWIDTH_SRC) PackedPixels; XF_SNAME(WORDWIDTH_Y) YPacked, UVPacked; unsigned long long int idx = 0, idx1 = 0; uint8_t offset; bool evenRow = true; rowloop: for (int i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on columnloop: for (int j = 0; j < width; j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=TC max=TC // clang-format on PackedPixels = rgba.read(i * width + j); ExtractRGBAPixels(PackedPixels, RGB); for (int l = 0; l<(1 << XF_BITSHIFT(NPC))>> 1; l++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC #pragma HLS unroll // clang-format on if (PLANES == 4) { offset = l << 3; Y0[(l << 1)] = CalculateY(RGB[offset + 0], RGB[offset + 1], RGB[offset + 2]); Y0[(l << 1) + 1] = CalculateY(RGB[offset + 4], RGB[offset + 5], RGB[offset + 6]); } else { offset = l * 6; Y0[(l << 1)] = CalculateY(RGB[offset + 0], RGB[offset + 1], RGB[offset + 2]); Y0[(l << 1) + 1] = CalculateY(RGB[offset + 3], RGB[offset + 4], RGB[offset + 5]); } if (evenRow) { UV[l << 1] = CalculateU(RGB[offset + 0], RGB[offset + 1], RGB[offset + 2]); UV[(l << 1) + 1] = CalculateV(RGB[offset + 0], RGB[offset + 1], RGB[offset + 2]); } } YPacked = PackPixels(Y0); y_plane.write(idx++, YPacked); if (evenRow) { UVPacked = PackPixels(UV); uv_plane.write(idx1++, UVPacked); } } evenRow = evenRow ? false : true; } } // KernRgba2Nv12 template void Kernbgr2Nv12_ro(xf::cv::Mat& rgba, xf::cv::Mat& y_plane, xf::cv::Mat& uv_plane, uint16_t height, uint16_t width) { // width=width>>NPC; XF_PTNAME(XF_8UP) Y0[16], UV[16]; uint8_t RGB[64]; // clang-format off #pragma HLS ARRAY_PARTITION variable=Y0 complete #pragma HLS ARRAY_PARTITION variable=UV complete #pragma HLS ARRAY_PARTITION variable=RGB complete // clang-format on XF_SNAME(WORDWIDTH_SRC) PackedPixels; XF_SNAME(WORDWIDTH_Y) YPacked, UVPacked; unsigned long long int idx = 0, idx1 = 0; uint8_t offset; bool evenRow = true; rowloop: for (int i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on columnloop: for (int j = 0; j < width; j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=TC max=TC // clang-format on PackedPixels = rgba.read(i * width + j); ExtractRGBAPixels(PackedPixels, RGB); for (int l = 0; l<(1 << XF_BITSHIFT(NPC))>> 1; l++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC #pragma HLS unroll // clang-format on if (PLANES == 4) { // offset = l << 3; // Y0[(l<<1)] = CalculateY(RGB[offset+0], // RGB[offset+1], RGB[offset+2]); // Y0[(l<<1)+1] = CalculateY(RGB[offset+4], // RGB[offset+5], RGB[offset+6]); } else { offset = l * 6; Y0[(l << 1)] = CalculateY(RGB[offset + 2], RGB[offset + 1], RGB[offset + 0]); Y0[(l << 1) + 1] = CalculateY(RGB[offset + 5], RGB[offset + 4], RGB[offset + 3]); } if (evenRow) { UV[l << 1] = CalculateU(RGB[offset + 2], RGB[offset + 1], RGB[offset + 0]); UV[(l << 1) + 1] = CalculateV(RGB[offset + 2], RGB[offset + 1], RGB[offset + 0]); } } YPacked = PackPixels(Y0); y_plane.write(idx++, YPacked); if (evenRow) { UVPacked = PackPixels(UV); uv_plane.write(idx1++, UVPacked); } } evenRow = evenRow ? false : true; } } // KernRgba2Nv21 template void KernRgba2Nv21_ro(xf::cv::Mat& rgba, xf::cv::Mat& y_plane, xf::cv::Mat& vu_plane, uint16_t height, uint16_t width) { // width=width>>NPC; uint16_t i, j, k, l; ap_uint8_t Y0[16], VU[16]; uint8_t RGB[64]; // clang-format off #pragma HLS ARRAY_PARTITION variable=Y0 complete #pragma HLS ARRAY_PARTITION variable=VU complete #pragma HLS ARRAY_PARTITION variable=RGB complete // clang-format on XF_SNAME(WORDWIDTH_SRC) PackedPixels; XF_SNAME(WORDWIDTH_Y) YPacked, VUPacked; uint8_t offset; unsigned long long int idx = 0, idx1 = 0; bool evenRow = true; rowloop: for (int i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on columnloop: for (int j = 0; j < width; j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=TC max=TC // clang-format on PackedPixels = (XF_SNAME(WORDWIDTH_SRC))rgba.read(i * width + j); ExtractRGBAPixels(PackedPixels, RGB); for (int l = 0; l<(1 << XF_BITSHIFT(NPC))>> 1; l++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC #pragma HLS unroll // clang-format on if (PLANES == 4) { offset = l << 3; Y0[(l << 1)] = CalculateY(RGB[offset + 0], RGB[offset + 1], RGB[offset + 2]); Y0[(l << 1) + 1] = CalculateY(RGB[offset + 4], RGB[offset + 5], RGB[offset + 6]); } else { offset = l * 6; Y0[(l << 1)] = CalculateY(RGB[offset + 0], RGB[offset + 1], RGB[offset + 2]); Y0[(l << 1) + 1] = CalculateY(RGB[offset + 3], RGB[offset + 4], RGB[offset + 5]); } if (evenRow) { VU[(l << 1)] = CalculateV(RGB[offset + 0], RGB[offset + 1], RGB[offset + 2]); VU[(l << 1) + 1] = CalculateU(RGB[offset + 0], RGB[offset + 1], RGB[offset + 2]); } } YPacked = PackPixels(Y0); y_plane.write(idx++, YPacked); if (evenRow) { VUPacked = PackPixels(VU); vu_plane.write(idx1++, VUPacked); } } evenRow = evenRow ? false : true; } } template void Kernbgr2Nv21_ro(xf::cv::Mat& rgba, xf::cv::Mat& y_plane, xf::cv::Mat& vu_plane, uint16_t height, uint16_t width) { // width=width>>NPC; uint16_t i, j, k, l; ap_uint8_t Y0[16], VU[16]; uint8_t RGB[64]; // clang-format off #pragma HLS ARRAY_PARTITION variable=Y0 complete #pragma HLS ARRAY_PARTITION variable=VU complete #pragma HLS ARRAY_PARTITION variable=RGB complete // clang-format on XF_SNAME(WORDWIDTH_SRC) PackedPixels; XF_SNAME(WORDWIDTH_Y) YPacked, VUPacked; uint8_t offset; unsigned long long int idx = 0, idx1 = 0; bool evenRow = true; rowloop: for (int i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on columnloop: for (int j = 0; j < width; j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=TC max=TC // clang-format on PackedPixels = (XF_SNAME(WORDWIDTH_SRC))rgba.read(i * width + j); ExtractRGBAPixels(PackedPixels, RGB); for (int l = 0; l<(1 << XF_BITSHIFT(NPC))>> 1; l++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC #pragma HLS unroll // clang-format on if (PLANES == 4) { // offset = l << 3; // Y0[(l<<1)] = CalculateY(RGB[offset+0], // RGB[offset+1], RGB[offset+2]); // Y0[(l<<1)+1] = CalculateY(RGB[offset+4], // RGB[offset+5], RGB[offset+6]); } else { offset = l * 6; Y0[(l << 1)] = CalculateY(RGB[offset + 2], RGB[offset + 1], RGB[offset + 0]); Y0[(l << 1) + 1] = CalculateY(RGB[offset + 5], RGB[offset + 4], RGB[offset + 3]); } if (evenRow) { VU[(l << 1)] = CalculateV(RGB[offset + 2], RGB[offset + 1], RGB[offset + 0]); VU[(l << 1) + 1] = CalculateU(RGB[offset + 2], RGB[offset + 1], RGB[offset + 0]); } } YPacked = PackPixels(Y0); y_plane.write(idx++, YPacked); if (evenRow) { VUPacked = PackPixels(VU); vu_plane.write(idx1++, VUPacked); } } evenRow = evenRow ? false : true; } } // KernIyuv2Rgba template void KernIyuv2Rgba_ro(xf::cv::Mat& in_y, xf::cv::Mat& in_u, xf::cv::Mat& in_v, xf::cv::Mat& _rgba, uint16_t height, uint16_t width) { // width=width>>NPC; // ap_uint<13> i,j,k; // uchar_t k; XF_PTNAME(XF_8UP) RGB[64], Ybuf[16], Ubuf[16], Vbuf[16]; // clang-format off #pragma HLS ARRAY_PARTITION variable=RGB complete #pragma HLS ARRAY_PARTITION variable=Ybuf complete #pragma HLS ARRAY_PARTITION variable=Ubuf complete #pragma HLS ARRAY_PARTITION variable=Vbuf complete // clang-format on hls::stream UStream, VStream; // clang-format off #pragma HLS STREAM variable=&UStream depth=COLS #pragma HLS STREAM variable=&VStream depth=COLS // clang-format on XF_SNAME(WORDWIDTH_SRC) YPacked, UPacked, VPacked; XF_SNAME(WORDWIDTH_DST) PackedPixels; unsigned long long int idx = 0, out_idx = 0; uint8_t Y00, Y01; int32_t V2Rtemp, U2Gtemp, V2Gtemp, U2Btemp; int8_t U, V; uint8_t offset; bool evenRow = true, evenBlock = true; rowloop: for (int i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on columnloop: for (int j = 0; j < width; j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=TC max=TC // clang-format on YPacked = in_y.read(i * width + j); xfExtractPixels(Ybuf, YPacked, 0); if (evenBlock) { if (evenRow) { UPacked = in_u.read(idx); UStream.write(UPacked); VPacked = in_v.read(idx++); VStream.write(VPacked); } else { /* Copy of the U and V values are pushed into stream to be used for * next row */ UPacked = UStream.read(); VPacked = VStream.read(); } xfExtractPixels(Ubuf, UPacked, 0); xfExtractPixels(Vbuf, VPacked, 0); offset = 0; } else { offset = (1 << XF_BITSHIFT(NPC)) >> 1; } for (int k = 0; k<(1 << XF_BITSHIFT(NPC))>> 1; k++) { // Y00 and Y01 have a U and V values in common // clang-format off #pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC #pragma HLS unroll // clang-format on // Y00 = (Ybuf[k<<1] > 16) ? (Ybuf[k<<1]-16) : 0; // Y01 = (Ybuf[(k<<1) + 1] > 16) ? (Ybuf[(k<<1)+1]-16) : 0; if ((Ybuf[k << 1] > 16)) { Y00 = (Ybuf[k << 1] - 16); } else { Y00 = 0; } if ((Ybuf[(k << 1) + 1] > 16)) { Y01 = (Ybuf[(k << 1) + 1] - 16); } else { Y01 = 0; } U = Ubuf[k + offset] - 128; V = Vbuf[k + offset] - 128; V2Rtemp = V * (short int)V2R; U2Gtemp = (short int)U2G * U; V2Gtemp = (short int)V2G * V; U2Btemp = U * (short int)U2B; // R = 1.164*Y + 1.596*V = Y + 0.164*Y + V + 0.596*V // G = 1.164*Y - 0.813*V - 0.391*U = Y + 0.164*Y - 0.813*V - 0.391*U // B = 1.164*Y + 2.018*U = Y + 0.164 + 2*U + 0.018*U RGB[(k << 3)] = CalculateR(Y00, V2Rtemp, V); // R0 RGB[(k << 3) + 1] = CalculateG(Y00, U2Gtemp, V2Gtemp); // G0 RGB[(k << 3) + 2] = CalculateB(Y00, U2Btemp, U); // B0 RGB[(k << 3) + 3] = 255; // A RGB[(k << 3) + 4] = CalculateR(Y01, V2Rtemp, V); // R1 RGB[(k << 3) + 5] = CalculateG(Y01, U2Gtemp, V2Gtemp); // G1 RGB[(k << 3) + 6] = CalculateB(Y01, U2Btemp, U); // B1 RGB[(k << 3) + 7] = 255; // A } PackedPixels = PackRGBAPixels(RGB); _rgba.write(out_idx++, PackedPixels); evenBlock = evenBlock ? false : true; } evenRow = evenRow ? false : true; } } // KernIyuv2Nv12 template void KernIyuv2Nv12_ro(xf::cv::Mat& _u, xf::cv::Mat& _v, xf::cv::Mat& _uv, uint16_t height, uint16_t width) { ap_uint<13> i, j; XF_PTNAME(XF_8UP) U[16], V[16]; // clang-format off #pragma HLS ARRAY_PARTITION variable=U complete #pragma HLS ARRAY_PARTITION variable=V complete // clang-format on unsigned long long int idx = 0, idx1 = 0; XF_SNAME(WORDWIDTH_SRC) UVPacked0, UVPacked1, UPacked, VPacked; rowloop: for (i = 0; i> 1; i++) { /* * Reading the plane interleaved U and V data from streams and packing them in * pixel interleaved * and writing out to UV stream */ // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=rTC max=rTC // clang-format on columnloop: for (j = 0; j < (width >> (1 + XF_BITSHIFT(NPC))); j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=cTC max=cTC // clang-format on UPacked = _u.read(idx); VPacked = _v.read(idx++); xfExtractPixels(U, UPacked, 0); xfExtractPixels(V, VPacked, 0); // Packing with alternative U and V values for Pixel interleaving #define AU_CVT_STEP 16 ap_uint<4> off = (1 << XF_BITSHIFT(NPC)) >> 1; ap_uint<4> k; int l; for (k = 0, l = 0; k < ((1 << XF_BITSHIFT(NPC)) >> 1); k++, l += AU_CVT_STEP) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC #pragma HLS UNROLL // clang-format on UVPacked0.range(l + AU_CVT_STEP - 1, l) = (U[k]) | ((ap_uint<16>)V[k] << (8)); UVPacked1.range(l + AU_CVT_STEP - 1, l) = (U[k + off]) | ((ap_uint<16>)V[k + off] << (8)); } _uv.write(idx1++, UVPacked0); _uv.write(idx1++, UVPacked1); } } } // KernIyuv2Yuv4 template void KernIyuv2Yuv4_ro(xf::cv::Mat& _in_u, xf::cv::Mat& _in_v, xf::cv::Mat& _u_image, xf::cv::Mat& _v_image, uint16_t height, uint16_t width) { XF_TNAME(SRC_T, NPC) arr[COLS >> XF_BITSHIFT(NPC)]; XF_TNAME(SRC_T, NPC) arr1[COLS >> XF_BITSHIFT(NPC)]; hls::stream inter_u, inter_v; // clang-format off #pragma HLS stream variable=&inter_u depth=COLS/2 #pragma HLS stream variable=&inter_v depth=COLS/2 // clang-format on unsigned long long int idx = 0, idx1 = 0; XF_PTNAME(XF_8UP) U[16], V[16]; // clang-format off #pragma HLS ARRAY_PARTITION variable=U complete #pragma HLS ARRAY_PARTITION variable=V complete // clang-format on XF_SNAME(WORDWIDTH) IUPacked, IVPacked, UPacked0, VPacked0, UPacked1, VPacked1; rowloop: for (int i = 0; i < ((height >> 2) << 1); i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=rTC max=rTC // clang-format on columnloop: for (int j = 0, k = 0; j < ((width >> XF_BITSHIFT(NPC)) >> 1); j++, k += 2) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=cTC max=cTC // clang-format on IUPacked = _in_u.read(idx); IVPacked = _in_v.read(idx++); xfExtractPixels(U, IUPacked, 0); xfExtractPixels(V, IVPacked, 0); #define AU_CVT_STEP 16 int off = 1 << (2); // (1 << NPC) >> 1; for (int k = 0, l = 0; k < (1 << (2)); k++, l += AU_CVT_STEP) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC #pragma HLS UNROLL // clang-format on UPacked0.range(l + AU_CVT_STEP - 1, l) = (U[k]) | ((ap_uint<16>)U[k] << (8)); VPacked0.range(l + AU_CVT_STEP - 1, l) = (V[k]) | ((ap_uint<16>)V[k] << (8)); UPacked1.range(l + AU_CVT_STEP - 1, l) = (U[k + off]) | ((ap_uint<16>)U[k + off] << (8)); VPacked1.range(l + AU_CVT_STEP - 1, l) = (V[k + off]) | ((ap_uint<16>)V[k + off] << (8)); } _u_image.write((((i * 2)) * (_u_image.cols >> XF_BITSHIFT(NPC))) + k, UPacked0); _v_image.write((((i * 2)) * (_v_image.cols >> XF_BITSHIFT(NPC))) + k, VPacked0); _u_image.write((((i * 2)) * (_u_image.cols >> XF_BITSHIFT(NPC))) + k + 1, UPacked1); _v_image.write((((i * 2)) * (_v_image.cols >> XF_BITSHIFT(NPC))) + k + 1, VPacked1); inter_u.write(UPacked0); inter_v.write(VPacked0); inter_u.write(UPacked1); inter_v.write(VPacked1); } for (int j = 0; j < (_u_image.cols >> XF_BITSHIFT(NPC)); j++) { // clang-format off #pragma HLS pipeline // clang-format on _u_image.write((((i * 2) + 1) * (_u_image.cols >> XF_BITSHIFT(NPC))) + j, inter_u.read()); _v_image.write((((i * 2) + 1) * (_u_image.cols >> XF_BITSHIFT(NPC))) + j, inter_v.read()); } } } // KernNv122Iyuv template void KernNv122Iyuv_ro(xf::cv::Mat& _uv, xf::cv::Mat& _u, xf::cv::Mat& _v, uint16_t height, uint16_t width) { XF_PTNAME(XF_8UP) UV0[16], UV1[16]; // clang-format off #pragma HLS ARRAY_PARTITION variable=UV0 complete #pragma HLS ARRAY_PARTITION variable=UV1 complete // clang-format on unsigned long long int idx = 0, idx1 = 0; XF_SNAME(WORDWIDTH_DST) UPacked, VPacked; XF_SNAME(WORDWIDTH_SRC) UVPacked0, UVPacked1; ap_uint<13> i, j; rowloop: for (i = 0; i < (height >> 1); i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on columnloop: for (j = 0; j < ((width >> XF_BITSHIFT(NPC)) >> 1); j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=TC max=TC // clang-format on UVPacked0 = _uv.read(idx++); UVPacked1 = _uv.read(idx++); xfExtractPixels(UV0, UVPacked0, 0); xfExtractPixels(UV1, UVPacked1, 0); // Packing the U and V by picking even indeces for U and odd indeces for V #define AU_CVT_STEP 16 int sft = 1 << (XF_BITSHIFT(NPC) + 2); int l; ap_uint<9> k; for (int k = 0, l = 0; k < (1 << (XF_BITSHIFT(NPC))); k += 4, l += AU_CVT_STEP) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC #pragma HLS UNROLL // clang-format on VPacked.range(l + AU_CVT_STEP - 1, l) = (UV0[k + 1]) | ((ap_uint<16>)UV0[k + 3] << (8)); UPacked.range(l + AU_CVT_STEP - 1, l) = (UV0[k]) | ((ap_uint<16>)UV0[k + 2] << (8)); VPacked.range(l + sft + AU_CVT_STEP - 1, l + sft) = (UV1[k + 1]) | ((ap_uint<16>)UV1[k + 3] << (8)); UPacked.range(l + sft + AU_CVT_STEP - 1, l + sft) = (UV1[k]) | ((ap_uint<16>)UV1[k + 2] << (8)); } _u.write(idx1, UPacked); _v.write(idx1++, VPacked); } } } // KernNv122Rgba template void KernNv122Rgba_ro(xf::cv::Mat& in_y, xf::cv::Mat& in_uv, xf::cv::Mat& rgba, uint16_t height, uint16_t width) { // width=width>>NPC; XF_PTNAME(XF_8UP) RGB[64], Ybuf[16], UVbuf[16]; // clang-format off #pragma HLS ARRAY_PARTITION variable=RGB complete #pragma HLS ARRAY_PARTITION variable=Ybuf complete #pragma HLS ARRAY_PARTITION variable=UVbuf complete // clang-format on hls::stream UVStream; // clang-format off #pragma HLS STREAM variable=&UVStream depth=COLS // clang-format on XF_SNAME(WORDWIDTH_Y) YPacked; XF_SNAME(WORDWIDTH_UV) UVPacked; XF_SNAME(WORDWIDTH_DST) PackedPixels; uint8_t Y00, Y01; int32_t V2Rtemp, U2Gtemp, V2Gtemp, U2Btemp; unsigned long long int uv_idx = 0, out_idx = 0; int8_t U, V; bool evenRow = true; rowloop: for (int i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on columnloop: for (int j = 0; j < width; j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=TC max=TC // clang-format on YPacked = in_y.read(i * width + j); xfExtractPixels(Ybuf, YPacked, 0); if (evenRow) { UVPacked = in_uv.read(uv_idx++); UVStream.write(UVPacked); } else // Keep a copy of UV row data in stream to use for oddrow UVPacked = UVStream.read(); xfExtractPixels(UVbuf, UVPacked, 0); for (int k = 0; k<(1 << XF_BITSHIFT(NPC))>> 1; k++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC #pragma HLS unroll // clang-format on // Y00 = (Ybuf[k<<1] > 16) ? (Ybuf[k<<1]-16) : 0; // Y01 = (Ybuf[(k<<1)+1] > 16) ? (Ybuf[(k<<1)+1] - 16) : 0; if ((Ybuf[k << 1] > 16)) { Y00 = (Ybuf[k << 1] - 16); } else { Y00 = 0; } if ((Ybuf[(k << 1) + 1] > 16)) { Y01 = (Ybuf[(k << 1) + 1] - 16); } else { Y01 = 0; } U = UVbuf[k << 1] - 128; V = UVbuf[(k << 1) + 1] - 128; V2Rtemp = V * (short int)V2R; U2Gtemp = (short int)U2G * U; V2Gtemp = (short int)V2G * V; U2Btemp = U * (short int)U2B; // R = 1.164*Y + 1.596*V = Y + 0.164*Y + V + 0.596*V // G = 1.164*Y - 0.813*V - 0.391*U = Y + 0.164*Y - 0.813*V - 0.391*U // B = 1.164*Y + 2.018*U = Y + 0.164 + 2*U + 0.018*U if (PLANES == 4) { RGB[(k << 3) + 0] = CalculateR(Y00, V2Rtemp, V); // R0 RGB[(k << 3) + 1] = CalculateG(Y00, U2Gtemp, V2Gtemp); // G0 RGB[(k << 3) + 2] = CalculateB(Y00, U2Btemp, U); // B0 RGB[(k << 3) + 3] = 255; // A RGB[(k << 3) + 4] = CalculateR(Y01, V2Rtemp, V); // R1 RGB[(k << 3) + 5] = CalculateG(Y01, U2Gtemp, V2Gtemp); // G1 RGB[(k << 3) + 6] = CalculateB(Y01, U2Btemp, U); // B0 RGB[(k << 3) + 7] = 255; // A } else { RGB[(k * 6) + 0] = CalculateR(Y00, V2Rtemp, V); // R0 RGB[(k * 6) + 1] = CalculateG(Y00, U2Gtemp, V2Gtemp); // G0 RGB[(k * 6) + 2] = CalculateB(Y00, U2Btemp, U); // B0 RGB[(k * 6) + 3] = CalculateR(Y01, V2Rtemp, V); // R1 RGB[(k * 6) + 4] = CalculateG(Y01, U2Gtemp, V2Gtemp); // G1 RGB[(k * 6) + 5] = CalculateB(Y01, U2Btemp, U); // B0 } } PackedPixels = PackRGBAPixels(RGB); rgba.write(out_idx++, PackedPixels); } evenRow = evenRow ? false : true; } // if(height & 1) // { // for(int i = 0; i < (width>>NPC); i++) // { //#pragma HLS LOOP_TRIPCOUNT min=TC max=TC // UVStream.read(); // } // } } // KernNv122Rgba template void KernNv122bgr_ro(xf::cv::Mat& in_y, xf::cv::Mat& in_uv, xf::cv::Mat& rgba, uint16_t height, uint16_t width) { // width=width>>NPC; XF_PTNAME(XF_8UP) RGB[64], Ybuf[16], UVbuf[16]; // clang-format off #pragma HLS ARRAY_PARTITION variable=RGB complete #pragma HLS ARRAY_PARTITION variable=Ybuf complete #pragma HLS ARRAY_PARTITION variable=UVbuf complete // clang-format on hls::stream UVStream; // clang-format off #pragma HLS STREAM variable=&UVStream depth=COLS // clang-format on XF_SNAME(WORDWIDTH_Y) YPacked; XF_SNAME(WORDWIDTH_UV) UVPacked; XF_SNAME(WORDWIDTH_DST) PackedPixels; uint8_t Y00, Y01; int32_t V2Rtemp, U2Gtemp, V2Gtemp, U2Btemp; unsigned long long int uv_idx = 0, out_idx = 0; int8_t U, V; bool evenRow = true; rowloop: for (int i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on columnloop: for (int j = 0; j < width; j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=TC max=TC // clang-format on YPacked = in_y.read(i * width + j); xfExtractPixels(Ybuf, YPacked, 0); if (evenRow) { UVPacked = in_uv.read(uv_idx++); UVStream.write(UVPacked); } else // Keep a copy of UV row data in stream to use for oddrow UVPacked = UVStream.read(); xfExtractPixels(UVbuf, UVPacked, 0); for (int k = 0; k<(1 << XF_BITSHIFT(NPC))>> 1; k++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC #pragma HLS unroll // clang-format on // Y00 = (Ybuf[k<<1] > 16) ? (Ybuf[k<<1]-16) : 0; // Y01 = (Ybuf[(k<<1)+1] > 16) ? (Ybuf[(k<<1)+1] - 16) : 0; if ((Ybuf[k << 1] > 16)) { Y00 = (Ybuf[k << 1] - 16); } else { Y00 = 0; } if ((Ybuf[(k << 1) + 1] > 16)) { Y01 = (Ybuf[(k << 1) + 1] - 16); } else { Y01 = 0; } U = UVbuf[k << 1] - 128; V = UVbuf[(k << 1) + 1] - 128; V2Rtemp = V * (short int)V2R; U2Gtemp = (short int)U2G * U; V2Gtemp = (short int)V2G * V; U2Btemp = U * (short int)U2B; // R = 1.164*Y + 1.596*V = Y + 0.164*Y + V + 0.596*V // G = 1.164*Y - 0.813*V - 0.391*U = Y + 0.164*Y - 0.813*V - 0.391*U // B = 1.164*Y + 2.018*U = Y + 0.164 + 2*U + 0.018*U if (PLANES == 4) { RGB[(k << 3) + 0] = CalculateR(Y00, V2Rtemp, V); // R0 RGB[(k << 3) + 1] = CalculateG(Y00, U2Gtemp, V2Gtemp); // G0 RGB[(k << 3) + 2] = CalculateB(Y00, U2Btemp, U); // B0 RGB[(k << 3) + 3] = 255; // A RGB[(k << 3) + 4] = CalculateR(Y01, V2Rtemp, V); // R1 RGB[(k << 3) + 5] = CalculateG(Y01, U2Gtemp, V2Gtemp); // G1 RGB[(k << 3) + 6] = CalculateB(Y01, U2Btemp, U); // B0 RGB[(k << 3) + 7] = 255; // A } else { RGB[(k * 6) + 0] = CalculateB(Y00, U2Btemp, U); // B0 RGB[(k * 6) + 1] = CalculateG(Y00, U2Gtemp, V2Gtemp); // G0 RGB[(k * 6) + 2] = CalculateR(Y00, V2Rtemp, V); // R0 RGB[(k * 6) + 3] = CalculateB(Y01, U2Btemp, U); // B0 RGB[(k * 6) + 4] = CalculateG(Y01, U2Gtemp, V2Gtemp); // G1 RGB[(k * 6) + 5] = CalculateR(Y01, V2Rtemp, V); // R1 } } PackedPixels = PackRGBAPixels(RGB); rgba.write(out_idx++, PackedPixels); } evenRow = evenRow ? false : true; } // if(height & 1) // { // for(int i = 0; i < (width>>NPC); i++) // { //#pragma HLS LOOP_TRIPCOUNT min=TC max=TC // UVStream.read(); // } // } } // KernNv122Yuv4 template void KernNv122Yuv4_ro(xf::cv::Mat& _uv, xf::cv::Mat& _u, xf::cv::Mat& _v, uint16_t height, uint16_t width) { XF_PTNAME(XF_8UP) UV[16]; // clang-format off #pragma HLS ARRAY_PARTITION variable=UV complete // clang-format on ap_uint<13> i, j; XF_SNAME(WORDWIDTH_UV) UPacked; XF_SNAME(WORDWIDTH_DST) VPacked, UVPacked; XF_SNAME(WORDWIDTH_DST) arr_UPacked[COLS >> (XF_BITSHIFT(NPC))], arr_VPacked[COLS >> (XF_BITSHIFT(NPC))]; unsigned long long int idx = 0, idx1 = 0; rowloop: for (i = 0; i < (height >> 1); i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on columnloop: for (j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=TC max=TC // clang-format on UVPacked = _uv.read(idx1++); xfExtractPixels(UV, UVPacked, 0); #define AU_CVT_STEP 16 for (int k = 0, l = 0; k < (1 << (XF_BITSHIFT(NPC))); k += 2, l += AU_CVT_STEP) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC #pragma HLS UNROLL // clang-format on VPacked.range(l + AU_CVT_STEP - 1, l) = (UV[k + 1]) | ((ap_uint<16>)UV[k + 1] << (8)); UPacked.range(l + AU_CVT_STEP - 1, l) = (UV[k]) | ((ap_uint<16>)UV[k] << (8)); } _u.write(((i * 2) * (_u.cols >> XF_BITSHIFT(NPC))) + j, UPacked); _v.write(((i * 2) * (_v.cols >> XF_BITSHIFT(NPC))) + j, VPacked); arr_UPacked[j] = UPacked; arr_VPacked[j] = VPacked; } for (j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) { _u.write((((i * 2) + 1) * (_u.cols >> XF_BITSHIFT(NPC))) + j, arr_UPacked[j]); _v.write((((i * 2) + 1) * (_v.cols >> XF_BITSHIFT(NPC))) + j, arr_VPacked[j]); } } } // KernNv212Iyuv template void KernNv212Iyuv_ro(xf::cv::Mat& in_uv, xf::cv::Mat& u_out, xf::cv::Mat& v_out, uint16_t height, uint16_t width) { XF_PTNAME(XF_8UP) VU0[16], VU1[16]; // clang-format off #pragma HLS ARRAY_PARTITION variable=VU0 complete #pragma HLS ARRAY_PARTITION variable=VU1 complete // clang-format on ap_uint<13> i, j; XF_SNAME(WORDWIDTH_DST) UPacked, VPacked; XF_SNAME(WORDWIDTH_SRC) VUPacked0, VUPacked1; unsigned long long int idx = 0, idx1 = 0; int l; ap_uint<4> k; rowloop: for (i = 0; i < (height >> 1); i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on columnloop: for (j = 0; j < ((width >> XF_BITSHIFT(NPC)) >> 1); j++) { // reading UV pixel interleaved data and writing them into // UStream and VStream // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=TC max=TC // clang-format on VUPacked0 = in_uv.read(idx++); VUPacked1 = in_uv.read(idx++); xfExtractPixels(VU0, VUPacked0, 0); xfExtractPixels(VU1, VUPacked1, 0); #define AU_CVT_STEP 16 int sft = 1 << (XF_BITSHIFT(NPC) + 2); for (k = 0, l = 0; k < (1 << (XF_BITSHIFT(NPC))); k += 4, l += AU_CVT_STEP) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC #pragma HLS UNROLL // clang-format on UPacked.range(l + AU_CVT_STEP - 1, l) = (VU0[k + 1]) | ((ap_uint<16>)VU0[k + 3] << (8)); VPacked.range(l + AU_CVT_STEP - 1, l) = (VU0[k]) | ((ap_uint<16>)VU0[k + 2] << (8)); UPacked.range(l + sft + AU_CVT_STEP - 1, l + sft) = (VU1[k + 1]) | ((ap_uint<16>)VU1[k + 3] << (8)); VPacked.range(l + sft + AU_CVT_STEP - 1, l + sft) = (VU1[k]) | ((ap_uint<16>)VU1[k + 2] << (8)); } u_out.write(idx1, UPacked); v_out.write(idx1, VPacked); idx1++; } } /* if((height>>1)& 0x1) { // Writing 0's to fill the stream if the UV plane width is odd for(int i = 0; i < ((width>>XF_BITSHIFT(NPC))>>1); i++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=TC max=TC // clang-format on u_out.write(idx1,0); v_out.write(idx1++,0); } }*/ } // template void // KernNv212bgr_ro(xf::cv::Mat & // in_y,xf::cv::Mat & in_uv,xf::cv::Mat & rgba,uint16_t // height,uint16_t width) //{ // XF_PTNAME(XF_8UP) RGB[64],Ybuf[16],UVbuf[16]; //#pragma HLS ARRAY_PARTITION variable=RGB complete //#pragma HLS ARRAY_PARTITION variable=Ybuf complete //#pragma HLS ARRAY_PARTITION variable=UVbuf complete // ap_uint<13> i,j; // unsigned long long int in_idx=0,out_idx=0; // int k; // hls::stream UVStream; //#pragma HLS STREAM variable=&UVStream depth=COLS // XF_SNAME(WORDWIDTH_Y) YPacked; XF_SNAME(WORDWIDTH_UV) UVPacked; // XF_SNAME(WORDWIDTH_DST) PackedPixels; // uint8_t Y00, Y01; // int32_t V2Rtemp, U2Gtemp, V2Gtemp, U2Btemp; // int8_t U, V; // bool evenRow = true; // rowloop: // for( i = 0; i < height; i++) // { //#pragma HLS LOOP_FLATTEN off //#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // columnloop: // for( j = 0; j < width; j++) // { //#pragma HLS pipeline //#pragma HLS LOOP_TRIPCOUNT min=TC max=TC // YPacked = in_y.read(i*width+j); // xfExtractPixels(Ybuf, YPacked, // 0); // if(evenRow) // { // UVPacked = in_uv.read(in_idx++); // UVStream.write(UVPacked); // } // else // Keep a copy of UV row data in stream to use for // oddrow // { // UVPacked = UVStream.read(); // } // // xfExtractPixels(UVbuf, // UVPacked, // 0); // for( k = 0; k < (1<>1; k++) // { //#pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC //#pragma HLS unroll // //Y00 = (Ybuf[k<<1] > 16) ? (Ybuf[k<<1]-16) : 0; // //Y01 = (Ybuf[(k<<1)+1] > 16) ? //(Ybuf[(k<<1)+1]-16) //: // 0; // // if((Ybuf[k<<1] > 16)) // { // Y00 = (Ybuf[k<<1]-16); // } // else // { // Y00 = 0; // } // // if((Ybuf[(k<<1)+1] > 16)) // { // Y01 = (Ybuf[(k<<1)+1]-16); // } // else // { // Y01 = 0; // } // // V = UVbuf[k<<1] - 128; // U = UVbuf[(k<<1)+1] - 128; // // V2Rtemp = V * (short int)V2R; // U2Gtemp = (short int)U2G * U; // V2Gtemp = (short int)V2G * V; // U2Btemp = U * (short int)U2B; // // // R = 1.164*Y + 1.596*V = Y + 0.164*Y + V + // 0.596*V // // G = 1.164*Y - 0.813*V - 0.391*U = Y + 0.164*Y //- // 0.813*V - 0.391*U // // B = 1.164*Y + 2.018*U = Y + 0.164 + 2*U + // 0.018*U // // RGB[(k*6) + 0] = // CalculateB(Y00,U2Btemp,U); // RGB[(k*6) + 1] = // CalculateG(Y00,U2Gtemp,V2Gtemp); //G0 // RGB[(k*6) + 2] = // CalculateR(Y00,V2Rtemp,V); // RGB[(k*6) + 3] = // CalculateB(Y01,U2Btemp,U); // RGB[(k*6) + 4] = // CalculateG(Y01,U2Gtemp,V2Gtemp); //G1 // RGB[(k*6) + 5] = // CalculateR(Y01,V2Rtemp,V); // // } // // PackedPixels = PackRGBAPixels(RGB); // rgba.write(out_idx++,PackedPixels); // } // evenRow = evenRow ? false : true; // } //} template void KernNv212Rgba_ro(xf::cv::Mat& in_y, xf::cv::Mat& in_uv, xf::cv::Mat& rgba, uint16_t height, uint16_t width) { XF_PTNAME(XF_8UP) RGB[64], Ybuf[16], UVbuf[16]; // clang-format off #pragma HLS ARRAY_PARTITION variable=RGB complete #pragma HLS ARRAY_PARTITION variable=Ybuf complete #pragma HLS ARRAY_PARTITION variable=UVbuf complete // clang-format on ap_uint<13> i, j; unsigned long long int in_idx = 0, out_idx = 0; int k; hls::stream UVStream; // clang-format off #pragma HLS STREAM variable=&UVStream depth=COLS // clang-format on XF_SNAME(WORDWIDTH_Y) YPacked; XF_SNAME(WORDWIDTH_UV) UVPacked; XF_SNAME(WORDWIDTH_DST) PackedPixels; uint8_t Y00, Y01; int32_t V2Rtemp, U2Gtemp, V2Gtemp, U2Btemp; int8_t U, V; bool evenRow = true; rowloop: for (i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on columnloop: for (j = 0; j < width; j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=TC max=TC // clang-format on YPacked = in_y.read(i * width + j); xfExtractPixels(Ybuf, YPacked, 0); if (evenRow) { UVPacked = in_uv.read(in_idx++); UVStream.write(UVPacked); } else // Keep a copy of UV row data in stream to use for oddrow UVPacked = UVStream.read(); xfExtractPixels(UVbuf, UVPacked, 0); for (k = 0; k<(1 << XF_BITSHIFT(NPC))>> 1; k++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC #pragma HLS unroll // clang-format on // Y00 = (Ybuf[k<<1] > 16) ? (Ybuf[k<<1]-16) : 0; // Y01 = (Ybuf[(k<<1)+1] > 16) ? (Ybuf[(k<<1)+1]-16) : 0; if ((Ybuf[k << 1] > 16)) { Y00 = (Ybuf[k << 1] - 16); } else { Y00 = 0; } if ((Ybuf[(k << 1) + 1] > 16)) { Y01 = (Ybuf[(k << 1) + 1] - 16); } else { Y01 = 0; } V = UVbuf[k << 1] - 128; U = UVbuf[(k << 1) + 1] - 128; V2Rtemp = V * (short int)V2R; U2Gtemp = (short int)U2G * U; V2Gtemp = (short int)V2G * V; U2Btemp = U * (short int)U2B; // R = 1.164*Y + 1.596*V = Y + 0.164*Y + V + 0.596*V // G = 1.164*Y - 0.813*V - 0.391*U = Y + 0.164*Y - 0.813*V - 0.391*U // B = 1.164*Y + 2.018*U = Y + 0.164 + 2*U + 0.018*U if (PLANES == 4) { RGB[(k << 3) + 0] = CalculateR(Y00, V2Rtemp, V); // R0 RGB[(k << 3) + 1] = CalculateG(Y00, U2Gtemp, V2Gtemp); // G0 RGB[(k << 3) + 2] = CalculateB(Y00, U2Btemp, U); // B0 RGB[(k << 3) + 3] = 255; // A RGB[(k << 3) + 4] = CalculateR(Y01, V2Rtemp, V); // R1 RGB[(k << 3) + 5] = CalculateG(Y01, U2Gtemp, V2Gtemp); // G1 RGB[(k << 3) + 6] = CalculateB(Y01, U2Btemp, U); // B0 RGB[(k << 3) + 7] = 255; // A } else { RGB[(k * 6) + 0] = CalculateR(Y00, V2Rtemp, V); // R0 RGB[(k * 6) + 1] = CalculateG(Y00, U2Gtemp, V2Gtemp); // G0 RGB[(k * 6) + 2] = CalculateB(Y00, U2Btemp, U); // B0 RGB[(k * 6) + 3] = CalculateR(Y01, V2Rtemp, V); // R1 RGB[(k * 6) + 4] = CalculateG(Y01, U2Gtemp, V2Gtemp); // G1 RGB[(k * 6) + 5] = CalculateB(Y01, U2Btemp, U); // B0 } } PackedPixels = PackRGBAPixels(RGB); rgba.write(out_idx++, PackedPixels); } evenRow = evenRow ? false : true; } // if(height & 1) // { // for( i = 0; i < (width>>XF_BITSHIFT(NPC)); i++) // { //#pragma HLS LOOP_TRIPCOUNT min=TC max=TC // UVStream.read(); // } // } } template void KernNv212bgr_ro(xf::cv::Mat& in_y, xf::cv::Mat& in_uv, xf::cv::Mat& rgba, uint16_t height, uint16_t width) { XF_PTNAME(XF_8UP) RGB[64], Ybuf[16], UVbuf[16]; // clang-format off #pragma HLS ARRAY_PARTITION variable=RGB complete #pragma HLS ARRAY_PARTITION variable=Ybuf complete #pragma HLS ARRAY_PARTITION variable=UVbuf complete // clang-format on ap_uint<13> i, j; unsigned long long int in_idx = 0, out_idx = 0; int k; hls::stream UVStream; // clang-format off #pragma HLS STREAM variable=&UVStream depth=COLS // clang-format on XF_SNAME(WORDWIDTH_Y) YPacked; XF_SNAME(WORDWIDTH_UV) UVPacked; XF_SNAME(WORDWIDTH_DST) PackedPixels; uint8_t Y00, Y01; int32_t V2Rtemp, U2Gtemp, V2Gtemp, U2Btemp; int8_t U, V; bool evenRow = true; rowloop: for (i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on columnloop: for (j = 0; j < width; j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=TC max=TC // clang-format on YPacked = in_y.read(i * width + j); xfExtractPixels(Ybuf, YPacked, 0); if (evenRow) { UVPacked = in_uv.read(in_idx++); UVStream.write(UVPacked); } else // Keep a copy of UV row data in stream to use for oddrow UVPacked = UVStream.read(); xfExtractPixels(UVbuf, UVPacked, 0); for (k = 0; k<(1 << XF_BITSHIFT(NPC))>> 1; k++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC #pragma HLS unroll // clang-format on // Y00 = (Ybuf[k<<1] > 16) ? (Ybuf[k<<1]-16) : 0; // Y01 = (Ybuf[(k<<1)+1] > 16) ? (Ybuf[(k<<1)+1]-16) : 0; if ((Ybuf[k << 1] > 16)) { Y00 = (Ybuf[k << 1] - 16); } else { Y00 = 0; } if ((Ybuf[(k << 1) + 1] > 16)) { Y01 = (Ybuf[(k << 1) + 1] - 16); } else { Y01 = 0; } V = UVbuf[k << 1] - 128; U = UVbuf[(k << 1) + 1] - 128; V2Rtemp = V * (short int)V2R; U2Gtemp = (short int)U2G * U; V2Gtemp = (short int)V2G * V; U2Btemp = U * (short int)U2B; // R = 1.164*Y + 1.596*V = Y + 0.164*Y + V + 0.596*V // G = 1.164*Y - 0.813*V - 0.391*U = Y + 0.164*Y - 0.813*V - 0.391*U // B = 1.164*Y + 2.018*U = Y + 0.164 + 2*U + 0.018*U // if(PLANES==4) // { // RGB[(k<<3) + 0] = // CalculateR(Y00,V2Rtemp,V); // RGB[(k<<3) + 1] = // CalculateG(Y00,U2Gtemp,V2Gtemp); //G0 // RGB[(k<<3) + 2] = // CalculateB(Y00,U2Btemp,U); // RGB[(k<<3) + 3] = 255; // RGB[(k<<3) + 4] = // CalculateR(Y01,V2Rtemp,V); // RGB[(k<<3) + 5] = // CalculateG(Y01,U2Gtemp,V2Gtemp); //G1 // RGB[(k<<3) + 6] = // CalculateB(Y01,U2Btemp,U); // RGB[(k<<3) + 7] = 255; // } // else // { RGB[(k * 6) + 0] = CalculateB(Y00, U2Btemp, U); // B0 RGB[(k * 6) + 1] = CalculateG(Y00, U2Gtemp, V2Gtemp); // G0 RGB[(k * 6) + 2] = CalculateR(Y00, V2Rtemp, V); // R0 RGB[(k * 6) + 3] = CalculateB(Y01, U2Btemp, U); // B0 RGB[(k * 6) + 4] = CalculateG(Y01, U2Gtemp, V2Gtemp); // G1 RGB[(k * 6) + 5] = CalculateR(Y01, V2Rtemp, V); // R1 // } } PackedPixels = PackRGBAPixels(RGB); rgba.write(out_idx++, PackedPixels); } evenRow = evenRow ? false : true; } // if(height & 1) // { // for( i = 0; i < (width>>XF_BITSHIFT(NPC)); i++) // { //#pragma HLS LOOP_TRIPCOUNT min=TC max=TC // UVStream.read(); // } // } } // KernNv212Yuv4 template void KernNv212Yuv4_ro(xf::cv::Mat& _vu, xf::cv::Mat& _u, xf::cv::Mat& _v, uint16_t height, uint16_t width) { XF_PTNAME(XF_8UP) VUbuf[16]; // clang-format off #pragma HLS ARRAY_PARTITION variable=VUbuf complete // clang-format on XF_SNAME(WORDWIDTH_DST) UPacked, VPacked; XF_SNAME(WORDWIDTH_VU) VUPacked; XF_SNAME(WORDWIDTH_DST) arr_UPacked[COLS >> (XF_BITSHIFT(NPC))], arr_VPacked[COLS >> (XF_BITSHIFT(NPC))]; ap_uint<13> i, j; ap_uint<4> k; unsigned long long int idx = 0, idx1 = 0; int l; rowloop: for (i = 0; i < (height >> 1); i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on columnloop: for (j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=TC max=TC // clang-format on VUPacked = _vu.read(idx1++); xfExtractPixels(VUbuf, VUPacked, 0); #define AU_CVT_STEP 16 for (k = 0, l = 0; k < (1 << (XF_BITSHIFT(NPC))); k += 2, l += AU_CVT_STEP) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC #pragma HLS UNROLL // clang-format on UPacked.range(l + AU_CVT_STEP - 1, l) = (VUbuf[k + 1]) | ((ap_uint<16>)VUbuf[k + 1] << (8)); VPacked.range(l + AU_CVT_STEP - 1, l) = (VUbuf[k]) | ((ap_uint<16>)VUbuf[k] << (8)); } //_u.write(idx,UPacked); //_v.write(idx++,VPacked); _u.write(((i * 2) * (_u.cols >> XF_BITSHIFT(NPC))) + j, UPacked); _v.write(((i * 2) * (_v.cols >> XF_BITSHIFT(NPC))) + j, VPacked); arr_UPacked[j] = UPacked; arr_VPacked[j] = VPacked; } for (j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) { _u.write((((i * 2) + 1) * (_u.cols >> XF_BITSHIFT(NPC))) + j, arr_UPacked[j]); _v.write((((i * 2) + 1) * (_v.cols >> XF_BITSHIFT(NPC))) + j, arr_VPacked[j]); } } } // KernYuyv2Rgba template void KernYuyv2Rgba_ro(xf::cv::Mat& yuyv, xf::cv::Mat& rgba, uint16_t height, uint16_t width) { ap_uint8_t RGB[64]; XF_PTNAME(XF_8UP) YUVbuf[32]; // clang-format off #pragma HLS ARRAY_PARTITION variable=RGB complete #pragma HLS ARRAY_PARTITION variable=YUVbuf complete // clang-format on XF_SNAME(WORDWIDTH_DST) PackedPixels; XF_SNAME(WORDWIDTH_SRC) YUVPacked; unsigned long long int idx = 0; uint8_t Y00, Y01; int32_t V2Rtemp, U2Gtemp, V2Gtemp, U2Btemp; int8_t U, V; rowloop: for (int i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on columnloop: for (int j = 0; j < width; j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=TC max=TC // clang-format on YUVPacked = yuyv.read(i * width + j); ExtractUYVYPixels(YUVPacked, YUVbuf); for (int k = 0; k < (XF_NPIXPERCYCLE(NPC) >> 1); k++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC // clang-format on // Y00 = (YUVbuf[(k<<2)] > 16) ? (YUVbuf[(k<<2)]-16) : 0; if (YUVbuf[(k << 2)] > 16) { Y00 = (YUVbuf[(k << 2)] - 16); } else { Y00 = 0; } U = YUVbuf[(k << 2) + 1] - 128; // Y01 = (YUVbuf[(k<<2)+2] > 16) ? (YUVbuf[(k<<2)+2]-16) : 0; if (YUVbuf[(k << 2) + 2] > 16) { Y01 = YUVbuf[(k << 2) + 2] - 16; } else { Y01 = 0; } V = YUVbuf[(k << 2) + 3] - 128; V2Rtemp = V * (short int)V2R; U2Gtemp = (short int)U2G * U; V2Gtemp = (short int)V2G * V; U2Btemp = U * (short int)U2B; if (PLANES == 4) { RGB[(k << 3)] = CalculateR(Y00, V2Rtemp, V); // R0 RGB[(k << 3) + 1] = CalculateG(Y00, U2Gtemp, V2Gtemp); // G0 RGB[(k << 3) + 2] = CalculateB(Y00, U2Btemp, U); // B0 RGB[(k << 3) + 3] = 255; // A RGB[(k << 3) + 4] = CalculateR(Y01, V2Rtemp, V); // R1 RGB[(k << 3) + 5] = CalculateG(Y01, U2Gtemp, V2Gtemp); // G1 RGB[(k << 3) + 6] = CalculateB(Y01, U2Btemp, U); // B0 RGB[(k << 3) + 7] = 255; // A } else { RGB[(k * 6)] = CalculateR(Y00, V2Rtemp, V); // R0 RGB[(k * 6) + 1] = CalculateG(Y00, U2Gtemp, V2Gtemp); // G0 RGB[(k * 6) + 2] = CalculateB(Y00, U2Btemp, U); // B0 RGB[(k * 6) + 3] = CalculateR(Y01, V2Rtemp, V); // R1 RGB[(k * 6) + 4] = CalculateG(Y01, U2Gtemp, V2Gtemp); // G1 RGB[(k * 6) + 5] = CalculateB(Y01, U2Btemp, U); // B0 } } PackedPixels = PackRGBAPixels(RGB); rgba.write(idx++, PackedPixels); } } } template void KernYuyv2bgr_ro(xf::cv::Mat& yuyv, xf::cv::Mat& rgba, uint16_t height, uint16_t width) { ap_uint8_t RGB[64]; XF_PTNAME(XF_8UP) YUVbuf[32]; // clang-format off #pragma HLS ARRAY_PARTITION variable=RGB complete #pragma HLS ARRAY_PARTITION variable=YUVbuf complete // clang-format on XF_SNAME(WORDWIDTH_DST) PackedPixels; XF_SNAME(WORDWIDTH_SRC) YUVPacked; unsigned long long int idx = 0; uint8_t Y00, Y01; int32_t V2Rtemp, U2Gtemp, V2Gtemp, U2Btemp; int8_t U, V; rowloop: for (int i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on columnloop: for (int j = 0; j < width; j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=TC max=TC // clang-format on YUVPacked = yuyv.read(i * width + j); ExtractUYVYPixels(YUVPacked, YUVbuf); for (int k = 0; k < (XF_NPIXPERCYCLE(NPC) >> 1); k++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC // clang-format on // Y00 = (YUVbuf[(k<<2)] > 16) ? (YUVbuf[(k<<2)]-16) : 0; if (YUVbuf[(k << 2)] > 16) { Y00 = (YUVbuf[(k << 2)] - 16); } else { Y00 = 0; } U = YUVbuf[(k << 2) + 1] - 128; // Y01 = (YUVbuf[(k<<2)+2] > 16) ? (YUVbuf[(k<<2)+2]-16) : 0; if (YUVbuf[(k << 2) + 2] > 16) { Y01 = YUVbuf[(k << 2) + 2] - 16; } else { Y01 = 0; } V = YUVbuf[(k << 2) + 3] - 128; V2Rtemp = V * (short int)V2R; U2Gtemp = (short int)U2G * U; V2Gtemp = (short int)V2G * V; U2Btemp = U * (short int)U2B; RGB[(k * 6)] = CalculateB(Y00, U2Btemp, U); // B0 RGB[(k * 6) + 1] = CalculateG(Y00, U2Gtemp, V2Gtemp); // G0 RGB[(k * 6) + 2] = CalculateR(Y00, V2Rtemp, V); // R0 RGB[(k * 6) + 3] = CalculateB(Y01, U2Btemp, U); // B0 RGB[(k * 6) + 4] = CalculateG(Y01, U2Gtemp, V2Gtemp); // G1 RGB[(k * 6) + 5] = CalculateR(Y01, V2Rtemp, V); // R1 } PackedPixels = PackRGBAPixels(RGB); rgba.write(idx++, PackedPixels); } } } template void KernYuyv2Nv12_ro(xf::cv::Mat& _yuyv, xf::cv::Mat& y_plane, xf::cv::Mat& uv_plane, uint16_t height, uint16_t width) { XF_PTNAME(XF_8UP) Ybuf[16], UVbuf[16], YUVbuf[32]; // clang-format off #pragma HLS ARRAY_PARTITION variable=Ybuf complete #pragma HLS ARRAY_PARTITION variable=UVbuf complete #pragma HLS ARRAY_PARTITION variable=YUVbuf complete // clang-format on XF_SNAME(WORDWIDTH_SRC) YUVPacked; XF_SNAME(WORDWIDTH_Y) YPacked, UVPacked; unsigned long long idx = 0, idx1 = 0; bool evenRow = true; rowloop: for (int i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on columnloop: for (int j = 0; j < width; j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=TC max=TC // clang-format on YUVPacked = _yuyv.read(i * width + j); ExtractUYVYPixels(YUVPacked, YUVbuf); for (int k = 0; k<(1 << XF_BITSHIFT(NPC))>> 1; k++) { // filling the Ybuf and UVbuf in the format required for NV12 // clang-format off #pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC #pragma HLS unroll // clang-format on Ybuf[(k << 1)] = YUVbuf[(k << 2)]; Ybuf[(k << 1) + 1] = YUVbuf[(k << 2) + 2]; if (evenRow) { UVbuf[(k << 1)] = YUVbuf[(k << 2) + 1]; UVbuf[(k << 1) + 1] = YUVbuf[(k << 2) + 3]; } } YPacked = PackPixels(Ybuf); y_plane.write(idx++, YPacked); if (evenRow) { UVPacked = PackPixels(UVbuf); uv_plane.write(idx1++, UVPacked); } } evenRow = evenRow ? false : true; } } template void KernYuyv2Nv21_ro(xf::cv::Mat& _yuyv, xf::cv::Mat& y_plane, xf::cv::Mat& uv_plane, uint16_t height, uint16_t width) { XF_PTNAME(XF_8UP) Ybuf[16], UVbuf[16], YUVbuf[32]; // clang-format off #pragma HLS ARRAY_PARTITION variable=Ybuf complete #pragma HLS ARRAY_PARTITION variable=UVbuf complete #pragma HLS ARRAY_PARTITION variable=YUVbuf complete // clang-format on XF_SNAME(WORDWIDTH_SRC) YUVPacked; XF_SNAME(WORDWIDTH_Y) YPacked, UVPacked; unsigned long long idx = 0, idx1 = 0; bool evenRow = true; rowloop: for (int i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on columnloop: for (int j = 0; j < width; j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=TC max=TC // clang-format on YUVPacked = _yuyv.read(i * width + j); ExtractUYVYPixels(YUVPacked, YUVbuf); for (int k = 0; k<(1 << XF_BITSHIFT(NPC))>> 1; k++) { // filling the Ybuf and UVbuf in the format required for NV12 // clang-format off #pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC #pragma HLS unroll // clang-format on Ybuf[(k << 1)] = YUVbuf[(k << 2)]; Ybuf[(k << 1) + 1] = YUVbuf[(k << 2) + 2]; if (evenRow) { UVbuf[(k << 1) + 1] = YUVbuf[(k << 2) + 1]; UVbuf[(k << 1)] = YUVbuf[(k << 2) + 3]; } } YPacked = PackPixels(Ybuf); y_plane.write(idx++, YPacked); if (evenRow) { UVPacked = PackPixels(UVbuf); uv_plane.write(idx1++, UVPacked); } } evenRow = evenRow ? false : true; } } template void KernYuyv2Iyuv_ro(xf::cv::Mat& _yuyv, xf::cv::Mat& _y, xf::cv::Mat& _u, xf::cv::Mat& _v, uint16_t height, uint16_t width) { uint16_t i, j, k, l; ap_uint8_t Ybuf[16], Ubuf[16], Vbuf[16], YUVbuf[32]; // clang-format off #pragma HLS ARRAY_PARTITION variable=Ybuf complete #pragma HLS ARRAY_PARTITION variable=Ubuf complete #pragma HLS ARRAY_PARTITION variable=Vbuf complete #pragma HLS ARRAY_PARTITION variable=YUVbuf complete // clang-format on unsigned long long int idx = 0, idx1 = 0; XF_SNAME(WORDWIDTH_SRC) YUVPacked; XF_SNAME(WORDWIDTH_DST) YPacked0, UPacked, VPacked; uint8_t offset; bool evenRow = true, evenBlock = true; offset = (1 << XF_BITSHIFT(NPC)) >> 1; rowloop: for (int i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on columnloop: for (int j = 0; j < width; j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=TC max=TC // clang-format on YUVPacked = _yuyv.read(i * width + j); ExtractUYVYPixels(YUVPacked, YUVbuf); for (int k = 0; k<(1 << XF_BITSHIFT(NPC))>> 1; k++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC #pragma HLS unroll // clang-format on Ybuf[(k << 1)] = YUVbuf[(k << 2)]; Ybuf[(k << 1) + 1] = YUVbuf[(k << 2) + 2]; if (evenRow) { if (evenBlock) { Ubuf[k] = YUVbuf[(k << 2) + 1]; Vbuf[k] = YUVbuf[(k << 2) + 3]; } else { Ubuf[k + offset] = YUVbuf[(k << 2) + 1]; Vbuf[k + offset] = YUVbuf[(k << 2) + 3]; } } } YPacked0 = PackPixels(Ybuf); _y.write(idx++, YPacked0); if (evenRow & !evenBlock) { UPacked = PackPixels(Ubuf); VPacked = PackPixels(Vbuf); _u.write(idx1, UPacked); _v.write(idx1++, VPacked); } evenBlock = evenBlock ? false : true; } evenRow = evenRow ? false : true; } } // KernUyvy2Iyuv template void KernUyvy2Iyuv_ro(xf::cv::Mat& _uyvy, xf::cv::Mat& y_plane, xf::cv::Mat& u_plane, xf::cv::Mat& v_plane, uint16_t height, uint16_t width) { ap_uint8_t Ybuf[16], Ubuf[16], Vbuf[16], YUVbuf[32]; // clang-format off #pragma HLS ARRAY_PARTITION variable=Ybuf complete #pragma HLS ARRAY_PARTITION variable=Ubuf complete #pragma HLS ARRAY_PARTITION variable=Vbuf complete #pragma HLS ARRAY_PARTITION variable=YUVbuf complete // clang-format on XF_SNAME(WORDWIDTH_SRC) YUVPacked; XF_SNAME(WORDWIDTH_DST) YPacked0, UPacked, VPacked; uint8_t offset; unsigned long long int idx = 0, idx1 = 0; bool evenRow = true, evenBlock = true; offset = (1 << XF_BITSHIFT(NPC)) >> 1; rowloop: for (int i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on columnloop: for (int j = 0; j < width; j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=TC max=TC // clang-format on YUVPacked = _uyvy.read(i * width + j); ExtractUYVYPixels(YUVPacked, YUVbuf); for (int k = 0; k<(1 << XF_BITSHIFT(NPC))>> 1; k++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC #pragma HLS unroll // clang-format on Ybuf[(k << 1)] = YUVbuf[(k << 2) + 1]; Ybuf[(k << 1) + 1] = YUVbuf[(k << 2) + 3]; if (evenRow) { if (evenBlock) { Ubuf[k] = YUVbuf[(k << 2)]; Vbuf[k] = YUVbuf[(k << 2) + 2]; } else { Ubuf[k + offset] = YUVbuf[(k << 2)]; Vbuf[k + offset] = YUVbuf[(k << 2) + 2]; } } } YPacked0 = PackPixels(Ybuf); y_plane.write(idx1++, YPacked0); if (evenRow & !evenBlock) { UPacked = PackPixels(Ubuf); VPacked = PackPixels(Vbuf); u_plane.write(idx, UPacked); v_plane.write(idx++, VPacked); } evenBlock = evenBlock ? false : true; } evenRow = evenRow ? false : true; } } template void KernUyvy2Nv12_ro(xf::cv::Mat& _uyvy, xf::cv::Mat& y_plane, xf::cv::Mat& uv_plane, uint16_t height, uint16_t width) { ap_uint8_t Ybuf[16], UVbuf[16], YUVbuf[32]; // clang-format off #pragma HLS ARRAY_PARTITION variable=Ybuf complete #pragma HLS ARRAY_PARTITION variable=UVbuf complete #pragma HLS ARRAY_PARTITION variable=YUVbuf complete // clang-format on XF_SNAME(WORDWIDTH_SRC) YUVPacked; XF_SNAME(WORDWIDTH_Y) YPacked, UVPacked; unsigned long long int idx = 0, idx1 = 0; bool evenRow = true; rowloop: for (int i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on columnloop: for (int j = 0; j < width; j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=TC max=TC // clang-format on YUVPacked = _uyvy.read(i * width + j); ExtractUYVYPixels(YUVPacked, YUVbuf); // filling the Ybuf and UVbuf in the format required for NV12 for (int k = 0; k<(1 << XF_BITSHIFT(NPC))>> 1; k++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC #pragma HLS unroll // clang-format on Ybuf[(k << 1)] = YUVbuf[(k << 2) + 1]; Ybuf[(k << 1) + 1] = YUVbuf[(k << 2) + 3]; if (evenRow) { UVbuf[(k << 1)] = YUVbuf[(k << 2)]; UVbuf[(k << 1) + 1] = YUVbuf[(k << 2) + 2]; } } YPacked = PackPixels(Ybuf); y_plane.write(idx++, YPacked); if (evenRow) { UVPacked = PackPixels(UVbuf); uv_plane.write(idx1++, UVPacked); } } evenRow = evenRow ? false : true; } } // KernUyvy2Nv21 template void KernUyvy2Nv21_ro(xf::cv::Mat& _uyvy, xf::cv::Mat& y_plane, xf::cv::Mat& uv_plane, uint16_t height, uint16_t width) { ap_uint8_t Ybuf[16], UVbuf[16], YUVbuf[32]; // clang-format off #pragma HLS ARRAY_PARTITION variable=Ybuf complete #pragma HLS ARRAY_PARTITION variable=UVbuf complete #pragma HLS ARRAY_PARTITION variable=YUVbuf complete // clang-format on XF_SNAME(WORDWIDTH_SRC) YUVPacked; XF_SNAME(WORDWIDTH_Y) YPacked, UVPacked; unsigned long long int idx = 0, idx1 = 0; bool evenRow = true; rowloop: for (int i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on columnloop: for (int j = 0; j < width; j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=TC max=TC // clang-format on YUVPacked = _uyvy.read(i * width + j); ExtractUYVYPixels(YUVPacked, YUVbuf); // filling the Ybuf and UVbuf in the format required for NV12 for (int k = 0; k<(1 << XF_BITSHIFT(NPC))>> 1; k++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC #pragma HLS unroll // clang-format on Ybuf[(k << 1)] = YUVbuf[(k << 2) + 1]; Ybuf[(k << 1) + 1] = YUVbuf[(k << 2) + 3]; if (evenRow) { UVbuf[(k << 1)] = YUVbuf[(k << 2) + 2]; UVbuf[(k << 1) + 1] = YUVbuf[(k << 2)]; } } YPacked = PackPixels(Ybuf); y_plane.write(idx++, YPacked); if (evenRow) { UVPacked = PackPixels(UVbuf); uv_plane.write(idx1++, UVPacked); } } evenRow = evenRow ? false : true; } } template void KernUyvy2Rgb_ro(xf::cv::Mat& uyvy, xf::cv::Mat& rgba, uint16_t height, uint16_t width) { uint16_t i, j, k; XF_PTNAME(XF_8UP) RGB[64], YUVbuf[32]; // clang-format off #pragma HLS ARRAY_PARTITION variable=RGB complete #pragma HLS ARRAY_PARTITION variable=YUVbuf complete // clang-format on XF_SNAME(WORDWIDTH_DST) PackedPixels; XF_SNAME(WORDWIDTH_SRC) YUVPacked; uint8_t Y00, Y01; int32_t V2Rtemp, U2Gtemp, V2Gtemp, U2Btemp; int8_t U, V; unsigned long long int idx = 0, out_idx = 0; rowloop: for (i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS #pragma HLS LOOP_FLATTEN off // clang-format on columnloop: for (j = 0; j < width; j++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=TC max=TC #pragma HLS pipeline // clang-format on YUVPacked = uyvy.read(idx++); ExtractUYVYPixels(YUVPacked, YUVbuf); for (k = 0; k<(1 << XF_BITSHIFT(NPC))>> 1; k++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC #pragma HLS unroll // clang-format on U = YUVbuf[(k << 2)] - 128; // Y00 = (YUVbuf[(k<<2) + 1] > 16) ? (YUVbuf[(k<<2) + 1] - 16):0; if (YUVbuf[(k << 2) + 1] > 16) { Y00 = (YUVbuf[(k << 2) + 1] - 16); } else { Y00 = 0; } V = YUVbuf[(k << 2) + 2] - 128; // Y01 = (YUVbuf[(k<<2) + 3] > 16) ? (YUVbuf[(k<<2) + 3] - 16):0; if ((YUVbuf[(k << 2) + 3] > 16)) { Y01 = (YUVbuf[(k << 2) + 3] - 16); } else { Y01 = 0; } V2Rtemp = V * (short int)V2R; U2Gtemp = (short int)U2G * U; V2Gtemp = (short int)V2G * V; U2Btemp = U * (short int)U2B; RGB[(k * 6)] = CalculateR(Y00, V2Rtemp, V); // G0 RGB[(k * 6) + 1] = CalculateG(Y00, U2Gtemp, V2Gtemp); // G0 RGB[(k * 6) + 2] = CalculateB(Y00, U2Btemp, U); // B0 RGB[(k * 6) + 3] = CalculateR(Y01, V2Rtemp, V); // R1 RGB[(k * 6) + 4] = CalculateG(Y01, U2Gtemp, V2Gtemp); // G1 RGB[(k * 6) + 5] = CalculateB(Y01, U2Btemp, U); // B0 } PackedPixels = PackRGBAPixels(RGB); rgba.write(out_idx++, PackedPixels); } } } template void KernUyvy2bgr_ro(xf::cv::Mat& uyvy, xf::cv::Mat& rgba, uint16_t height, uint16_t width) { uint16_t i, j, k; XF_PTNAME(XF_8UP) RGB[64], YUVbuf[32]; // clang-format off #pragma HLS ARRAY_PARTITION variable=RGB complete #pragma HLS ARRAY_PARTITION variable=YUVbuf complete // clang-format on XF_SNAME(WORDWIDTH_DST) PackedPixels; XF_SNAME(WORDWIDTH_SRC) YUVPacked; uint8_t Y00, Y01; int32_t V2Rtemp, U2Gtemp, V2Gtemp, U2Btemp; int8_t U, V; unsigned long long int idx = 0, out_idx = 0; rowloop: for (i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS #pragma HLS LOOP_FLATTEN off // clang-format on columnloop: for (j = 0; j < width; j++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=TC max=TC #pragma HLS pipeline // clang-format on YUVPacked = uyvy.read(idx++); ExtractUYVYPixels(YUVPacked, YUVbuf); for (k = 0; k<(1 << XF_BITSHIFT(NPC))>> 1; k++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC #pragma HLS unroll // clang-format on U = YUVbuf[(k << 2)] - 128; // Y00 = (YUVbuf[(k<<2) + 1] > 16) ? (YUVbuf[(k<<2) + 1] - 16):0; if (YUVbuf[(k << 2) + 1] > 16) { Y00 = (YUVbuf[(k << 2) + 1] - 16); } else { Y00 = 0; } V = YUVbuf[(k << 2) + 2] - 128; // Y01 = (YUVbuf[(k<<2) + 3] > 16) ? (YUVbuf[(k<<2) + 3] - 16):0; if ((YUVbuf[(k << 2) + 3] > 16)) { Y01 = (YUVbuf[(k << 2) + 3] - 16); } else { Y01 = 0; } V2Rtemp = V * (short int)V2R; U2Gtemp = (short int)U2G * U; V2Gtemp = (short int)V2G * V; U2Btemp = U * (short int)U2B; RGB[(k * 6)] = CalculateB(Y00, U2Btemp, U); // B0 RGB[(k * 6) + 1] = CalculateG(Y00, U2Gtemp, V2Gtemp); // G0 RGB[(k * 6) + 2] = CalculateR(Y00, V2Rtemp, V); // G0 RGB[(k * 6) + 3] = CalculateB(Y01, U2Btemp, U); // B0 RGB[(k * 6) + 4] = CalculateG(Y01, U2Gtemp, V2Gtemp); // G1 RGB[(k * 6) + 5] = CalculateR(Y01, V2Rtemp, V); // R1 } PackedPixels = PackRGBAPixels(RGB); rgba.write(out_idx++, PackedPixels); } } } template void KernUyvy2Rgba_ro(xf::cv::Mat& uyvy, xf::cv::Mat& rgba, uint16_t height, uint16_t width) { uint16_t i, j, k; XF_PTNAME(XF_8UP) RGB[64], YUVbuf[32]; // clang-format off #pragma HLS ARRAY_PARTITION variable=RGB complete #pragma HLS ARRAY_PARTITION variable=YUVbuf complete // clang-format on XF_SNAME(WORDWIDTH_DST) PackedPixels; XF_SNAME(WORDWIDTH_SRC) YUVPacked; uint8_t Y00, Y01; int32_t V2Rtemp, U2Gtemp, V2Gtemp, U2Btemp; int8_t U, V; unsigned long long int idx = 0, out_idx = 0; rowloop: for (i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS #pragma HLS LOOP_FLATTEN off // clang-format on columnloop: for (j = 0; j < width; j++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=TC max=TC #pragma HLS pipeline // clang-format on YUVPacked = uyvy.read(idx++); ExtractUYVYPixels(YUVPacked, YUVbuf); for (k = 0; k<(1 << XF_BITSHIFT(NPC))>> 1; k++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC #pragma HLS unroll // clang-format on U = YUVbuf[(k << 2)] - 128; // Y00 = (YUVbuf[(k<<2) + 1] > 16) ? (YUVbuf[(k<<2) + 1] - 16):0; if (YUVbuf[(k << 2) + 1] > 16) { Y00 = (YUVbuf[(k << 2) + 1] - 16); } else { Y00 = 0; } V = YUVbuf[(k << 2) + 2] - 128; // Y01 = (YUVbuf[(k<<2) + 3] > 16) ? (YUVbuf[(k<<2) + 3] - 16):0; if ((YUVbuf[(k << 2) + 3] > 16)) { Y01 = (YUVbuf[(k << 2) + 3] - 16); } else { Y01 = 0; } V2Rtemp = V * (short int)V2R; U2Gtemp = (short int)U2G * U; V2Gtemp = (short int)V2G * V; U2Btemp = U * (short int)U2B; RGB[(k << 3)] = CalculateR(Y00, V2Rtemp, V); // G0 RGB[(k << 3) + 1] = CalculateG(Y00, U2Gtemp, V2Gtemp); // G0 RGB[(k << 3) + 2] = CalculateB(Y00, U2Btemp, U); // B0 RGB[(k << 3) + 3] = 255; RGB[(k << 3) + 4] = CalculateR(Y01, V2Rtemp, V); // R1 RGB[(k << 3) + 5] = CalculateG(Y01, U2Gtemp, V2Gtemp); // G1 RGB[(k << 3) + 6] = CalculateB(Y01, U2Btemp, U); // B0 RGB[(k << 3) + 7] = 255; } PackedPixels = PackRGBAPixels(RGB); rgba.write(out_idx++, PackedPixels); } } } /******************************************************************************** * Color Conversion APIs *******************************************************************************/ template void xFRgba2Yuv4(xf::cv::Mat& _src, xf::cv::Mat& _y_image, xf::cv::Mat& _u_image, xf::cv::Mat& _v_image, uint16_t height, uint16_t width) { width = width >> XF_BITSHIFT(NPC); if (NPC == 1) { KernRgba2Yuv4(_src, _y_image, _u_image, _v_image, height, width); } else { KernRgba2Yuv4_ro> XF_BITSHIFT(NPC)), ((1 << XF_BITSHIFT(NPC)) >> 1)>(_src, _y_image, _u_image, _v_image, height, width); } } template void rgba2yuv4(xf::cv::Mat& _src, xf::cv::Mat& _y_image, xf::cv::Mat& _u_image, xf::cv::Mat& _v_image) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_8UC4) && " RGBA image Type must be XF_8UC4"); assert((DST_T == XF_8UC1) && " Y, U, V image Type must be XF_8UC1"); assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " RGBA image rows and cols should be less than ROWS, COLS"); assert(((_src.cols == _y_image.cols) && (_src.rows == _y_image.rows)) && "RGBA and Y plane dimensions mismatch"); assert(((_src.cols == _u_image.cols) && (_src.rows == _u_image.rows)) && "RGBA and U plane dimensions mismatch"); assert(((_src.cols == _v_image.cols) && (_src.rows == _v_image.rows)) && "RGBA and V plane dimensions mismatch"); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported "); #endif xFRgba2Yuv4( _src, _y_image, _u_image, _v_image, _src.rows, _src.cols); } template void KernRgb2Iyuv(xf::cv::Mat& _rgba, xf::cv::Mat& _y, xf::cv::Mat& _u, xf::cv::Mat& _v, uint16_t height, uint16_t width) { ap_uint<24> rgba; uint8_t y, u, v; bool evenRow = true, evenBlock = true; unsigned long long int idx = 0, idx1 = 0; RowLoop: for (int i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on ColLoop: for (int j = 0; j < width; j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS // clang-format on rgba = _rgba.read(i * width + j); uint8_t r = rgba.range(7, 0); uint8_t g = rgba.range(15, 8); uint8_t b = rgba.range(23, 16); y = CalculateY(r, g, b); if (evenRow) { if (evenBlock) { u = CalculateU(r, g, b); v = CalculateV(r, g, b); } } _y.write(idx++, y); if (evenRow & !evenBlock) { _u.write(idx1, u); _v.write(idx1++, v); } evenBlock = evenBlock ? false : true; } evenRow = evenRow ? false : true; } } template void xFRgb2Iyuv(xf::cv::Mat& _src, xf::cv::Mat& _y_image, xf::cv::Mat& _u_image, xf::cv::Mat& _v_image, uint16_t height, uint16_t width) { width = width >> XF_BITSHIFT(NPC); if (NPC == XF_NPPC1) { KernRgb2Iyuv( _src, _y_image, _u_image, _v_image, height, width); } else { KernRgba2Iyuv_ro> XF_BITSHIFT(NPC)), ((1 << XF_BITSHIFT(NPC)) >> 1)>(_src, _y_image, _u_image, _v_image, height, width); } } template void rgb2iyuv(xf::cv::Mat& _src, xf::cv::Mat& _y_image, xf::cv::Mat& _u_image, xf::cv::Mat& _v_image) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_8UC3) && " RGB image Type must be XF_8UC3"); assert((DST_T == XF_8UC1) && " Y, U, V image Type must be XF_8UC1"); assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " RGB image rows and cols should be less than ROWS, COLS"); assert(((_src.cols == _y_image.cols) && (_src.rows == _y_image.rows)) && "RGB and Y plane dimensions mismatch"); assert(((_src.cols == _u_image.cols) && (_src.rows == (_u_image.rows << 2))) && "RGB and U plane dimensions mismatch"); assert(((_src.cols == _v_image.cols) && (_src.rows == (_v_image.rows << 2))) && "RGB and V plane dimensions mismatch"); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported "); #endif xFRgb2Iyuv( _src, _y_image, _u_image, _v_image, _src.rows, _src.cols); } template void xFRgba2Iyuv(xf::cv::Mat& _src, xf::cv::Mat& _y_image, xf::cv::Mat& _u_image, xf::cv::Mat& _v_image, uint16_t height, uint16_t width) { width = width >> XF_BITSHIFT(NPC); if (NPC == XF_NPPC1) { KernRgba2Iyuv( _src, _y_image, _u_image, _v_image, height, width); } else { KernRgba2Iyuv_ro> XF_BITSHIFT(NPC)), ((1 << XF_BITSHIFT(NPC)) >> 1)>(_src, _y_image, _u_image, _v_image, height, width); } } template void rgba2iyuv(xf::cv::Mat& _src, xf::cv::Mat& _y_image, xf::cv::Mat& _u_image, xf::cv::Mat& _v_image) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_8UC4) && " RGBA image Type must be XF_8UC3"); assert((DST_T == XF_8UC1) && " Y, U, V image Type must be XF_8UC1"); assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " RGBA image rows and cols should be less than ROWS, COLS"); assert(((_src.cols == _y_image.cols) && (_src.rows == _y_image.rows)) && "RGBA and Y plane dimensions mismatch"); assert(((_src.cols == _u_image.cols) && (_src.rows == (_u_image.rows << 2))) && "RGBA and U plane dimensions mismatch"); assert(((_src.cols == _v_image.cols) && (_src.rows == (_v_image.rows << 2))) && "RGBA and V plane dimensions mismatch"); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported "); #endif xFRgba2Iyuv( _src, _y_image, _u_image, _v_image, _src.rows, _src.cols); } // auRgba2Iyuv // template void xFRgba2Nv21(xf::cv::Mat& _src, xf::cv::Mat& _y, xf::cv::Mat& _uv, uint16_t height, uint16_t width) { width = width >> XF_BITSHIFT(NPC); if (NPC == 1) { KernRgba2Nv21( _src, _y, _uv, height, width); } else { KernRgba2Nv21_ro> XF_BITSHIFT(NPC)), (1 << (XF_BITSHIFT(NPC) + 1))>(_src, _y, _uv, height, width); } } template void rgba2nv21(xf::cv::Mat& _src, xf::cv::Mat& _y, xf::cv::Mat& _uv) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_8UC4) && " RGBA image Type must be XF_8UC3"); assert((Y_T == XF_8UC1) && " Y image Type must be XF_8UC1"); assert((UV_T == XF_8UC2) && " VU image Type must be XF_8UC2"); assert(((_src.rows <= ROWS) && (_y.cols <= COLS)) && " Y image ROWS and COLS should be less than ROWS, COLS"); assert(((_src.cols == _y.cols) && (_src.rows == _y.rows)) && "Y and RGBA plane dimensions mismatch"); assert(((_y.cols == (_uv.cols << 1)) && (_y.rows == (_uv.rows << 1))) && "Y and VU planes dimensions mismatch"); if (NPC != XF_NPPC1) { assert((NPC == (NPC_UV * 2)) && " NPC of Y plane must be double the VU " "plane for multipixel parallelism "); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported "); } else { assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC,NPC_UV values must be same "); } #endif xFRgba2Nv21(_src, _y, _uv, _src.rows, _src.cols); } template void xFRgba2Nv12(xf::cv::Mat& _src, xf::cv::Mat& _y, xf::cv::Mat& _uv, uint16_t height, uint16_t width) { width = width >> XF_BITSHIFT(NPC); if (NPC == 1) { KernRgba2Nv12( _src, _y, _uv, height, width); } else { KernRgba2Nv12_ro> XF_BITSHIFT(NPC)), (1 << (XF_BITSHIFT(NPC) + 1))>(_src, _y, _uv, height, width); } } template void rgba2nv12(xf::cv::Mat& _src, xf::cv::Mat& _y, xf::cv::Mat& _uv) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_8UC4) && " RGBA image Type must be XF_8UC3"); assert((Y_T == XF_8UC1) && " Y image Type must be XF_8UC1"); assert((UV_T == XF_8UC2) && " UV image Type must be XF_8UC2"); assert(((_src.rows <= ROWS) && (_y.cols <= COLS)) && " Y image ROWS and COLS should be less than ROWS, COLS"); assert(((_src.cols == _y.cols) && (_src.rows == _y.rows)) && "Y and RGBA plane dimensions mismatch"); assert(((_y.cols == (_uv.cols << 1)) && (_y.rows == (_uv.rows << 1))) && "Y and UV planes dimensions mismatch"); if (NPC != XF_NPPC1) { assert((NPC == (NPC_UV * 2)) && " NPC of Y plane must be double the UV " "plane for multipixel parallelism "); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported "); } else { assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC,NPC_UV values must be same "); } #endif xFRgba2Nv12(_src, _y, _uv, _src.rows, _src.cols); } // auRgba2Nv21 template void xFIyuv2Rgba(xf::cv::Mat& src_y, xf::cv::Mat& src_u, xf::cv::Mat& src_v, xf::cv::Mat& _dst0, uint16_t height, uint16_t width) { width = width >> XF_BITSHIFT(NPC); if ((NPC == XF_NPPC8)) { KernIyuv2Rgba_ro> XF_BITSHIFT(NPC)), (1 << (XF_BITSHIFT(NPC) + 1))>(src_y, src_u, src_v, _dst0, height, width); } else { KernIyuv2Rgba> XF_BITSHIFT(NPC))>( src_y, src_u, src_v, _dst0, height, width); } } template void iyuv2rgba(xf::cv::Mat& src_y, xf::cv::Mat& src_u, xf::cv::Mat& src_v, xf::cv::Mat& _dst0) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_8UC1) && " Y, U, V images Type must be XF_8UC1"); assert((DST_T == XF_8UC4) && " RGBA image Type must be XF_8UC4"); assert(((src_y.rows <= ROWS) && (src_y.cols <= COLS)) && " Y image ROWS and COLS should be less than ROWS, COLS"); assert(((src_y.cols == (_dst0.cols)) && (src_y.rows == _dst0.rows)) && "Y plane and RGBA dimensions mismatch"); assert(((src_u.cols == (_dst0.cols)) && (src_u.rows == (_dst0.rows >> 2))) && "U plane and RGBA dimensions mismatch"); assert(((src_v.cols == (_dst0.cols)) && (src_v.rows == (_dst0.rows >> 2))) && "V plane and RGBA dimensions mismatch"); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported "); #endif xFIyuv2Rgba( src_y, src_u, src_v, _dst0, src_y.rows, src_y.cols); } // Iyuv2Rgba template void xFIyuv2Yuv4(xf::cv::Mat& src_y, xf::cv::Mat& src_u, xf::cv::Mat& src_v, xf::cv::Mat& _y_image, xf::cv::Mat& _u_image, xf::cv::Mat& _v_image, uint16_t height, uint16_t width) { if (NPC == XF_NPPC8) { // clang-format off #pragma HLS DATAFLOW // clang-format on KernIyuv2Yuv4_ro> XF_BITSHIFT(NPC)) >> 1), ((1 << XF_BITSHIFT(NPC)) >> 1)>(src_u, src_v, _u_image, _v_image, height, width); write_y_ro> XF_BITSHIFT(NPC))>(src_y, _y_image, height, width); } else if (NPC == XF_NPPC1) { // clang-format off #pragma HLS DATAFLOW // clang-format on KernIyuv2Yuv4> 1), ((COLS >> XF_BITSHIFT(NPC)) >> 1)>( src_u, src_v, _u_image, _v_image, height, width); write_y> XF_BITSHIFT(NPC)), ROWS>(src_y, _y_image, height, width); } } template void iyuv2yuv4(xf::cv::Mat& src_y, xf::cv::Mat& src_u, xf::cv::Mat& src_v, xf::cv::Mat& _y_image, xf::cv::Mat& _u_image, xf::cv::Mat& _v_image) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_8UC1) && " Y, U, V images Type must be XF_8UC1"); assert(((src_y.rows <= ROWS) && (src_y.cols <= COLS)) && " Y image ROWS and COLS should be less than ROWS, COLS"); assert(((src_y.cols == (_y_image.cols)) && (src_y.rows == _y_image.rows)) && "input and ouput Y planes dimensions mismatch"); assert(((src_u.cols == (_u_image.cols)) && (src_u.rows == (_u_image.rows >> 2))) && "input and ouput U dimensions mismatch"); assert(((src_v.cols == (_v_image.cols)) && (src_v.rows == (_v_image.rows >> 2))) && "input and ouput V dimensions mismatch"); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported "); #endif xFIyuv2Yuv4(src_y, src_u, src_v, _y_image, _u_image, _v_image, src_y.rows, src_y.cols); } template void xFIyuv2Nv12(xf::cv::Mat& src_y, xf::cv::Mat& src_u, xf::cv::Mat& src_v, xf::cv::Mat& _y_image, xf::cv::Mat& _uv_image, uint16_t height, uint16_t width) { if (NPC == XF_NPPC8) { // clang-format off #pragma HLS DATAFLOW // clang-format on KernIyuv2Nv12_ro> 1), ((COLS >> XF_BITSHIFT(NPC)) >> 1), ((1 << XF_BITSHIFT(NPC)) >> 1)>(src_u, src_v, _uv_image, height, width); write_y_ro> XF_BITSHIFT(NPC))>(src_y, _y_image, height, width); } else { // clang-format off #pragma HLS DATAFLOW // clang-format on KernIyuv2Nv12> 1), ((COLS >> XF_BITSHIFT(NPC)) >> 1)>(src_u, src_v, _uv_image, height, width); write_y> XF_BITSHIFT(NPC)), (ROWS >> 1)>(src_y, _y_image, height, width); } } template void iyuv2nv12(xf::cv::Mat& src_y, xf::cv::Mat& src_u, xf::cv::Mat& src_v, xf::cv::Mat& _y_image, xf::cv::Mat& _uv_image) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_8UC1) && " Y, U, V images Type must be XF_8UC1"); assert((UV_T == XF_8UC2) && " UV image Type must be XF_8UC2"); assert(((src_y.rows <= ROWS) && (src_y.cols <= COLS)) && " Y image ROWS and COLS should be less than ROWS, COLS"); assert(((src_y.cols == (_y_image.cols)) && (src_y.rows == _y_image.rows)) && "input and ouput Y planes dimensions mismatch"); assert(((src_y.cols == (src_u.cols)) && (src_y.rows == (src_u.rows << 2))) && "Y and U dimensions mismatch"); assert(((src_y.cols == (src_v.cols)) && (src_y.rows == (src_v.rows << 2))) && "Y and V dimensions mismatch"); assert(((src_y.cols == (_uv_image.cols << 1)) && (src_y.rows == (_uv_image.rows << 1))) && "input and ouput Y planes dimensions mismatch"); if (NPC != XF_NPPC1) { assert((NPC == (NPC_UV * 2)) && " NPC of Y plane must be double the UV " "plane for multipixel parallelism "); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported "); } else { assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC,NPC_UV values must be same "); } #endif xFIyuv2Nv12( src_y, src_u, src_v, _y_image, _uv_image, src_y.rows, src_y.cols); } template void xFNv122Iyuv(xf::cv::Mat& src_y, xf::cv::Mat& src_uv, xf::cv::Mat& _y_image, xf::cv::Mat& _u_image, xf::cv::Mat& _v_image, uint16_t height, uint16_t width) { if (NPC == XF_NPPC8) { // clang-format off #pragma HLS DATAFLOW // clang-format on KernNv122Iyuv_ro> XF_BITSHIFT(NPC)) >> 1), ((1 << XF_BITSHIFT(NPC)) >> 2)>(src_uv, _u_image, _v_image, height, width); write_y_ro> XF_BITSHIFT(NPC))>(src_y, _y_image, height, width); } else { // clang-format off #pragma HLS DATAFLOW // clang-format on KernNv122Iyuv> XF_BITSHIFT(NPC)) >> 1)>(src_uv, _u_image, _v_image, height, width); write_y> XF_BITSHIFT(NPC)), (ROWS >> XF_BITSHIFT(NPC))>( src_y, _y_image, height, width); } } // Nv122Iyuv template void nv122iyuv(xf::cv::Mat& src_y, xf::cv::Mat& src_uv, xf::cv::Mat& _y_image, xf::cv::Mat& _u_image, xf::cv::Mat& _v_image) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_8UC1) && " Y,U,V image Type must be XF_8UC1"); assert((UV_T == XF_8UC2) && " UV image Type must be XF_8UC2"); assert(((src_y.rows <= ROWS) && (src_y.cols <= COLS)) && " Y image ROWS and COLS should be less than ROWS, COLS"); assert(((src_y.cols == (src_uv.cols << 1)) && (src_y.rows == (src_uv.rows << 1))) && "Y and UV planes dimensions mismatch"); assert(((src_y.cols == _y_image.cols) && (src_y.rows == _y_image.rows)) && "Input and Outut Y planes dimensions mismatch"); assert(((src_y.cols == _u_image.cols) && (src_y.rows == (_u_image.rows << 2))) && "U, Y planes dimensions mismatch"); assert(((src_y.cols == _v_image.cols) && (src_y.rows == (_v_image.rows << 2))) && "V, Y planes dimensions mismatch"); if (NPC != XF_NPPC1) { assert((NPC == (NPC_UV * 2)) && " NPC of Y plane must be double the UV " "plane for multipixel parallelism "); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported "); } else { assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC,NPC_UV values must be same "); } #endif xFNv122Iyuv( src_y, src_uv, _y_image, _u_image, _v_image, src_y.rows, src_y.cols); } template void xFNv122Rgba(xf::cv::Mat& src_y, xf::cv::Mat& src_uv, xf::cv::Mat& _dst0, uint16_t height, uint16_t width) { width = width >> XF_BITSHIFT(NPC); if (NPC == 1) { KernNv122Rgba( src_y, src_uv, _dst0, height, width); } else { KernNv122Rgba_ro> XF_BITSHIFT(NPC)), ((1 << XF_BITSHIFT(NPC)) >> 1)>(src_y, src_uv, _dst0, height, width); } } // Nv122Rgba template void nv122rgba(xf::cv::Mat& src_y, xf::cv::Mat& src_uv, xf::cv::Mat& _dst0) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_8UC1) && " Y image Type must be XF_8UC1"); assert((UV_T == XF_8UC2) && " UV image Type must be XF_8UC2"); assert((DST_T == XF_8UC4) && " RGBA image Type must be XF_8UC4"); assert(((src_y.rows <= ROWS) && (src_y.cols <= COLS)) && " Y image ROWS and COLS should be less than ROWS, COLS"); assert(((src_y.cols == _dst0.cols) && (src_y.rows == _dst0.rows)) && "Y and RGBA Aplane dimensions mismatch"); assert(((src_y.cols == (src_uv.cols << 1)) && (src_y.rows == (src_uv.rows << 1))) && "Y and UV planes dimensions mismatch"); if (NPC != XF_NPPC1) { assert((NPC == (NPC_UV * 2)) && " NPC of Y plane must be double the UV " "plane for multipixel parallelism "); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC2) || (NPC == XF_NPPC4) || (NPC == XF_NPPC8)) && " 1,2,4,8 pixel parallelism is supported "); } else { assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC values must be same "); } #endif xFNv122Rgba(src_y, src_uv, _dst0, src_y.rows, src_y.cols); } template void xFNv122Yuv4(xf::cv::Mat& src_y, xf::cv::Mat& src_uv, xf::cv::Mat& _y_image, xf::cv::Mat& _u_image, xf::cv::Mat& _v_image, uint16_t height, uint16_t width) { // assert(( (in_uv.cols == (u_out.cols)) && (in_uv.rows == //(u_out.rows>>1))) // && "UV plane and U plane dimensions mismatch"); // assert(( (in_uv.cols == (v_out.cols)) && (in_uv.rows == //(v_out.rows>>1))) // && "UV plane and V plane dimensions mismatch"); if (NPC == XF_NPPC8) { // clang-format off #pragma HLS DATAFLOW // clang-format on KernNv122Yuv4_ro> XF_BITSHIFT(NPC)), ((1 << (XF_BITSHIFT(NPC))) >> 1)>(src_uv, _u_image, _v_image, height, width); write_y_ro> XF_BITSHIFT(NPC))>(src_y, _y_image, height, width); } else { // clang-format off #pragma HLS DATAFLOW // clang-format on KernNv122Yuv4> XF_BITSHIFT(NPC))>( src_uv, _u_image, _v_image, height, width); write_y> XF_BITSHIFT(NPC)), ROWS>(src_y, _y_image, height, width); } } // auNv122Yuv4 template void nv122yuv4(xf::cv::Mat& src_y, xf::cv::Mat& src_uv, xf::cv::Mat& _y_image, xf::cv::Mat& _u_image, xf::cv::Mat& _v_image) { // clang-format off #pragma HLS INLINE OFF // clang-format on xFNv122Yuv4( src_y, src_uv, _y_image, _u_image, _v_image, src_y.rows, src_y.cols); } template void xFNv212Iyuv(xf::cv::Mat& src_y, xf::cv::Mat& src_uv, xf::cv::Mat& _y_image, xf::cv::Mat& _u_image, xf::cv::Mat& _v_image, uint16_t height, uint16_t width) { if (NPC == XF_NPPC8) { // clang-format off #pragma HLS DATAFLOW // clang-format on KernNv212Iyuv_ro> XF_BITSHIFT(NPC)) >> 1), ((1 << XF_BITSHIFT(NPC)) >> 2)>(src_uv, _u_image, _v_image, height, width); write_y_ro> XF_BITSHIFT(NPC))>(src_y, _y_image, height, width); } else { // clang-format off #pragma HLS DATAFLOW // clang-format on KernNv212Iyuv> XF_BITSHIFT(NPC)) >> 1)>(src_uv, _u_image, _v_image, height, width); write_y> XF_BITSHIFT(NPC)), ROWS>(src_y, _y_image, height, width); } } // Nv212Iyuv template void nv212iyuv(xf::cv::Mat& src_y, xf::cv::Mat& src_uv, xf::cv::Mat& _y_image, xf::cv::Mat& _u_image, xf::cv::Mat& _v_image) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_8UC1) && " Y,U,V image Type must be XF_8UC1"); assert((UV_T == XF_8UC2) && " VU image Type must be XF_8UC2"); assert(((src_y.rows <= ROWS) && (src_y.cols <= COLS)) && " Y image ROWS and COLS should be less than ROWS, COLS"); assert(((src_y.cols == (src_uv.cols << 1)) && (src_y.rows == (src_uv.rows << 1))) && "Y and VU planes dimensions mismatch"); assert(((src_y.cols == _y_image.cols) && (src_y.rows == _y_image.rows)) && "Input and Outut Y planes dimensions mismatch"); assert(((src_y.cols == _u_image.cols) && (src_y.rows == (_u_image.rows << 2))) && "U, Y planes dimensions mismatch"); assert(((src_y.cols == _v_image.cols) && (src_y.rows == (_v_image.rows << 2))) && "V, Y planes dimensions mismatch"); if (NPC != XF_NPPC1) { assert((NPC == (NPC_UV * 2)) && " NPC of Y plane must be double the VU " "plane for multipixel parallelism "); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported "); } else { assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC,NPC_UV values must be same "); } #endif xFNv212Iyuv( src_y, src_uv, _y_image, _u_image, _v_image, src_y.rows, src_y.cols); } template void xFNv212Rgba(xf::cv::Mat& src_y, xf::cv::Mat& src_uv, xf::cv::Mat& _dst0, uint16_t height, uint16_t width) { width = width >> XF_BITSHIFT(NPC); if (NPC == 1) { KernNv212Rgba( src_y, src_uv, _dst0, height, width); } else { KernNv212Rgba_ro> XF_BITSHIFT(NPC)), ((1 << XF_BITSHIFT(NPC)) >> 1)>(src_y, src_uv, _dst0, height, width); } } // Nv212Rgba template void nv212rgba(xf::cv::Mat& src_y, xf::cv::Mat& src_uv, xf::cv::Mat& _dst0) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_8UC1) && " Y image Type must be XF_8UC1"); assert((UV_T == XF_8UC2) && " VU image Type must be XF_8UC2"); assert((DST_T == XF_8UC4) && " RGBA image Type must be XF_8UC4"); assert(((src_y.rows <= ROWS) && (src_y.cols <= COLS)) && " Y image ROWS and COLS should be less than ROWS, COLS"); assert(((src_y.cols == _dst0.cols) && (src_y.rows == _dst0.rows)) && "Y and RGBA Aplane dimensions mismatch"); assert(((src_y.cols == (src_uv.cols << 1)) && (src_y.rows == (src_uv.rows << 1))) && "Y and VU planes dimensions mismatch"); if (NPC != XF_NPPC1) { assert((NPC == (NPC_UV * 2)) && " NPC of Y plane must be double the VU " "plane for multipixel parallelism "); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported "); } else { assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC,NPC_UV values must be same "); } #endif xFNv212Rgba(src_y, src_uv, _dst0, src_y.rows, src_y.cols); } template void xFNv212Yuv4(xf::cv::Mat& src_y, xf::cv::Mat& src_uv, xf::cv::Mat& _y_image, xf::cv::Mat& _u_image, xf::cv::Mat& _v_image, uint16_t height, uint16_t width) { if (NPC == XF_NPPC8) { // clang-format off #pragma HLS DATAFLOW // clang-format on KernNv212Yuv4_ro> XF_BITSHIFT(NPC)), ((1 << XF_BITSHIFT(NPC)) >> 1)>(src_uv, _u_image, _v_image, height, width); write_y_ro> XF_BITSHIFT(NPC))>(src_y, _y_image, height, width); } else { // clang-format off #pragma HLS DATAFLOW // clang-format on KernNv212Yuv4> XF_BITSHIFT(NPC))>( src_uv, _u_image, _v_image, height, width); write_y> XF_BITSHIFT(NPC)), ROWS>(src_y, _y_image, height, width); } } // auNv212Yuv4 template void nv212yuv4(xf::cv::Mat& src_y, xf::cv::Mat& src_uv, xf::cv::Mat& _y_image, xf::cv::Mat& _u_image, xf::cv::Mat& _v_image) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_8UC1) && "Y plane Type must be XF_8UC1"); assert((UV_T == XF_8UC2) && "UV plane Type must be XF_8UC2"); assert(((src_y.rows <= ROWS) && (src_y.cols <= COLS)) && " Y image rows and cols should be less than ROWS, COLS"); assert(((_y_image.cols == src_y.cols) && (_y_image.rows == src_y.rows)) && "Y planes dimensions mismatch"); assert(((_u_image.cols == src_y.cols) && (_u_image.rows == src_y.rows)) && "Y and U planes dimensions mismatch"); assert(((_v_image.cols == src_y.cols) && (_v_image.rows == src_y.rows)) && "Y and V planes dimensions mismatch"); assert((((src_uv.cols << 1) == src_y.cols) && ((src_uv.rows << 1) == src_y.rows)) && "Y and V planes dimensions mismatch"); if (NPC != XF_NPPC1) { assert((NPC == (NPC_UV * 2)) && " NPC of Y plane must be double the UV " "plane for multipixel parallelism "); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported "); } else { assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC, NPC_UV values must be same "); } #endif xFNv212Yuv4( src_y, src_uv, _y_image, _u_image, _v_image, src_y.rows, src_y.cols); } template void xFUyvy2Iyuv(xf::cv::Mat& uyvy, xf::cv::Mat& y_plane, xf::cv::Mat& u_plane, xf::cv::Mat& v_plane, uint16_t height, uint16_t width) { width = width >> XF_BITSHIFT(NPC); if (NPC == XF_NPPC8) { KernUyvy2Iyuv_ro> 1) >> XF_BITSHIFT(NPC)), ((1 << XF_BITSHIFT(NPC)) >> 1)>(uyvy, y_plane, u_plane, v_plane, height, width); } else { KernUyvy2Iyuv> 1) >> XF_BITSHIFT(NPC))>( uyvy, y_plane, u_plane, v_plane, height, width); } } template void uyvy2iyuv(xf::cv::Mat& _src, xf::cv::Mat& _y_image, xf::cv::Mat& _u_image, xf::cv::Mat& _v_image) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_16UC1) && " UYVY plane Type must be XF_16UC1"); assert((DST_T == XF_8UC1) && " Y, U, V planes Type must be XF_8UC1"); assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " UYVY image rows and cols should be less than ROWS, COLS"); assert(((_y_image.cols == _src.cols) && (_y_image.rows == _src.rows)) && "Y and UYVY planes dimensions mismatch"); assert(((_u_image.cols == _src.cols) && ((_u_image.rows << 2) == _src.rows)) && "U and UYVY planes dimensions mismatch"); assert(((_v_image.cols == _src.cols) && ((_v_image.rows << 2) == _src.rows)) && "U and UYVY planes dimensions mismatch"); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1, 8 pixel parallelism is supported "); #endif xFUyvy2Iyuv( _src, _y_image, _u_image, _v_image, _src.rows, _src.cols); } // Uyvy2Nv12 template void xFUyvy2Nv12(xf::cv::Mat& uyvy, xf::cv::Mat& y_plane, xf::cv::Mat& uv_plane, uint16_t height, uint16_t width) { /* assert(( (uyvy.cols == (y_plane.cols<<1)) && (uyvy.rows == y_plane.rows)) && "UYVY and Y plane dimensions mismatch"); assert(( (uyvy.cols == (uv_plane.cols<<1)) && (uyvy.rows == (uv_plane.rows<<1))) && "UYVY and UV plane dimensions mismatch");*/ width = width >> XF_BITSHIFT(NPC); if (NPC == XF_NPPC1) { // clang-format off #pragma HLS DATAFLOW // clang-format on KernUyvy2Nv12> 1) >> XF_BITSHIFT(NPC))>(uyvy, y_plane, uv_plane, height, width); } else { KernUyvy2Nv12_ro> 1) >> XF_BITSHIFT(NPC)), ((1 << NPC) >> 1)>(uyvy, y_plane, uv_plane, height, width); } } template void uyvy2nv12(xf::cv::Mat& _src, xf::cv::Mat& _y_image, xf::cv::Mat& _uv_image) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_16UC1) && " UYVY plane Type must be XF_16UC1"); assert((Y_T == XF_8UC1) && " Y plane Type must be XF_8UC1"); assert((UV_T == XF_8UC2) && " UV image Type must be XF_8UC2"); assert(((_y_image.rows <= ROWS) && (_y_image.cols <= COLS)) && " Y image rows and cols should be less than ROWS, COLS"); assert(((_y_image.cols == (_uv_image.cols << 1)) && (_y_image.rows == (_uv_image.rows << 1))) && "Y and UV planes dimensions mismatch"); assert(((_y_image.cols == _src.cols) && (_y_image.rows == _src.rows)) && "Y and UYVY planes dimensions mismatch"); if (NPC != XF_NPPC1) { assert((NPC == (NPC_UV * 2)) && " NPC of Y plane must be double the UV " "plane for multipixel parallelism "); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC2) || (NPC == XF_NPPC4) || (NPC == XF_NPPC8)) && " 1,2,4,8 pixel parallelism is supported "); } else { assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC, NPC_UV values must be same "); } #endif xFUyvy2Nv12(_src, _y_image, _uv_image, _src.rows, _src.cols); } template void xFUyvy2Rgba(xf::cv::Mat& _src, xf::cv::Mat& _dst, uint16_t height, uint16_t width) { width = width >> XF_BITSHIFT(NPC); if (NPC == 1) { KernUyvy2Rgba> 1) >> XF_BITSHIFT(NPC))>( _src, _dst, height, width); } else { KernUyvy2Rgba_ro> 1) >> XF_BITSHIFT(NPC)), (1 << XF_BITSHIFT(NPC) >> 1)>(_src, _dst, height, width); } } template void uyvy2rgba(xf::cv::Mat& _src, xf::cv::Mat& _dst) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_16UC1) && " UYVY plane Type must be XF_16UC1"); assert((DST_T == XF_8UC4) && " RGBA plane Type must be XF_8UC4"); assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " Y image rows and cols should be less than ROWS, COLS"); assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "RGBA and UYVY planes dimensions mismatch"); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported "); #endif xFUyvy2Rgba( _src, _dst, _src.rows, _src.cols); } // Yuyv2Iyuv template void xFYuyv2Iyuv(xf::cv::Mat& _src, xf::cv::Mat& _y_image, xf::cv::Mat& _u_image, xf::cv::Mat& _v_image, uint16_t height, uint16_t width) { width = width >> XF_BITSHIFT(NPC); if (NPC == XF_NPPC8) { KernYuyv2Iyuv_ro> 1) >> XF_BITSHIFT(NPC)), ((1 << XF_BITSHIFT(NPC)) >> 1)>(_src, _y_image, _u_image, _v_image, height, width); } else { KernYuyv2Iyuv> 1) >> XF_BITSHIFT(NPC))>( _src, _y_image, _u_image, _v_image, height, width); } } template void yuyv2iyuv(xf::cv::Mat& _src, xf::cv::Mat& _y_image, xf::cv::Mat& _u_image, xf::cv::Mat& _v_image) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_16UC1) && " YUYV plane Type must be XF_16UC1"); assert((DST_T == XF_8UC1) && " Y, U, V planes Type must be XF_8UC1"); assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " YUYV image rows and cols should be less than ROWS, COLS"); assert(((_y_image.cols == _src.cols) && (_y_image.rows == _src.rows)) && "Y and UYVY planes dimensions mismatch"); assert(((_u_image.cols == _src.cols) && ((_u_image.rows << 2) == _src.rows)) && "U and UYVY planes dimensions mismatch"); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1, 8 pixel parallelism is supported "); #endif xFYuyv2Iyuv( _src, _y_image, _u_image, _v_image, _src.rows, _src.cols); } // Yuyv2Nv12 template void xFYuyv2Nv12(xf::cv::Mat& _src, xf::cv::Mat& _y_image, xf::cv::Mat& _uv_image, uint16_t height, uint16_t width) { width = width >> XF_BITSHIFT(NPC); if (NPC == XF_NPPC1) { KernYuyv2Nv12> 1) >> XF_BITSHIFT(NPC))>(_src, _y_image, _uv_image, height, width); } else { KernYuyv2Nv12_ro> 1) >> XF_BITSHIFT(NPC)), ((1 << XF_BITSHIFT(NPC)) >> 1)>(_src, _y_image, _uv_image, height, width); } } template void yuyv2nv12(xf::cv::Mat& _src, xf::cv::Mat& _y_image, xf::cv::Mat& _uv_image) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_16UC1) && " YUYV plane Type must be XF_16UC1"); assert((Y_T == XF_8UC1) && " Y plane Type must be XF_8UC1"); assert((UV_T == XF_8UC2) && " UV image Type must be XF_8UC2"); assert(((_y_image.rows <= ROWS) && (_y_image.cols <= COLS)) && " Y image rows and cols should be less than ROWS, COLS"); assert(((_y_image.cols == (_uv_image.cols << 1)) && (_y_image.rows == (_uv_image.rows << 1))) && "Y and UV planes dimensions mismatch"); assert(((_y_image.cols == _src.cols) && (_y_image.rows == _src.rows)) && "Y and YUYV planes dimensions mismatch"); if (NPC != XF_NPPC1) { assert((NPC == (NPC_UV * 2)) && " NPC of Y plane must be double the UV " "plane for multipixel parallelism "); } else { assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC, NPC_UV values must be same "); } #endif xFYuyv2Nv12(_src, _y_image, _uv_image, _src.rows, _src.cols); } // Yuyv2Rgba template void xFYuyv2Rgba(xf::cv::Mat& _src, xf::cv::Mat& _dst, uint16_t height, uint16_t width) { width = width >> XF_BITSHIFT(NPC); if (NPC == 1) { KernYuyv2Rgba> 1) >> XF_BITSHIFT(NPC))>( _src, _dst, height, width); } else { KernYuyv2Rgba_ro> 1) >> XF_BITSHIFT(NPC)), ((COLS >> 1) >> XF_BITSHIFT(NPC))>(_src, _dst, height, width); } } template void yuyv2rgba(xf::cv::Mat& _src, xf::cv::Mat& _dst) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_16UC1) && " YUYV plane Type must be XF_16UC1"); assert((DST_T == XF_8UC4) && " RGBA plane Type must be XF_8UC4"); assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " YUYV image rows and cols should be less than ROWS, COLS"); assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "YUYV and RGBA planes dimensions mismatch"); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported "); #endif xFYuyv2Rgba( _src, _dst, _src.rows, _src.cols); } template void xFRgb2Nv12(xf::cv::Mat& _src, xf::cv::Mat& _y, xf::cv::Mat& _uv, uint16_t height, uint16_t width) { width = width >> XF_BITSHIFT(NPC); if (NPC == 1) { KernRgba2Nv12( _src, _y, _uv, height, width); } else { KernRgba2Nv12_ro> XF_BITSHIFT(NPC)), (1 << (XF_BITSHIFT(NPC) + 1))>(_src, _y, _uv, height, width); } } template void rgb2nv12(xf::cv::Mat& _src, xf::cv::Mat& _y, xf::cv::Mat& _uv) { // clang-format off #pragma HLS INLINE OFF // clang-format on assert((SRC_T == XF_8UC3) && " RGB image Type must be XF_8UC3"); assert((Y_T == XF_8UC1) && " Y image Type must be XF_8UC1"); assert((UV_T == XF_8UC2) && " UV image Type must be XF_8UC2"); assert(((_src.rows <= ROWS) && (_y.cols <= COLS)) && " Y image ROWS and COLS should be less than ROWS, COLS"); assert(((_src.cols == _y.cols) && (_src.rows == _y.rows)) && "Y and RGB plane dimensions mismatch"); assert(((_y.cols == (_uv.cols << 1)) && (_y.rows == (_uv.rows << 1))) && "Y and UV planes dimensions mismatch"); if (NPC != XF_NPPC1) { assert((NPC == (NPC_UV * 2)) && " NPC of Y plane must be double the UV " "plane for multipixel parallelism "); } else { assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC,NPC_UV values must be same "); } xFRgb2Nv12(_src, _y, _uv, _src.rows, _src.cols); } // template void KernRgb2Nv21(xf::cv::Mat & _rgba, // xf::cv::Mat & _y, // xf::cv::Mat & _vu,uint16_t height,uint16_t width) //{ // width=width>>XF_BITSHIFT(NPC); // XF_SNAME(XF_32UW) rgba; // unsigned long long int idx=0,idx1=0; // uint8_t y, u, v; // bool evenRow = true, evenBlock = true; // // RowLoop: // for(int i = 0; i < height; i++) // { //#pragma HLS LOOP_FLATTEN off //#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // ColLoop: // for(int j = 0; j < width; j++) // { //#pragma HLS pipeline //#pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS // rgba = _rgba.read(i*width+j); // uint8_t r = rgba.range(7,0); // uint8_t g = rgba.range(15,8); // uint8_t b = rgba.range(23,16); // // y = CalculateY(r, g, b); // if(evenRow) // { // u = CalculateU(r, g, b); // v = CalculateV(r, g, b); // } // _y.write(idx++,y); // if(evenRow) // { // if((j & 0x01)==0) // _vu.write(idx1++,v | ((uint16_t)u << // 8)); // } // } // evenRow = evenRow ? false : true; // } //} template void xFRgb2Nv21(xf::cv::Mat& _src, xf::cv::Mat& _y, xf::cv::Mat& _uv, uint16_t height, uint16_t width) { width = width >> XF_BITSHIFT(NPC); if (NPC == 1) { KernRgba2Nv21( _src, _y, _uv, height, width); } else { KernRgba2Nv21_ro> XF_BITSHIFT(NPC)), (1 << (XF_BITSHIFT(NPC) + 1))>(_src, _y, _uv, height, width); } } template void rgb2nv21(xf::cv::Mat& _src, xf::cv::Mat& _y, xf::cv::Mat& _uv) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_8UC3) && " RGB image Type must be XF_8UC3"); assert((Y_T == XF_8UC1) && " Y image Type must be XF_8UC1"); assert((UV_T == XF_8UC2) && " UV image Type must be XF_8UC2"); assert(((_src.rows <= ROWS) && (_y.cols <= COLS)) && " Y image ROWS and COLS should be less than ROWS, COLS"); assert(((_src.cols == _y.cols) && (_src.rows == _y.rows)) && "Y and RGB plane dimensions mismatch"); assert(((_y.cols == (_uv.cols << 1)) && (_y.rows == (_uv.rows << 1))) && "Y and UV planes dimensions mismatch"); if (NPC != XF_NPPC1) { assert((NPC == (NPC_UV * 2)) && " NPC of Y plane must be double the UV " "plane for multipixel parallelism "); } else { assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC values must be same "); } #endif xFRgb2Nv21(_src, _y, _uv, _src.rows, _src.cols); } template void KernRgb2Yuv4(xf::cv::Mat& _rgba, xf::cv::Mat& _y, xf::cv::Mat& _u, xf::cv::Mat& _v, uint16_t height, uint16_t width) { XF_SNAME(XF_32UW) rgba; uint8_t y, u, v; unsigned long long int idx = 0; RowLoop: for (int i = 0; i < height; ++i) { // clang-format off #pragma HLS LOOP_FLATTEN OFF #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on ColLoop: for (int j = 0; j < width; ++j) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS #pragma HLS PIPELINE // clang-format on rgba = _rgba.read(i * width + j); y = CalculateY(rgba.range(7, 0), rgba.range(15, 8), rgba.range(23, 16)); u = CalculateU(rgba.range(7, 0), rgba.range(15, 8), rgba.range(23, 16)); v = CalculateV(rgba.range(7, 0), rgba.range(15, 8), rgba.range(23, 16)); _y.write(idx, y); _u.write(idx, u); _v.write(idx++, v); } } } template void xFRgb2Yuv4(xf::cv::Mat& _src, xf::cv::Mat& _y_image, xf::cv::Mat& _u_image, xf::cv::Mat& _v_image, uint16_t height, uint16_t width) { // assert(( (rgba.cols == y_plane.cols) && (rgba.rows == y_plane.rows)) // && "RGBA and Y plane dimensions mismatch"); // assert(( (rgba.cols == u_plane.cols) && (rgba.rows == u_plane.rows)) // && "RGBA and U plane dimensions mismatch"); // assert(( (rgba.cols == v_plane.cols) && (rgba.rows == v_plane.rows)) // && "RGBA and V plane dimensions mismatch"); width = width >> (XF_BITSHIFT(NPC)); if (NPC == 1) { KernRgb2Yuv4(_src, _y_image, _u_image, _v_image, height, width); } else { KernRgba2Yuv4_ro> XF_BITSHIFT(NPC)), ((1 << XF_BITSHIFT(NPC)) >> 1)>(_src, _y_image, _u_image, _v_image, height, width); } } template void rgb2yuv4(xf::cv::Mat& _src, xf::cv::Mat& _y_image, xf::cv::Mat& _u_image, xf::cv::Mat& _v_image) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_8UC3) && " RGB image Type must be XF_8UC3"); assert((DST_T == XF_8UC1) && " Y, U, V image Type must be XF_8UC1"); assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " RGB image rows and cols should be less than ROWS, COLS"); assert(((_src.cols == _y_image.cols) && (_src.rows == _y_image.rows)) && "RGB and Y plane dimensions mismatch"); assert(((_src.cols == _u_image.cols) && (_src.rows == _u_image.rows)) && "RGB and U plane dimensions mismatch"); assert(((_src.cols == _v_image.cols) && (_src.rows == _v_image.rows)) && "RGB and V plane dimensions mismatch"); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported "); #endif xFRgb2Yuv4( _src, _y_image, _u_image, _v_image, _src.rows, _src.cols); } template void KernUyvy2Rgb(xf::cv::Mat& _uyvy, xf::cv::Mat& _rgba, uint16_t height, uint16_t width) { XF_SNAME(WORDWIDTH_DST) rgba; XF_SNAME(WORDWIDTH_SRC) uyvy; XF_SNAME(WORDWIDTH_SRC) uy; XF_SNAME(WORDWIDTH_SRC) vy; unsigned long long int idx = 0; XF_PTNAME(XF_8UP) r, g, b; int8_t y1, y2, u, v; int32_t V2Rtemp, U2Gtemp, V2Gtemp, U2Btemp; RowLoop: for (int i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS #pragma HLS LOOP_FLATTEN off // clang-format on ColLoop: for (int j = 0; j < width; j += 2) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=TC max=TC #pragma HLS pipeline // clang-format on // uyvy = _uyvy.read(); uy = _uyvy.read(i * width + j); vy = _uyvy.read(i * width + j + 1); u = (uint8_t)uy.range(7, 0) - 128; /* if(uyvy.range(15,8) > 16) y1 = (uint8_t)uyvy.range(15,8) - 16; else y1 = 0;*/ y1 = (uy.range(15, 8) > 16) ? ((uint8_t)uy.range(15, 8) - 16) : 0; v = (uint8_t)vy.range(7, 0) - 128; /* if(uyvy.range(31,24) > 16) y2 = ((uint8_t)uyvy.range(31,24) - 16); else y2 = 0;*/ y2 = (vy.range(15, 8) > 16) ? ((uint8_t)vy.range(15, 8) - 16) : 0; V2Rtemp = v * (short int)V2R; U2Gtemp = (short int)U2G * u; V2Gtemp = (short int)V2G * v; U2Btemp = u * (short int)U2B; r = CalculateR(y1, V2Rtemp, v); g = CalculateG(y1, U2Gtemp, V2Gtemp); b = CalculateB(y1, U2Btemp, u); rgba = ((ap_uint24_t)r) | ((ap_uint24_t)g << 8) | ((ap_uint24_t)b << 16); _rgba.write(idx, rgba); idx++; r = CalculateR(y2, V2Rtemp, v); g = CalculateG(y2, U2Gtemp, V2Gtemp); b = CalculateB(y2, U2Btemp, u); rgba = ((ap_uint24_t)r) | ((ap_uint24_t)g << 8) | ((ap_uint24_t)b << 16); _rgba.write(idx, rgba); idx++; } } } template void xFUyvy2Rgb(xf::cv::Mat& _src, xf::cv::Mat& _dst, uint16_t height, uint16_t width) { /* assert(( (uyvy.cols == (rgba.cols<<1)) && (uyvy.rows == rgba.rows)) && "UYVY and RGBA plane dimensions mismatch");*/ width = width >> XF_BITSHIFT(NPC); if (NPC == 1) { KernUyvy2Rgb> 1) >> XF_BITSHIFT(NPC))>( _src, _dst, height, width); } else { KernUyvy2Rgb_ro> 1) >> XF_BITSHIFT(NPC)), ((COLS >> 1) >> XF_BITSHIFT(NPC))>(_src, _dst, height, width); } } template void uyvy2rgb(xf::cv::Mat& _src, xf::cv::Mat& _dst) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_16UC1) && " UYVY plane Type must be XF_16UC1"); assert((DST_T == XF_8UC3) && " RGB plane Type must be XF_8UC3"); assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " UYVY image rows and cols should be less than ROWS, COLS"); assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "UYVY and RGB planes dimensions mismatch"); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC2) || (NPC == XF_NPPC4) || (NPC == XF_NPPC8)) && " 1,2,4,8 pixel parallelism is supported "); #endif xFUyvy2Rgb(_src, _dst, _src.rows, _src.cols); } template void KernYuyv2Rgb(xf::cv::Mat& _yuyv, xf::cv::Mat& _rgba, uint16_t height, uint16_t width) { XF_SNAME(WORDWIDTH_DST) rgba; XF_SNAME(WORDWIDTH_SRC) yu, yv; XF_PTNAME(XF_8UP) r, g, b; int8_t y1, y2, u, v; int32_t V2Rtemp, U2Gtemp, V2Gtemp, U2Btemp; unsigned long long int idx = 0; RowLoop: for (int i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS #pragma HLS LOOP_FLATTEN off // clang-format on ColLoop: for (int j = 0; j < width; j += 2) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=TC max=TC #pragma HLS pipeline // clang-format on yu = _yuyv.read(i * width + j); yv = _yuyv.read(i * width + j + 1); u = (uint8_t)yu.range(15, 8) - 128; y1 = (yu.range(7, 0) > 16) ? ((uint8_t)yu.range(7, 0) - 16) : 0; v = (uint8_t)yv.range(15, 8) - 128; y2 = (yv.range(7, 0) > 16) ? ((uint8_t)yv.range(7, 0) - 16) : 0; V2Rtemp = v * (short int)V2R; U2Gtemp = (short int)U2G * u; V2Gtemp = (short int)V2G * v; U2Btemp = u * (short int)U2B; r = CalculateR(y1, V2Rtemp, v); g = CalculateG(y1, U2Gtemp, V2Gtemp); b = CalculateB(y1, U2Btemp, u); rgba = ((ap_uint24_t)r) | ((ap_uint24_t)g << 8) | ((ap_uint24_t)b << 16); _rgba.write(idx++, rgba); r = CalculateR(y2, V2Rtemp, v); g = CalculateG(y2, U2Gtemp, V2Gtemp); b = CalculateB(y2, U2Btemp, u); rgba = ((ap_uint24_t)r) | ((ap_uint24_t)g << 8) | ((ap_uint24_t)b << 16); _rgba.write(idx++, rgba); } } } // Yuyv2Rgba template void xFYuyv2Rgb(xf::cv::Mat& _src, xf::cv::Mat& _dst, uint16_t height, uint16_t width) { width = width >> XF_BITSHIFT(NPC); if (NPC == 1) { KernYuyv2Rgb> 1) >> XF_BITSHIFT(NPC))>( _src, _dst, height, width); } else { KernYuyv2Rgba_ro> 1) >> XF_BITSHIFT(NPC)), ((COLS >> 1) >> XF_BITSHIFT(NPC))>(_src, _dst, height, width); } } template void yuyv2rgb(xf::cv::Mat& _src, xf::cv::Mat& _dst) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_16UC1) && " YUYV plane Type must be XF_16UC1"); assert((DST_T == XF_8UC3) && " RGB plane Type must be XF_8UC3"); assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " YUYV image rows and cols should be less than ROWS, COLS"); assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "YUYV and RGB planes dimensions mismatch"); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC2) || (NPC == XF_NPPC4) || (NPC == XF_NPPC8)) && " 1,2,4,8 pixel parallelism is supported "); #endif xFYuyv2Rgb(_src, _dst, _src.rows, _src.cols); } template void KernIyuv2Rgb(xf::cv::Mat& _y, xf::cv::Mat& _u, xf::cv::Mat& _v, xf::cv::Mat& _rgba, uint16_t height, uint16_t width) { ap_uint<13> i, j; hls::stream uStream, vStream; // clang-format off #pragma HLS STREAM variable=&uStream depth=TC #pragma HLS STREAM variable=&vStream depth=TC // clang-format on XF_SNAME(WORDWIDTH_SRC) yPacked, uPacked, vPacked; XF_SNAME(WORDWIDTH_DST) rgba; unsigned long long int idx = 0, idx1 = 0; uint8_t y1, y2; int32_t V2Rtemp, U2Gtemp, V2Gtemp, U2Btemp; int8_t u, v; bool evenRow = true, evenBlock = true; RowLoop: for (i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on ColLoop: for (j = 0; j < width; j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=TC max=TC // clang-format on yPacked = _y.read(i * width + j); // dummy1 = dst1.read(); // dummy2 = dst2.read(); ap_uint k1; if (evenBlock) { if (evenRow) { uPacked = _u.read(idx); uStream.write(uPacked); vPacked = _v.read(idx++); vStream.write(vPacked); } else { /* Copy of the U and V values are pushed into stream to be used for * next row */ uPacked = uStream.read(); vPacked = vStream.read(); } k1 = 0; } else { k1 = NPC / 2; } ap_uint k; bool evenPixel = true; for (k = 0; k < NPC; k++) { // clang-format off #pragma HLS UNROLL // clang-format on y1 = (uint8_t)yPacked.range((8 * k + 7), 8 * k) > 16 ? (uint8_t)yPacked.range((8 * k + 7), 8 * k) - 16 : 0; u = (uint8_t)uPacked.range((8 * k1 + 7), 8 * k1) - 128; v = (uint8_t)vPacked.range((8 * k1 + 7), 8 * k1) - 128; if (evenPixel == false) { k1 = k1 + 1; evenPixel = true; } else { evenPixel = false; } V2Rtemp = v * (short int)V2R; U2Gtemp = (short int)U2G * u; V2Gtemp = (short int)V2G * v; U2Btemp = u * (short int)U2B; // R = 1.164*Y + 1.596*V = Y + 0.164*Y + V + 0.596*V // G = 1.164*Y - 0.813*V - 0.391*U = Y + 0.164*Y - 0.813*V - 0.391*U // B = 1.164*Y + 2.018*U = Y + 0.164 + 2*U + 0.018*U rgba.range((24 * k + 7), (24 * k)) = CalculateR(y1, V2Rtemp, v); // R rgba.range((24 * k + 15), (24 * k + 8)) = CalculateG(y1, U2Gtemp, V2Gtemp); // G rgba.range((24 * k + 23), (24 * k + 16)) = CalculateB(y1, U2Btemp, u); // B } _rgba.write(idx1++, rgba); evenBlock = evenBlock ? false : true; } evenRow = evenRow ? false : true; } } template void xFIyuv2Rgb(xf::cv::Mat& src_y, xf::cv::Mat& src_u, xf::cv::Mat& src_v, xf::cv::Mat& _dst0, uint16_t height, uint16_t width) { width = width >> XF_BITSHIFT(NPC); KernIyuv2Rgb> XF_BITSHIFT(NPC))>( src_y, src_u, src_v, _dst0, height, width); } template void iyuv2rgb(xf::cv::Mat& src_y, xf::cv::Mat& src_u, xf::cv::Mat& src_v, xf::cv::Mat& _dst0) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_8UC1) && " Y,U,V planes Type must be XF_8UC1"); assert((DST_T == XF_8UC3) && " RGB image Type must be XF_8UC3"); assert(((src_y.rows <= ROWS) && (src_y.cols <= COLS)) && " Y image rows and cols should be less than ROWS, COLS"); assert(((src_y.cols == _dst0.cols) && (src_y.rows == _dst0.rows)) && "Y and RGB plane dimensions mismatch"); assert(((src_y.cols == src_u.cols) && (src_y.rows == (src_u.rows << 2))) && "Y and U planes dimensions mismatch"); assert(((src_y.cols == src_v.cols) && (src_y.rows == (src_v.rows << 2))) && "Y and U planes dimensions mismatch"); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported "); #endif xFIyuv2Rgb( src_y, src_u, src_v, _dst0, src_y.rows, src_y.cols); } template void KernNv122bgr(xf::cv::Mat& _y, xf::cv::Mat& _uv, xf::cv::Mat& _rgba, uint16_t height, uint16_t width) { unsigned long long int idx = 0, idx1 = 0; hls::stream uvStream; // clang-format off #pragma HLS STREAM variable=&uvStream depth=COLS // clang-format on XF_SNAME(WORDWIDTH_Y) yPacked; XF_SNAME(WORDWIDTH_UV) uvPacked; XF_SNAME(WORDWIDTH_DST) rgba; uint8_t y1, y2; int32_t V2Rtemp, U2Gtemp, V2Gtemp, U2Btemp; int8_t u, v; bool evenRow = true, evenBlock = true; RowLoop: for (int i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on ColLoop: for (int j = 0; j < width; j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS // clang-format on yPacked = _y.read(i * width + j); if (evenRow) { if (evenBlock) { uvPacked = _uv.read(idx++); uvStream.write(uvPacked); } } else { // Keep a copy of UV row data in stream to use for oddrow if (evenBlock) { uvPacked = uvStream.read(); } } // auExtractPixels(UVbuf, UVPacked, 0); uint8_t t = yPacked.range(7, 0); y1 = t > 16 ? t - 16 : 0; v = (uint8_t)uvPacked.range(15, 8) - 128; u = (uint8_t)uvPacked.range(7, 0) - 128; V2Rtemp = v * (short int)V2R; U2Gtemp = (short int)U2G * u; V2Gtemp = (short int)V2G * v; U2Btemp = u * (short int)U2B; // R = 1.164*Y + 1.596*V = Y + 0.164*Y + V + 0.596*V // G = 1.164*Y - 0.813*V - 0.391*U = Y + 0.164*Y - 0.813*V - 0.391*U // B = 1.164*Y + 2.018*U = Y + 0.164 + 2*U + 0.018*U rgba.range(23, 16) = CalculateR(y1, V2Rtemp, v); // R rgba.range(15, 8) = CalculateG(y1, U2Gtemp, V2Gtemp); // G rgba.range(7, 0) = CalculateB(y1, U2Btemp, u); // B // PackedPixels = // PackRGBAPixels(RGB); _rgba.write(idx1++, rgba); evenBlock = evenBlock ? false : true; } evenRow = evenRow ? false : true; } if (height & 1) { for (int i = 0; i < width; i++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS // clang-format on uvStream.read(); } } } template void xFNv122bgr(xf::cv::Mat& src_y, xf::cv::Mat& src_uv, xf::cv::Mat& _dst0, uint16_t height, uint16_t width) { width = width >> XF_BITSHIFT(NPC); if (NPC == 1) { KernNv122bgr( src_y, src_uv, _dst0, height, width); } else { KernNv122bgr_ro> XF_BITSHIFT(NPC)), ((1 << XF_BITSHIFT(NPC)) >> 1)>(src_y, src_uv, _dst0, height, width); } } template void nv122bgr(xf::cv::Mat& src_y, xf::cv::Mat& src_uv, xf::cv::Mat& _dst0) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_8UC1) && " Y image Type must be XF_8UC1"); assert((UV_T == XF_8UC2) && " VU image Type must be XF_8UC2"); assert((DST_T == XF_8UC3) && " BGR image Type must be XF_8UC3"); assert(((src_y.rows <= ROWS) && (src_y.cols <= COLS)) && " Y image rows and cols should be less than ROWS, COLS"); assert(((src_y.cols == _dst0.cols) && (src_y.rows == _dst0.rows)) && "Y and BGR plane dimensions mismatch"); assert(((src_y.cols == (src_uv.cols << 1)) && (src_y.rows == (src_uv.rows << 1))) && "Y and VU planes dimensions mismatch"); if (NPC != XF_NPPC1) { assert((NPC == (NPC_UV * 2)) && " NPC of Y plane must be double the VU " "plane for multipixel parallelism "); } else { assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC,NPC_UV values must be same "); } #endif xFNv122bgr(src_y, src_uv, _dst0, src_y.rows, src_y.cols); } template void KernNv122Rgb(xf::cv::Mat& _y, xf::cv::Mat& _uv, xf::cv::Mat& _rgba, uint16_t height, uint16_t width) { unsigned long long int idx = 0, idx1 = 0; hls::stream uvStream; // clang-format off #pragma HLS STREAM variable=&uvStream depth=COLS // clang-format on XF_SNAME(WORDWIDTH_Y) yPacked; XF_SNAME(WORDWIDTH_UV) uvPacked; XF_SNAME(WORDWIDTH_DST) rgba; uint8_t y1, y2; int32_t V2Rtemp, U2Gtemp, V2Gtemp, U2Btemp; int8_t u, v; bool evenRow = true, evenBlock = true; RowLoop: for (int i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on ColLoop: for (int j = 0; j < width; j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS // clang-format on yPacked = _y.read(i * width + j); if (evenRow) { if (evenBlock) { uvPacked = _uv.read(idx++); uvStream.write(uvPacked); } } else { // Keep a copy of UV row data in stream to use for oddrow if (evenBlock) { uvPacked = uvStream.read(); } } // auExtractPixels(UVbuf, UVPacked, 0); uint8_t t = yPacked.range(7, 0); y1 = t > 16 ? t - 16 : 0; v = (uint8_t)uvPacked.range(15, 8) - 128; u = (uint8_t)uvPacked.range(7, 0) - 128; V2Rtemp = v * (short int)V2R; U2Gtemp = (short int)U2G * u; V2Gtemp = (short int)V2G * v; U2Btemp = u * (short int)U2B; // R = 1.164*Y + 1.596*V = Y + 0.164*Y + V + 0.596*V // G = 1.164*Y - 0.813*V - 0.391*U = Y + 0.164*Y - 0.813*V - 0.391*U // B = 1.164*Y + 2.018*U = Y + 0.164 + 2*U + 0.018*U rgba.range(7, 0) = CalculateR(y1, V2Rtemp, v); // R rgba.range(15, 8) = CalculateG(y1, U2Gtemp, V2Gtemp); // G rgba.range(23, 16) = CalculateB(y1, U2Btemp, u); // B // PackedPixels = // PackRGBAPixels(RGB); _rgba.write(idx1++, rgba); evenBlock = evenBlock ? false : true; } evenRow = evenRow ? false : true; } if (height & 1) { for (int i = 0; i < width; i++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS // clang-format on uvStream.read(); } } } template void xFNv122Rgb(xf::cv::Mat& src_y, xf::cv::Mat& src_uv, xf::cv::Mat& _dst0, uint16_t height, uint16_t width) { width = width >> XF_BITSHIFT(NPC); if (NPC == 1) { KernNv122Rgb( src_y, src_uv, _dst0, height, width); } else { KernNv122Rgba_ro> XF_BITSHIFT(NPC)), ((1 << XF_BITSHIFT(NPC)) >> 1)>(src_y, src_uv, _dst0, height, width); } } template void nv122rgb(xf::cv::Mat& src_y, xf::cv::Mat& src_uv, xf::cv::Mat& _dst0) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_8UC1) && " Y image Type must be XF_8UC1"); assert((UV_T == XF_8UC2) && " UV image Type must be XF_8UC2"); assert((DST_T == XF_8UC3) && " RGB image Type must be XF_8UC3"); assert(((src_y.rows <= ROWS) && (src_y.cols <= COLS)) && " Y image ROWS and COLS should be less than ROWS, COLS"); assert(((src_y.cols == _dst0.cols) && (src_y.rows == _dst0.rows)) && "Y and RGB plane dimensions mismatch"); assert(((src_y.cols == (src_uv.cols << 1)) && (src_y.rows == (src_uv.rows << 1))) && "Y and UV planes dimensions mismatch"); if (NPC != XF_NPPC1) { assert((NPC == (NPC_UV * 2)) && " NPC of Y plane must be double the UV " "plane for multipixel parallelism "); } else { assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC,NPC_UV values must be same "); } #endif xFNv122Rgb(src_y, src_uv, _dst0, src_y.rows, src_y.cols); } template void KernNv212Rgb(xf::cv::Mat& _y, xf::cv::Mat& _vu, xf::cv::Mat& _rgba, uint16_t height, uint16_t width) { hls::stream vuStream; // clang-format off #pragma HLS STREAM variable=&vuStream depth=COLS // clang-format on XF_SNAME(WORDWIDTH_Y) yPacked; XF_SNAME(WORDWIDTH_VU) vuPacked; unsigned long long int idx = 0, idx1 = 0; XF_SNAME(WORDWIDTH_DST) rgba; ap_uint<13> i, j; uint8_t y1, y2; int32_t V2Rtemp, U2Gtemp, V2Gtemp, U2Btemp; int8_t u, v; bool evenRow = true, evenBlock = true; RowLoop: for (i = 0; i < (height); i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on ColLoop: for (j = 0; j < width; j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS // clang-format on yPacked = _y.read(i * width + j); // auExtractPixels(Ybuf, YPacked, 0); if (evenRow) { if (evenBlock) { vuPacked = _vu.read(idx++); vuStream.write(vuPacked); } } else { // Keep a copy of UV row data in stream to use for oddrow if (evenBlock) { vuPacked = vuStream.read(); } } // auExtractPixels(UVbuf, UVPacked, 0); uint8_t t = yPacked.range(7, 0); y1 = t > 16 ? t - 16 : 0; u = (uint8_t)vuPacked.range(15, 8) - 128; v = (uint8_t)vuPacked.range(7, 0) - 128; V2Rtemp = v * (short int)V2R; U2Gtemp = (short int)U2G * u; V2Gtemp = (short int)V2G * v; U2Btemp = u * (short int)U2B; // R = 1.164*Y + 1.596*V = Y + 0.164*Y + V + 0.596*V // G = 1.164*Y - 0.813*V - 0.391*U = Y + 0.164*Y - 0.813*V - 0.391*U // B = 1.164*Y + 2.018*U = Y + 0.164 + 2*U + 0.018*U rgba.range(7, 0) = CalculateR(y1, V2Rtemp, v); // R rgba.range(15, 8) = CalculateG(y1, U2Gtemp, V2Gtemp); // G rgba.range(23, 16) = CalculateB(y1, U2Btemp, u); // B // PackedPixels = // PackRGBAPixels(RGB); _rgba.write(idx1++, rgba); evenBlock = evenBlock ? false : true; } evenRow = evenRow ? false : true; } if (height & 1) { for (i = 0; i < width; i++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS // clang-format on vuStream.read(); } } } template void xFNv212Rgb(xf::cv::Mat& src_y, xf::cv::Mat& src_uv, xf::cv::Mat& _dst0, uint16_t height, uint16_t width) { width = width >> XF_BITSHIFT(NPC); if (NPC == 1) { KernNv212Rgb( src_y, src_uv, _dst0, height, width); } else { KernNv212Rgba_ro> XF_BITSHIFT(NPC)), ((1 << XF_BITSHIFT(NPC)) >> 1)>(src_y, src_uv, _dst0, height, width); } } template void nv212rgb(xf::cv::Mat& src_y, xf::cv::Mat& src_uv, xf::cv::Mat& _dst0) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_8UC1) && " Y image Type must be XF_8UC1"); assert((UV_T == XF_8UC2) && " vu image Type must be XF_8UC2"); assert((DST_T == XF_8UC3) && " RGB image Type must be XF_8UC3"); assert(((src_y.rows <= ROWS) && (src_y.cols <= COLS)) && " Y image ROWS and COLS should be less than ROWS, COLS"); assert(((src_y.cols == _dst0.cols) && (src_y.rows == _dst0.rows)) && "Y and RGB plane dimensions mismatch"); assert(((src_y.cols == (src_uv.cols << 1)) && (src_y.rows == (src_uv.rows << 1))) && "Y and VU planes dimensions mismatch"); if (NPC != XF_NPPC1) { assert((NPC == (NPC_UV * 2)) && " NPC of Y plane must be double the UV " "plane for multipixel parallelism "); } else { assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC values must be same "); } #endif xFNv212Rgb(src_y, src_uv, _dst0, src_y.rows, src_y.cols); } template void KernNv212bgr(xf::cv::Mat& _y, xf::cv::Mat& _vu, xf::cv::Mat& _rgba, uint16_t height, uint16_t width) { hls::stream vuStream; // clang-format off #pragma HLS STREAM variable=&vuStream depth=COLS // clang-format on XF_SNAME(WORDWIDTH_Y) yPacked; XF_SNAME(WORDWIDTH_VU) vuPacked; unsigned long long int idx = 0, idx1 = 0; XF_SNAME(WORDWIDTH_DST) rgba; ap_uint<13> i, j; uint8_t y1, y2; int32_t V2Rtemp, U2Gtemp, V2Gtemp, U2Btemp; int8_t u, v; bool evenRow = true, evenBlock = true; RowLoop: for (i = 0; i < (height); i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on ColLoop: for (j = 0; j < width; j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS // clang-format on yPacked = _y.read(i * width + j); // auExtractPixels(Ybuf, YPacked, 0); if (evenRow) { if (evenBlock) { vuPacked = _vu.read(idx++); vuStream.write(vuPacked); } } else { // Keep a copy of UV row data in stream to use for oddrow if (evenBlock) { vuPacked = vuStream.read(); } } // auExtractPixels(UVbuf, UVPacked, 0); uint8_t t = yPacked.range(7, 0); y1 = t > 16 ? t - 16 : 0; u = (uint8_t)vuPacked.range(15, 8) - 128; v = (uint8_t)vuPacked.range(7, 0) - 128; V2Rtemp = v * (short int)V2R; U2Gtemp = (short int)U2G * u; V2Gtemp = (short int)V2G * v; U2Btemp = u * (short int)U2B; // R = 1.164*Y + 1.596*V = Y + 0.164*Y + V + 0.596*V // G = 1.164*Y - 0.813*V - 0.391*U = Y + 0.164*Y - 0.813*V - 0.391*U // B = 1.164*Y + 2.018*U = Y + 0.164 + 2*U + 0.018*U rgba.range(23, 16) = CalculateR(y1, V2Rtemp, v); // R rgba.range(15, 8) = CalculateG(y1, U2Gtemp, V2Gtemp); // G rgba.range(7, 0) = CalculateB(y1, U2Btemp, u); // B // PackedPixels = // PackRGBAPixels(RGB); _rgba.write(idx1++, rgba); evenBlock = evenBlock ? false : true; } evenRow = evenRow ? false : true; } if (height & 1) { for (i = 0; i < width; i++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS // clang-format on vuStream.read(); } } } template void xFNv212bgr(xf::cv::Mat& src_y, xf::cv::Mat& src_uv, xf::cv::Mat& _dst0, uint16_t height, uint16_t width) { width = width >> XF_BITSHIFT(NPC); if (NPC == 1) { KernNv212bgr(src_y, src_uv, _dst0, height, width); } else { KernNv212bgr_ro> XF_BITSHIFT(NPC)), ((1 << XF_BITSHIFT(NPC)) >> 1)>(src_y, src_uv, _dst0, height, width); } } template void nv212bgr(xf::cv::Mat& src_y, xf::cv::Mat& src_uv, xf::cv::Mat& _dst0) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_8UC1) && " Y image Type must be XF_8UC1"); assert((UV_T == XF_8UC2) && " VU image Type must be XF_8UC2"); assert((DST_T == XF_8UC3) && " BGR image Type must be XF_8UC3"); assert(((src_y.rows <= ROWS) && (src_y.cols <= COLS)) && " Y image rows and cols should be less than ROWS, COLS"); assert(((src_y.cols == _dst0.cols) && (src_y.rows == _dst0.rows)) && "Y and BGR plane dimensions mismatch"); assert(((src_y.cols == (src_uv.cols << 1)) && (src_y.rows == (src_uv.rows << 1))) && "Y and VU planes dimensions mismatch"); if (NPC != XF_NPPC1) { assert((NPC == (NPC_UV * 2)) && " NPC of Y plane must be double the VU " "plane for multipixel parallelism "); } else { assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC,NPC_UV values must be same "); } #endif xFNv212bgr(src_y, src_uv, _dst0, src_y.rows, src_y.cols); } template void xfrgb2gray(xf::cv::Mat& src, xf::cv::Mat& dst, unsigned short int height, unsigned short int width) { XF_CTUNAME(SRC_T, NPC) RGB[XF_CHANNELS(SRC_T, NPC) * XF_NPIXPERCYCLE(NPC)]; // clang-format off #pragma HLS ARRAY_PARTITION variable=RGB complete // clang-format on XF_TNAME(SRC_T, NPC) RGB_packed; //=0; XF_CTUNAME(DST_T, NPC) GRAY[XF_NPIXPERCYCLE(NPC)]; //=0; XF_TNAME(DST_T, NPC) Gray_packed; rowloop: for (ap_uint<13> i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on columnloop: for (ap_uint<13> j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS // clang-format on RGB_packed = src.read(i * (width >> XF_BITSHIFT(NPC)) + j); ExtractUYVYPixels(RGB_packed, RGB); for (ap_uint<13> k = 0, offset = 0; k < XF_NPIXPERCYCLE(NPC); k++, offset += 3) { // clang-format off #pragma HLS UNROLL // clang-format on GRAY[k] = CalculateGRAY(RGB[offset], RGB[offset + 1], RGB[offset + 2]); Gray_packed.range((k * XF_DTPIXELDEPTH(DST_T, NPC) + (XF_DTPIXELDEPTH(DST_T, NPC) - 1)), k * XF_DTPIXELDEPTH(DST_T, NPC)) = GRAY[k]; } dst.write((i * (width >> XF_BITSHIFT(NPC))) + j, Gray_packed); } } } template void rgb2gray(xf::cv::Mat& _src, xf::cv::Mat& _dst) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_8UC3) && " RGB image Type must be XF_8UC3"); assert((DST_T == XF_8UC1) && " GRAY image Type must be XF_8UC1"); assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " RGB image rows and cols should be less than ROWS, COLS"); assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "RGB and GRAY plane dimensions mismatch"); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported "); #endif xfrgb2gray> (XF_NPIXPERCYCLE(NPC))))>(_src, _dst, _src.rows, _src.cols); } template void xfbgr2gray(xf::cv::Mat& src, xf::cv::Mat& dst, unsigned short int height, unsigned short int width) { XF_CTUNAME(SRC_T, NPC) RGB[XF_CHANNELS(SRC_T, NPC) * XF_NPIXPERCYCLE(NPC)]; // clang-format off #pragma HLS ARRAY_PARTITION variable=RGB complete // clang-format on XF_TNAME(SRC_T, NPC) RGB_packed; //=0; XF_CTUNAME(DST_T, NPC) GRAY[XF_NPIXPERCYCLE(NPC)]; //=0; XF_TNAME(DST_T, NPC) Gray_packed; rowloop: for (ap_uint<13> i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on columnloop: for (ap_uint<13> j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS // clang-format on RGB_packed = src.read(i * (width >> XF_BITSHIFT(NPC)) + j); ExtractUYVYPixels(RGB_packed, RGB); for (ap_uint<13> k = 0, offset = 0; k < XF_NPIXPERCYCLE(NPC); k++, offset += 3) { // clang-format off #pragma HLS UNROLL // clang-format on GRAY[k] = CalculateGRAY(RGB[offset + 2], RGB[offset + 1], RGB[offset]); Gray_packed.range((k * XF_DTPIXELDEPTH(DST_T, NPC) + (XF_DTPIXELDEPTH(DST_T, NPC) - 1)), k * XF_DTPIXELDEPTH(DST_T, NPC)) = GRAY[k]; } dst.write((i * (width >> XF_BITSHIFT(NPC))) + j, Gray_packed); } } } template void bgr2gray(xf::cv::Mat& _src, xf::cv::Mat& _dst) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_8UC3) && " BGR image Type must be XF_8UC3"); assert((DST_T == XF_8UC1) && " GRAY image Type must be XF_8UC1"); assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " BGR image rows and cols should be less than ROWS, COLS"); assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "BGR and GRAY plane dimensions mismatch"); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported "); #endif xfbgr2gray> (XF_NPIXPERCYCLE(NPC))))>(_src, _dst, _src.rows, _src.cols); } template void xfgray2rgb(xf::cv::Mat& src, xf::cv::Mat& dst, unsigned short int height, unsigned short int width) { XF_DTUNAME(DST_T, NPC) RGB[XF_NPIXPERCYCLE(NPC)]; XF_TNAME(DST_T, NPC) RGB_packed; XF_TNAME(SRC_T, NPC) GRAY_packed; XF_TNAME(SRC_T, NPC) GRAY[XF_NPIXPERCYCLE(NPC)]; rowloop: for (ap_uint<13> i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on columnloop: for (ap_uint<13> j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS // clang-format on GRAY_packed = src.read(i * (width >> XF_BITSHIFT(NPC)) + j); for (int k = 0; k < XF_NPIXPERCYCLE(NPC); k++) { // clang-format off #pragma HLS UNROLL // clang-format on GRAY[k] = GRAY_packed.range(k * (XF_PIXELWIDTH(SRC_T, NPC)) + XF_PIXELWIDTH(SRC_T, NPC) - 1, k * XF_PIXELWIDTH(SRC_T, NPC)); RGB[k].range(7, 0) = GRAY[k]; RGB[k].range(15, 8) = GRAY[k]; RGB[k].range(23, 16) = GRAY[k]; RGB_packed.range(k * (XF_PIXELWIDTH(DST_T, NPC)) + XF_PIXELWIDTH(DST_T, NPC) - 1, k * XF_PIXELWIDTH(DST_T, NPC)) = RGB[k]; } dst.write(i * (width >> XF_BITSHIFT(NPC)) + j, RGB_packed); } } } template void gray2rgb(xf::cv::Mat& _src, xf::cv::Mat& _dst) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_8UC1) && " GRAY image Type must be XF_8UC1"); assert((DST_T == XF_8UC3) && " RGB image Type must be XF_8UC3"); assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " GRAY image rows and cols should be less than ROWS, COLS"); assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "RGB and GRAY plane dimensions mismatch"); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported "); #endif xfgray2rgb> (XF_NPIXPERCYCLE(NPC))))>(_src, _dst, _src.rows, _src.cols); } template void xfgray2bgr(xf::cv::Mat& src, xf::cv::Mat& dst, unsigned short int height, unsigned short int width) { XF_DTUNAME(DST_T, NPC) RGB[XF_NPIXPERCYCLE(NPC)]; XF_TNAME(DST_T, NPC) RGB_packed; XF_TNAME(SRC_T, NPC) GRAY_packed; XF_TNAME(SRC_T, NPC) GRAY[XF_NPIXPERCYCLE(NPC)]; rowloop: for (ap_uint<13> i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on columnloop: for (ap_uint<13> j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS // clang-format on GRAY_packed = src.read(i * (width >> XF_BITSHIFT(NPC)) + j); for (int k = 0; k < XF_NPIXPERCYCLE(NPC); k++) { // clang-format off #pragma HLS UNROLL // clang-format on GRAY[k] = GRAY_packed.range(k * (XF_PIXELWIDTH(SRC_T, NPC)) + XF_PIXELWIDTH(SRC_T, NPC) - 1, k * XF_PIXELWIDTH(SRC_T, NPC)); RGB[k].range(7, 0) = GRAY[k]; RGB[k].range(15, 8) = GRAY[k]; RGB[k].range(23, 16) = GRAY[k]; RGB_packed.range(k * (XF_PIXELWIDTH(DST_T, NPC)) + XF_PIXELWIDTH(DST_T, NPC) - 1, k * XF_PIXELWIDTH(DST_T, NPC)) = RGB[k]; } dst.write(i * (width >> XF_BITSHIFT(NPC)) + j, RGB_packed); } } } template void gray2bgr(xf::cv::Mat& _src, xf::cv::Mat& _dst) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_8UC1) && " GRAY image Type must be XF_8UC1"); assert((DST_T == XF_8UC3) && " BGR image Type must be XF_8UC3"); assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " GRAY image rows and cols should be less than ROWS, COLS"); assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "BGR and GRAY plane dimensions mismatch"); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported "); #endif xfgray2bgr> (XF_NPIXPERCYCLE(NPC))))>(_src, _dst, _src.rows, _src.cols); } template void xfrgb2xyz(xf::cv::Mat& src, xf::cv::Mat& dst, unsigned short int height, unsigned short int width) { ap_uint<8> RGB[3]; // clang-format off #pragma HLS ARRAY_PARTITION variable=RGB complete // clang-format on XF_TNAME(SRC_T, NPC) RGB_packed = 0; XF_TNAME(DST_T, NPC) XYZ_packed = 0; XF_DTUNAME(DST_T, NPC) XYZ[XF_NPIXPERCYCLE(NPC)]; XF_TNAME(DST_T, NPC) X, Y, Z; short int depth = XF_PIXELWIDTH(DST_T, NPC) / XF_CHANNELS(SRC_T, NPC); int k = 0; rowloop: for (ap_uint<13> i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on columnloop: for (ap_uint<13> j = 0; j> XF_BITSHIFT(NPC); j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS // clang-format on RGB_packed = src.read((i * (width >> XF_BITSHIFT(NPC))) + j); ExtractUYVYPixels(RGB_packed, RGB); for (int k = 0, offset = 0; k < XF_NPIXPERCYCLE(NPC); k++, offset += 3) { // clang-format off #pragma HLS UNROLL // clang-format on X = Calculate_X(RGB[offset], RGB[offset + 1], RGB[offset + 2]); Y = Calculate_Y(RGB[offset], RGB[offset + 1], RGB[offset + 2]); Z = Calculate_Z(RGB[offset], RGB[offset + 1], RGB[offset + 2]); XYZ[k].range((XF_DTPIXELDEPTH(DST_T, NPC) - 1), 0) = X; XYZ[k].range((XF_DTPIXELDEPTH(DST_T, NPC) * 2) - 1, XF_DTPIXELDEPTH(DST_T, NPC)) = Y; XYZ[k].range((XF_DTPIXELDEPTH(DST_T, NPC) * 3) - 1, XF_DTPIXELDEPTH(DST_T, NPC) * 2) = Z; XYZ_packed.range(k * XF_PIXELWIDTH(DST_T, NPC) + (XF_PIXELWIDTH(DST_T, NPC) - 1), k * XF_PIXELWIDTH(DST_T, NPC)) = XYZ[k]; } dst.write((i * (width >> XF_BITSHIFT(NPC))) + j, XYZ_packed); } } } template void rgb2xyz(xf::cv::Mat& _src, xf::cv::Mat& _dst) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_8UC3) && " RGB image Type must be XF_8UC3"); assert((DST_T == XF_8UC3) && " XYZ image Type must be XF_8UC3"); assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " RGB image rows and cols should be less than ROWS, COLS"); assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "RGB and XYZ plane dimensions mismatch"); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported "); #endif xfrgb2xyz> (XF_NPIXPERCYCLE(NPC))))>(_src, _dst, _src.rows, _src.cols); } template void xfbgr2xyz(xf::cv::Mat& src, xf::cv::Mat& dst, unsigned short int height, unsigned short int width) { ap_uint<8> RGB[3]; // clang-format off #pragma HLS ARRAY_PARTITION variable=RGB complete // clang-format on XF_TNAME(SRC_T, NPC) RGB_packed = 0; XF_TNAME(DST_T, NPC) XYZ_packed = 0; XF_DTUNAME(DST_T, NPC) XYZ[XF_NPIXPERCYCLE(NPC)]; XF_TNAME(DST_T, NPC) X, Y, Z; short int depth = XF_PIXELWIDTH(DST_T, NPC) / XF_CHANNELS(SRC_T, NPC); int k = 0; rowloop: for (ap_uint<13> i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on columnloop: for (ap_uint<13> j = 0; j> XF_BITSHIFT(NPC); j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS // clang-format on RGB_packed = src.read((i * (width >> XF_BITSHIFT(NPC))) + j); ExtractUYVYPixels(RGB_packed, RGB); for (int k = 0, offset = 0; k < XF_NPIXPERCYCLE(NPC); k++, offset += 3) { // clang-format off #pragma HLS UNROLL // clang-format on X = Calculate_X(RGB[offset + 2], RGB[offset + 1], RGB[offset]); Y = Calculate_Y(RGB[offset + 2], RGB[offset + 1], RGB[offset]); Z = Calculate_Z(RGB[offset + 2], RGB[offset + 1], RGB[offset]); XYZ[k].range((XF_DTPIXELDEPTH(DST_T, NPC) - 1), 0) = X; XYZ[k].range((XF_DTPIXELDEPTH(DST_T, NPC) * 2) - 1, XF_DTPIXELDEPTH(DST_T, NPC)) = Y; XYZ[k].range((XF_DTPIXELDEPTH(DST_T, NPC) * 3) - 1, XF_DTPIXELDEPTH(DST_T, NPC) * 2) = Z; XYZ_packed.range(k * XF_PIXELWIDTH(DST_T, NPC) + (XF_PIXELWIDTH(DST_T, NPC) - 1), k * XF_PIXELWIDTH(DST_T, NPC)) = XYZ[k]; } dst.write((i * (width >> XF_BITSHIFT(NPC))) + j, XYZ_packed); } } } template void bgr2xyz(xf::cv::Mat& _src, xf::cv::Mat& _dst) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_8UC3) && " BGR image Type must be XF_8UC3"); assert((DST_T == XF_8UC3) && " XYZ image Type must be XF_8UC3"); assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " BGR image rows and cols should be less than ROWS, COLS"); assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "BGR and XYZ plane dimensions mismatch"); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported "); #endif xfbgr2xyz> (XF_NPIXPERCYCLE(NPC))))>(_src, _dst, _src.rows, _src.cols); } template void xfxyz2rgb(xf::cv::Mat& src, xf::cv::Mat& dst, unsigned short int height, unsigned short int width) { XF_CTUNAME(SRC_T, NPC) XYZ[3 * XF_NPIXPERCYCLE(NPC)]; // clang-format off #pragma HLS ARRAY_PARTITION variable=XYZ complete // clang-format on XF_TNAME(DST_T, NPC) RGB[XF_NPIXPERCYCLE(NPC)]; XF_TNAME(DST_T, NPC) XYZ_packed = 0, RGB_packed = 0; XF_TNAME(DST_T, NPC) R, G, B; rowloop: for (ap_uint<13> i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on columnloop: for (ap_uint<13> j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS // clang-format on XYZ_packed = src.read((i * (width >> XF_BITSHIFT(NPC))) + j); ExtractUYVYPixels(XYZ_packed, XYZ); for (int k = 0, offset = 0; k < XF_NPIXPERCYCLE(NPC); k++, offset += 3) { // clang-format off #pragma HLS UNROLL // clang-format on R = Calculate_R(XYZ[offset], XYZ[offset + 1], XYZ[offset + 2]); G = Calculate_G(XYZ[offset], XYZ[offset + 1], XYZ[offset + 2]); B = Calculate_B(XYZ[offset], XYZ[offset + 1], XYZ[offset + 2]); RGB[k].range((XF_DTPIXELDEPTH(DST_T, NPC) - 1), 0) = R; RGB[k].range((XF_DTPIXELDEPTH(DST_T, NPC) * 2) - 1, XF_DTPIXELDEPTH(DST_T, NPC)) = G; RGB[k].range((XF_DTPIXELDEPTH(DST_T, NPC) * 3) - 1, XF_DTPIXELDEPTH(DST_T, NPC) * 2) = B; RGB_packed.range(k * XF_PIXELWIDTH(DST_T, NPC) + (XF_PIXELWIDTH(DST_T, NPC) - 1), k * XF_PIXELWIDTH(DST_T, NPC)) = RGB[k]; } dst.write((i * (width >> XF_BITSHIFT(NPC))) + j, RGB_packed); } } } template void xyz2rgb(xf::cv::Mat& _src, xf::cv::Mat& _dst) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_8UC3) && " XYZ image Type must be XF_8UC3"); assert((DST_T == XF_8UC3) && " RGB image Type must be XF_8UC3"); assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " XYZ image rows and cols should be less than ROWS, COLS"); assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "RGB and XYZ plane dimensions mismatch"); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported "); #endif xfxyz2rgb> (XF_NPIXPERCYCLE(NPC))))>(_src, _dst, _src.rows, _src.cols); } template void xfxyz2bgr(xf::cv::Mat& src, xf::cv::Mat& dst, unsigned short int height, unsigned short int width) { XF_CTUNAME(SRC_T, NPC) XYZ[3 * XF_NPIXPERCYCLE(NPC)]; // clang-format off #pragma HLS ARRAY_PARTITION variable=XYZ complete // clang-format on XF_TNAME(DST_T, NPC) RGB[XF_NPIXPERCYCLE(NPC)]; XF_TNAME(DST_T, NPC) XYZ_packed = 0, RGB_packed = 0; XF_TNAME(DST_T, NPC) R, G, B; rowloop: for (ap_uint<13> i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on columnloop: for (ap_uint<13> j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS // clang-format on XYZ_packed = src.read((i * (width >> XF_BITSHIFT(NPC))) + j); ExtractUYVYPixels(XYZ_packed, XYZ); for (int k = 0, offset = 0; k < XF_NPIXPERCYCLE(NPC); k++, offset += 3) { // clang-format off #pragma HLS UNROLL // clang-format on R = Calculate_R(XYZ[offset], XYZ[offset + 1], XYZ[offset + 2]); G = Calculate_G(XYZ[offset], XYZ[offset + 1], XYZ[offset + 2]); B = Calculate_B(XYZ[offset], XYZ[offset + 1], XYZ[offset + 2]); RGB[k].range((XF_DTPIXELDEPTH(DST_T, NPC) - 1), 0) = B; RGB[k].range((XF_DTPIXELDEPTH(DST_T, NPC) * 2) - 1, XF_DTPIXELDEPTH(DST_T, NPC)) = G; RGB[k].range((XF_DTPIXELDEPTH(DST_T, NPC) * 3) - 1, XF_DTPIXELDEPTH(DST_T, NPC) * 2) = R; RGB_packed.range(k * XF_PIXELWIDTH(DST_T, NPC) + (XF_PIXELWIDTH(DST_T, NPC) - 1), k * XF_PIXELWIDTH(DST_T, NPC)) = RGB[k]; } dst.write((i * (width >> XF_BITSHIFT(NPC))) + j, RGB_packed); } } } template void xyz2bgr(xf::cv::Mat& _src, xf::cv::Mat& _dst) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_8UC3) && " XYZ image Type must be XF_8UC3"); assert((DST_T == XF_8UC3) && " BGR image Type must be XF_8UC3"); assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " XYZ image rows and cols should be less than ROWS, COLS"); assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "BGR and XYZ plane dimensions mismatch"); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported "); #endif xfxyz2bgr> (XF_NPIXPERCYCLE(NPC))))>(_src, _dst, _src.rows, _src.cols); } template void xfrgb2ycrcb(xf::cv::Mat& src, xf::cv::Mat& dst, unsigned short int height, unsigned short int width) { XF_CTUNAME(SRC_T, NPC) RGB[3 * XF_NPIXPERCYCLE(NPC)]; // clang-format off #pragma HLS ARRAY_PARTITION variable=RGB complete // clang-format on XF_DTUNAME(DST_T, NPC) YCRCB[XF_NPIXPERCYCLE(NPC)]; XF_TNAME(SRC_T, NPC) RGB_packed = 0; XF_TNAME(DST_T, NPC) YCRCB_packed = 0; XF_TNAME(DST_T, NPC) Y, CR, CB; rowloop: for (ap_uint<13> i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on columnloop: for (ap_uint<13> j = 0; j> XF_BITSHIFT(NPC); j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS // clang-format on RGB_packed = src.read((i * width >> XF_BITSHIFT(NPC)) + j); ExtractUYVYPixels(RGB_packed, RGB); for (int k = 0, offset = 0; k < XF_NPIXPERCYCLE(NPC); k++, offset += 3) { Y = CalculateGRAY(RGB[offset], RGB[offset + 1], RGB[offset + 2]); CR = Calculate_CR(RGB[offset], Y); CB = Calculate_CB(RGB[offset + 2], Y); YCRCB[k].range((XF_DTPIXELDEPTH(DST_T, NPC) - 1), 0) = Y; YCRCB[k].range((XF_DTPIXELDEPTH(DST_T, NPC) * 2) - 1, XF_DTPIXELDEPTH(DST_T, NPC)) = CR; YCRCB[k].range((XF_DTPIXELDEPTH(DST_T, NPC) * 3) - 1, XF_DTPIXELDEPTH(DST_T, NPC) * 2) = CB; YCRCB_packed.range(k * XF_PIXELWIDTH(DST_T, NPC) + (XF_PIXELWIDTH(DST_T, NPC) - 1), k * XF_PIXELWIDTH(DST_T, NPC)) = YCRCB[k]; } dst.write((i * width >> XF_BITSHIFT(NPC)) + j, YCRCB_packed); } } } template void rgb2ycrcb(xf::cv::Mat& _src, xf::cv::Mat& _dst) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_8UC3) && " RGB image Type must be XF_8UC3"); assert((DST_T == XF_8UC3) && " YCrCb image Type must be XF_8UC3"); assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " RGB image rows and cols should be less than ROWS, COLS"); assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "RGB and YCrCb plane dimensions mismatch"); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported "); #endif xfrgb2ycrcb> (XF_NPIXPERCYCLE(NPC))))>(_src, _dst, _src.rows, _src.cols); } template void xfbgr2ycrcb(xf::cv::Mat& src, xf::cv::Mat& dst, unsigned short int height, unsigned short int width) { XF_CTUNAME(SRC_T, NPC) RGB[3 * XF_NPIXPERCYCLE(NPC)]; // clang-format off #pragma HLS ARRAY_PARTITION variable=RGB complete // clang-format on XF_DTUNAME(DST_T, NPC) YCRCB[XF_NPIXPERCYCLE(NPC)]; XF_TNAME(SRC_T, NPC) RGB_packed = 0; XF_TNAME(DST_T, NPC) YCRCB_packed = 0; XF_TNAME(DST_T, NPC) Y, CR, CB; rowloop: for (ap_uint<13> i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on columnloop: for (ap_uint<13> j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS // clang-format on RGB_packed = src.read((i * (width >> XF_BITSHIFT(NPC))) + j); ExtractUYVYPixels(RGB_packed, RGB); for (int k = 0, offset = 0; k < XF_NPIXPERCYCLE(NPC); k++, offset += 3) { Y = CalculateGRAY(RGB[offset + 2], RGB[offset + 1], RGB[offset]); CR = Calculate_CR(RGB[offset + 2], Y); CB = Calculate_CB(RGB[offset], Y); YCRCB[k].range((XF_DTPIXELDEPTH(DST_T, NPC) - 1), 0) = Y; YCRCB[k].range((XF_DTPIXELDEPTH(DST_T, NPC) * 2) - 1, XF_DTPIXELDEPTH(DST_T, NPC)) = CR; YCRCB[k].range((XF_DTPIXELDEPTH(DST_T, NPC) * 3) - 1, XF_DTPIXELDEPTH(DST_T, NPC) * 2) = CB; YCRCB_packed.range(k * XF_PIXELWIDTH(DST_T, NPC) + (XF_PIXELWIDTH(DST_T, NPC) - 1), k * XF_PIXELWIDTH(DST_T, NPC)) = YCRCB[k]; } dst.write((i * width >> XF_BITSHIFT(NPC)) + j, YCRCB_packed); } } } template void bgr2ycrcb(xf::cv::Mat& _src, xf::cv::Mat& _dst) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_8UC3) && " BGR image Type must be XF_8UC3"); assert((DST_T == XF_8UC3) && " YCrCb image Type must be XF_8UC3"); assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " BGR image rows and cols should be less than ROWS, COLS"); assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "BGR and YCrCb plane dimensions mismatch"); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported "); #endif xfbgr2ycrcb> (XF_NPIXPERCYCLE(NPC))))>(_src, _dst, _src.rows, _src.cols); } template void xfycrcb2rgb(xf::cv::Mat& src, xf::cv::Mat& dst, unsigned short int height, unsigned short int width) { XF_CTUNAME(SRC_T, NPC) YCRCB[3 * XF_NPIXPERCYCLE(NPC)]; // clang-format off #pragma HLS ARRAY_PARTITION variable=YCRCB complete // clang-format on XF_TNAME(SRC_T, NPC) YCRCB_packed = 0; XF_TNAME(DST_T, NPC) RGB_packed = 0; XF_TNAME(DST_T, NPC) RGB[XF_NPIXPERCYCLE(NPC)]; XF_TNAME(DST_T, NPC) Y, R, B, G; rowloop: for (ap_uint<13> i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on columnloop: for (ap_uint<13> j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS // clang-format on YCRCB_packed = src.read((i * (width >> XF_BITSHIFT(NPC))) + j); ExtractUYVYPixels(YCRCB_packed, YCRCB); for (int k = 0, offset = 0; k < XF_NPIXPERCYCLE(NPC); k++, offset += 3) { R = Calculate_Ycrcb2R(YCRCB[offset], YCRCB[offset + 1]); G = Calculate_Ycrcb2G(YCRCB[offset], YCRCB[offset + 1], YCRCB[offset + 2]); B = Calculate_Ycrcb2B(YCRCB[offset], YCRCB[offset + 2]); RGB[k].range((XF_DTPIXELDEPTH(DST_T, NPC) - 1), 0) = R; RGB[k].range((XF_DTPIXELDEPTH(DST_T, NPC) * 2) - 1, XF_DTPIXELDEPTH(DST_T, NPC)) = G; RGB[k].range((XF_DTPIXELDEPTH(DST_T, NPC) * 3) - 1, XF_DTPIXELDEPTH(DST_T, NPC) * 2) = B; RGB_packed.range(k * XF_PIXELWIDTH(DST_T, NPC) + (XF_PIXELWIDTH(DST_T, NPC) - 1), k * XF_PIXELWIDTH(DST_T, NPC)) = RGB[k]; } dst.write(i * (width >> XF_BITSHIFT(NPC)) + j, RGB_packed); } } } template void ycrcb2rgb(xf::cv::Mat& _src, xf::cv::Mat& _dst) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_8UC3) && " YCrCb image Type must be XF_8UC3"); assert((DST_T == XF_8UC3) && " RGB image Type must be XF_8UC3"); assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " YCrCb image rows and cols should be less than ROWS, COLS"); assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "RGB and YCrCb plane dimensions mismatch"); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported "); #endif xfycrcb2rgb> (XF_NPIXPERCYCLE(NPC))))>(_src, _dst, _src.rows, _src.cols); } template void xfycrcb2bgr(xf::cv::Mat& src, xf::cv::Mat& dst, unsigned short int height, unsigned short int width) { XF_CTUNAME(SRC_T, NPC) YCRCB[3 * XF_NPIXPERCYCLE(NPC)]; // clang-format off #pragma HLS ARRAY_PARTITION variable=YCRCB complete // clang-format on XF_TNAME(SRC_T, NPC) YCRCB_packed = 0; XF_TNAME(DST_T, NPC) RGB_packed = 0; XF_TNAME(DST_T, NPC) RGB[XF_NPIXPERCYCLE(NPC)]; XF_TNAME(DST_T, NPC) Y, R, B, G; rowloop: for (ap_uint<13> i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on columnloop: for (ap_uint<13> j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS // clang-format on YCRCB_packed = src.read((i * (width >> XF_BITSHIFT(NPC))) + j); ExtractUYVYPixels(YCRCB_packed, YCRCB); for (int k = 0, offset = 0; k < XF_NPIXPERCYCLE(NPC); k++, offset += 3) { R = Calculate_Ycrcb2R(YCRCB[offset], YCRCB[offset + 1]); G = Calculate_Ycrcb2G(YCRCB[offset], YCRCB[offset + 1], YCRCB[offset + 2]); B = Calculate_Ycrcb2B(YCRCB[offset], YCRCB[offset + 2]); RGB[k].range((XF_DTPIXELDEPTH(DST_T, NPC) - 1), 0) = B; RGB[k].range((XF_DTPIXELDEPTH(DST_T, NPC) * 2) - 1, XF_DTPIXELDEPTH(DST_T, NPC)) = G; RGB[k].range((XF_DTPIXELDEPTH(DST_T, NPC) * 3) - 1, XF_DTPIXELDEPTH(DST_T, NPC) * 2) = R; RGB_packed.range(k * XF_PIXELWIDTH(DST_T, NPC) + (XF_PIXELWIDTH(DST_T, NPC) - 1), k * XF_PIXELWIDTH(DST_T, NPC)) = RGB[k]; } dst.write(i * (width >> XF_BITSHIFT(NPC)) + j, RGB_packed); } } } template void ycrcb2bgr(xf::cv::Mat& _src, xf::cv::Mat& _dst) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_8UC3) && " YCrCb image Type must be XF_8UC3"); assert((DST_T == XF_8UC3) && " BGR image Type must be XF_8UC3"); assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " YCrCb image rows and cols should be less than ROWS, COLS"); assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "BGR and YCrCb plane dimensions mismatch"); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported "); #endif xfycrcb2bgr> (XF_NPIXPERCYCLE(NPC))))>(_src, _dst, _src.rows, _src.cols); } template void xfrgb2hls(xf::cv::Mat& src, xf::cv::Mat& dst, unsigned short int height, unsigned short int width) { XF_CTUNAME(SRC_T, NPC) RGB[3 * XF_NPIXPERCYCLE(NPC)]; // clang-format off #pragma HLS ARRAY_PARTITION variable=RGB complete // clang-format on XF_TNAME(SRC_T, NPC) RGB_packed = 0; XF_TNAME(DST_T, NPC) HSV_packed = 0; XF_TNAME(DST_T, NPC) HSV[XF_NPIXPERCYCLE(NPC)]; XF_CTUNAME(SRC_T, NPC) r, g, b; XF_CTUNAME(SRC_T, NPC) Vmax, Vmin; // int Vmax=0,Vmin=0; int consta; int sub; int two_L = 0; int inv_sub = 0; int less_if = 0; short int depth = XF_PIXELWIDTH(DST_T, NPC) / XF_CHANNELS(SRC_T, NPC); int inv_add = 0; int S = 0; int k = 0; rowloop: for (ap_uint<13> i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on columnloop: for (ap_uint<13> j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS // clang-format on RGB_packed = src.read((i * width >> XF_BITSHIFT(NPC)) + j); ExtractUYVYPixels(RGB_packed, RGB); for (int k = 0, offset = 0; k < XF_NPIXPERCYCLE(NPC); k++, offset += 3) { r = RGB[offset], g = RGB[offset + 1], b = RGB[offset + 2]; Vmax = b; Vmin = b; if ((g > r) && (g > b)) { Vmax = g; } else if ((r > b)) { Vmax = r; } if ((g < r) && (g < b)) { Vmin = g; } else if ((r < b)) { Vmin = r; } short int v_add = (Vmax + Vmin); short int v_sub = (Vmax - Vmin); two_L = (Vmax + Vmin); int h = 0; if (two_L < 255) { inv_add = ((255 << 12) / (v_add)); less_if = (v_sub * inv_add + (1 << (11))) >> 12; S = less_if; } else { if (Vmax == Vmin) { S = 0; } else { int inv_sub = ((255 << 12) / ((2 * 255) - v_add)); int less_if = (v_sub * inv_sub + (1 << (11))) >> 12; S = less_if; } } sub = (Vmax == b) ? (r - g) : (Vmax == g) ? (b - r) : (g - b); consta = (Vmax == b) ? 240 : (Vmax == g) ? 120 : 0; if (Vmax == Vmin) { h = 0; } else { inv_sub = ((1 << 15) / (v_sub)); h = consta + ((60 * sub * inv_sub) >> 15); if (h < 0) { h += 360; } } HSV[k].range(7, 0) = (h >> 1); HSV[k].range(15, 8) = (unsigned char)((two_L + 1) >> 1); HSV[k].range(23, 16) = S; HSV_packed.range(k * XF_PIXELWIDTH(DST_T, NPC) + (XF_PIXELWIDTH(DST_T, NPC) - 1), k * XF_PIXELWIDTH(DST_T, NPC)) = HSV[k]; } dst.write(i * (width >> XF_BITSHIFT(NPC)) + j, HSV_packed); } } } template void rgb2hls(xf::cv::Mat& _src, xf::cv::Mat& _dst) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_8UC3) && " RGB image Type must be XF_8UC3"); assert((DST_T == XF_8UC3) && " HLS image Type must be XF_8UC3"); assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " RGB image rows and cols should be less than ROWS, COLS"); assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "RGB and HLS plane dimensions mismatch"); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported "); #endif xfrgb2hls> (XF_NPIXPERCYCLE(NPC))))>(_src, _dst, _src.rows, _src.cols); } template void xfbgr2hls(xf::cv::Mat& src, xf::cv::Mat& dst, unsigned short int height, unsigned short int width) { XF_CTUNAME(SRC_T, NPC) RGB[3 * XF_NPIXPERCYCLE(NPC)]; // clang-format off #pragma HLS ARRAY_PARTITION variable=RGB complete // clang-format on XF_TNAME(SRC_T, NPC) RGB_packed = 0; XF_TNAME(DST_T, NPC) HSV_packed = 0; XF_TNAME(DST_T, NPC) HSV[XF_NPIXPERCYCLE(NPC)]; XF_CTUNAME(SRC_T, NPC) r, g, b; XF_CTUNAME(SRC_T, NPC) Vmax, Vmin; // int Vmax=0,Vmin=0; int consta; int sub; int two_L = 0; int inv_sub = 0; int less_if = 0; short int depth = XF_PIXELWIDTH(DST_T, NPC) / XF_CHANNELS(SRC_T, NPC); int inv_add = 0; int S = 0; int k = 0; rowloop: for (ap_uint<13> i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on columnloop: for (ap_uint<13> j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS // clang-format on RGB_packed = src.read((i * width >> XF_BITSHIFT(NPC)) + j); ExtractUYVYPixels(RGB_packed, RGB); for (int k = 0, offset = 0; k < XF_NPIXPERCYCLE(NPC); k++, offset += 3) { b = RGB[offset], g = RGB[offset + 1], r = RGB[offset + 2]; Vmax = b; Vmin = b; if ((g > r) && (g > b)) { Vmax = g; } else if ((r > b)) { Vmax = r; } if ((g < r) && (g < b)) { Vmin = g; } else if ((r < b)) { Vmin = r; } short int v_add = (Vmax + Vmin); short int v_sub = (Vmax - Vmin); two_L = (Vmax + Vmin); int h = 0; if (two_L < 255) { inv_add = ((255 << 12) / (v_add)); less_if = (v_sub * inv_add + (1 << (11))) >> 12; S = less_if; } else { if (Vmax == Vmin) { S = 0; } else { int inv_sub = ((255 << 12) / ((2 * 255) - v_add)); int less_if = (v_sub * inv_sub + (1 << (11))) >> 12; S = less_if; } } sub = (Vmax == b) ? (r - g) : (Vmax == g) ? (b - r) : (g - b); consta = (Vmax == b) ? 240 : (Vmax == g) ? 120 : 0; if (Vmax == Vmin) { h = 0; } else { inv_sub = ((1 << 15) / (v_sub)); h = consta + ((60 * sub * inv_sub) >> 15); if (h < 0) { h += 360; } } HSV[k].range(7, 0) = (h >> 1); HSV[k].range(15, 8) = (unsigned char)((two_L + 1) >> 1); HSV[k].range(23, 16) = S; HSV_packed.range(k * XF_PIXELWIDTH(DST_T, NPC) + (XF_PIXELWIDTH(DST_T, NPC) - 1), k * XF_PIXELWIDTH(DST_T, NPC)) = HSV[k]; } dst.write(i * (width >> XF_BITSHIFT(NPC)) + j, HSV_packed); } } } template void bgr2hls(xf::cv::Mat& _src, xf::cv::Mat& _dst) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_8UC3) && " BGR image Type must be XF_8UC3"); assert((DST_T == XF_8UC3) && " HLS image Type must be XF_8UC3"); assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " BGR image rows and cols should be less than ROWS, COLS"); assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "BGR and HLS plane dimensions mismatch"); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported "); #endif xfbgr2hls> (XF_NPIXPERCYCLE(NPC))))>(_src, _dst, _src.rows, _src.cols); } template void xfhls2rgb(xf::cv::Mat& src, xf::cv::Mat& dst, unsigned short int height, unsigned short int width) { XF_CTUNAME(SRC_T, NPC) HLS[3 * XF_NPIXPERCYCLE(NPC)]; // clang-format off #pragma HLS ARRAY_PARTITION variable=HLS complete // clang-format on XF_DTUNAME(SRC_T, NPC) RGB[XF_NPIXPERCYCLE(NPC)]; XF_TNAME(SRC_T, NPC) HLS_packed = 0; XF_TNAME(DST_T, NPC) RGB_packed = 0; short int depth = XF_PIXELWIDTH(DST_T, NPC) / XF_CHANNELS(SRC_T, NPC); unsigned long int r = 0; unsigned long int g = 0; unsigned long int b = 0; XF_CTUNAME(SRC_T, NPC) H, L, S; ap_fixed<28, 9> tab[4]; ap_fixed<28, 9> p1, p2; ap_ufixed<20, 1, AP_RND> hscale = 0.0333333333333333333; ap_ufixed<20, 1, AP_RND> s_scale = 0.0039215686274509803921568627451f; rowloop: for (ap_uint<13> i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on columnloop: for (ap_uint<13> j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS // clang-format on HLS_packed = src.read((i * width >> XF_BITSHIFT(NPC)) + j); ExtractUYVYPixels(HLS_packed, HLS); for (int k = 0, offset = 0; k < XF_NPIXPERCYCLE(NPC); k++, offset += 3) { H = HLS[offset], L = HLS[offset + 1], S = HLS[offset + 2]; if (S == 0) b = g = r = L; else { static const int sector_data[][3] = {{1, 3, 0}, {1, 0, 2}, {3, 0, 1}, {0, 2, 1}, {0, 1, 3}, {2, 1, 0}}; ap_fixed<28, 9> mul_scl = s_scale * S; if (2 * L <= 255) { p2 = L + L * mul_scl; } else { p2 = L + S - ((L * mul_scl)); } p1 = 2 * L - p2; unsigned char H_scl = (unsigned char)H * hscale; ap_fixed<28, 9> h_fix = H * hscale - H_scl; if (H_scl >= 6) // for hrange=180, 0> XF_BITSHIFT(NPC)) + j, RGB_packed); } } } template void hls2rgb(xf::cv::Mat& _src, xf::cv::Mat& _dst) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_8UC3) && " HLS image Type must be XF_8UC3"); assert((DST_T == XF_8UC3) && " RGB image Type must be XF_8UC3"); assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " HLS image rows and cols should be less than ROWS, COLS"); assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "RGB and HLS plane dimensions mismatch"); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported "); #endif xfhls2rgb> (XF_NPIXPERCYCLE(NPC))))>(_src, _dst, _src.rows, _src.cols); } template void xfhls2bgr(xf::cv::Mat& src, xf::cv::Mat& dst, unsigned short int height, unsigned short int width) { XF_CTUNAME(SRC_T, NPC) HLS[3 * XF_NPIXPERCYCLE(NPC)]; // clang-format off #pragma HLS ARRAY_PARTITION variable=HLS complete // clang-format on XF_DTUNAME(SRC_T, NPC) RGB[XF_NPIXPERCYCLE(NPC)]; XF_TNAME(SRC_T, NPC) HLS_packed = 0; XF_TNAME(DST_T, NPC) RGB_packed = 0; short int depth = XF_PIXELWIDTH(DST_T, NPC) / XF_CHANNELS(SRC_T, NPC); unsigned long int r = 0; unsigned long int g = 0; unsigned long int b = 0; XF_CTUNAME(SRC_T, NPC) H, L, S; ap_fixed<28, 9> tab[4]; ap_fixed<28, 9> p1, p2; ap_ufixed<20, 1, AP_RND> hscale = 0.0333333333333333333; ap_ufixed<20, 1, AP_RND> s_scale = 0.0039215686274509803921568627451f; rowloop: for (ap_uint<13> i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on columnloop: for (ap_uint<13> j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS // clang-format on HLS_packed = src.read((i * width >> XF_BITSHIFT(NPC)) + j); ExtractUYVYPixels(HLS_packed, HLS); for (int k = 0, offset = 0; k < XF_NPIXPERCYCLE(NPC); k++, offset += 3) { H = HLS[offset], L = HLS[offset + 1], S = HLS[offset + 2]; if (S == 0) b = g = r = L; else { static const int sector_data[][3] = {{1, 3, 0}, {1, 0, 2}, {3, 0, 1}, {0, 2, 1}, {0, 1, 3}, {2, 1, 0}}; ap_fixed<28, 9> mul_scl = s_scale * S; if (2 * L <= 255) { p2 = L + L * mul_scl; } else { p2 = L + S - ((L * mul_scl)); } p1 = 2 * L - p2; unsigned char H_scl = (unsigned char)H * hscale; ap_fixed<28, 9> h_fix = H * hscale - H_scl; if (H_scl >= 6) // for hrange=180, 0> XF_BITSHIFT(NPC)) + j, RGB_packed); } } } template void hls2bgr(xf::cv::Mat& _src, xf::cv::Mat& _dst) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_8UC3) && " HLS image Type must be XF_8UC3"); assert((DST_T == XF_8UC3) && " BGR image Type must be XF_8UC3"); assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " HLS image rows and cols should be less than ROWS, COLS"); assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "BGR and HLS plane dimensions mismatch"); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported "); #endif xfhls2bgr> (XF_NPIXPERCYCLE(NPC))))>(_src, _dst, _src.rows, _src.cols); } template void xfhsv2rgb(xf::cv::Mat& src, xf::cv::Mat& dst, unsigned short int height, unsigned short int width) { XF_CTUNAME(SRC_T, NPC) HSV[3 * XF_NPIXPERCYCLE(NPC)]; // clang-format off #pragma HLS ARRAY_PARTITION variable=HSV complete // clang-format on XF_DTUNAME(DST_T, NPC) RGB[XF_NPIXPERCYCLE(NPC)]; XF_TNAME(SRC_T, NPC) HSV_packed = 0; XF_TNAME(DST_T, NPC) RGB_packed = 0; XF_CTUNAME(SRC_T, NPC) H, S, V; unsigned long int r = 0; unsigned long int g = 0; unsigned long int b = 0; ap_fixed<28, 9> tab[4]; ap_fixed<28, 9> p1, p2; ap_ufixed<20, 1, AP_RND> hscale = 0.0333333333333333333; ap_ufixed<20, 1, AP_RND> s_scale = 0.0039215686274509803921568627451; rowloop: for (ap_uint<13> i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on columnloop: for (ap_uint<13> j = 0; j> XF_BITSHIFT(NPC); j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS // clang-format on HSV_packed = src.read((i * width >> XF_BITSHIFT(NPC)) + j); ExtractUYVYPixels(HSV_packed, HSV); for (int k = 0, offset = 0; k < XF_NPIXPERCYCLE(NPC); k++, offset += 3) { H = HSV[offset], S = HSV[offset + 1], V = HSV[offset + 2]; static const int sector_data[][3] = {{1, 3, 0}, {1, 0, 2}, {3, 0, 1}, {0, 2, 1}, {0, 1, 3}, {2, 1, 0}}; ap_fixed<28, 9> mul_scl = s_scale * S; unsigned char H_scl = (unsigned char)H * hscale; ap_fixed<28, 9> h_fix = H * hscale - H_scl; if (H_scl >= 6) // for hrange=180, 0> XF_BITSHIFT(NPC)) + j, RGB_packed); } } } template void hsv2rgb(xf::cv::Mat& _src, xf::cv::Mat& _dst) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_8UC3) && " HSV image Type must be XF_8UC3"); assert((DST_T == XF_8UC3) && " RGB image Type must be XF_8UC3"); assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " HSV image rows and cols should be less than ROWS, COLS"); assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "RGB and HSV plane dimensions mismatch"); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported "); #endif xfhsv2rgb> (XF_NPIXPERCYCLE(NPC))))>(_src, _dst, _src.rows, _src.cols); } template void xfhsv2bgr(xf::cv::Mat& src, xf::cv::Mat& dst, unsigned short int height, unsigned short int width) { XF_CTUNAME(SRC_T, NPC) HSV[3 * XF_NPIXPERCYCLE(NPC)]; // clang-format off #pragma HLS ARRAY_PARTITION variable=HSV complete // clang-format on XF_DTUNAME(DST_T, NPC) RGB[XF_NPIXPERCYCLE(NPC)]; // clang-format off #pragma HLS ARRAY_PARTITION variable=RGB complete // clang-format on XF_TNAME(SRC_T, NPC) HSV_packed = 0; XF_TNAME(DST_T, NPC) RGB_packed = 0; XF_CTUNAME(SRC_T, NPC) H, S, V; unsigned long int r = 0; unsigned long int g = 0; unsigned long int b = 0; ap_fixed<28, 9> tab[4]; // clang-format off #pragma HLS ARRAY_PARTITION variable=tab complete // clang-format on ap_fixed<28, 9> p1, p2; ap_ufixed<20, 1, AP_RND> hscale = 0.0333333333333333333; ap_ufixed<20, 1, AP_RND> s_scale = 0.0039215686274509803921568627451; rowloop: for (ap_uint<13> i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on columnloop: for (ap_uint<13> j = 0; j> XF_BITSHIFT(NPC); j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS // clang-format on HSV_packed = src.read((i * width >> XF_BITSHIFT(NPC)) + j); ExtractUYVYPixels(HSV_packed, HSV); for (int k = 0, offset = 0; k < XF_NPIXPERCYCLE(NPC); k++, offset += 3) { H = HSV[offset], S = HSV[offset + 1], V = HSV[offset + 2]; static const int sector_data[][3] = {{1, 3, 0}, {1, 0, 2}, {3, 0, 1}, {0, 2, 1}, {0, 1, 3}, {2, 1, 0}}; ap_fixed<28, 9> mul_scl = s_scale * S; unsigned char H_scl = (unsigned char)H * hscale; ap_fixed<28, 9> h_fix = H * hscale - H_scl; if (H_scl >= 6) // for hrange=180, 0> XF_BITSHIFT(NPC)) + j, RGB_packed); } } } template void hsv2bgr(xf::cv::Mat& _src, xf::cv::Mat& _dst) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_8UC3) && " HSV image Type must be XF_8UC3"); assert((DST_T == XF_8UC3) && " BGR image Type must be XF_8UC3"); assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " HSV image rows and cols should be less than ROWS, COLS"); assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "BGR and HSV plane dimensions mismatch"); // assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism // is supported "); #endif xfhsv2bgr> (XF_NPIXPERCYCLE(NPC))))>(_src, _dst, _src.rows, _src.cols); } template void xfrgb2uyvy(xf::cv::Mat& src, xf::cv::Mat& dst, unsigned short int height, unsigned short int width) { // XF_PTNAME(XF_8UP) Y[],U,V; XF_PTNAME(XF_8UP) Y[XF_NPIXPERCYCLE(NPC)]; XF_PTNAME(XF_8UP) U[XF_NPIXPERCYCLE(NPC)]; XF_PTNAME(XF_8UP) V[XF_NPIXPERCYCLE(NPC)]; ap_uint<24> RGB1[XF_NPIXPERCYCLE(NPC)]; // clang-format off #pragma HLS ARRAY_PARTITION variable=Y complete #pragma HLS ARRAY_PARTITION variable=U complete #pragma HLS ARRAY_PARTITION variable=V complete #pragma HLS ARRAY_PARTITION variable=RGB1 complete // clang-format on unsigned long long int idx = 0, idx1 = 0; XF_SNAME(WORDWIDTH_SRC) Packed_rgb1; XF_PTNAME(XF_DEPTH(DST_T, NPC)) UYPacked, VYPacked, packed_uyvy[XF_NPIXPERCYCLE(NPC)]; XF_SNAME(WORDWIDTH_DST) val_dst = 0; uint8_t offset = 0; uint16_t shift = 0; bool evencol = true; rowloop: for (int i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on evencol = true; columnloop: for (int j = 0; j < (width >> (XF_BITSHIFT(NPC))); j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=TC max=TC // clang-format on val_dst = 0; // evencol=true; Packed_rgb1 = src.read(idx++); xfExtractPixels(RGB1, Packed_rgb1, 0); shift = 0; for (int l = 0; l < (XF_NPIXPERCYCLE(NPC)); l++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC #pragma HLS unroll // clang-format on // Y0[l] = // CalculateY(RGB1[offset+0].range(7,0), // RGB1[offset+0].range(15,8), // RGB1[offset+0].range(23,16)); Y[l] = CalculateY(RGB1[l].range(7, 0), RGB1[l].range(15, 8), RGB1[l].range(23, 16)); if (evencol) { U[l / 2] = CalculateU(RGB1[l].range(7, 0), RGB1[l].range(15, 8), RGB1[l].range(23, 16)); V[l / 2] = CalculateV(RGB1[l].range(7, 0), RGB1[l].range(15, 8), RGB1[l].range(23, 16)); // U[l] = CalculateU(RGB1[offset+0].range(7,0), // RGB1[offset+0].range(15,8), // RGB1[offset+0].range(23,16)); V[l] // = // CalculateV(RGB1[offset+0].range(7,0), // RGB1[offset+0].range(15,8), RGB1[offset+0].range(23,16)); UYPacked.range(7, 0) = U[l / 2]; UYPacked.range(15, 8) = Y[l]; packed_uyvy[l] = UYPacked; } else { VYPacked.range(7, 0) = V[l / 2]; VYPacked.range(15, 8) = Y[l]; packed_uyvy[l] = VYPacked; } // val_dst.range(l*8+) = packed_uyvy[l]; xfPackPixels(&packed_uyvy[l], val_dst, 0, 1, shift); evencol = evencol ? false : true; } dst.write(idx1++, val_dst); } } } template void rgb2uyvy(xf::cv::Mat& _src, xf::cv::Mat& _dst) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_8UC3) && " RGB image Type must be XF_8UC3"); assert((DST_T == XF_16UC1) && " UYVY image Type must be XF_16UC1"); assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " RGB image rows and cols should be less than ROWS, COLS"); assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "RGB and UYVY plane dimensions mismatch"); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC2) || (NPC == XF_NPPC4) || (NPC == XF_NPPC8)) && " 1,2,4,8 pixel parallelism is supported "); #endif xfrgb2uyvy> (XF_NPIXPERCYCLE(NPC)))), XF_NPIXPERCYCLE(NPC)>(_src, _dst, _src.rows, _src.cols); } template void xfrgb2yuyv(xf::cv::Mat& src, xf::cv::Mat& dst, unsigned short int height, unsigned short int width) { // XF_PTNAME(XF_8UP) Y[],U,V; XF_PTNAME(XF_8UP) Y[XF_NPIXPERCYCLE(NPC)]; XF_PTNAME(XF_8UP) U[XF_NPIXPERCYCLE(NPC)]; XF_PTNAME(XF_8UP) V[XF_NPIXPERCYCLE(NPC)]; ap_uint<24> RGB1[XF_NPIXPERCYCLE(NPC)]; // clang-format off #pragma HLS ARRAY_PARTITION variable=Y complete #pragma HLS ARRAY_PARTITION variable=U complete #pragma HLS ARRAY_PARTITION variable=V complete #pragma HLS ARRAY_PARTITION variable=RGB1 complete // clang-format on unsigned long long int idx = 0, idx1 = 0; XF_SNAME(WORDWIDTH_SRC) Packed_rgb1; XF_PTNAME(XF_DEPTH(DST_T, NPC)) YUPacked, YVPacked, packed_yuyv[XF_NPIXPERCYCLE(NPC)]; XF_SNAME(WORDWIDTH_DST) val_dst = 0; uint8_t offset = 0; uint16_t shift = 0; bool evencol = true; rowloop: for (int i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on evencol = true; columnloop: for (int j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=TC max=TC // clang-format on val_dst = 0; Packed_rgb1 = src.read(idx++); xfExtractPixels(RGB1, Packed_rgb1, 0); shift = 0; for (int l = 0; l < (XF_NPIXPERCYCLE(NPC)); l++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC #pragma HLS unroll // clang-format on Y[l] = CalculateY(RGB1[l].range(7, 0), RGB1[l].range(15, 8), RGB1[l].range(23, 16)); if (evencol) { U[l / 2] = CalculateU(RGB1[l].range(7, 0), RGB1[l].range(15, 8), RGB1[l].range(23, 16)); V[l / 2] = CalculateV(RGB1[l].range(7, 0), RGB1[l].range(15, 8), RGB1[l].range(23, 16)); YUPacked.range(7, 0) = Y[l]; YUPacked.range(15, 8) = U[l / 2]; packed_yuyv[l] = YUPacked; } else { YVPacked.range(7, 0) = Y[l]; YVPacked.range(15, 8) = V[l / 2]; packed_yuyv[l] = YVPacked; } xfPackPixels(&packed_yuyv[l], val_dst, 0, 1, shift); evencol = evencol ? false : true; } dst.write(idx1++, val_dst); } } } template void rgb2yuyv(xf::cv::Mat& _src, xf::cv::Mat& _dst) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_8UC3) && " RGB image Type must be XF_8UC3"); assert((DST_T == XF_16UC1) && " YUYV image Type must be XF_16UC1"); assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " RGB image rows and cols should be less than ROWS, COLS"); assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "RGB and YUYV plane dimensions mismatch"); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC2) || (NPC == XF_NPPC4) || (NPC == XF_NPPC8)) && " 1,2,4,8 pixel parallelism is supported "); #endif xfrgb2yuyv> (XF_NPIXPERCYCLE(NPC)))), XF_NPIXPERCYCLE(NPC)>(_src, _dst, _src.rows, _src.cols); } template void xfrgb2bgr(xf::cv::Mat& src, xf::cv::Mat& dst, unsigned short int height, unsigned short int width) { ap_uint<24> RGB[XF_NPIXPERCYCLE(NPC)], BGR[XF_NPIXPERCYCLE(NPC)]; // clang-format off #pragma HLS ARRAY_PARTITION variable=RGB complete #pragma HLS ARRAY_PARTITION variable=BGR complete // clang-format on unsigned long long int idx = 0, idx1 = 0; XF_TNAME(SRC_T, NPC) Packed_rgb1; XF_TNAME(DST_T, NPC) val_dst = 0; uint8_t offset = 0; uint16_t shift = 0; rowloop: for (int i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on columnloop: for (int j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=TC max=TC // clang-format on val_dst = 0; Packed_rgb1 = src.read(idx++); for (int l = 0; l < (XF_NPIXPERCYCLE(NPC)); l++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC #pragma HLS unroll // clang-format on RGB[l] = Packed_rgb1(l * 24 + 23, l * 24); BGR[l].range(23, 16) = RGB[l].range(7, 0); BGR[l].range(15, 8) = RGB[l].range(15, 8); BGR[l].range(7, 0) = RGB[l].range(23, 16); val_dst.range(l * 24 + 23, l * 24) = BGR[l]; } dst.write(idx1++, val_dst); } } } template void rgb2bgr(xf::cv::Mat& _src, xf::cv::Mat& _dst) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_8UC3) && " RGB image Type must be XF_8UC3"); assert((DST_T == XF_8UC3) && " BGR image Type must be XF_8UC3"); assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " RGB image rows and cols should be less than ROWS, COLS"); assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "BGR and RGB plane dimensions mismatch"); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC2) || (NPC == XF_NPPC4) || (NPC == XF_NPPC8)) && " 1,2,4,8 pixel parallelism is supported "); #endif xfrgb2bgr> (XF_NPIXPERCYCLE(NPC)))), XF_NPIXPERCYCLE(NPC)>(_src, _dst, _src.rows, _src.cols); } template void xfnv122uyvy(xf::cv::Mat& _y, xf::cv::Mat& _uv, xf::cv::Mat& dst, unsigned short int height, unsigned short int width) { // assert(); hls::stream uvStream; // clang-format off #pragma HLS STREAM variable=&uvStream depth=COLS/2 // clang-format on XF_SNAME(WORDWIDTH_Y) yPacked; XF_SNAME(WORDWIDTH_UV) uvPacked; XF_SNAME(WORDWIDTH_DST) uyvyPacked; unsigned long long int y_idx = 0, uv_idx = 0, out_idx = 0; uint8_t y; int8_t u, v; bool evenRow = true, evenBlock = true, evenPix = true; RowLoop: for (int i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on ColLoop: for (int j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS // clang-format on yPacked = _y.read(y_idx++); uyvyPacked = 0; if (evenRow) { if (evenBlock) { uvPacked = _uv.read(uv_idx++); uvStream.write(uvPacked); } } else { // Keep a copy of UV row data in stream to use for oddrow if (evenBlock) { uvPacked = uvStream.read(); } } for (int l = 0; l < (XF_NPIXPERCYCLE(NPC)); l++) { uint8_t y = yPacked.range(l * 8 + 7, l * 8 + 0); if (evenPix) { v = (uint8_t)uvPacked.range((l / 2) * 16 + 15, (l / 2) * 16 + 8); u = (uint8_t)uvPacked.range((l / 2) * 16 + 7, (l / 2) * 16 + 0); uyvyPacked.range(l * 16 + 7, l * 16 + 0) = u; uyvyPacked.range(l * 16 + 15, l * 16 + 8) = y; } else { uyvyPacked.range(l * 16 + 7, l * 16 + 0) = v; uyvyPacked.range(l * 16 + 15, l * 16 + 8) = y; } evenPix = evenPix ? false : true; } dst.write(out_idx++, uyvyPacked); evenBlock = ((XF_NPIXPERCYCLE(NPC)) != 1) ? true : evenBlock ? false : true; } evenRow = evenRow ? false : true; } if (height & 1) { for (int i = 0; i < width; i++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS // clang-format on uvStream.read(); } } } template void nv122uyvy(xf::cv::Mat& _y, xf::cv::Mat& _uv, xf::cv::Mat& _dst) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_Y == XF_8UC1) && " Y image Type must be XF_8UC1"); assert((SRC_UV == XF_8UC2) && " UV image Type must be XF_8UC2"); assert((DST_T == XF_16UC1) && " UYVY image Type must be XF_16UC1"); assert(((_y.rows <= ROWS) && (_y.cols <= COLS)) && " Y image rows and cols should be less than ROWS, COLS"); assert(((_y.cols == _dst.cols) && (_y.rows == _dst.rows)) && "Y and UYVY plane dimensions mismatch"); assert(((_y.cols == (_uv.cols << 1)) && (_y.rows == (_uv.rows << 1))) && "Y and UV planes dimensions mismatch"); if (NPC != XF_NPPC1) { assert((NPC == (NPC_UV * 2)) && " NPC of Y plane must be double the UV " "plane for multipixel parallelism "); } else { assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC values must be same "); } #endif xfnv122uyvy> (XF_NPIXPERCYCLE(NPC)))>(_y, _uv, _dst, _y.rows, _y.cols); } template void xfnv212uyvy(xf::cv::Mat& _y, xf::cv::Mat& _uv, xf::cv::Mat& dst, unsigned short int height, unsigned short int width) { // assert(); hls::stream uvStream; // clang-format off #pragma HLS STREAM variable=&uvStream depth=COLS/2 // clang-format on XF_SNAME(WORDWIDTH_Y) yPacked; XF_SNAME(WORDWIDTH_UV) uvPacked; XF_SNAME(WORDWIDTH_DST) uyvyPacked; unsigned long long int y_idx = 0, uv_idx = 0, out_idx = 0; uint8_t y; int8_t u, v; bool evenRow = true, evenBlock = true, evenPix = true; RowLoop: for (int i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on ColLoop: for (int j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS // clang-format on yPacked = _y.read(y_idx++); uyvyPacked = 0; if (evenRow) { if (evenBlock) { uvPacked = _uv.read(uv_idx++); uvStream.write(uvPacked); } } else { // Keep a copy of UV row data in stream to use for oddrow if (evenBlock) { uvPacked = uvStream.read(); } } for (int l = 0; l < (XF_NPIXPERCYCLE(NPC)); l++) { y = yPacked.range(l * 8 + 7, l * 8 + 0); if (evenPix) { u = (uint8_t)uvPacked.range((l / 2) * 16 + 15, (l / 2) * 16 + 8); v = (uint8_t)uvPacked.range((l / 2) * 16 + 7, (l / 2) * 16 + 0); uyvyPacked.range(l * 16 + 7, l * 16 + 0) = u; uyvyPacked.range(l * 16 + 15, l * 16 + 8) = y; } else { uyvyPacked.range(l * 16 + 7, l * 16 + 0) = v; uyvyPacked.range(l * 16 + 15, l * 16 + 8) = y; } evenPix = evenPix ? false : true; } dst.write(out_idx++, uyvyPacked); evenBlock = ((XF_NPIXPERCYCLE(NPC)) != 1) ? true : evenBlock ? false : true; } evenRow = evenRow ? false : true; } if (height & 1) { for (int i = 0; i < width; i++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS // clang-format on uvStream.read(); } } } template void nv212uyvy(xf::cv::Mat& _y, xf::cv::Mat& _uv, xf::cv::Mat& _dst) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_Y == XF_8UC1) && " Y image Type must be XF_8UC1"); assert((SRC_UV == XF_8UC2) && " UV image Type must be XF_8UC2"); assert((DST_T == XF_16UC1) && " UYVY image Type must be XF_16UC1"); assert(((_y.rows <= ROWS) && (_y.cols <= COLS)) && " Y image rows and cols should be less than ROWS, COLS"); assert(((_y.cols == _dst.cols) && (_y.rows == _dst.rows)) && "Y and UYVY plane dimensions mismatch"); assert(((_y.cols == (_uv.cols << 1)) && (_y.rows == (_uv.rows << 1))) && "Y and UV planes dimensions mismatch"); if (NPC != XF_NPPC1) { assert((NPC == (NPC_UV * 2)) && " NPC of Y plane must be double the UV " "plane for multipixel parallelism "); } else { assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC,NPC_UV values must be same "); } #endif xfnv212uyvy> (XF_NPIXPERCYCLE(NPC))))>(_y, _uv, _dst, _y.rows, _y.cols); } template void xfnv122yuyv(xf::cv::Mat& _y, xf::cv::Mat& _uv, xf::cv::Mat& dst, unsigned short int height, unsigned short int width) { // assert(); hls::stream uvStream; // clang-format off #pragma HLS STREAM variable=&uvStream depth=COLS/2 // clang-format on XF_SNAME(WORDWIDTH_Y) yPacked; XF_SNAME(WORDWIDTH_UV) uvPacked; XF_SNAME(WORDWIDTH_DST) uyvyPacked; unsigned long long int y_idx = 0, uv_idx = 0, out_idx = 0; uint8_t y; int8_t u, v; bool evenRow = true, evenBlock = true, evenPix = true; RowLoop: for (int i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on ColLoop: for (int j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS // clang-format on yPacked = _y.read(y_idx++); uyvyPacked = 0; if (evenRow) { if (evenBlock) { uvPacked = _uv.read(uv_idx++); uvStream.write(uvPacked); } } else { // Keep a copy of UV row data in stream to use for oddrow if (evenBlock) { uvPacked = uvStream.read(); } } for (int l = 0; l < (XF_NPIXPERCYCLE(NPC)); l++) { y = yPacked.range(l * 8 + 7, l * 8 + 0); if (evenPix) { v = (uint8_t)uvPacked.range((l / 2) * 16 + 15, (l / 2) * 16 + 8); u = (uint8_t)uvPacked.range((l / 2) * 16 + 7, (l / 2) * 16 + 0); uyvyPacked.range(l * 16 + 7, l * 16 + 0) = y; uyvyPacked.range(l * 16 + 15, l * 16 + 8) = u; } else { uyvyPacked.range(l * 16 + 7, l * 16 + 0) = y; uyvyPacked.range(l * 16 + 15, l * 16 + 8) = v; } evenPix = evenPix ? false : true; } dst.write(out_idx++, uyvyPacked); evenBlock = ((XF_NPIXPERCYCLE(NPC)) != 1) ? true : evenBlock ? false : true; } evenRow = evenRow ? false : true; } if (height & 1) { for (int i = 0; i < width; i++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS // clang-format on uvStream.read(); } } } template void nv122yuyv(xf::cv::Mat& _y, xf::cv::Mat& _uv, xf::cv::Mat& _dst) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_Y == XF_8UC1) && " Y image Type must be XF_8UC1"); assert((SRC_UV == XF_8UC2) && " UV image Type must be XF_8UC2"); assert((DST_T == XF_16UC1) && " YUYV image Type must be XF_16UC1"); assert(((_y.rows <= ROWS) && (_y.cols <= COLS)) && " Y image rows and cols should be less than ROWS, COLS"); assert(((_y.cols == _dst.cols) && (_y.rows == _dst.rows)) && "Y and Yuyv plane dimensions mismatch"); assert(((_y.cols == (_uv.cols << 1)) && (_y.rows == (_uv.rows << 1))) && "Y and UV planes dimensions mismatch"); if (NPC != XF_NPPC1) { assert((NPC == (NPC_UV * 2)) && " NPC of Y plane must be double the UV " "plane for multipixel parallelism "); } else { assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC,NPC_UV values must be same "); } #endif xfnv122yuyv> (XF_NPIXPERCYCLE(NPC)))>(_y, _uv, _dst, _y.rows, _y.cols); } template void xfnv212yuyv(xf::cv::Mat& _y, xf::cv::Mat& _uv, xf::cv::Mat& dst, unsigned short int height, unsigned short int width) { // assert(); hls::stream uvStream; // clang-format off #pragma HLS STREAM variable=&uvStream depth=COLS/2 // clang-format on XF_SNAME(WORDWIDTH_Y) yPacked; XF_SNAME(WORDWIDTH_UV) uvPacked; XF_SNAME(WORDWIDTH_DST) uyvyPacked; unsigned long long int y_idx = 0, uv_idx = 0, out_idx = 0; uint8_t y; int8_t u, v; bool evenRow = true, evenBlock = true, evenPix = true; RowLoop: for (int i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on ColLoop: for (int j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS // clang-format on yPacked = _y.read(y_idx++); uyvyPacked = 0; if (evenRow) { if (evenBlock) { uvPacked = _uv.read(uv_idx++); uvStream.write(uvPacked); } } else { // Keep a copy of UV row data in stream to use for oddrow if (evenBlock) { uvPacked = uvStream.read(); } } for (int l = 0; l < (XF_NPIXPERCYCLE(NPC)); l++) { y = yPacked.range(l * 8 + 7, l * 8 + 0); if (evenPix) { u = (uint8_t)uvPacked.range((l / 2) * 16 + 15, (l / 2) * 16 + 8); v = (uint8_t)uvPacked.range((l / 2) * 16 + 7, (l / 2) * 16 + 0); uyvyPacked.range(l * 16 + 7, l * 16 + 0) = y; uyvyPacked.range(l * 16 + 15, l * 16 + 8) = u; } else { uyvyPacked.range(l * 16 + 7, l * 16 + 0) = y; uyvyPacked.range(l * 16 + 15, l * 16 + 8) = v; } evenPix = evenPix ? false : true; } dst.write(out_idx++, uyvyPacked); evenBlock = ((XF_NPIXPERCYCLE(NPC)) != 1) ? true : evenBlock ? false : true; } evenRow = evenRow ? false : true; } if (height & 1) { for (int i = 0; i < width; i++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS // clang-format on uvStream.read(); } } } template void nv212yuyv(xf::cv::Mat& _y, xf::cv::Mat& _uv, xf::cv::Mat& _dst) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_Y == XF_8UC1) && " Y image Type must be XF_8UC1"); assert((SRC_UV == XF_8UC2) && " VU image Type must be XF_8UC2"); assert((DST_T == XF_16UC1) && " YUYV image Type must be XF_16UC1"); assert(((_y.rows <= ROWS) && (_y.cols <= COLS)) && " Y image rows and cols should be less than ROWS, COLS"); assert(((_y.cols == _dst.cols) && (_y.rows == _dst.rows)) && "Y and Yuyv plane dimensions mismatch"); assert(((_y.cols == (_uv.cols << 1)) && (_y.rows == (_uv.rows << 1))) && "Y and VU planes dimensions mismatch"); if (NPC != XF_NPPC1) { assert((NPC == (NPC_UV * 2)) && " NPC of Y plane must be double the VU " "plane for multipixel parallelism "); } else { assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC values must be same "); } #endif xfnv212yuyv> (XF_NPIXPERCYCLE(NPC))))>(_y, _uv, _dst, _y.rows, _y.cols); } template void xfbgr2uyvy(xf::cv::Mat& src, xf::cv::Mat& dst, unsigned short int height, unsigned short int width) { // XF_PTNAME(XF_8UP) Y[],U,V; XF_PTNAME(XF_8UP) Y[XF_NPIXPERCYCLE(NPC)]; XF_PTNAME(XF_8UP) U[XF_NPIXPERCYCLE(NPC)]; XF_PTNAME(XF_8UP) V[XF_NPIXPERCYCLE(NPC)]; ap_uint<24> RGB1[XF_NPIXPERCYCLE(NPC)]; // clang-format off #pragma HLS ARRAY_PARTITION variable=Y complete #pragma HLS ARRAY_PARTITION variable=U complete #pragma HLS ARRAY_PARTITION variable=V complete #pragma HLS ARRAY_PARTITION variable=RGB1 complete // clang-format on unsigned long long int idx = 0, idx1 = 0; XF_SNAME(WORDWIDTH_SRC) Packed_rgb1; XF_PTNAME(XF_DEPTH(DST_T, NPC)) UYPacked, VYPacked, packed_uyvy[XF_NPIXPERCYCLE(NPC)]; XF_SNAME(WORDWIDTH_DST) val_dst = 0; uint8_t offset = 0; uint16_t shift = 0; bool evencol = true; rowloop: for (int i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on evencol = true; columnloop: for (int j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=TC max=TC // clang-format on val_dst = 0; // evencol=true; Packed_rgb1 = src.read(idx++); xfExtractPixels(RGB1, Packed_rgb1, 0); shift = 0; for (int l = 0; l < (XF_NPIXPERCYCLE(NPC)); l++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC #pragma HLS unroll // clang-format on // Y0[l] = // CalculateY(RGB1[offset+0].range(7,0), // RGB1[offset+0].range(15,8), // RGB1[offset+0].range(23,16)); Y[l] = CalculateY(RGB1[l].range(23, 16), RGB1[l].range(15, 8), RGB1[l].range(7, 0)); if (evencol) { U[l / 2] = CalculateU(RGB1[l].range(23, 16), RGB1[l].range(15, 8), RGB1[l].range(7, 0)); V[l / 2] = CalculateV(RGB1[l].range(23, 16), RGB1[l].range(15, 8), RGB1[l].range(7, 0)); // U[l] = // CalculateU(RGB1[offset+0].range(7,0), // RGB1[offset+0].range(15,8), // RGB1[offset+0].range(23,16)); V[l] // = // CalculateV(RGB1[offset+0].range(7,0), // RGB1[offset+0].range(15,8), RGB1[offset+0].range(23,16)); UYPacked.range(7, 0) = U[l / 2]; UYPacked.range(15, 8) = Y[l]; packed_uyvy[l] = UYPacked; } else { VYPacked.range(7, 0) = V[l / 2]; VYPacked.range(15, 8) = Y[l]; packed_uyvy[l] = VYPacked; } // val_dst.range() = packed_uyvy[l]; xfPackPixels(&packed_uyvy[l], val_dst, 0, 1, shift); evencol = evencol ? false : true; } dst.write(idx1++, val_dst); } } } template void bgr2uyvy(xf::cv::Mat& _src, xf::cv::Mat& _dst) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_8UC3) && " BGR image Type must be XF_8UC3"); assert((DST_T == XF_16UC1) && " UYVY image Type must be XF_16UC1"); assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " BGR image rows and cols should be less than ROWS, COLS"); assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "BGR and UYVY plane dimensions mismatch"); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC2) || (NPC == XF_NPPC4) || (NPC == XF_NPPC8)) && " 1,2,4,8 pixel parallelism is supported "); #endif xfbgr2uyvy> (XF_NPIXPERCYCLE(NPC)))), XF_NPIXPERCYCLE(NPC)>(_src, _dst, _src.rows, _src.cols); } template void xfbgr2yuyv(xf::cv::Mat& src, xf::cv::Mat& dst, unsigned short int height, unsigned short int width) { // XF_PTNAME(XF_8UP) Y[],U,V; XF_PTNAME(XF_8UP) Y[XF_NPIXPERCYCLE(NPC)]; XF_PTNAME(XF_8UP) U[XF_NPIXPERCYCLE(NPC)]; XF_PTNAME(XF_8UP) V[XF_NPIXPERCYCLE(NPC)]; ap_uint<24> RGB1[XF_NPIXPERCYCLE(NPC)]; // clang-format off #pragma HLS ARRAY_PARTITION variable=Y complete #pragma HLS ARRAY_PARTITION variable=U complete #pragma HLS ARRAY_PARTITION variable=V complete #pragma HLS ARRAY_PARTITION variable=RGB1 complete // clang-format on unsigned long long int idx = 0, idx1 = 0; XF_SNAME(WORDWIDTH_SRC) Packed_rgb1; XF_PTNAME(XF_DEPTH(DST_T, NPC)) YUPacked, YVPacked, packed_yuyv[XF_NPIXPERCYCLE(NPC)]; XF_SNAME(WORDWIDTH_DST) val_dst = 0; uint8_t offset = 0; uint16_t shift = 0; bool evencol = true; rowloop: for (int i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on evencol = true; columnloop: for (int j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=TC max=TC // clang-format on val_dst = 0; Packed_rgb1 = src.read(idx++); xfExtractPixels(RGB1, Packed_rgb1, 0); shift = 0; for (int l = 0; l < (XF_NPIXPERCYCLE(NPC)); l++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC #pragma HLS unroll // clang-format on Y[l] = CalculateY(RGB1[l].range(23, 16), RGB1[l].range(15, 8), RGB1[l].range(7, 0)); if (evencol) { U[l / 2] = CalculateU(RGB1[l].range(23, 16), RGB1[l].range(15, 8), RGB1[l].range(7, 0)); V[l / 2] = CalculateV(RGB1[l].range(23, 16), RGB1[l].range(15, 8), RGB1[l].range(7, 0)); YUPacked.range(7, 0) = Y[l]; YUPacked.range(15, 8) = U[l / 2]; packed_yuyv[l] = YUPacked; } else { YVPacked.range(7, 0) = Y[l]; YVPacked.range(15, 8) = V[l / 2]; packed_yuyv[l] = YVPacked; } xfPackPixels(&packed_yuyv[l], val_dst, 0, 1, shift); evencol = evencol ? false : true; } dst.write(idx1++, val_dst); } } } template void bgr2yuyv(xf::cv::Mat& _src, xf::cv::Mat& _dst) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_8UC3) && " BGR image Type must be XF_8UC3"); assert((DST_T == XF_16UC1) && " YUYV image Type must be XF_16UC1"); assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " BGR image rows and cols should be less than ROWS, COLS"); assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "BGR and YUYV plane dimensions mismatch"); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC2) || (NPC == XF_NPPC4) || (NPC == XF_NPPC8)) && " 1,2,4,8 pixel parallelism is supported "); #endif xfbgr2yuyv> (XF_NPIXPERCYCLE(NPC)))), XF_NPIXPERCYCLE(NPC)>(_src, _dst, _src.rows, _src.cols); } template void xfbgr2rgb(xf::cv::Mat& src, xf::cv::Mat& dst, unsigned short int height, unsigned short int width) { ap_uint<24> RGB[XF_NPIXPERCYCLE(NPC)], BGR[XF_NPIXPERCYCLE(NPC)]; // clang-format off #pragma HLS ARRAY_PARTITION variable=RGB complete #pragma HLS ARRAY_PARTITION variable=BGR complete // clang-format on unsigned long long int idx = 0, idx1 = 0; XF_SNAME(WORDWIDTH_SRC) Packed_rgb1; XF_SNAME(WORDWIDTH_DST) val_dst = 0; uint8_t offset = 0; uint16_t shift = 0; rowloop: for (int i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on columnloop: for (int j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=TC max=TC // clang-format on val_dst = 0; Packed_rgb1 = src.read(idx++); for (int l = 0; l < (XF_NPIXPERCYCLE(NPC)); l++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC #pragma HLS unroll // clang-format on xfExtractPixels(BGR, Packed_rgb1, 0); RGB[l].range(23, 16) = BGR[l].range(7, 0); RGB[l].range(15, 8) = BGR[l].range(15, 8); RGB[l].range(7, 0) = BGR[l].range(23, 16); val_dst.range(l * XF_PIXELWIDTH(SRC_T, NPC) + XF_PIXELWIDTH(SRC_T, NPC) - 1, l * XF_PIXELWIDTH(SRC_T, NPC)) = RGB[l]; } dst.write(idx1++, val_dst); } } } template void bgr2rgb(xf::cv::Mat& _src, xf::cv::Mat& _dst) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_8UC3) && " BGR image Type must be XF_8UC3"); assert((DST_T == XF_8UC3) && " RGB image Type must be XF_8UC3"); assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " BGR image rows and cols should be less than ROWS, COLS"); assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "BGR and RGB plane dimensions mismatch"); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC2) || (NPC == XF_NPPC4) || (NPC == XF_NPPC8)) && " 1,2,4,8 pixel parallelism is supported "); #endif xfbgr2rgb> (XF_NPIXPERCYCLE(NPC)))), XF_NPIXPERCYCLE(NPC)>(_src, _dst, _src.rows, _src.cols); } template void Kernbgr2Nv12(xf::cv::Mat& _rgba, xf::cv::Mat& _y, xf::cv::Mat& _uv, uint16_t height, uint16_t width) { unsigned long long int idx = 0, idx1 = 0; ap_uint<32> rgba; ap_uint<16> val1; uint8_t y, u, v; bool evenRow = true, evenBlock = true; RowLoop: for (int i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on ColLoop: for (int j = 0; j < width; j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS // clang-format on rgba = _rgba.read(i * width + j); uint8_t b = rgba.range(7, 0); uint8_t g = rgba.range(15, 8); uint8_t r = rgba.range(23, 16); y = CalculateY(r, g, b); if (evenRow) { u = CalculateU(r, g, b); v = CalculateV(r, g, b); } _y.write(idx++, y); if (evenRow) { if ((j & 0x01) == 0) //{ _uv.write(idx1++, u | (uint16_t)v << 8); //_uv.write(v); //} // _uv.write(u | (uint16_t)v << 8); } } evenRow = evenRow ? false : true; } } template void xFbgr2Nv12(xf::cv::Mat& _src, xf::cv::Mat& _y, xf::cv::Mat& _uv, uint16_t height, uint16_t width) { width = width >> XF_BITSHIFT(NPC); if (NPC == 1) { Kernbgr2Nv12( _src, _y, _uv, height, width); } else { Kernbgr2Nv12_ro> XF_BITSHIFT(NPC)), (1 << (XF_BITSHIFT(NPC) + 1))>(_src, _y, _uv, height, width); } } template void bgr2nv12(xf::cv::Mat& _src, xf::cv::Mat& _y, xf::cv::Mat& _uv) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_8UC3) && " BGR image Type must be XF_8UC3"); assert((Y_T == XF_8UC1) && " Y image Type must be XF_8UC1"); assert((UV_T == XF_8UC2) && " UV image Type must be XF_8UC2"); assert(((_src.rows <= ROWS) && (_y.cols <= COLS)) && " Y image ROWS and COLS should be less than ROWS, COLS"); assert(((_src.cols == _y.cols) && (_src.rows == _y.rows)) && "Y and BGR plane dimensions mismatch"); assert(((_y.cols == (_uv.cols << 1)) && (_y.rows == (_uv.rows << 1))) && "Y and UV planes dimensions mismatch"); if (NPC != XF_NPPC1) { assert((NPC == (NPC_UV * 2)) && " NPC of Y plane must be double the UV " "plane for multipixel parallelism "); } else { assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC values must be same "); } #endif xFbgr2Nv12(_src, _y, _uv, _src.rows, _src.cols); } template void Kernbgr2Nv21(xf::cv::Mat& _rgba, xf::cv::Mat& _y, xf::cv::Mat& _vu, uint16_t height, uint16_t width) { width = width >> XF_BITSHIFT(NPC); XF_SNAME(XF_32UW) rgba; unsigned long long int idx = 0, idx1 = 0; uint8_t y, u, v; bool evenRow = true, evenBlock = true; RowLoop: for (int i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on ColLoop: for (int j = 0; j < width; j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS // clang-format on rgba = _rgba.read(i * width + j); uint8_t b = rgba.range(7, 0); uint8_t g = rgba.range(15, 8); uint8_t r = rgba.range(23, 16); y = CalculateY(r, g, b); if (evenRow) { u = CalculateU(r, g, b); v = CalculateV(r, g, b); } _y.write(idx++, y); if (evenRow) { if ((j & 0x01) == 0) _vu.write(idx1++, v | ((uint16_t)u << 8)); } } evenRow = evenRow ? false : true; } } template void xFbgr2Nv21(xf::cv::Mat& _src, xf::cv::Mat& _y, xf::cv::Mat& _uv, uint16_t height, uint16_t width) { width = width >> XF_BITSHIFT(NPC); if (NPC == 1) { Kernbgr2Nv21( _src, _y, _uv, height, width); } else { Kernbgr2Nv21_ro> XF_BITSHIFT(NPC)), (1 << (XF_BITSHIFT(NPC) + 1))>(_src, _y, _uv, height, width); } } template void bgr2nv21(xf::cv::Mat& _src, xf::cv::Mat& _y, xf::cv::Mat& _uv) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_8UC3) && " BGR image Type must be XF_8UC3"); assert((Y_T == XF_8UC1) && " Y image Type must be XF_8UC1"); assert((UV_T == XF_8UC2) && " UV image Type must be XF_8UC2"); assert(((_src.rows <= ROWS) && (_y.cols <= COLS)) && " Y image ROWS and COLS should be less than ROWS, COLS"); assert(((_src.cols == _y.cols) && (_src.rows == _y.rows)) && "Y and BGR plane dimensions mismatch"); assert(((_y.cols == (_uv.cols << 1)) && (_y.rows == (_uv.rows << 1))) && "Y and UV planes dimensions mismatch"); if (NPC != XF_NPPC1) { assert((NPC == (NPC_UV * 2)) && " NPC of Y plane must be double the UV " "plane for multipixel parallelism "); } else { assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC values must be same "); } #endif xFbgr2Nv21(_src, _y, _uv, _src.rows, _src.cols); } template void KernYuyv2bgr(xf::cv::Mat& _yuyv, xf::cv::Mat& _rgba, uint16_t height, uint16_t width) { XF_SNAME(WORDWIDTH_DST) rgba; XF_SNAME(WORDWIDTH_SRC) yu, yv; XF_PTNAME(XF_8UP) r, g, b; int8_t y1, y2, u, v; int32_t V2Rtemp, U2Gtemp, V2Gtemp, U2Btemp; unsigned long long int idx = 0; RowLoop: for (int i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS #pragma HLS LOOP_FLATTEN off // clang-format on ColLoop: for (int j = 0; j < width; j += 2) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=TC max=TC #pragma HLS pipeline // clang-format on yu = _yuyv.read(i * width + j); yv = _yuyv.read(i * width + j + 1); u = (uint8_t)yu.range(15, 8) - 128; y1 = (yu.range(7, 0) > 16) ? ((uint8_t)yu.range(7, 0) - 16) : 0; v = (uint8_t)yv.range(15, 8) - 128; y2 = (yv.range(7, 0) > 16) ? ((uint8_t)yv.range(7, 0) - 16) : 0; V2Rtemp = v * (short int)V2R; U2Gtemp = (short int)U2G * u; V2Gtemp = (short int)V2G * v; U2Btemp = u * (short int)U2B; r = CalculateR(y1, V2Rtemp, v); g = CalculateG(y1, U2Gtemp, V2Gtemp); b = CalculateB(y1, U2Btemp, u); rgba = ((ap_uint24_t)b) | ((ap_uint24_t)g << 8) | ((ap_uint24_t)r << 16); _rgba.write(idx++, rgba); r = CalculateR(y2, V2Rtemp, v); g = CalculateG(y2, U2Gtemp, V2Gtemp); b = CalculateB(y2, U2Btemp, u); rgba = ((ap_uint24_t)b) | ((ap_uint24_t)g << 8) | ((ap_uint24_t)r << 16); _rgba.write(idx++, rgba); } } } // Yuyv2Rgba template void xFYuyv2bgr(xf::cv::Mat& _src, xf::cv::Mat& _dst, uint16_t height, uint16_t width) { width = width >> XF_BITSHIFT(NPC); if (NPC == 1) { KernYuyv2bgr> 1) >> XF_BITSHIFT(NPC))>( _src, _dst, height, width); } else { KernYuyv2bgr_ro> 1) >> XF_BITSHIFT(NPC)), ((COLS >> 1) >> XF_BITSHIFT(NPC))>(_src, _dst, height, width); } } template void yuyv2bgr(xf::cv::Mat& _src, xf::cv::Mat& _dst) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_16UC1) && " YUYV plane Type must be XF_16UC1"); assert((DST_T == XF_8UC3) && " BGR plane Type must be XF_8UC3"); assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " YUYV image rows and cols should be less than ROWS, COLS"); assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "YUYV and BGR planes dimensions mismatch"); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC2) || (NPC == XF_NPPC4) || (NPC == XF_NPPC8)) && " 1,2,4,8 pixel parallelism is supported "); #endif xFYuyv2bgr(_src, _dst, _src.rows, _src.cols); } template void KernUyvy2bgr(xf::cv::Mat& _uyvy, xf::cv::Mat& _rgba, uint16_t height, uint16_t width) { XF_SNAME(WORDWIDTH_DST) rgba; XF_SNAME(WORDWIDTH_SRC) uyvy; XF_SNAME(WORDWIDTH_SRC) uy; XF_SNAME(WORDWIDTH_SRC) vy; unsigned long long int idx = 0; XF_PTNAME(XF_8UP) r, g, b; int8_t y1, y2, u, v; int32_t V2Rtemp, U2Gtemp, V2Gtemp, U2Btemp; RowLoop: for (int i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS #pragma HLS LOOP_FLATTEN off // clang-format on ColLoop: for (int j = 0; j < width; j += 2) { // clang-format off #pragma HLS LOOP_TRIPCOUNT min=TC max=TC #pragma HLS pipeline // clang-format on // uyvy = _uyvy.read(); uy = _uyvy.read(i * width + j); vy = _uyvy.read(i * width + j + 1); u = (uint8_t)uy.range(7, 0) - 128; /* if(uyvy.range(15,8) > 16) y1 = (uint8_t)uyvy.range(15,8) - 16; else y1 = 0;*/ y1 = (uy.range(15, 8) > 16) ? ((uint8_t)uy.range(15, 8) - 16) : 0; v = (uint8_t)vy.range(7, 0) - 128; /* if(uyvy.range(31,24) > 16) y2 = ((uint8_t)uyvy.range(31,24) - 16); else y2 = 0;*/ y2 = (vy.range(15, 8) > 16) ? ((uint8_t)vy.range(15, 8) - 16) : 0; V2Rtemp = v * (short int)V2R; U2Gtemp = (short int)U2G * u; V2Gtemp = (short int)V2G * v; U2Btemp = u * (short int)U2B; r = CalculateR(y1, V2Rtemp, v); g = CalculateG(y1, U2Gtemp, V2Gtemp); b = CalculateB(y1, U2Btemp, u); rgba = ((ap_uint24_t)b) | ((ap_uint24_t)g << 8) | ((ap_uint24_t)r << 16); _rgba.write(idx, rgba); idx++; r = CalculateR(y2, V2Rtemp, v); g = CalculateG(y2, U2Gtemp, V2Gtemp); b = CalculateB(y2, U2Btemp, u); rgba = ((ap_uint24_t)b) | ((ap_uint24_t)g << 8) | ((ap_uint24_t)r << 16); _rgba.write(idx, rgba); idx++; } } } template void xFUyvy2bgr(xf::cv::Mat& _src, xf::cv::Mat& _dst, uint16_t height, uint16_t width) { width = width >> XF_BITSHIFT(NPC); if (NPC == 1) { KernUyvy2bgr> 1) >> XF_BITSHIFT(NPC))>( _src, _dst, height, width); } else { KernUyvy2bgr_ro> 1) >> XF_BITSHIFT(NPC)), ((COLS >> 1) >> XF_BITSHIFT(NPC))>(_src, _dst, height, width); } } template void uyvy2bgr(xf::cv::Mat& _src, xf::cv::Mat& _dst) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_16UC1) && " UYVY plane Type must be XF_16UC1"); assert((DST_T == XF_8UC3) && " BGR plane Type must be XF_8UC3"); assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " UYVY image rows and cols should be less than ROWS, COLS"); assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "UYVY and BGR planes dimensions mismatch"); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC2) || (NPC == XF_NPPC4) || (NPC == XF_NPPC8)) && " 1,2,4,8 pixel parallelism is supported "); #endif xFUyvy2bgr(_src, _dst, _src.rows, _src.cols); } // Yuyv2Nv12 template void xFYuyv2Nv21(xf::cv::Mat& _src, xf::cv::Mat& _y_image, xf::cv::Mat& _uv_image, uint16_t height, uint16_t width) { width = width >> XF_BITSHIFT(NPC); if (NPC == XF_NPPC1) { KernYuyv2Nv21> 1) >> XF_BITSHIFT(NPC))>(_src, _y_image, _uv_image, height, width); } else { KernYuyv2Nv21_ro> 1) >> XF_BITSHIFT(NPC)), ((1 << XF_BITSHIFT(NPC)) >> 1)>(_src, _y_image, _uv_image, height, width); } } template void yuyv2nv21(xf::cv::Mat& _src, xf::cv::Mat& _y_image, xf::cv::Mat& _uv_image) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_16UC1) && " YUYV plane Type must be XF_16UC1"); assert((Y_T == XF_8UC1) && " Y plane Type must be XF_8UC1"); assert((UV_T == XF_8UC2) && " VU image Type must be XF_8UC2"); assert(((_y_image.rows <= ROWS) && (_y_image.cols <= COLS)) && " Y image rows and cols should be less than ROWS, COLS"); assert(((_y_image.cols == (_uv_image.cols << 1)) && (_y_image.rows == (_uv_image.rows << 1))) && "Y and UV planes dimensions mismatch"); assert(((_y_image.cols == _src.cols) && (_y_image.rows == _src.rows)) && "Y and YUYV planes dimensions mismatch"); if (NPC != XF_NPPC1) { assert((NPC == (NPC_UV * 2)) && " NPC of Y plane must be double the UV " "plane for multipixel parallelism "); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC2) || (NPC == XF_NPPC4) || (NPC == XF_NPPC8)) && " 1,2,4,8 pixel parallelism is supported "); } else { assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC, NPC_UV values must be same "); } #endif xFYuyv2Nv21(_src, _y_image, _uv_image, _src.rows, _src.cols); } // Yuyv2nv21 // Uyvy2Nv21 template void xFUyvy2Nv21(xf::cv::Mat& uyvy, xf::cv::Mat& y_plane, xf::cv::Mat& uv_plane, uint16_t height, uint16_t width) { width = width >> XF_BITSHIFT(NPC); if (NPC == XF_NPPC1) { // clang-format off #pragma HLS DATAFLOW // clang-format on KernUyvy2Nv21> 1) >> XF_BITSHIFT(NPC))>(uyvy, y_plane, uv_plane, height, width); } else { KernUyvy2Nv21_ro> 1) >> XF_BITSHIFT(NPC)), ((1 << NPC) >> 1)>(uyvy, y_plane, uv_plane, height, width); } } template void uyvy2nv21(xf::cv::Mat& _src, xf::cv::Mat& _y_image, xf::cv::Mat& _uv_image) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_16UC1) && " UYVY plane Type must be XF_16UC1"); assert((Y_T == XF_8UC1) && " Y plane Type must be XF_8UC1"); assert((UV_T == XF_8UC2) && " UV image Type must be XF_8UC2"); assert(((_y_image.rows <= ROWS) && (_y_image.cols <= COLS)) && " Y image rows and cols should be less than ROWS, COLS"); assert(((_y_image.cols == (_uv_image.cols << 1)) && (_y_image.rows == (_uv_image.rows << 1))) && "Y and UV planes dimensions mismatch"); assert(((_y_image.cols == _src.cols) && (_y_image.rows == _src.rows)) && "Y and UYVY planes dimensions mismatch"); if (NPC != XF_NPPC1) { assert((NPC == (NPC_UV * 2)) && " NPC of Y plane must be double the UV " "plane for multipixel parallelism "); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC2) || (NPC == XF_NPPC4) || (NPC == XF_NPPC8)) && " 1,2,4,8 pixel parallelism is supported "); } else { assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC, NPC_UV values must be same "); } #endif xFUyvy2Nv21(_src, _y_image, _uv_image, _src.rows, _src.cols); } template void xfnv122nv21(xf::cv::Mat& _y, xf::cv::Mat& _uv, xf::cv::Mat& out_y, xf::cv::Mat& out_uv, unsigned short int height, unsigned short int width) { // assert(); XF_SNAME(WORDWIDTH_Y) yPacked = 0; XF_SNAME(WORDWIDTH_UV) uvPacked[8], vuPacked[8], packed_Data = 0, val_dst = 0; unsigned long long int y_idx = 0, uv_idx = 0; unsigned long long int outUV_idx = 0; RowLoop: for (int i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on ColLoop: for (int j = 0; j> XF_BITSHIFT(NPC); j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS // clang-format on yPacked = _y.read(i * (width >> XF_BITSHIFT(NPC)) + j); out_y.write(y_idx++, yPacked); if (i < _uv.rows && j < (_uv.cols >> XF_BITSHIFT(NPC_UV))) { packed_Data = _uv.read(uv_idx++); } for (int l = 0; l < (XF_NPIXPERCYCLE(NPC_UV)); l++) { // clang-format off #pragma HLS unroll // clang-format on uvPacked[l] = packed_Data(l * 16 + 15, l * 16); vuPacked[l].range(15, 8) = uvPacked[l].range(7, 0); vuPacked[l].range(7, 0) = uvPacked[l].range(15, 8); val_dst.range(l * 16 + 15, l * 16) = vuPacked[l]; } if (i < _uv.rows && j < (_uv.cols >> XF_BITSHIFT(NPC_UV))) { out_uv.write(outUV_idx++, val_dst); } } } } template void nv122nv21(xf::cv::Mat& _y, xf::cv::Mat& _uv, xf::cv::Mat& out_y, xf::cv::Mat& out_uv) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_Y == XF_8UC1) && " Y plane Type must be XF_8UC1"); assert((SRC_UV == XF_8UC2) && " UV image Type must be XF_8UC2"); assert(((_y.rows <= ROWS) && (_y.cols <= COLS)) && " Y image rows and cols should be less than ROWS, COLS"); assert(((_y.cols == (_uv.cols << 1)) && (_y.rows == (_uv.rows << 1))) && "Y and UV planes dimensions mismatch"); if (NPC != XF_NPPC1) { assert((NPC == (NPC_UV * 2)) && " NPC of Y plane must be double the UV " "plane for multipixel parallelism "); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC2) || (NPC == XF_NPPC4) || (NPC == XF_NPPC8)) && " 1,2,4,8 pixel parallelism is supported "); } else { assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC, NPC_UV values must be same "); } #endif xfnv122nv21> (XF_NPIXPERCYCLE(NPC))))>(_y, _uv, out_y, out_uv, _y.rows, _y.cols); } template void nv212nv12(xf::cv::Mat& _y, xf::cv::Mat& _uv, xf::cv::Mat& out_y, xf::cv::Mat& out_uv) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_Y == XF_8UC1) && " Y plane Type must be XF_8UC1"); assert((SRC_UV == XF_8UC2) && " VU image Type must be XF_8UC2"); assert(((_y.rows <= ROWS) && (_y.cols <= COLS)) && " Y image rows and cols should be less than ROWS, COLS"); assert(((_y.cols == (_uv.cols << 1)) && (_y.rows == (_uv.rows << 1))) && "Y and VU planes dimensions mismatch"); if (NPC != XF_NPPC1) { assert((NPC == (NPC_UV * 2)) && " NPC of Y plane must be double the UV " "plane for multipixel parallelism "); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC2) || (NPC == XF_NPPC4) || (NPC == XF_NPPC8)) && " 1,2,4,8 pixel parallelism is supported "); } else { assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC, NPC_UV values must be same "); } #endif xfnv122nv21> (XF_NPIXPERCYCLE(NPC))))>(_y, _uv, out_y, out_uv, _y.rows, _y.cols); } template void xfuyvy2yuyv(xf::cv::Mat& uyvy, xf::cv::Mat& yuyv, unsigned short int height, unsigned short int width) { // assert(); XF_SNAME(WORDWIDTH_SRC) uy = 0; XF_SNAME(WORDWIDTH_DST) yu[8], uyPacked[8], packed_Data = 0, val_dst = 0; unsigned long long int y_idx = 0, uv_idx = 0; unsigned long long int outUV_idx = 0; RowLoop: for (int i = 0; i < height; i++) { // clang-format off #pragma HLS LOOP_FLATTEN off #pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS // clang-format on ColLoop: for (int j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) { // clang-format off #pragma HLS pipeline #pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS // clang-format on uy = uyvy.read(i * (width >> XF_BITSHIFT(NPC)) + j); for (int l = 0; l < (XF_NPIXPERCYCLE(NPC)); l++) { // clang-format off #pragma HLS unroll // clang-format on uyPacked[l] = uy(l * 16 + 15, l * 16); yu[l].range(15, 8) = uyPacked[l].range(7, 0); yu[l].range(7, 0) = uyPacked[l].range(15, 8); val_dst.range(l * 16 + 15, l * 16) = yu[l]; } yuyv.write(outUV_idx++, val_dst); } } } template void uyvy2yuyv(xf::cv::Mat& uyvy, xf::cv::Mat& yuyv) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_16UC1) && " UYVY image Type must be XF_16UC1"); assert((DST_T == XF_16UC1) && " YUYV image Type must be XF_16UC1"); assert(((yuyv.rows <= ROWS) && (yuyv.cols <= COLS)) && " Y image rows and cols should be less than ROWS, COLS"); assert(((yuyv.cols == uyvy.cols) && (yuyv.rows == uyvy.rows)) && "YUYV and UYVY plane dimensions mismatch"); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC2) || (NPC == XF_NPPC4) || (NPC == XF_NPPC8)) && " 1,2,4,8 pixel parallelism is supported "); #endif xfuyvy2yuyv> (XF_NPIXPERCYCLE(NPC))))>(uyvy, yuyv, uyvy.rows, uyvy.cols); } template void yuyv2uyvy(xf::cv::Mat& yuyv, xf::cv::Mat& uyvy) { // clang-format off #pragma HLS INLINE OFF // clang-format on #ifndef __SYNTHESIS__ assert((SRC_T == XF_16UC1) && " YUYV image Type must be XF_16UC1"); assert((DST_T == XF_16UC1) && " UYVY image Type must be XF_16UC1"); assert(((yuyv.rows <= ROWS) && (yuyv.cols <= COLS)) && " Y image rows and cols should be less than ROWS, COLS"); assert(((yuyv.cols == uyvy.cols) && (yuyv.rows == uyvy.rows)) && "YUYV and UYVY plane dimensions mismatch"); assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC2) || (NPC == XF_NPPC4) || (NPC == XF_NPPC8)) && " 1,2,4,8 pixel parallelism is supported "); #endif xfuyvy2yuyv> (XF_NPIXPERCYCLE(NPC))))>(yuyv, uyvy, uyvy.rows, uyvy.cols); } } // namespace cv } // namespace xf #endif