Program Listing for File xf_cvt_color.hpp
↰ Return to documentation for file (/tmp/ws/src/vitis_common/include/imgproc/xf_cvt_color.hpp
)
/*
* Copyright 2019 Xilinx, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _XF_CVT_COLOR_HPP_
#define _XF_CVT_COLOR_HPP_
#include "../common/xf_common.hpp"
#include "hls_stream.h"
#include "xf_cvt_color_1.hpp"
#include "xf_cvt_color_utils.hpp"
#include <assert.h>
namespace xf {
namespace cv {
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int TC>
void write_y_ro(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src_y,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& out_y,
uint16_t height,
uint16_t width) {
XF_SNAME(WORDWIDTH_SRC) tmp;
unsigned long long int idx = 0;
for (int i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
#pragma HLS LOOP_FLATTEN off
// clang-format on
for (int j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
// clang-format on
tmp = src_y.read(i * (width >> XF_BITSHIFT(NPC)) + j);
out_y.write(idx++, tmp);
}
}
}
// KernRgba2Yuv4
template <int SRC_T,
int DST_T,
int ROWS,
int COLS,
int NPC,
int PLANES,
int WORDWIDTH_SRC,
int WORDWIDTH_DST,
int TC,
int iTC>
void KernRgba2Yuv4_ro(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& dst1,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& dst2,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& dst3,
uint16_t height,
uint16_t width) {
// width=width>>NPC;
XF_PTNAME(XF_8UP) Y0[16], U[16], V[16];
uint8_t RGB[64];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=Y0 complete
#pragma HLS ARRAY_PARTITION variable=U complete
#pragma HLS ARRAY_PARTITION variable=V complete
#pragma HLS ARRAY_PARTITION variable=RGB complete
// clang-format on
unsigned long long int y_idx = 0, u_idx = 0, v_idx = 0;
XF_SNAME(WORDWIDTH_SRC) PackedPixels;
XF_SNAME(WORDWIDTH_DST) YPacked, UPacked, VPacked;
uint8_t offset;
rowloop:
for (int i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
columnloop:
for (int j = 0; j < width; j++) {
// clang-format off
#pragma HLS PIPELINE
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
// clang-format on
PackedPixels = src.read(i * width + j);
ExtractRGBAPixels<WORDWIDTH_SRC>(PackedPixels, RGB);
// Converting from RGBA to YUV4
// Y = (0.257 * R) + (0.504 * G) + (0.098 * B) + 16
// U = -(0.148 * R) - (0.291 * G) + (0.439 * B) + 128
// V = (0.439 * R) - (0.368 * G) - (0.071 * B) + 128
for (int l = 0; l<(1 << XF_BITSHIFT(NPC))>> 1; l++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC
// clang-format on
//#pragma HLS unroll
if (PLANES == 4) {
offset = l << 3;
Y0[(l << 1)] = CalculateY(RGB[offset + 0], RGB[offset + 1], RGB[offset + 2]);
Y0[(l << 1) + 1] = CalculateY(RGB[offset + 4], RGB[offset + 5], RGB[offset + 6]);
U[(l << 1)] = CalculateU(RGB[offset + 0], RGB[offset + 1], RGB[offset + 2]);
U[(l << 1) + 1] = CalculateU(RGB[offset + 4], RGB[offset + 5], RGB[offset + 6]);
V[(l << 1)] = CalculateV(RGB[offset + 0], RGB[offset + 1], RGB[offset + 2]);
V[(l << 1) + 1] = CalculateV(RGB[offset + 4], RGB[offset + 5], RGB[offset + 6]);
} else {
offset = l * 6;
Y0[(l << 1)] = CalculateY(RGB[offset + 0], RGB[offset + 1], RGB[offset + 2]);
Y0[(l << 1) + 1] = CalculateY(RGB[offset + 3], RGB[offset + 4], RGB[offset + 5]);
U[(l << 1)] = CalculateU(RGB[offset + 0], RGB[offset + 1], RGB[offset + 2]);
U[(l << 1) + 1] = CalculateU(RGB[offset + 3], RGB[offset + 4], RGB[offset + 5]);
V[(l << 1)] = CalculateV(RGB[offset + 0], RGB[offset + 1], RGB[offset + 2]);
V[(l << 1) + 1] = CalculateV(RGB[offset + 3], RGB[offset + 4], RGB[offset + 5]);
}
}
YPacked = PackPixels<WORDWIDTH_DST>(Y0);
UPacked = PackPixels<WORDWIDTH_DST>(U);
VPacked = PackPixels<WORDWIDTH_DST>(V);
dst1.write(y_idx++, YPacked);
dst2.write(u_idx++, UPacked);
dst3.write(v_idx++, VPacked);
}
}
}
// KernRgba2Iyuv
template <int SRC_T,
int DST_T,
int ROWS,
int COLS,
int NPC,
int PLANES,
int WORDWIDTH_SRC,
int WORDWIDTH_DST,
int ROWS_U,
int ROWS_V,
int TC,
int iTC>
void KernRgba2Iyuv_ro(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& rgba,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& y_plane,
xf::cv::Mat<DST_T, ROWS / 4, COLS, NPC>& u_plane,
xf::cv::Mat<DST_T, ROWS / 4, COLS, NPC>& v_plane,
uint16_t height,
uint16_t width) {
ap_uint8_t Y0[16], U[16], V[16];
uint8_t RGB[64];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=Y0 complete
#pragma HLS ARRAY_PARTITION variable=U complete
#pragma HLS ARRAY_PARTITION variable=V complete
#pragma HLS ARRAY_PARTITION variable=RGB complete
// clang-format on
unsigned long long int y_idx = 0, out_idx = 0, out_idx1 = 0;
XF_SNAME(WORDWIDTH_SRC) PackedPixels;
XF_SNAME(WORDWIDTH_DST) YPacked, UPacked, VPacked;
uint8_t Ycount = 0, UVcount = 0;
int offset;
uchar_t UVoffset_ind, l;
ap_uint<13> i, j;
UVoffset_ind = (1 << XF_BITSHIFT(NPC)) >> 1;
bool evenRow = true, evenBlock = true;
rowloop:
for (i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
columnloop:
for (j = 0; j < width; j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
// clang-format on
PackedPixels = rgba.read(i * width + j);
ExtractRGBAPixels<WORDWIDTH_SRC>(PackedPixels, RGB);
for (l = 0; l<(1 << XF_BITSHIFT(NPC))>> 1; l++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC
#pragma HLS unroll
// clang-format on
if (PLANES == 4) {
offset = l << 3;
Y0[(l << 1)] = CalculateY(RGB[offset + 0], RGB[offset + 1], RGB[offset + 2]);
Y0[(l << 1) + 1] = CalculateY(RGB[offset + 4], RGB[offset + 5], RGB[offset + 6]);
} else {
offset = l * 6;
Y0[(l << 1)] = CalculateY(RGB[offset + 0], RGB[offset + 1], RGB[offset + 2]);
Y0[(l << 1) + 1] = CalculateY(RGB[offset + 3], RGB[offset + 4], RGB[offset + 5]);
}
if (evenRow) // As Sampling rate is 2, Calculating U and V components
// only for even rows
{
/* 128 is added to U and V values to make them always positive and in
* studio range 16-240 */
if (evenBlock) {
U[l] = CalculateU(RGB[offset + 0], RGB[offset + 1], RGB[offset + 2]);
V[l] = CalculateV(RGB[offset + 0], RGB[offset + 1], RGB[offset + 2]);
} else {
U[UVoffset_ind + l] = CalculateU(RGB[offset + 0], RGB[offset + 1], RGB[offset + 2]);
V[UVoffset_ind + l] = CalculateV(RGB[offset + 0], RGB[offset + 1], RGB[offset + 2]);
}
}
}
YPacked = PackPixels<WORDWIDTH_DST>(Y0);
y_plane.write(y_idx++, YPacked);
if (evenRow & !evenBlock) {
UPacked = PackPixels<WORDWIDTH_DST>(U);
VPacked = PackPixels<WORDWIDTH_DST>(V);
u_plane.write(out_idx++, UPacked);
v_plane.write(out_idx1++, VPacked);
}
evenBlock = evenBlock ? false : true;
}
evenRow = evenRow ? false : true;
}
// if(((ROWS+1)>>1) & 0x1)
// { // Filling the empty region with zeros, when the height is
// multiple
// of 2 but not a multiple of 4
// for( i = 0; i < width; i++)
// {
//#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
// u_plane.write(0);
// v_plane.write(0);
// }
// }
}
// KernRgba2Nv12
template <int SRC_T,
int Y_T,
int UV_T,
int ROWS,
int COLS,
int NPC,
int NPC_UV,
int PLANES,
int WORDWIDTH_SRC,
int WORDWIDTH_Y,
int WORDWIDTH_UV,
int TC,
int iTC>
void KernRgba2Nv12_ro(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& rgba,
xf::cv::Mat<Y_T, ROWS, COLS, NPC>& y_plane,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& uv_plane,
uint16_t height,
uint16_t width) {
// width=width>>NPC;
XF_PTNAME(XF_8UP) Y0[16], UV[16];
uint8_t RGB[64];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=Y0 complete
#pragma HLS ARRAY_PARTITION variable=UV complete
#pragma HLS ARRAY_PARTITION variable=RGB complete
// clang-format on
XF_SNAME(WORDWIDTH_SRC) PackedPixels;
XF_SNAME(WORDWIDTH_Y) YPacked, UVPacked;
unsigned long long int idx = 0, idx1 = 0;
uint8_t offset;
bool evenRow = true;
rowloop:
for (int i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
columnloop:
for (int j = 0; j < width; j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
// clang-format on
PackedPixels = rgba.read(i * width + j);
ExtractRGBAPixels<WORDWIDTH_SRC>(PackedPixels, RGB);
for (int l = 0; l<(1 << XF_BITSHIFT(NPC))>> 1; l++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC
#pragma HLS unroll
// clang-format on
if (PLANES == 4) {
offset = l << 3;
Y0[(l << 1)] = CalculateY(RGB[offset + 0], RGB[offset + 1], RGB[offset + 2]);
Y0[(l << 1) + 1] = CalculateY(RGB[offset + 4], RGB[offset + 5], RGB[offset + 6]);
} else {
offset = l * 6;
Y0[(l << 1)] = CalculateY(RGB[offset + 0], RGB[offset + 1], RGB[offset + 2]);
Y0[(l << 1) + 1] = CalculateY(RGB[offset + 3], RGB[offset + 4], RGB[offset + 5]);
}
if (evenRow) {
UV[l << 1] = CalculateU(RGB[offset + 0], RGB[offset + 1], RGB[offset + 2]);
UV[(l << 1) + 1] = CalculateV(RGB[offset + 0], RGB[offset + 1], RGB[offset + 2]);
}
}
YPacked = PackPixels<WORDWIDTH_Y>(Y0);
y_plane.write(idx++, YPacked);
if (evenRow) {
UVPacked = PackPixels<WORDWIDTH_UV>(UV);
uv_plane.write(idx1++, UVPacked);
}
}
evenRow = evenRow ? false : true;
}
}
// KernRgba2Nv12
template <int SRC_T,
int Y_T,
int UV_T,
int ROWS,
int COLS,
int NPC,
int NPC_UV,
int PLANES,
int WORDWIDTH_SRC,
int WORDWIDTH_Y,
int WORDWIDTH_UV,
int TC,
int iTC>
void Kernbgr2Nv12_ro(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& rgba,
xf::cv::Mat<Y_T, ROWS, COLS, NPC>& y_plane,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& uv_plane,
uint16_t height,
uint16_t width) {
// width=width>>NPC;
XF_PTNAME(XF_8UP) Y0[16], UV[16];
uint8_t RGB[64];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=Y0 complete
#pragma HLS ARRAY_PARTITION variable=UV complete
#pragma HLS ARRAY_PARTITION variable=RGB complete
// clang-format on
XF_SNAME(WORDWIDTH_SRC) PackedPixels;
XF_SNAME(WORDWIDTH_Y) YPacked, UVPacked;
unsigned long long int idx = 0, idx1 = 0;
uint8_t offset;
bool evenRow = true;
rowloop:
for (int i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
columnloop:
for (int j = 0; j < width; j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
// clang-format on
PackedPixels = rgba.read(i * width + j);
ExtractRGBAPixels<WORDWIDTH_SRC>(PackedPixels, RGB);
for (int l = 0; l<(1 << XF_BITSHIFT(NPC))>> 1; l++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC
#pragma HLS unroll
// clang-format on
if (PLANES == 4) {
// offset = l << 3;
// Y0[(l<<1)] = CalculateY(RGB[offset+0],
// RGB[offset+1], RGB[offset+2]);
// Y0[(l<<1)+1] = CalculateY(RGB[offset+4],
// RGB[offset+5], RGB[offset+6]);
} else {
offset = l * 6;
Y0[(l << 1)] = CalculateY(RGB[offset + 2], RGB[offset + 1], RGB[offset + 0]);
Y0[(l << 1) + 1] = CalculateY(RGB[offset + 5], RGB[offset + 4], RGB[offset + 3]);
}
if (evenRow) {
UV[l << 1] = CalculateU(RGB[offset + 2], RGB[offset + 1], RGB[offset + 0]);
UV[(l << 1) + 1] = CalculateV(RGB[offset + 2], RGB[offset + 1], RGB[offset + 0]);
}
}
YPacked = PackPixels<WORDWIDTH_Y>(Y0);
y_plane.write(idx++, YPacked);
if (evenRow) {
UVPacked = PackPixels<WORDWIDTH_UV>(UV);
uv_plane.write(idx1++, UVPacked);
}
}
evenRow = evenRow ? false : true;
}
}
// KernRgba2Nv21
template <int SRC_T,
int Y_T,
int UV_T,
int ROWS,
int COLS,
int NPC,
int NPC_UV,
int PLANES,
int WORDWIDTH_SRC,
int WORDWIDTH_Y,
int WORDWIDTH_VU,
int TC,
int iTC>
void KernRgba2Nv21_ro(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& rgba,
xf::cv::Mat<Y_T, ROWS, COLS, NPC>& y_plane,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& vu_plane,
uint16_t height,
uint16_t width) {
// width=width>>NPC;
uint16_t i, j, k, l;
ap_uint8_t Y0[16], VU[16];
uint8_t RGB[64];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=Y0 complete
#pragma HLS ARRAY_PARTITION variable=VU complete
#pragma HLS ARRAY_PARTITION variable=RGB complete
// clang-format on
XF_SNAME(WORDWIDTH_SRC) PackedPixels;
XF_SNAME(WORDWIDTH_Y) YPacked, VUPacked;
uint8_t offset;
unsigned long long int idx = 0, idx1 = 0;
bool evenRow = true;
rowloop:
for (int i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
columnloop:
for (int j = 0; j < width; j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
// clang-format on
PackedPixels = (XF_SNAME(WORDWIDTH_SRC))rgba.read(i * width + j);
ExtractRGBAPixels<WORDWIDTH_SRC>(PackedPixels, RGB);
for (int l = 0; l<(1 << XF_BITSHIFT(NPC))>> 1; l++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC
#pragma HLS unroll
// clang-format on
if (PLANES == 4) {
offset = l << 3;
Y0[(l << 1)] = CalculateY(RGB[offset + 0], RGB[offset + 1], RGB[offset + 2]);
Y0[(l << 1) + 1] = CalculateY(RGB[offset + 4], RGB[offset + 5], RGB[offset + 6]);
} else {
offset = l * 6;
Y0[(l << 1)] = CalculateY(RGB[offset + 0], RGB[offset + 1], RGB[offset + 2]);
Y0[(l << 1) + 1] = CalculateY(RGB[offset + 3], RGB[offset + 4], RGB[offset + 5]);
}
if (evenRow) {
VU[(l << 1)] = CalculateV(RGB[offset + 0], RGB[offset + 1], RGB[offset + 2]);
VU[(l << 1) + 1] = CalculateU(RGB[offset + 0], RGB[offset + 1], RGB[offset + 2]);
}
}
YPacked = PackPixels<WORDWIDTH_Y>(Y0);
y_plane.write(idx++, YPacked);
if (evenRow) {
VUPacked = PackPixels<WORDWIDTH_Y>(VU);
vu_plane.write(idx1++, VUPacked);
}
}
evenRow = evenRow ? false : true;
}
}
template <int SRC_T,
int Y_T,
int UV_T,
int ROWS,
int COLS,
int NPC,
int NPC_UV,
int PLANES,
int WORDWIDTH_SRC,
int WORDWIDTH_Y,
int WORDWIDTH_VU,
int TC,
int iTC>
void Kernbgr2Nv21_ro(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& rgba,
xf::cv::Mat<Y_T, ROWS, COLS, NPC>& y_plane,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& vu_plane,
uint16_t height,
uint16_t width) {
// width=width>>NPC;
uint16_t i, j, k, l;
ap_uint8_t Y0[16], VU[16];
uint8_t RGB[64];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=Y0 complete
#pragma HLS ARRAY_PARTITION variable=VU complete
#pragma HLS ARRAY_PARTITION variable=RGB complete
// clang-format on
XF_SNAME(WORDWIDTH_SRC) PackedPixels;
XF_SNAME(WORDWIDTH_Y) YPacked, VUPacked;
uint8_t offset;
unsigned long long int idx = 0, idx1 = 0;
bool evenRow = true;
rowloop:
for (int i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
columnloop:
for (int j = 0; j < width; j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
// clang-format on
PackedPixels = (XF_SNAME(WORDWIDTH_SRC))rgba.read(i * width + j);
ExtractRGBAPixels<WORDWIDTH_SRC>(PackedPixels, RGB);
for (int l = 0; l<(1 << XF_BITSHIFT(NPC))>> 1; l++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC
#pragma HLS unroll
// clang-format on
if (PLANES == 4) {
// offset = l << 3;
// Y0[(l<<1)] = CalculateY(RGB[offset+0],
// RGB[offset+1], RGB[offset+2]);
// Y0[(l<<1)+1] = CalculateY(RGB[offset+4],
// RGB[offset+5], RGB[offset+6]);
} else {
offset = l * 6;
Y0[(l << 1)] = CalculateY(RGB[offset + 2], RGB[offset + 1], RGB[offset + 0]);
Y0[(l << 1) + 1] = CalculateY(RGB[offset + 5], RGB[offset + 4], RGB[offset + 3]);
}
if (evenRow) {
VU[(l << 1)] = CalculateV(RGB[offset + 2], RGB[offset + 1], RGB[offset + 0]);
VU[(l << 1) + 1] = CalculateU(RGB[offset + 2], RGB[offset + 1], RGB[offset + 0]);
}
}
YPacked = PackPixels<WORDWIDTH_Y>(Y0);
y_plane.write(idx++, YPacked);
if (evenRow) {
VUPacked = PackPixels<WORDWIDTH_Y>(VU);
vu_plane.write(idx1++, VUPacked);
}
}
evenRow = evenRow ? false : true;
}
}
// KernIyuv2Rgba
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST, int TC, int iTC>
void KernIyuv2Rgba_ro(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& in_y,
xf::cv::Mat<SRC_T, ROWS / 4, COLS, NPC>& in_u,
xf::cv::Mat<SRC_T, ROWS / 4, COLS, NPC>& in_v,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _rgba,
uint16_t height,
uint16_t width) {
// width=width>>NPC;
// ap_uint<13> i,j,k;
// uchar_t k;
XF_PTNAME(XF_8UP) RGB[64], Ybuf[16], Ubuf[16], Vbuf[16];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=RGB complete
#pragma HLS ARRAY_PARTITION variable=Ybuf complete
#pragma HLS ARRAY_PARTITION variable=Ubuf complete
#pragma HLS ARRAY_PARTITION variable=Vbuf complete
// clang-format on
hls::stream<XF_SNAME(WORDWIDTH_SRC)> UStream, VStream;
// clang-format off
#pragma HLS STREAM variable=&UStream depth=COLS
#pragma HLS STREAM variable=&VStream depth=COLS
// clang-format on
XF_SNAME(WORDWIDTH_SRC) YPacked, UPacked, VPacked;
XF_SNAME(WORDWIDTH_DST) PackedPixels;
unsigned long long int idx = 0, out_idx = 0;
uint8_t Y00, Y01;
int32_t V2Rtemp, U2Gtemp, V2Gtemp, U2Btemp;
int8_t U, V;
uint8_t offset;
bool evenRow = true, evenBlock = true;
rowloop:
for (int i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
columnloop:
for (int j = 0; j < width; j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
// clang-format on
YPacked = in_y.read(i * width + j);
xfExtractPixels<NPC, WORDWIDTH_SRC, XF_8UP>(Ybuf, YPacked, 0);
if (evenBlock) {
if (evenRow) {
UPacked = in_u.read(idx);
UStream.write(UPacked);
VPacked = in_v.read(idx++);
VStream.write(VPacked);
} else {
/* Copy of the U and V values are pushed into stream to be used for
* next row */
UPacked = UStream.read();
VPacked = VStream.read();
}
xfExtractPixels<NPC, WORDWIDTH_SRC, XF_8UP>(Ubuf, UPacked, 0);
xfExtractPixels<NPC, WORDWIDTH_SRC, XF_8UP>(Vbuf, VPacked, 0);
offset = 0;
} else {
offset = (1 << XF_BITSHIFT(NPC)) >> 1;
}
for (int k = 0; k<(1 << XF_BITSHIFT(NPC))>> 1; k++) { // Y00 and Y01 have a U and V values in common
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC
#pragma HLS unroll
// clang-format on
// Y00 = (Ybuf[k<<1] > 16) ? (Ybuf[k<<1]-16) : 0;
// Y01 = (Ybuf[(k<<1) + 1] > 16) ? (Ybuf[(k<<1)+1]-16) : 0;
if ((Ybuf[k << 1] > 16)) {
Y00 = (Ybuf[k << 1] - 16);
} else {
Y00 = 0;
}
if ((Ybuf[(k << 1) + 1] > 16)) {
Y01 = (Ybuf[(k << 1) + 1] - 16);
} else {
Y01 = 0;
}
U = Ubuf[k + offset] - 128;
V = Vbuf[k + offset] - 128;
V2Rtemp = V * (short int)V2R;
U2Gtemp = (short int)U2G * U;
V2Gtemp = (short int)V2G * V;
U2Btemp = U * (short int)U2B;
// R = 1.164*Y + 1.596*V = Y + 0.164*Y + V + 0.596*V
// G = 1.164*Y - 0.813*V - 0.391*U = Y + 0.164*Y - 0.813*V - 0.391*U
// B = 1.164*Y + 2.018*U = Y + 0.164 + 2*U + 0.018*U
RGB[(k << 3)] = CalculateR(Y00, V2Rtemp, V); // R0
RGB[(k << 3) + 1] = CalculateG(Y00, U2Gtemp, V2Gtemp); // G0
RGB[(k << 3) + 2] = CalculateB(Y00, U2Btemp, U); // B0
RGB[(k << 3) + 3] = 255; // A
RGB[(k << 3) + 4] = CalculateR(Y01, V2Rtemp, V); // R1
RGB[(k << 3) + 5] = CalculateG(Y01, U2Gtemp, V2Gtemp); // G1
RGB[(k << 3) + 6] = CalculateB(Y01, U2Btemp, U); // B1
RGB[(k << 3) + 7] = 255; // A
}
PackedPixels = PackRGBAPixels<WORDWIDTH_DST>(RGB);
_rgba.write(out_idx++, PackedPixels);
evenBlock = evenBlock ? false : true;
}
evenRow = evenRow ? false : true;
}
}
// KernIyuv2Nv12
template <int SRC_T,
int UV_T,
int ROWS,
int COLS,
int NPC,
int NPC_UV,
int WORDWIDTH_SRC,
int WORDWIDTH_UV,
int rTC,
int cTC,
int iTC>
void KernIyuv2Nv12_ro(xf::cv::Mat<SRC_T, ROWS / 4, COLS, NPC>& _u,
xf::cv::Mat<SRC_T, ROWS / 4, COLS, NPC>& _v,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& _uv,
uint16_t height,
uint16_t width) {
ap_uint<13> i, j;
XF_PTNAME(XF_8UP) U[16], V[16];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=U complete
#pragma HLS ARRAY_PARTITION variable=V complete
// clang-format on
unsigned long long int idx = 0, idx1 = 0;
XF_SNAME(WORDWIDTH_SRC) UVPacked0, UVPacked1, UPacked, VPacked;
rowloop:
for (i = 0; i<height>> 1; i++) {
/*
* Reading the plane interleaved U and V data from streams and packing them in
* pixel interleaved
* and writing out to UV stream
*/
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=rTC max=rTC
// clang-format on
columnloop:
for (j = 0; j < (width >> (1 + XF_BITSHIFT(NPC))); j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=cTC max=cTC
// clang-format on
UPacked = _u.read(idx);
VPacked = _v.read(idx++);
xfExtractPixels<NPC, WORDWIDTH_SRC, XF_8UP>(U, UPacked, 0);
xfExtractPixels<NPC, WORDWIDTH_SRC, XF_8UP>(V, VPacked, 0);
// Packing with alternative U and V values for Pixel interleaving
#define AU_CVT_STEP 16
ap_uint<4> off = (1 << XF_BITSHIFT(NPC)) >> 1;
ap_uint<4> k;
int l;
for (k = 0, l = 0; k < ((1 << XF_BITSHIFT(NPC)) >> 1); k++, l += AU_CVT_STEP) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC
#pragma HLS UNROLL
// clang-format on
UVPacked0.range(l + AU_CVT_STEP - 1, l) = (U[k]) | ((ap_uint<16>)V[k] << (8));
UVPacked1.range(l + AU_CVT_STEP - 1, l) = (U[k + off]) | ((ap_uint<16>)V[k + off] << (8));
}
_uv.write(idx1++, UVPacked0);
_uv.write(idx1++, UVPacked1);
}
}
}
// KernIyuv2Yuv4
template <int SRC_T, int ROWS, int COLS, int NPC, int WORDWIDTH, int rTC, int cTC, int iTC>
void KernIyuv2Yuv4_ro(xf::cv::Mat<SRC_T, ROWS / 4, COLS, NPC>& _in_u,
xf::cv::Mat<SRC_T, ROWS / 4, COLS, NPC>& _in_v,
xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _u_image,
xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _v_image,
uint16_t height,
uint16_t width) {
XF_TNAME(SRC_T, NPC) arr[COLS >> XF_BITSHIFT(NPC)];
XF_TNAME(SRC_T, NPC) arr1[COLS >> XF_BITSHIFT(NPC)];
hls::stream<XF_TNAME(SRC_T, NPC)> inter_u, inter_v;
// clang-format off
#pragma HLS stream variable=&inter_u depth=COLS/2
#pragma HLS stream variable=&inter_v depth=COLS/2
// clang-format on
unsigned long long int idx = 0, idx1 = 0;
XF_PTNAME(XF_8UP) U[16], V[16];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=U complete
#pragma HLS ARRAY_PARTITION variable=V complete
// clang-format on
XF_SNAME(WORDWIDTH)
IUPacked, IVPacked, UPacked0, VPacked0, UPacked1, VPacked1;
rowloop:
for (int i = 0; i < ((height >> 2) << 1); i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=rTC max=rTC
// clang-format on
columnloop:
for (int j = 0, k = 0; j < ((width >> XF_BITSHIFT(NPC)) >> 1); j++, k += 2) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=cTC max=cTC
// clang-format on
IUPacked = _in_u.read(idx);
IVPacked = _in_v.read(idx++);
xfExtractPixels<NPC, WORDWIDTH, XF_8UP>(U, IUPacked, 0);
xfExtractPixels<NPC, WORDWIDTH, XF_8UP>(V, IVPacked, 0);
#define AU_CVT_STEP 16
int off = 1 << (2); // (1 << NPC) >> 1;
for (int k = 0, l = 0; k < (1 << (2)); k++, l += AU_CVT_STEP) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC
#pragma HLS UNROLL
// clang-format on
UPacked0.range(l + AU_CVT_STEP - 1, l) = (U[k]) | ((ap_uint<16>)U[k] << (8));
VPacked0.range(l + AU_CVT_STEP - 1, l) = (V[k]) | ((ap_uint<16>)V[k] << (8));
UPacked1.range(l + AU_CVT_STEP - 1, l) = (U[k + off]) | ((ap_uint<16>)U[k + off] << (8));
VPacked1.range(l + AU_CVT_STEP - 1, l) = (V[k + off]) | ((ap_uint<16>)V[k + off] << (8));
}
_u_image.write((((i * 2)) * (_u_image.cols >> XF_BITSHIFT(NPC))) + k, UPacked0);
_v_image.write((((i * 2)) * (_v_image.cols >> XF_BITSHIFT(NPC))) + k, VPacked0);
_u_image.write((((i * 2)) * (_u_image.cols >> XF_BITSHIFT(NPC))) + k + 1, UPacked1);
_v_image.write((((i * 2)) * (_v_image.cols >> XF_BITSHIFT(NPC))) + k + 1, VPacked1);
inter_u.write(UPacked0);
inter_v.write(VPacked0);
inter_u.write(UPacked1);
inter_v.write(VPacked1);
}
for (int j = 0; j < (_u_image.cols >> XF_BITSHIFT(NPC)); j++) {
// clang-format off
#pragma HLS pipeline
// clang-format on
_u_image.write((((i * 2) + 1) * (_u_image.cols >> XF_BITSHIFT(NPC))) + j, inter_u.read());
_v_image.write((((i * 2) + 1) * (_u_image.cols >> XF_BITSHIFT(NPC))) + j, inter_v.read());
}
}
}
// KernNv122Iyuv
template <int SRC_T,
int UV_T,
int ROWS,
int COLS,
int NPC,
int NPC_UV,
int WORDWIDTH_SRC,
int WORDWIDTH_DST,
int TC,
int iTC>
void KernNv122Iyuv_ro(xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& _uv,
xf::cv::Mat<SRC_T, ROWS / 4, COLS, NPC>& _u,
xf::cv::Mat<SRC_T, ROWS / 4, COLS, NPC>& _v,
uint16_t height,
uint16_t width) {
XF_PTNAME(XF_8UP) UV0[16], UV1[16];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=UV0 complete
#pragma HLS ARRAY_PARTITION variable=UV1 complete
// clang-format on
unsigned long long int idx = 0, idx1 = 0;
XF_SNAME(WORDWIDTH_DST) UPacked, VPacked;
XF_SNAME(WORDWIDTH_SRC) UVPacked0, UVPacked1;
ap_uint<13> i, j;
rowloop:
for (i = 0; i < (height >> 1); i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
columnloop:
for (j = 0; j < ((width >> XF_BITSHIFT(NPC)) >> 1); j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
// clang-format on
UVPacked0 = _uv.read(idx++);
UVPacked1 = _uv.read(idx++);
xfExtractPixels<NPC, WORDWIDTH_SRC, XF_8UP>(UV0, UVPacked0, 0);
xfExtractPixels<NPC, WORDWIDTH_SRC, XF_8UP>(UV1, UVPacked1, 0);
// Packing the U and V by picking even indeces for U and odd indeces for V
#define AU_CVT_STEP 16
int sft = 1 << (XF_BITSHIFT(NPC) + 2);
int l;
ap_uint<9> k;
for (int k = 0, l = 0; k < (1 << (XF_BITSHIFT(NPC))); k += 4, l += AU_CVT_STEP) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC
#pragma HLS UNROLL
// clang-format on
VPacked.range(l + AU_CVT_STEP - 1, l) = (UV0[k + 1]) | ((ap_uint<16>)UV0[k + 3] << (8));
UPacked.range(l + AU_CVT_STEP - 1, l) = (UV0[k]) | ((ap_uint<16>)UV0[k + 2] << (8));
VPacked.range(l + sft + AU_CVT_STEP - 1, l + sft) = (UV1[k + 1]) | ((ap_uint<16>)UV1[k + 3] << (8));
UPacked.range(l + sft + AU_CVT_STEP - 1, l + sft) = (UV1[k]) | ((ap_uint<16>)UV1[k + 2] << (8));
}
_u.write(idx1, UPacked);
_v.write(idx1++, VPacked);
}
}
}
// KernNv122Rgba
template <int SRC_T,
int UV_T,
int DST_T,
int ROWS,
int COLS,
int NPC,
int NPC_UV,
int PLANES,
int WORDWIDTH_Y,
int WORDWIDTH_UV,
int WORDWIDTH_DST,
int TC,
int iTC>
void KernNv122Rgba_ro(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& in_y,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& in_uv,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& rgba,
uint16_t height,
uint16_t width) {
// width=width>>NPC;
XF_PTNAME(XF_8UP) RGB[64], Ybuf[16], UVbuf[16];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=RGB complete
#pragma HLS ARRAY_PARTITION variable=Ybuf complete
#pragma HLS ARRAY_PARTITION variable=UVbuf complete
// clang-format on
hls::stream<XF_SNAME(WORDWIDTH_UV)> UVStream;
// clang-format off
#pragma HLS STREAM variable=&UVStream depth=COLS
// clang-format on
XF_SNAME(WORDWIDTH_Y) YPacked;
XF_SNAME(WORDWIDTH_UV) UVPacked;
XF_SNAME(WORDWIDTH_DST) PackedPixels;
uint8_t Y00, Y01;
int32_t V2Rtemp, U2Gtemp, V2Gtemp, U2Btemp;
unsigned long long int uv_idx = 0, out_idx = 0;
int8_t U, V;
bool evenRow = true;
rowloop:
for (int i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
columnloop:
for (int j = 0; j < width; j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
// clang-format on
YPacked = in_y.read(i * width + j);
xfExtractPixels<NPC, WORDWIDTH_Y, XF_8UP>(Ybuf, YPacked, 0);
if (evenRow) {
UVPacked = in_uv.read(uv_idx++);
UVStream.write(UVPacked);
} else // Keep a copy of UV row data in stream to use for oddrow
UVPacked = UVStream.read();
xfExtractPixels<NPC, WORDWIDTH_UV, XF_8UP>(UVbuf, UVPacked, 0);
for (int k = 0; k<(1 << XF_BITSHIFT(NPC))>> 1; k++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC
#pragma HLS unroll
// clang-format on
// Y00 = (Ybuf[k<<1] > 16) ? (Ybuf[k<<1]-16) : 0;
// Y01 = (Ybuf[(k<<1)+1] > 16) ? (Ybuf[(k<<1)+1] - 16) : 0;
if ((Ybuf[k << 1] > 16)) {
Y00 = (Ybuf[k << 1] - 16);
} else {
Y00 = 0;
}
if ((Ybuf[(k << 1) + 1] > 16)) {
Y01 = (Ybuf[(k << 1) + 1] - 16);
} else {
Y01 = 0;
}
U = UVbuf[k << 1] - 128;
V = UVbuf[(k << 1) + 1] - 128;
V2Rtemp = V * (short int)V2R;
U2Gtemp = (short int)U2G * U;
V2Gtemp = (short int)V2G * V;
U2Btemp = U * (short int)U2B;
// R = 1.164*Y + 1.596*V = Y + 0.164*Y + V + 0.596*V
// G = 1.164*Y - 0.813*V - 0.391*U = Y + 0.164*Y - 0.813*V - 0.391*U
// B = 1.164*Y + 2.018*U = Y + 0.164 + 2*U + 0.018*U
if (PLANES == 4) {
RGB[(k << 3) + 0] = CalculateR(Y00, V2Rtemp, V); // R0
RGB[(k << 3) + 1] = CalculateG(Y00, U2Gtemp, V2Gtemp); // G0
RGB[(k << 3) + 2] = CalculateB(Y00, U2Btemp, U); // B0
RGB[(k << 3) + 3] = 255; // A
RGB[(k << 3) + 4] = CalculateR(Y01, V2Rtemp, V); // R1
RGB[(k << 3) + 5] = CalculateG(Y01, U2Gtemp, V2Gtemp); // G1
RGB[(k << 3) + 6] = CalculateB(Y01, U2Btemp, U); // B0
RGB[(k << 3) + 7] = 255; // A
} else {
RGB[(k * 6) + 0] = CalculateR(Y00, V2Rtemp, V); // R0
RGB[(k * 6) + 1] = CalculateG(Y00, U2Gtemp, V2Gtemp); // G0
RGB[(k * 6) + 2] = CalculateB(Y00, U2Btemp, U); // B0
RGB[(k * 6) + 3] = CalculateR(Y01, V2Rtemp, V); // R1
RGB[(k * 6) + 4] = CalculateG(Y01, U2Gtemp, V2Gtemp); // G1
RGB[(k * 6) + 5] = CalculateB(Y01, U2Btemp, U); // B0
}
}
PackedPixels = PackRGBAPixels<WORDWIDTH_DST>(RGB);
rgba.write(out_idx++, PackedPixels);
}
evenRow = evenRow ? false : true;
}
// if(height & 1)
// {
// for(int i = 0; i < (width>>NPC); i++)
// {
//#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
// UVStream.read();
// }
// }
}
// KernNv122Rgba
template <int SRC_T,
int UV_T,
int DST_T,
int ROWS,
int COLS,
int NPC,
int NPC_UV,
int PLANES,
int WORDWIDTH_Y,
int WORDWIDTH_UV,
int WORDWIDTH_DST,
int TC,
int iTC>
void KernNv122bgr_ro(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& in_y,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& in_uv,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& rgba,
uint16_t height,
uint16_t width) {
// width=width>>NPC;
XF_PTNAME(XF_8UP) RGB[64], Ybuf[16], UVbuf[16];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=RGB complete
#pragma HLS ARRAY_PARTITION variable=Ybuf complete
#pragma HLS ARRAY_PARTITION variable=UVbuf complete
// clang-format on
hls::stream<XF_SNAME(WORDWIDTH_UV)> UVStream;
// clang-format off
#pragma HLS STREAM variable=&UVStream depth=COLS
// clang-format on
XF_SNAME(WORDWIDTH_Y) YPacked;
XF_SNAME(WORDWIDTH_UV) UVPacked;
XF_SNAME(WORDWIDTH_DST) PackedPixels;
uint8_t Y00, Y01;
int32_t V2Rtemp, U2Gtemp, V2Gtemp, U2Btemp;
unsigned long long int uv_idx = 0, out_idx = 0;
int8_t U, V;
bool evenRow = true;
rowloop:
for (int i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
columnloop:
for (int j = 0; j < width; j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
// clang-format on
YPacked = in_y.read(i * width + j);
xfExtractPixels<NPC, WORDWIDTH_Y, XF_8UP>(Ybuf, YPacked, 0);
if (evenRow) {
UVPacked = in_uv.read(uv_idx++);
UVStream.write(UVPacked);
} else // Keep a copy of UV row data in stream to use for oddrow
UVPacked = UVStream.read();
xfExtractPixels<NPC, WORDWIDTH_UV, XF_8UP>(UVbuf, UVPacked, 0);
for (int k = 0; k<(1 << XF_BITSHIFT(NPC))>> 1; k++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC
#pragma HLS unroll
// clang-format on
// Y00 = (Ybuf[k<<1] > 16) ? (Ybuf[k<<1]-16) : 0;
// Y01 = (Ybuf[(k<<1)+1] > 16) ? (Ybuf[(k<<1)+1] - 16) : 0;
if ((Ybuf[k << 1] > 16)) {
Y00 = (Ybuf[k << 1] - 16);
} else {
Y00 = 0;
}
if ((Ybuf[(k << 1) + 1] > 16)) {
Y01 = (Ybuf[(k << 1) + 1] - 16);
} else {
Y01 = 0;
}
U = UVbuf[k << 1] - 128;
V = UVbuf[(k << 1) + 1] - 128;
V2Rtemp = V * (short int)V2R;
U2Gtemp = (short int)U2G * U;
V2Gtemp = (short int)V2G * V;
U2Btemp = U * (short int)U2B;
// R = 1.164*Y + 1.596*V = Y + 0.164*Y + V + 0.596*V
// G = 1.164*Y - 0.813*V - 0.391*U = Y + 0.164*Y - 0.813*V - 0.391*U
// B = 1.164*Y + 2.018*U = Y + 0.164 + 2*U + 0.018*U
if (PLANES == 4) {
RGB[(k << 3) + 0] = CalculateR(Y00, V2Rtemp, V); // R0
RGB[(k << 3) + 1] = CalculateG(Y00, U2Gtemp, V2Gtemp); // G0
RGB[(k << 3) + 2] = CalculateB(Y00, U2Btemp, U); // B0
RGB[(k << 3) + 3] = 255; // A
RGB[(k << 3) + 4] = CalculateR(Y01, V2Rtemp, V); // R1
RGB[(k << 3) + 5] = CalculateG(Y01, U2Gtemp, V2Gtemp); // G1
RGB[(k << 3) + 6] = CalculateB(Y01, U2Btemp, U); // B0
RGB[(k << 3) + 7] = 255; // A
} else {
RGB[(k * 6) + 0] = CalculateB(Y00, U2Btemp, U); // B0
RGB[(k * 6) + 1] = CalculateG(Y00, U2Gtemp, V2Gtemp); // G0
RGB[(k * 6) + 2] = CalculateR(Y00, V2Rtemp, V); // R0
RGB[(k * 6) + 3] = CalculateB(Y01, U2Btemp, U); // B0
RGB[(k * 6) + 4] = CalculateG(Y01, U2Gtemp, V2Gtemp); // G1
RGB[(k * 6) + 5] = CalculateR(Y01, V2Rtemp, V); // R1
}
}
PackedPixels = PackRGBAPixels<WORDWIDTH_DST>(RGB);
rgba.write(out_idx++, PackedPixels);
}
evenRow = evenRow ? false : true;
}
// if(height & 1)
// {
// for(int i = 0; i < (width>>NPC); i++)
// {
//#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
// UVStream.read();
// }
// }
}
// KernNv122Yuv4
template <int SRC_T,
int UV_T,
int ROWS,
int COLS,
int NPC,
int NPC_UV,
int WORDWIDTH_UV,
int WORDWIDTH_DST,
int TC,
int iTC>
void KernNv122Yuv4_ro(xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& _uv,
xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _u,
xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _v,
uint16_t height,
uint16_t width) {
XF_PTNAME(XF_8UP) UV[16];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=UV complete
// clang-format on
ap_uint<13> i, j;
XF_SNAME(WORDWIDTH_UV) UPacked;
XF_SNAME(WORDWIDTH_DST) VPacked, UVPacked;
XF_SNAME(WORDWIDTH_DST)
arr_UPacked[COLS >> (XF_BITSHIFT(NPC))], arr_VPacked[COLS >> (XF_BITSHIFT(NPC))];
unsigned long long int idx = 0, idx1 = 0;
rowloop:
for (i = 0; i < (height >> 1); i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
columnloop:
for (j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
// clang-format on
UVPacked = _uv.read(idx1++);
xfExtractPixels<NPC, WORDWIDTH_DST, XF_8UP>(UV, UVPacked, 0);
#define AU_CVT_STEP 16
for (int k = 0, l = 0; k < (1 << (XF_BITSHIFT(NPC))); k += 2, l += AU_CVT_STEP) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC
#pragma HLS UNROLL
// clang-format on
VPacked.range(l + AU_CVT_STEP - 1, l) = (UV[k + 1]) | ((ap_uint<16>)UV[k + 1] << (8));
UPacked.range(l + AU_CVT_STEP - 1, l) = (UV[k]) | ((ap_uint<16>)UV[k] << (8));
}
_u.write(((i * 2) * (_u.cols >> XF_BITSHIFT(NPC))) + j, UPacked);
_v.write(((i * 2) * (_v.cols >> XF_BITSHIFT(NPC))) + j, VPacked);
arr_UPacked[j] = UPacked;
arr_VPacked[j] = VPacked;
}
for (j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) {
_u.write((((i * 2) + 1) * (_u.cols >> XF_BITSHIFT(NPC))) + j, arr_UPacked[j]);
_v.write((((i * 2) + 1) * (_v.cols >> XF_BITSHIFT(NPC))) + j, arr_VPacked[j]);
}
}
}
// KernNv212Iyuv
template <int SRC_T,
int UV_T,
int ROWS,
int COLS,
int NPC,
int NPC_UV,
int WORDWIDTH_SRC,
int WORDWIDTH_DST,
int TC,
int iTC>
void KernNv212Iyuv_ro(xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& in_uv,
xf::cv::Mat<SRC_T, ROWS / 4, COLS, NPC>& u_out,
xf::cv::Mat<SRC_T, ROWS / 4, COLS, NPC>& v_out,
uint16_t height,
uint16_t width) {
XF_PTNAME(XF_8UP) VU0[16], VU1[16];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=VU0 complete
#pragma HLS ARRAY_PARTITION variable=VU1 complete
// clang-format on
ap_uint<13> i, j;
XF_SNAME(WORDWIDTH_DST) UPacked, VPacked;
XF_SNAME(WORDWIDTH_SRC) VUPacked0, VUPacked1;
unsigned long long int idx = 0, idx1 = 0;
int l;
ap_uint<4> k;
rowloop:
for (i = 0; i < (height >> 1); i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
columnloop:
for (j = 0; j < ((width >> XF_BITSHIFT(NPC)) >> 1);
j++) { // reading UV pixel interleaved data and writing them into
// UStream and VStream
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
// clang-format on
VUPacked0 = in_uv.read(idx++);
VUPacked1 = in_uv.read(idx++);
xfExtractPixels<NPC, WORDWIDTH_SRC, XF_8UP>(VU0, VUPacked0, 0);
xfExtractPixels<NPC, WORDWIDTH_SRC, XF_8UP>(VU1, VUPacked1, 0);
#define AU_CVT_STEP 16
int sft = 1 << (XF_BITSHIFT(NPC) + 2);
for (k = 0, l = 0; k < (1 << (XF_BITSHIFT(NPC))); k += 4, l += AU_CVT_STEP) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC
#pragma HLS UNROLL
// clang-format on
UPacked.range(l + AU_CVT_STEP - 1, l) = (VU0[k + 1]) | ((ap_uint<16>)VU0[k + 3] << (8));
VPacked.range(l + AU_CVT_STEP - 1, l) = (VU0[k]) | ((ap_uint<16>)VU0[k + 2] << (8));
UPacked.range(l + sft + AU_CVT_STEP - 1, l + sft) = (VU1[k + 1]) | ((ap_uint<16>)VU1[k + 3] << (8));
VPacked.range(l + sft + AU_CVT_STEP - 1, l + sft) = (VU1[k]) | ((ap_uint<16>)VU1[k + 2] << (8));
}
u_out.write(idx1, UPacked);
v_out.write(idx1, VPacked);
idx1++;
}
}
/* if((height>>1)& 0x1)
{
// Writing 0's to fill the stream if the UV plane width is odd
for(int i = 0; i < ((width>>XF_BITSHIFT(NPC))>>1); i++)
{
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
// clang-format on
u_out.write(idx1,0);
v_out.write(idx1++,0);
}
}*/
}
// template<int SRC_T,int UV_T,int DST_T,int ROWS, int COLS, int NPC, int
// NPC_UV,int PLANES,int WORDWIDTH_Y, int
// WORDWIDTH_UV, int WORDWIDTH_DST, int TC, int iTC> void
// KernNv212bgr_ro(xf::cv::Mat<SRC_T, ROWS, COLS, NPC> &
// in_y,xf::cv::Mat<UV_T, ROWS/2, COLS/2, NPC_UV> & in_uv,xf::cv::Mat<DST_T,
// ROWS, COLS, NPC> & rgba,uint16_t
// height,uint16_t width)
//{
// XF_PTNAME(XF_8UP) RGB[64],Ybuf[16],UVbuf[16];
//#pragma HLS ARRAY_PARTITION variable=RGB complete
//#pragma HLS ARRAY_PARTITION variable=Ybuf complete
//#pragma HLS ARRAY_PARTITION variable=UVbuf complete
// ap_uint<13> i,j;
// unsigned long long int in_idx=0,out_idx=0;
// int k;
// hls::stream<XF_SNAME(WORDWIDTH_UV)> UVStream;
//#pragma HLS STREAM variable=&UVStream depth=COLS
// XF_SNAME(WORDWIDTH_Y) YPacked; XF_SNAME(WORDWIDTH_UV) UVPacked;
// XF_SNAME(WORDWIDTH_DST) PackedPixels;
// uint8_t Y00, Y01;
// int32_t V2Rtemp, U2Gtemp, V2Gtemp, U2Btemp;
// int8_t U, V;
// bool evenRow = true;
// rowloop:
// for( i = 0; i < height; i++)
// {
//#pragma HLS LOOP_FLATTEN off
//#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// columnloop:
// for( j = 0; j < width; j++)
// {
//#pragma HLS pipeline
//#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
// YPacked = in_y.read(i*width+j);
// xfExtractPixels<NPC, WORDWIDTH_Y, XF_8UP>(Ybuf, YPacked,
// 0);
// if(evenRow)
// {
// UVPacked = in_uv.read(in_idx++);
// UVStream.write(UVPacked);
// }
// else // Keep a copy of UV row data in stream to use for
// oddrow
// {
// UVPacked = UVStream.read();
// }
//
// xfExtractPixels<NPC, WORDWIDTH_UV, XF_8UP>(UVbuf,
// UVPacked,
// 0);
// for( k = 0; k < (1<<XF_BITSHIFT(NPC))>>1; k++)
// {
//#pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC
//#pragma HLS unroll
// //Y00 = (Ybuf[k<<1] > 16) ? (Ybuf[k<<1]-16) : 0;
// //Y01 = (Ybuf[(k<<1)+1] > 16) ?
//(Ybuf[(k<<1)+1]-16)
//:
// 0;
//
// if((Ybuf[k<<1] > 16))
// {
// Y00 = (Ybuf[k<<1]-16);
// }
// else
// {
// Y00 = 0;
// }
//
// if((Ybuf[(k<<1)+1] > 16))
// {
// Y01 = (Ybuf[(k<<1)+1]-16);
// }
// else
// {
// Y01 = 0;
// }
//
// V = UVbuf[k<<1] - 128;
// U = UVbuf[(k<<1)+1] - 128;
//
// V2Rtemp = V * (short int)V2R;
// U2Gtemp = (short int)U2G * U;
// V2Gtemp = (short int)V2G * V;
// U2Btemp = U * (short int)U2B;
//
// // R = 1.164*Y + 1.596*V = Y + 0.164*Y + V +
// 0.596*V
// // G = 1.164*Y - 0.813*V - 0.391*U = Y + 0.164*Y
//-
// 0.813*V - 0.391*U
// // B = 1.164*Y + 2.018*U = Y + 0.164 + 2*U +
// 0.018*U
//
// RGB[(k*6) + 0] =
// CalculateB(Y00,U2Btemp,U);
// RGB[(k*6) + 1] =
// CalculateG(Y00,U2Gtemp,V2Gtemp); //G0
// RGB[(k*6) + 2] =
// CalculateR(Y00,V2Rtemp,V);
// RGB[(k*6) + 3] =
// CalculateB(Y01,U2Btemp,U);
// RGB[(k*6) + 4] =
// CalculateG(Y01,U2Gtemp,V2Gtemp); //G1
// RGB[(k*6) + 5] =
// CalculateR(Y01,V2Rtemp,V);
//
// }
//
// PackedPixels = PackRGBAPixels<WORDWIDTH_DST>(RGB);
// rgba.write(out_idx++,PackedPixels);
// }
// evenRow = evenRow ? false : true;
// }
//}
template <int SRC_T,
int UV_T,
int DST_T,
int ROWS,
int COLS,
int NPC,
int NPC_UV,
int PLANES,
int WORDWIDTH_Y,
int WORDWIDTH_UV,
int WORDWIDTH_DST,
int TC,
int iTC>
void KernNv212Rgba_ro(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& in_y,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& in_uv,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& rgba,
uint16_t height,
uint16_t width) {
XF_PTNAME(XF_8UP) RGB[64], Ybuf[16], UVbuf[16];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=RGB complete
#pragma HLS ARRAY_PARTITION variable=Ybuf complete
#pragma HLS ARRAY_PARTITION variable=UVbuf complete
// clang-format on
ap_uint<13> i, j;
unsigned long long int in_idx = 0, out_idx = 0;
int k;
hls::stream<XF_SNAME(WORDWIDTH_UV)> UVStream;
// clang-format off
#pragma HLS STREAM variable=&UVStream depth=COLS
// clang-format on
XF_SNAME(WORDWIDTH_Y) YPacked;
XF_SNAME(WORDWIDTH_UV) UVPacked;
XF_SNAME(WORDWIDTH_DST) PackedPixels;
uint8_t Y00, Y01;
int32_t V2Rtemp, U2Gtemp, V2Gtemp, U2Btemp;
int8_t U, V;
bool evenRow = true;
rowloop:
for (i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
columnloop:
for (j = 0; j < width; j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
// clang-format on
YPacked = in_y.read(i * width + j);
xfExtractPixels<NPC, WORDWIDTH_Y, XF_8UP>(Ybuf, YPacked, 0);
if (evenRow) {
UVPacked = in_uv.read(in_idx++);
UVStream.write(UVPacked);
} else // Keep a copy of UV row data in stream to use for oddrow
UVPacked = UVStream.read();
xfExtractPixels<NPC, WORDWIDTH_UV, XF_8UP>(UVbuf, UVPacked, 0);
for (k = 0; k<(1 << XF_BITSHIFT(NPC))>> 1; k++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC
#pragma HLS unroll
// clang-format on
// Y00 = (Ybuf[k<<1] > 16) ? (Ybuf[k<<1]-16) : 0;
// Y01 = (Ybuf[(k<<1)+1] > 16) ? (Ybuf[(k<<1)+1]-16) : 0;
if ((Ybuf[k << 1] > 16)) {
Y00 = (Ybuf[k << 1] - 16);
} else {
Y00 = 0;
}
if ((Ybuf[(k << 1) + 1] > 16)) {
Y01 = (Ybuf[(k << 1) + 1] - 16);
} else {
Y01 = 0;
}
V = UVbuf[k << 1] - 128;
U = UVbuf[(k << 1) + 1] - 128;
V2Rtemp = V * (short int)V2R;
U2Gtemp = (short int)U2G * U;
V2Gtemp = (short int)V2G * V;
U2Btemp = U * (short int)U2B;
// R = 1.164*Y + 1.596*V = Y + 0.164*Y + V + 0.596*V
// G = 1.164*Y - 0.813*V - 0.391*U = Y + 0.164*Y - 0.813*V - 0.391*U
// B = 1.164*Y + 2.018*U = Y + 0.164 + 2*U + 0.018*U
if (PLANES == 4) {
RGB[(k << 3) + 0] = CalculateR(Y00, V2Rtemp, V); // R0
RGB[(k << 3) + 1] = CalculateG(Y00, U2Gtemp, V2Gtemp); // G0
RGB[(k << 3) + 2] = CalculateB(Y00, U2Btemp, U); // B0
RGB[(k << 3) + 3] = 255; // A
RGB[(k << 3) + 4] = CalculateR(Y01, V2Rtemp, V); // R1
RGB[(k << 3) + 5] = CalculateG(Y01, U2Gtemp, V2Gtemp); // G1
RGB[(k << 3) + 6] = CalculateB(Y01, U2Btemp, U); // B0
RGB[(k << 3) + 7] = 255; // A
} else {
RGB[(k * 6) + 0] = CalculateR(Y00, V2Rtemp, V); // R0
RGB[(k * 6) + 1] = CalculateG(Y00, U2Gtemp, V2Gtemp); // G0
RGB[(k * 6) + 2] = CalculateB(Y00, U2Btemp, U); // B0
RGB[(k * 6) + 3] = CalculateR(Y01, V2Rtemp, V); // R1
RGB[(k * 6) + 4] = CalculateG(Y01, U2Gtemp, V2Gtemp); // G1
RGB[(k * 6) + 5] = CalculateB(Y01, U2Btemp, U); // B0
}
}
PackedPixels = PackRGBAPixels<WORDWIDTH_DST>(RGB);
rgba.write(out_idx++, PackedPixels);
}
evenRow = evenRow ? false : true;
}
// if(height & 1)
// {
// for( i = 0; i < (width>>XF_BITSHIFT(NPC)); i++)
// {
//#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
// UVStream.read();
// }
// }
}
template <int SRC_T,
int UV_T,
int DST_T,
int ROWS,
int COLS,
int NPC,
int NPC_UV,
int PLANES,
int WORDWIDTH_Y,
int WORDWIDTH_UV,
int WORDWIDTH_DST,
int TC,
int iTC>
void KernNv212bgr_ro(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& in_y,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& in_uv,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& rgba,
uint16_t height,
uint16_t width) {
XF_PTNAME(XF_8UP) RGB[64], Ybuf[16], UVbuf[16];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=RGB complete
#pragma HLS ARRAY_PARTITION variable=Ybuf complete
#pragma HLS ARRAY_PARTITION variable=UVbuf complete
// clang-format on
ap_uint<13> i, j;
unsigned long long int in_idx = 0, out_idx = 0;
int k;
hls::stream<XF_SNAME(WORDWIDTH_UV)> UVStream;
// clang-format off
#pragma HLS STREAM variable=&UVStream depth=COLS
// clang-format on
XF_SNAME(WORDWIDTH_Y) YPacked;
XF_SNAME(WORDWIDTH_UV) UVPacked;
XF_SNAME(WORDWIDTH_DST) PackedPixels;
uint8_t Y00, Y01;
int32_t V2Rtemp, U2Gtemp, V2Gtemp, U2Btemp;
int8_t U, V;
bool evenRow = true;
rowloop:
for (i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
columnloop:
for (j = 0; j < width; j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
// clang-format on
YPacked = in_y.read(i * width + j);
xfExtractPixels<NPC, WORDWIDTH_Y, XF_8UP>(Ybuf, YPacked, 0);
if (evenRow) {
UVPacked = in_uv.read(in_idx++);
UVStream.write(UVPacked);
} else // Keep a copy of UV row data in stream to use for oddrow
UVPacked = UVStream.read();
xfExtractPixels<NPC, WORDWIDTH_UV, XF_8UP>(UVbuf, UVPacked, 0);
for (k = 0; k<(1 << XF_BITSHIFT(NPC))>> 1; k++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC
#pragma HLS unroll
// clang-format on
// Y00 = (Ybuf[k<<1] > 16) ? (Ybuf[k<<1]-16) : 0;
// Y01 = (Ybuf[(k<<1)+1] > 16) ? (Ybuf[(k<<1)+1]-16) : 0;
if ((Ybuf[k << 1] > 16)) {
Y00 = (Ybuf[k << 1] - 16);
} else {
Y00 = 0;
}
if ((Ybuf[(k << 1) + 1] > 16)) {
Y01 = (Ybuf[(k << 1) + 1] - 16);
} else {
Y01 = 0;
}
V = UVbuf[k << 1] - 128;
U = UVbuf[(k << 1) + 1] - 128;
V2Rtemp = V * (short int)V2R;
U2Gtemp = (short int)U2G * U;
V2Gtemp = (short int)V2G * V;
U2Btemp = U * (short int)U2B;
// R = 1.164*Y + 1.596*V = Y + 0.164*Y + V + 0.596*V
// G = 1.164*Y - 0.813*V - 0.391*U = Y + 0.164*Y - 0.813*V - 0.391*U
// B = 1.164*Y + 2.018*U = Y + 0.164 + 2*U + 0.018*U
// if(PLANES==4)
// {
// RGB[(k<<3) + 0] =
// CalculateR(Y00,V2Rtemp,V);
// RGB[(k<<3) + 1] =
// CalculateG(Y00,U2Gtemp,V2Gtemp); //G0
// RGB[(k<<3) + 2] =
// CalculateB(Y00,U2Btemp,U);
// RGB[(k<<3) + 3] = 255;
// RGB[(k<<3) + 4] =
// CalculateR(Y01,V2Rtemp,V);
// RGB[(k<<3) + 5] =
// CalculateG(Y01,U2Gtemp,V2Gtemp); //G1
// RGB[(k<<3) + 6] =
// CalculateB(Y01,U2Btemp,U);
// RGB[(k<<3) + 7] = 255;
// }
// else
// {
RGB[(k * 6) + 0] = CalculateB(Y00, U2Btemp, U); // B0
RGB[(k * 6) + 1] = CalculateG(Y00, U2Gtemp, V2Gtemp); // G0
RGB[(k * 6) + 2] = CalculateR(Y00, V2Rtemp, V); // R0
RGB[(k * 6) + 3] = CalculateB(Y01, U2Btemp, U); // B0
RGB[(k * 6) + 4] = CalculateG(Y01, U2Gtemp, V2Gtemp); // G1
RGB[(k * 6) + 5] = CalculateR(Y01, V2Rtemp, V); // R1
// }
}
PackedPixels = PackRGBAPixels<WORDWIDTH_DST>(RGB);
rgba.write(out_idx++, PackedPixels);
}
evenRow = evenRow ? false : true;
}
// if(height & 1)
// {
// for( i = 0; i < (width>>XF_BITSHIFT(NPC)); i++)
// {
//#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
// UVStream.read();
// }
// }
}
// KernNv212Yuv4
template <int SRC_T,
int UV_T,
int ROWS,
int COLS,
int NPC,
int NPC_UV,
int WORDWIDTH_VU,
int WORDWIDTH_DST,
int TC,
int iTC>
void KernNv212Yuv4_ro(xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& _vu,
xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _u,
xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _v,
uint16_t height,
uint16_t width) {
XF_PTNAME(XF_8UP) VUbuf[16];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=VUbuf complete
// clang-format on
XF_SNAME(WORDWIDTH_DST) UPacked, VPacked;
XF_SNAME(WORDWIDTH_VU) VUPacked;
XF_SNAME(WORDWIDTH_DST)
arr_UPacked[COLS >> (XF_BITSHIFT(NPC))], arr_VPacked[COLS >> (XF_BITSHIFT(NPC))];
ap_uint<13> i, j;
ap_uint<4> k;
unsigned long long int idx = 0, idx1 = 0;
int l;
rowloop:
for (i = 0; i < (height >> 1); i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
columnloop:
for (j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
// clang-format on
VUPacked = _vu.read(idx1++);
xfExtractPixels<NPC, WORDWIDTH_VU, XF_8UP>(VUbuf, VUPacked, 0);
#define AU_CVT_STEP 16
for (k = 0, l = 0; k < (1 << (XF_BITSHIFT(NPC))); k += 2, l += AU_CVT_STEP) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC
#pragma HLS UNROLL
// clang-format on
UPacked.range(l + AU_CVT_STEP - 1, l) = (VUbuf[k + 1]) | ((ap_uint<16>)VUbuf[k + 1] << (8));
VPacked.range(l + AU_CVT_STEP - 1, l) = (VUbuf[k]) | ((ap_uint<16>)VUbuf[k] << (8));
}
//_u.write(idx,UPacked);
//_v.write(idx++,VPacked);
_u.write(((i * 2) * (_u.cols >> XF_BITSHIFT(NPC))) + j, UPacked);
_v.write(((i * 2) * (_v.cols >> XF_BITSHIFT(NPC))) + j, VPacked);
arr_UPacked[j] = UPacked;
arr_VPacked[j] = VPacked;
}
for (j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) {
_u.write((((i * 2) + 1) * (_u.cols >> XF_BITSHIFT(NPC))) + j, arr_UPacked[j]);
_v.write((((i * 2) + 1) * (_v.cols >> XF_BITSHIFT(NPC))) + j, arr_VPacked[j]);
}
}
}
// KernYuyv2Rgba
template <int SRC_T,
int DST_T,
int ROWS,
int COLS,
int NPC,
int PLANES,
int WORDWIDTH_SRC,
int WORDWIDTH_DST,
int TC,
int iTC>
void KernYuyv2Rgba_ro(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& yuyv,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& rgba,
uint16_t height,
uint16_t width) {
ap_uint8_t RGB[64];
XF_PTNAME(XF_8UP) YUVbuf[32];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=RGB complete
#pragma HLS ARRAY_PARTITION variable=YUVbuf complete
// clang-format on
XF_SNAME(WORDWIDTH_DST) PackedPixels;
XF_SNAME(WORDWIDTH_SRC) YUVPacked;
unsigned long long int idx = 0;
uint8_t Y00, Y01;
int32_t V2Rtemp, U2Gtemp, V2Gtemp, U2Btemp;
int8_t U, V;
rowloop:
for (int i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
columnloop:
for (int j = 0; j < width; j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
// clang-format on
YUVPacked = yuyv.read(i * width + j);
ExtractUYVYPixels<WORDWIDTH_SRC>(YUVPacked, YUVbuf);
for (int k = 0; k < (XF_NPIXPERCYCLE(NPC) >> 1); k++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC
// clang-format on
// Y00 = (YUVbuf[(k<<2)] > 16) ? (YUVbuf[(k<<2)]-16) : 0;
if (YUVbuf[(k << 2)] > 16) {
Y00 = (YUVbuf[(k << 2)] - 16);
} else {
Y00 = 0;
}
U = YUVbuf[(k << 2) + 1] - 128;
// Y01 = (YUVbuf[(k<<2)+2] > 16) ? (YUVbuf[(k<<2)+2]-16) : 0;
if (YUVbuf[(k << 2) + 2] > 16) {
Y01 = YUVbuf[(k << 2) + 2] - 16;
} else {
Y01 = 0;
}
V = YUVbuf[(k << 2) + 3] - 128;
V2Rtemp = V * (short int)V2R;
U2Gtemp = (short int)U2G * U;
V2Gtemp = (short int)V2G * V;
U2Btemp = U * (short int)U2B;
if (PLANES == 4) {
RGB[(k << 3)] = CalculateR(Y00, V2Rtemp, V); // R0
RGB[(k << 3) + 1] = CalculateG(Y00, U2Gtemp, V2Gtemp); // G0
RGB[(k << 3) + 2] = CalculateB(Y00, U2Btemp, U); // B0
RGB[(k << 3) + 3] = 255; // A
RGB[(k << 3) + 4] = CalculateR(Y01, V2Rtemp, V); // R1
RGB[(k << 3) + 5] = CalculateG(Y01, U2Gtemp, V2Gtemp); // G1
RGB[(k << 3) + 6] = CalculateB(Y01, U2Btemp, U); // B0
RGB[(k << 3) + 7] = 255; // A
} else {
RGB[(k * 6)] = CalculateR(Y00, V2Rtemp, V); // R0
RGB[(k * 6) + 1] = CalculateG(Y00, U2Gtemp, V2Gtemp); // G0
RGB[(k * 6) + 2] = CalculateB(Y00, U2Btemp, U); // B0
RGB[(k * 6) + 3] = CalculateR(Y01, V2Rtemp, V); // R1
RGB[(k * 6) + 4] = CalculateG(Y01, U2Gtemp, V2Gtemp); // G1
RGB[(k * 6) + 5] = CalculateB(Y01, U2Btemp, U); // B0
}
}
PackedPixels = PackRGBAPixels<WORDWIDTH_DST>(RGB);
rgba.write(idx++, PackedPixels);
}
}
}
template <int SRC_T,
int DST_T,
int ROWS,
int COLS,
int NPC,
int PLANES,
int WORDWIDTH_SRC,
int WORDWIDTH_DST,
int TC,
int iTC>
void KernYuyv2bgr_ro(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& yuyv,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& rgba,
uint16_t height,
uint16_t width) {
ap_uint8_t RGB[64];
XF_PTNAME(XF_8UP) YUVbuf[32];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=RGB complete
#pragma HLS ARRAY_PARTITION variable=YUVbuf complete
// clang-format on
XF_SNAME(WORDWIDTH_DST) PackedPixels;
XF_SNAME(WORDWIDTH_SRC) YUVPacked;
unsigned long long int idx = 0;
uint8_t Y00, Y01;
int32_t V2Rtemp, U2Gtemp, V2Gtemp, U2Btemp;
int8_t U, V;
rowloop:
for (int i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
columnloop:
for (int j = 0; j < width; j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
// clang-format on
YUVPacked = yuyv.read(i * width + j);
ExtractUYVYPixels<WORDWIDTH_SRC>(YUVPacked, YUVbuf);
for (int k = 0; k < (XF_NPIXPERCYCLE(NPC) >> 1); k++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC
// clang-format on
// Y00 = (YUVbuf[(k<<2)] > 16) ? (YUVbuf[(k<<2)]-16) : 0;
if (YUVbuf[(k << 2)] > 16) {
Y00 = (YUVbuf[(k << 2)] - 16);
} else {
Y00 = 0;
}
U = YUVbuf[(k << 2) + 1] - 128;
// Y01 = (YUVbuf[(k<<2)+2] > 16) ? (YUVbuf[(k<<2)+2]-16) : 0;
if (YUVbuf[(k << 2) + 2] > 16) {
Y01 = YUVbuf[(k << 2) + 2] - 16;
} else {
Y01 = 0;
}
V = YUVbuf[(k << 2) + 3] - 128;
V2Rtemp = V * (short int)V2R;
U2Gtemp = (short int)U2G * U;
V2Gtemp = (short int)V2G * V;
U2Btemp = U * (short int)U2B;
RGB[(k * 6)] = CalculateB(Y00, U2Btemp, U); // B0
RGB[(k * 6) + 1] = CalculateG(Y00, U2Gtemp, V2Gtemp); // G0
RGB[(k * 6) + 2] = CalculateR(Y00, V2Rtemp, V); // R0
RGB[(k * 6) + 3] = CalculateB(Y01, U2Btemp, U); // B0
RGB[(k * 6) + 4] = CalculateG(Y01, U2Gtemp, V2Gtemp); // G1
RGB[(k * 6) + 5] = CalculateR(Y01, V2Rtemp, V); // R1
}
PackedPixels = PackRGBAPixels<WORDWIDTH_DST>(RGB);
rgba.write(idx++, PackedPixels);
}
}
}
template <int SRC_T,
int Y_T,
int UV_T,
int ROWS,
int COLS,
int NPC,
int NPC_UV,
int WORDWIDTH_SRC,
int WORDWIDTH_Y,
int WORDWIDTH_UV,
int TC,
int iTC>
void KernYuyv2Nv12_ro(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _yuyv,
xf::cv::Mat<Y_T, ROWS, COLS, NPC>& y_plane,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& uv_plane,
uint16_t height,
uint16_t width) {
XF_PTNAME(XF_8UP) Ybuf[16], UVbuf[16], YUVbuf[32];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=Ybuf complete
#pragma HLS ARRAY_PARTITION variable=UVbuf complete
#pragma HLS ARRAY_PARTITION variable=YUVbuf complete
// clang-format on
XF_SNAME(WORDWIDTH_SRC) YUVPacked;
XF_SNAME(WORDWIDTH_Y) YPacked, UVPacked;
unsigned long long idx = 0, idx1 = 0;
bool evenRow = true;
rowloop:
for (int i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
columnloop:
for (int j = 0; j < width; j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
// clang-format on
YUVPacked = _yuyv.read(i * width + j);
ExtractUYVYPixels<WORDWIDTH_SRC>(YUVPacked, YUVbuf);
for (int k = 0; k<(1 << XF_BITSHIFT(NPC))>> 1;
k++) { // filling the Ybuf and UVbuf in the format required for NV12
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC
#pragma HLS unroll
// clang-format on
Ybuf[(k << 1)] = YUVbuf[(k << 2)];
Ybuf[(k << 1) + 1] = YUVbuf[(k << 2) + 2];
if (evenRow) {
UVbuf[(k << 1)] = YUVbuf[(k << 2) + 1];
UVbuf[(k << 1) + 1] = YUVbuf[(k << 2) + 3];
}
}
YPacked = PackPixels<WORDWIDTH_Y>(Ybuf);
y_plane.write(idx++, YPacked);
if (evenRow) {
UVPacked = PackPixels<WORDWIDTH_UV>(UVbuf);
uv_plane.write(idx1++, UVPacked);
}
}
evenRow = evenRow ? false : true;
}
}
template <int SRC_T,
int Y_T,
int UV_T,
int ROWS,
int COLS,
int NPC,
int NPC_UV,
int WORDWIDTH_SRC,
int WORDWIDTH_Y,
int WORDWIDTH_UV,
int TC,
int iTC>
void KernYuyv2Nv21_ro(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _yuyv,
xf::cv::Mat<Y_T, ROWS, COLS, NPC>& y_plane,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& uv_plane,
uint16_t height,
uint16_t width) {
XF_PTNAME(XF_8UP) Ybuf[16], UVbuf[16], YUVbuf[32];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=Ybuf complete
#pragma HLS ARRAY_PARTITION variable=UVbuf complete
#pragma HLS ARRAY_PARTITION variable=YUVbuf complete
// clang-format on
XF_SNAME(WORDWIDTH_SRC) YUVPacked;
XF_SNAME(WORDWIDTH_Y) YPacked, UVPacked;
unsigned long long idx = 0, idx1 = 0;
bool evenRow = true;
rowloop:
for (int i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
columnloop:
for (int j = 0; j < width; j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
// clang-format on
YUVPacked = _yuyv.read(i * width + j);
ExtractUYVYPixels<WORDWIDTH_SRC>(YUVPacked, YUVbuf);
for (int k = 0; k<(1 << XF_BITSHIFT(NPC))>> 1;
k++) { // filling the Ybuf and UVbuf in the format required for NV12
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC
#pragma HLS unroll
// clang-format on
Ybuf[(k << 1)] = YUVbuf[(k << 2)];
Ybuf[(k << 1) + 1] = YUVbuf[(k << 2) + 2];
if (evenRow) {
UVbuf[(k << 1) + 1] = YUVbuf[(k << 2) + 1];
UVbuf[(k << 1)] = YUVbuf[(k << 2) + 3];
}
}
YPacked = PackPixels<WORDWIDTH_Y>(Ybuf);
y_plane.write(idx++, YPacked);
if (evenRow) {
UVPacked = PackPixels<WORDWIDTH_UV>(UVbuf);
uv_plane.write(idx1++, UVPacked);
}
}
evenRow = evenRow ? false : true;
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST, int TC, int iTC>
void KernYuyv2Iyuv_ro(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _yuyv,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _y,
xf::cv::Mat<DST_T, ROWS / 4, COLS, NPC>& _u,
xf::cv::Mat<DST_T, ROWS / 4, COLS, NPC>& _v,
uint16_t height,
uint16_t width) {
uint16_t i, j, k, l;
ap_uint8_t Ybuf[16], Ubuf[16], Vbuf[16], YUVbuf[32];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=Ybuf complete
#pragma HLS ARRAY_PARTITION variable=Ubuf complete
#pragma HLS ARRAY_PARTITION variable=Vbuf complete
#pragma HLS ARRAY_PARTITION variable=YUVbuf complete
// clang-format on
unsigned long long int idx = 0, idx1 = 0;
XF_SNAME(WORDWIDTH_SRC) YUVPacked;
XF_SNAME(WORDWIDTH_DST) YPacked0, UPacked, VPacked;
uint8_t offset;
bool evenRow = true, evenBlock = true;
offset = (1 << XF_BITSHIFT(NPC)) >> 1;
rowloop:
for (int i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
columnloop:
for (int j = 0; j < width; j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
// clang-format on
YUVPacked = _yuyv.read(i * width + j);
ExtractUYVYPixels<WORDWIDTH_SRC>(YUVPacked, YUVbuf);
for (int k = 0; k<(1 << XF_BITSHIFT(NPC))>> 1; k++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC
#pragma HLS unroll
// clang-format on
Ybuf[(k << 1)] = YUVbuf[(k << 2)];
Ybuf[(k << 1) + 1] = YUVbuf[(k << 2) + 2];
if (evenRow) {
if (evenBlock) {
Ubuf[k] = YUVbuf[(k << 2) + 1];
Vbuf[k] = YUVbuf[(k << 2) + 3];
} else {
Ubuf[k + offset] = YUVbuf[(k << 2) + 1];
Vbuf[k + offset] = YUVbuf[(k << 2) + 3];
}
}
}
YPacked0 = PackPixels<WORDWIDTH_DST>(Ybuf);
_y.write(idx++, YPacked0);
if (evenRow & !evenBlock) {
UPacked = PackPixels<WORDWIDTH_DST>(Ubuf);
VPacked = PackPixels<WORDWIDTH_DST>(Vbuf);
_u.write(idx1, UPacked);
_v.write(idx1++, VPacked);
}
evenBlock = evenBlock ? false : true;
}
evenRow = evenRow ? false : true;
}
}
// KernUyvy2Iyuv
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST, int TC, int iTC>
void KernUyvy2Iyuv_ro(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _uyvy,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& y_plane,
xf::cv::Mat<DST_T, ROWS / 4, COLS, NPC>& u_plane,
xf::cv::Mat<DST_T, ROWS / 4, COLS, NPC>& v_plane,
uint16_t height,
uint16_t width) {
ap_uint8_t Ybuf[16], Ubuf[16], Vbuf[16], YUVbuf[32];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=Ybuf complete
#pragma HLS ARRAY_PARTITION variable=Ubuf complete
#pragma HLS ARRAY_PARTITION variable=Vbuf complete
#pragma HLS ARRAY_PARTITION variable=YUVbuf complete
// clang-format on
XF_SNAME(WORDWIDTH_SRC) YUVPacked;
XF_SNAME(WORDWIDTH_DST) YPacked0, UPacked, VPacked;
uint8_t offset;
unsigned long long int idx = 0, idx1 = 0;
bool evenRow = true, evenBlock = true;
offset = (1 << XF_BITSHIFT(NPC)) >> 1;
rowloop:
for (int i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
columnloop:
for (int j = 0; j < width; j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
// clang-format on
YUVPacked = _uyvy.read(i * width + j);
ExtractUYVYPixels<WORDWIDTH_SRC>(YUVPacked, YUVbuf);
for (int k = 0; k<(1 << XF_BITSHIFT(NPC))>> 1; k++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC
#pragma HLS unroll
// clang-format on
Ybuf[(k << 1)] = YUVbuf[(k << 2) + 1];
Ybuf[(k << 1) + 1] = YUVbuf[(k << 2) + 3];
if (evenRow) {
if (evenBlock) {
Ubuf[k] = YUVbuf[(k << 2)];
Vbuf[k] = YUVbuf[(k << 2) + 2];
} else {
Ubuf[k + offset] = YUVbuf[(k << 2)];
Vbuf[k + offset] = YUVbuf[(k << 2) + 2];
}
}
}
YPacked0 = PackPixels<WORDWIDTH_DST>(Ybuf);
y_plane.write(idx1++, YPacked0);
if (evenRow & !evenBlock) {
UPacked = PackPixels<WORDWIDTH_DST>(Ubuf);
VPacked = PackPixels<WORDWIDTH_DST>(Vbuf);
u_plane.write(idx, UPacked);
v_plane.write(idx++, VPacked);
}
evenBlock = evenBlock ? false : true;
}
evenRow = evenRow ? false : true;
}
}
template <int SRC_T,
int Y_T,
int UV_T,
int ROWS,
int COLS,
int NPC,
int NPC_UV,
int WORDWIDTH_SRC,
int WORDWIDTH_Y,
int WORDWIDTH_UV,
int TC,
int iTC>
void KernUyvy2Nv12_ro(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _uyvy,
xf::cv::Mat<Y_T, ROWS, COLS, NPC>& y_plane,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& uv_plane,
uint16_t height,
uint16_t width) {
ap_uint8_t Ybuf[16], UVbuf[16], YUVbuf[32];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=Ybuf complete
#pragma HLS ARRAY_PARTITION variable=UVbuf complete
#pragma HLS ARRAY_PARTITION variable=YUVbuf complete
// clang-format on
XF_SNAME(WORDWIDTH_SRC) YUVPacked;
XF_SNAME(WORDWIDTH_Y) YPacked, UVPacked;
unsigned long long int idx = 0, idx1 = 0;
bool evenRow = true;
rowloop:
for (int i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
columnloop:
for (int j = 0; j < width; j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
// clang-format on
YUVPacked = _uyvy.read(i * width + j);
ExtractUYVYPixels<WORDWIDTH_SRC>(YUVPacked, YUVbuf);
// filling the Ybuf and UVbuf in the format required for NV12
for (int k = 0; k<(1 << XF_BITSHIFT(NPC))>> 1; k++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC
#pragma HLS unroll
// clang-format on
Ybuf[(k << 1)] = YUVbuf[(k << 2) + 1];
Ybuf[(k << 1) + 1] = YUVbuf[(k << 2) + 3];
if (evenRow) {
UVbuf[(k << 1)] = YUVbuf[(k << 2)];
UVbuf[(k << 1) + 1] = YUVbuf[(k << 2) + 2];
}
}
YPacked = PackPixels<WORDWIDTH_Y>(Ybuf);
y_plane.write(idx++, YPacked);
if (evenRow) {
UVPacked = PackPixels<WORDWIDTH_Y>(UVbuf);
uv_plane.write(idx1++, UVPacked);
}
}
evenRow = evenRow ? false : true;
}
}
// KernUyvy2Nv21
template <int SRC_T,
int Y_T,
int UV_T,
int ROWS,
int COLS,
int NPC,
int NPC_UV,
int WORDWIDTH_SRC,
int WORDWIDTH_Y,
int WORDWIDTH_UV,
int TC,
int iTC>
void KernUyvy2Nv21_ro(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _uyvy,
xf::cv::Mat<Y_T, ROWS, COLS, NPC>& y_plane,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& uv_plane,
uint16_t height,
uint16_t width) {
ap_uint8_t Ybuf[16], UVbuf[16], YUVbuf[32];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=Ybuf complete
#pragma HLS ARRAY_PARTITION variable=UVbuf complete
#pragma HLS ARRAY_PARTITION variable=YUVbuf complete
// clang-format on
XF_SNAME(WORDWIDTH_SRC) YUVPacked;
XF_SNAME(WORDWIDTH_Y) YPacked, UVPacked;
unsigned long long int idx = 0, idx1 = 0;
bool evenRow = true;
rowloop:
for (int i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
columnloop:
for (int j = 0; j < width; j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
// clang-format on
YUVPacked = _uyvy.read(i * width + j);
ExtractUYVYPixels<WORDWIDTH_SRC>(YUVPacked, YUVbuf);
// filling the Ybuf and UVbuf in the format required for NV12
for (int k = 0; k<(1 << XF_BITSHIFT(NPC))>> 1; k++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC
#pragma HLS unroll
// clang-format on
Ybuf[(k << 1)] = YUVbuf[(k << 2) + 1];
Ybuf[(k << 1) + 1] = YUVbuf[(k << 2) + 3];
if (evenRow) {
UVbuf[(k << 1)] = YUVbuf[(k << 2) + 2];
UVbuf[(k << 1) + 1] = YUVbuf[(k << 2)];
}
}
YPacked = PackPixels<WORDWIDTH_Y>(Ybuf);
y_plane.write(idx++, YPacked);
if (evenRow) {
UVPacked = PackPixels<WORDWIDTH_Y>(UVbuf);
uv_plane.write(idx1++, UVPacked);
}
}
evenRow = evenRow ? false : true;
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST, int TC, int iTC>
void KernUyvy2Rgb_ro(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& uyvy,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& rgba,
uint16_t height,
uint16_t width) {
uint16_t i, j, k;
XF_PTNAME(XF_8UP) RGB[64], YUVbuf[32];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=RGB complete
#pragma HLS ARRAY_PARTITION variable=YUVbuf complete
// clang-format on
XF_SNAME(WORDWIDTH_DST) PackedPixels;
XF_SNAME(WORDWIDTH_SRC) YUVPacked;
uint8_t Y00, Y01;
int32_t V2Rtemp, U2Gtemp, V2Gtemp, U2Btemp;
int8_t U, V;
unsigned long long int idx = 0, out_idx = 0;
rowloop:
for (i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
#pragma HLS LOOP_FLATTEN off
// clang-format on
columnloop:
for (j = 0; j < width; j++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
#pragma HLS pipeline
// clang-format on
YUVPacked = uyvy.read(idx++);
ExtractUYVYPixels<WORDWIDTH_SRC>(YUVPacked, YUVbuf);
for (k = 0; k<(1 << XF_BITSHIFT(NPC))>> 1; k++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC
#pragma HLS unroll
// clang-format on
U = YUVbuf[(k << 2)] - 128;
// Y00 = (YUVbuf[(k<<2) + 1] > 16) ? (YUVbuf[(k<<2) + 1] - 16):0;
if (YUVbuf[(k << 2) + 1] > 16) {
Y00 = (YUVbuf[(k << 2) + 1] - 16);
} else {
Y00 = 0;
}
V = YUVbuf[(k << 2) + 2] - 128;
// Y01 = (YUVbuf[(k<<2) + 3] > 16) ? (YUVbuf[(k<<2) + 3] - 16):0;
if ((YUVbuf[(k << 2) + 3] > 16)) {
Y01 = (YUVbuf[(k << 2) + 3] - 16);
} else {
Y01 = 0;
}
V2Rtemp = V * (short int)V2R;
U2Gtemp = (short int)U2G * U;
V2Gtemp = (short int)V2G * V;
U2Btemp = U * (short int)U2B;
RGB[(k * 6)] = CalculateR(Y00, V2Rtemp, V); // G0
RGB[(k * 6) + 1] = CalculateG(Y00, U2Gtemp, V2Gtemp); // G0
RGB[(k * 6) + 2] = CalculateB(Y00, U2Btemp, U); // B0
RGB[(k * 6) + 3] = CalculateR(Y01, V2Rtemp, V); // R1
RGB[(k * 6) + 4] = CalculateG(Y01, U2Gtemp, V2Gtemp); // G1
RGB[(k * 6) + 5] = CalculateB(Y01, U2Btemp, U); // B0
}
PackedPixels = PackRGBAPixels<WORDWIDTH_DST>(RGB);
rgba.write(out_idx++, PackedPixels);
}
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST, int TC, int iTC>
void KernUyvy2bgr_ro(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& uyvy,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& rgba,
uint16_t height,
uint16_t width) {
uint16_t i, j, k;
XF_PTNAME(XF_8UP) RGB[64], YUVbuf[32];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=RGB complete
#pragma HLS ARRAY_PARTITION variable=YUVbuf complete
// clang-format on
XF_SNAME(WORDWIDTH_DST) PackedPixels;
XF_SNAME(WORDWIDTH_SRC) YUVPacked;
uint8_t Y00, Y01;
int32_t V2Rtemp, U2Gtemp, V2Gtemp, U2Btemp;
int8_t U, V;
unsigned long long int idx = 0, out_idx = 0;
rowloop:
for (i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
#pragma HLS LOOP_FLATTEN off
// clang-format on
columnloop:
for (j = 0; j < width; j++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
#pragma HLS pipeline
// clang-format on
YUVPacked = uyvy.read(idx++);
ExtractUYVYPixels<WORDWIDTH_SRC>(YUVPacked, YUVbuf);
for (k = 0; k<(1 << XF_BITSHIFT(NPC))>> 1; k++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC
#pragma HLS unroll
// clang-format on
U = YUVbuf[(k << 2)] - 128;
// Y00 = (YUVbuf[(k<<2) + 1] > 16) ? (YUVbuf[(k<<2) + 1] - 16):0;
if (YUVbuf[(k << 2) + 1] > 16) {
Y00 = (YUVbuf[(k << 2) + 1] - 16);
} else {
Y00 = 0;
}
V = YUVbuf[(k << 2) + 2] - 128;
// Y01 = (YUVbuf[(k<<2) + 3] > 16) ? (YUVbuf[(k<<2) + 3] - 16):0;
if ((YUVbuf[(k << 2) + 3] > 16)) {
Y01 = (YUVbuf[(k << 2) + 3] - 16);
} else {
Y01 = 0;
}
V2Rtemp = V * (short int)V2R;
U2Gtemp = (short int)U2G * U;
V2Gtemp = (short int)V2G * V;
U2Btemp = U * (short int)U2B;
RGB[(k * 6)] = CalculateB(Y00, U2Btemp, U); // B0
RGB[(k * 6) + 1] = CalculateG(Y00, U2Gtemp, V2Gtemp); // G0
RGB[(k * 6) + 2] = CalculateR(Y00, V2Rtemp, V); // G0
RGB[(k * 6) + 3] = CalculateB(Y01, U2Btemp, U); // B0
RGB[(k * 6) + 4] = CalculateG(Y01, U2Gtemp, V2Gtemp); // G1
RGB[(k * 6) + 5] = CalculateR(Y01, V2Rtemp, V); // R1
}
PackedPixels = PackRGBAPixels<WORDWIDTH_DST>(RGB);
rgba.write(out_idx++, PackedPixels);
}
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST, int TC, int iTC>
void KernUyvy2Rgba_ro(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& uyvy,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& rgba,
uint16_t height,
uint16_t width) {
uint16_t i, j, k;
XF_PTNAME(XF_8UP) RGB[64], YUVbuf[32];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=RGB complete
#pragma HLS ARRAY_PARTITION variable=YUVbuf complete
// clang-format on
XF_SNAME(WORDWIDTH_DST) PackedPixels;
XF_SNAME(WORDWIDTH_SRC) YUVPacked;
uint8_t Y00, Y01;
int32_t V2Rtemp, U2Gtemp, V2Gtemp, U2Btemp;
int8_t U, V;
unsigned long long int idx = 0, out_idx = 0;
rowloop:
for (i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
#pragma HLS LOOP_FLATTEN off
// clang-format on
columnloop:
for (j = 0; j < width; j++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
#pragma HLS pipeline
// clang-format on
YUVPacked = uyvy.read(idx++);
ExtractUYVYPixels<WORDWIDTH_SRC>(YUVPacked, YUVbuf);
for (k = 0; k<(1 << XF_BITSHIFT(NPC))>> 1; k++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC
#pragma HLS unroll
// clang-format on
U = YUVbuf[(k << 2)] - 128;
// Y00 = (YUVbuf[(k<<2) + 1] > 16) ? (YUVbuf[(k<<2) + 1] - 16):0;
if (YUVbuf[(k << 2) + 1] > 16) {
Y00 = (YUVbuf[(k << 2) + 1] - 16);
} else {
Y00 = 0;
}
V = YUVbuf[(k << 2) + 2] - 128;
// Y01 = (YUVbuf[(k<<2) + 3] > 16) ? (YUVbuf[(k<<2) + 3] - 16):0;
if ((YUVbuf[(k << 2) + 3] > 16)) {
Y01 = (YUVbuf[(k << 2) + 3] - 16);
} else {
Y01 = 0;
}
V2Rtemp = V * (short int)V2R;
U2Gtemp = (short int)U2G * U;
V2Gtemp = (short int)V2G * V;
U2Btemp = U * (short int)U2B;
RGB[(k << 3)] = CalculateR(Y00, V2Rtemp, V); // G0
RGB[(k << 3) + 1] = CalculateG(Y00, U2Gtemp, V2Gtemp); // G0
RGB[(k << 3) + 2] = CalculateB(Y00, U2Btemp, U); // B0
RGB[(k << 3) + 3] = 255;
RGB[(k << 3) + 4] = CalculateR(Y01, V2Rtemp, V); // R1
RGB[(k << 3) + 5] = CalculateG(Y01, U2Gtemp, V2Gtemp); // G1
RGB[(k << 3) + 6] = CalculateB(Y01, U2Btemp, U); // B0
RGB[(k << 3) + 7] = 255;
}
PackedPixels = PackRGBAPixels<WORDWIDTH_DST>(RGB);
rgba.write(out_idx++, PackedPixels);
}
}
}
/********************************************************************************
* Color Conversion APIs
*******************************************************************************/
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST>
void xFRgba2Yuv4(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _y_image,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _u_image,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _v_image,
uint16_t height,
uint16_t width) {
width = width >> XF_BITSHIFT(NPC);
if (NPC == 1) {
KernRgba2Yuv4<SRC_T, DST_T, ROWS, COLS, NPC, WORDWIDTH_SRC, WORDWIDTH_DST>(_src, _y_image, _u_image, _v_image,
height, width);
} else {
KernRgba2Yuv4_ro<SRC_T, DST_T, ROWS, COLS, NPC, XF_CHANNELS(SRC_T, NPC), WORDWIDTH_SRC, WORDWIDTH_DST,
(COLS >> XF_BITSHIFT(NPC)), ((1 << XF_BITSHIFT(NPC)) >> 1)>(_src, _y_image, _u_image, _v_image,
height, width);
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC = 1>
void rgba2yuv4(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _y_image,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _u_image,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _v_image) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_8UC4) && " RGBA image Type must be XF_8UC4");
assert((DST_T == XF_8UC1) && " Y, U, V image Type must be XF_8UC1");
assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " RGBA image rows and cols should be less than ROWS, COLS");
assert(((_src.cols == _y_image.cols) && (_src.rows == _y_image.rows)) && "RGBA and Y plane dimensions mismatch");
assert(((_src.cols == _u_image.cols) && (_src.rows == _u_image.rows)) && "RGBA and U plane dimensions mismatch");
assert(((_src.cols == _v_image.cols) && (_src.rows == _v_image.rows)) && "RGBA and V plane dimensions mismatch");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported ");
#endif
xFRgba2Yuv4<SRC_T, DST_T, ROWS, COLS, NPC, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(DST_T, NPC)>(
_src, _y_image, _u_image, _v_image, _src.rows, _src.cols);
}
template <int SRC_T,
int DST_T,
int ROWS,
int COLS,
int NPC,
int WORDWIDTH_SRC,
int WORDWIDTH_DST,
int ROWS_U,
int ROWS_V>
void KernRgb2Iyuv(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _rgba,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _y,
xf::cv::Mat<DST_T, ROWS / 4, COLS, NPC>& _u,
xf::cv::Mat<DST_T, ROWS / 4, COLS, NPC>& _v,
uint16_t height,
uint16_t width) {
ap_uint<24> rgba;
uint8_t y, u, v;
bool evenRow = true, evenBlock = true;
unsigned long long int idx = 0, idx1 = 0;
RowLoop:
for (int i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
ColLoop:
for (int j = 0; j < width; j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS
// clang-format on
rgba = _rgba.read(i * width + j);
uint8_t r = rgba.range(7, 0);
uint8_t g = rgba.range(15, 8);
uint8_t b = rgba.range(23, 16);
y = CalculateY(r, g, b);
if (evenRow) {
if (evenBlock) {
u = CalculateU(r, g, b);
v = CalculateV(r, g, b);
}
}
_y.write(idx++, y);
if (evenRow & !evenBlock) {
_u.write(idx1, u);
_v.write(idx1++, v);
}
evenBlock = evenBlock ? false : true;
}
evenRow = evenRow ? false : true;
}
}
template <int SRC_T,
int DST_T,
int ROWS,
int COLS,
int NPC,
int WORDWIDTH_SRC,
int WORDWIDTH_DST,
int ROWS_U,
int ROWS_V>
void xFRgb2Iyuv(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _y_image,
xf::cv::Mat<DST_T, ROWS / 4, COLS, NPC>& _u_image,
xf::cv::Mat<DST_T, ROWS / 4, COLS, NPC>& _v_image,
uint16_t height,
uint16_t width) {
width = width >> XF_BITSHIFT(NPC);
if (NPC == XF_NPPC1) {
KernRgb2Iyuv<SRC_T, DST_T, ROWS, COLS, NPC, WORDWIDTH_SRC, WORDWIDTH_DST, ROWS_U, ROWS_V>(
_src, _y_image, _u_image, _v_image, height, width);
} else {
KernRgba2Iyuv_ro<SRC_T, DST_T, ROWS, COLS, NPC, XF_CHANNELS(SRC_T, NPC), WORDWIDTH_SRC, WORDWIDTH_DST, ROWS_U,
ROWS_V, (COLS >> XF_BITSHIFT(NPC)), ((1 << XF_BITSHIFT(NPC)) >> 1)>(_src, _y_image, _u_image,
_v_image, height, width);
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC = 0>
void rgb2iyuv(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _y_image,
xf::cv::Mat<DST_T, ROWS / 4, COLS, NPC>& _u_image,
xf::cv::Mat<DST_T, ROWS / 4, COLS, NPC>& _v_image) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_8UC3) && " RGB image Type must be XF_8UC3");
assert((DST_T == XF_8UC1) && " Y, U, V image Type must be XF_8UC1");
assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " RGB image rows and cols should be less than ROWS, COLS");
assert(((_src.cols == _y_image.cols) && (_src.rows == _y_image.rows)) && "RGB and Y plane dimensions mismatch");
assert(((_src.cols == _u_image.cols) && (_src.rows == (_u_image.rows << 2))) &&
"RGB and U plane dimensions mismatch");
assert(((_src.cols == _v_image.cols) && (_src.rows == (_v_image.rows << 2))) &&
"RGB and V plane dimensions mismatch");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported ");
#endif
xFRgb2Iyuv<SRC_T, DST_T, ROWS, COLS, NPC, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(DST_T, NPC), ROWS / 4, ROWS / 4>(
_src, _y_image, _u_image, _v_image, _src.rows, _src.cols);
}
template <int SRC_T,
int DST_T,
int ROWS,
int COLS,
int NPC,
int WORDWIDTH_SRC,
int WORDWIDTH_DST,
int ROWS_U,
int ROWS_V>
void xFRgba2Iyuv(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _y_image,
xf::cv::Mat<DST_T, ROWS / 4, COLS, NPC>& _u_image,
xf::cv::Mat<DST_T, ROWS / 4, COLS, NPC>& _v_image,
uint16_t height,
uint16_t width) {
width = width >> XF_BITSHIFT(NPC);
if (NPC == XF_NPPC1) {
KernRgba2Iyuv<SRC_T, DST_T, ROWS, COLS, NPC, WORDWIDTH_SRC, WORDWIDTH_DST, ROWS_U, ROWS_V>(
_src, _y_image, _u_image, _v_image, height, width);
} else {
KernRgba2Iyuv_ro<SRC_T, DST_T, ROWS, COLS, NPC, XF_CHANNELS(SRC_T, NPC), WORDWIDTH_SRC, WORDWIDTH_DST, ROWS_U,
ROWS_V, (COLS >> XF_BITSHIFT(NPC)), ((1 << XF_BITSHIFT(NPC)) >> 1)>(_src, _y_image, _u_image,
_v_image, height, width);
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC = 0>
void rgba2iyuv(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _y_image,
xf::cv::Mat<DST_T, ROWS / 4, COLS, NPC>& _u_image,
xf::cv::Mat<DST_T, ROWS / 4, COLS, NPC>& _v_image) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_8UC4) && " RGBA image Type must be XF_8UC3");
assert((DST_T == XF_8UC1) && " Y, U, V image Type must be XF_8UC1");
assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " RGBA image rows and cols should be less than ROWS, COLS");
assert(((_src.cols == _y_image.cols) && (_src.rows == _y_image.rows)) && "RGBA and Y plane dimensions mismatch");
assert(((_src.cols == _u_image.cols) && (_src.rows == (_u_image.rows << 2))) &&
"RGBA and U plane dimensions mismatch");
assert(((_src.cols == _v_image.cols) && (_src.rows == (_v_image.rows << 2))) &&
"RGBA and V plane dimensions mismatch");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported ");
#endif
xFRgba2Iyuv<SRC_T, DST_T, ROWS, COLS, NPC, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(DST_T, NPC), ROWS / 4, ROWS / 4>(
_src, _y_image, _u_image, _v_image, _src.rows, _src.cols);
}
// auRgba2Iyuv
//
template <int SRC_T,
int Y_T,
int UV_T,
int ROWS,
int COLS,
int NPC,
int NPC_UV,
int WORDWIDTH_SRC,
int WORDWIDTH_Y,
int WORDWIDTH_UV>
void xFRgba2Nv21(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src,
xf::cv::Mat<Y_T, ROWS, COLS, NPC>& _y,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& _uv,
uint16_t height,
uint16_t width) {
width = width >> XF_BITSHIFT(NPC);
if (NPC == 1) {
KernRgba2Nv21<SRC_T, Y_T, UV_T, ROWS, COLS, NPC, NPC_UV, WORDWIDTH_SRC, WORDWIDTH_Y, WORDWIDTH_UV>(
_src, _y, _uv, height, width);
} else {
KernRgba2Nv21_ro<SRC_T, Y_T, UV_T, ROWS, COLS, NPC, NPC_UV, XF_CHANNELS(SRC_T, NPC), WORDWIDTH_SRC, WORDWIDTH_Y,
WORDWIDTH_UV, (COLS >> XF_BITSHIFT(NPC)), (1 << (XF_BITSHIFT(NPC) + 1))>(_src, _y, _uv, height,
width);
}
}
template <int SRC_T, int Y_T, int UV_T, int ROWS, int COLS, int NPC = 1, int NPC_UV = 1>
void rgba2nv21(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src,
xf::cv::Mat<Y_T, ROWS, COLS, NPC>& _y,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& _uv) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_8UC4) && " RGBA image Type must be XF_8UC3");
assert((Y_T == XF_8UC1) && " Y image Type must be XF_8UC1");
assert((UV_T == XF_8UC2) && " VU image Type must be XF_8UC2");
assert(((_src.rows <= ROWS) && (_y.cols <= COLS)) && " Y image ROWS and COLS should be less than ROWS, COLS");
assert(((_src.cols == _y.cols) && (_src.rows == _y.rows)) && "Y and RGBA plane dimensions mismatch");
assert(((_y.cols == (_uv.cols << 1)) && (_y.rows == (_uv.rows << 1))) && "Y and VU planes dimensions mismatch");
if (NPC != XF_NPPC1) {
assert((NPC == (NPC_UV * 2)) &&
" NPC of Y plane must be double the VU "
"plane for multipixel parallelism ");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported ");
} else {
assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC,NPC_UV values must be same ");
}
#endif
xFRgba2Nv21<SRC_T, Y_T, UV_T, ROWS, COLS, NPC, NPC_UV, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(Y_T, NPC),
XF_WORDWIDTH(UV_T, NPC_UV)>(_src, _y, _uv, _src.rows, _src.cols);
}
template <int SRC_T,
int Y_T,
int UV_T,
int ROWS,
int COLS,
int NPC,
int NPC_UV,
int WORDWIDTH_SRC,
int WORDWIDTH_Y,
int WORDWIDTH_UV>
void xFRgba2Nv12(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src,
xf::cv::Mat<Y_T, ROWS, COLS, NPC>& _y,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& _uv,
uint16_t height,
uint16_t width) {
width = width >> XF_BITSHIFT(NPC);
if (NPC == 1) {
KernRgba2Nv12<SRC_T, Y_T, UV_T, ROWS, COLS, NPC, NPC_UV, WORDWIDTH_SRC, WORDWIDTH_Y, WORDWIDTH_UV>(
_src, _y, _uv, height, width);
} else {
KernRgba2Nv12_ro<SRC_T, Y_T, UV_T, ROWS, COLS, NPC, NPC_UV, XF_CHANNELS(SRC_T, NPC), WORDWIDTH_SRC, WORDWIDTH_Y,
WORDWIDTH_UV, (COLS >> XF_BITSHIFT(NPC)), (1 << (XF_BITSHIFT(NPC) + 1))>(_src, _y, _uv, height,
width);
}
}
template <int SRC_T, int Y_T, int UV_T, int ROWS, int COLS, int NPC = 1, int NPC_UV = 1>
void rgba2nv12(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src,
xf::cv::Mat<Y_T, ROWS, COLS, NPC>& _y,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& _uv) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_8UC4) && " RGBA image Type must be XF_8UC3");
assert((Y_T == XF_8UC1) && " Y image Type must be XF_8UC1");
assert((UV_T == XF_8UC2) && " UV image Type must be XF_8UC2");
assert(((_src.rows <= ROWS) && (_y.cols <= COLS)) && " Y image ROWS and COLS should be less than ROWS, COLS");
assert(((_src.cols == _y.cols) && (_src.rows == _y.rows)) && "Y and RGBA plane dimensions mismatch");
assert(((_y.cols == (_uv.cols << 1)) && (_y.rows == (_uv.rows << 1))) && "Y and UV planes dimensions mismatch");
if (NPC != XF_NPPC1) {
assert((NPC == (NPC_UV * 2)) &&
" NPC of Y plane must be double the UV "
"plane for multipixel parallelism ");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported ");
} else {
assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC,NPC_UV values must be same ");
}
#endif
xFRgba2Nv12<SRC_T, Y_T, UV_T, ROWS, COLS, NPC, NPC_UV, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(Y_T, NPC),
XF_WORDWIDTH(UV_T, NPC_UV)>(_src, _y, _uv, _src.rows, _src.cols);
}
// auRgba2Nv21
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST>
void xFIyuv2Rgba(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src_y,
xf::cv::Mat<SRC_T, ROWS / 4, COLS, NPC>& src_u,
xf::cv::Mat<SRC_T, ROWS / 4, COLS, NPC>& src_v,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst0,
uint16_t height,
uint16_t width) {
width = width >> XF_BITSHIFT(NPC);
if ((NPC == XF_NPPC8)) {
KernIyuv2Rgba_ro<SRC_T, DST_T, ROWS, COLS, NPC, WORDWIDTH_SRC, WORDWIDTH_DST, (COLS >> XF_BITSHIFT(NPC)),
(1 << (XF_BITSHIFT(NPC) + 1))>(src_y, src_u, src_v, _dst0, height, width);
} else {
KernIyuv2Rgba<SRC_T, DST_T, ROWS, COLS, NPC, WORDWIDTH_SRC, WORDWIDTH_DST, (COLS >> XF_BITSHIFT(NPC))>(
src_y, src_u, src_v, _dst0, height, width);
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC = 1>
void iyuv2rgba(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src_y,
xf::cv::Mat<SRC_T, ROWS / 4, COLS, NPC>& src_u,
xf::cv::Mat<SRC_T, ROWS / 4, COLS, NPC>& src_v,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst0) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_8UC1) && " Y, U, V images Type must be XF_8UC1");
assert((DST_T == XF_8UC4) && " RGBA image Type must be XF_8UC4");
assert(((src_y.rows <= ROWS) && (src_y.cols <= COLS)) && " Y image ROWS and COLS should be less than ROWS, COLS");
assert(((src_y.cols == (_dst0.cols)) && (src_y.rows == _dst0.rows)) && "Y plane and RGBA dimensions mismatch");
assert(((src_u.cols == (_dst0.cols)) && (src_u.rows == (_dst0.rows >> 2))) &&
"U plane and RGBA dimensions mismatch");
assert(((src_v.cols == (_dst0.cols)) && (src_v.rows == (_dst0.rows >> 2))) &&
"V plane and RGBA dimensions mismatch");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported ");
#endif
xFIyuv2Rgba<SRC_T, DST_T, ROWS, COLS, NPC, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(DST_T, NPC)>(
src_y, src_u, src_v, _dst0, src_y.rows, src_y.cols);
}
// Iyuv2Rgba
template <int SRC_T, int ROWS, int COLS, int NPC, int WORDWIDTH>
void xFIyuv2Yuv4(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src_y,
xf::cv::Mat<SRC_T, ROWS / 4, COLS, NPC>& src_u,
xf::cv::Mat<SRC_T, ROWS / 4, COLS, NPC>& src_v,
xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _y_image,
xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _u_image,
xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _v_image,
uint16_t height,
uint16_t width) {
if (NPC == XF_NPPC8) {
// clang-format off
#pragma HLS DATAFLOW
// clang-format on
KernIyuv2Yuv4_ro<SRC_T, ROWS, COLS, NPC, WORDWIDTH, (ROWS << 1), ((COLS >> XF_BITSHIFT(NPC)) >> 1),
((1 << XF_BITSHIFT(NPC)) >> 1)>(src_u, src_v, _u_image, _v_image, height, width);
write_y_ro<SRC_T, SRC_T, ROWS, COLS, NPC, WORDWIDTH, (COLS >> XF_BITSHIFT(NPC))>(src_y, _y_image, height,
width);
} else if (NPC == XF_NPPC1) {
// clang-format off
#pragma HLS DATAFLOW
// clang-format on
KernIyuv2Yuv4<SRC_T, ROWS, COLS, NPC, WORDWIDTH, (ROWS >> 1), ((COLS >> XF_BITSHIFT(NPC)) >> 1)>(
src_u, src_v, _u_image, _v_image, height, width);
write_y<SRC_T, SRC_T, ROWS, COLS, NPC, WORDWIDTH, (COLS >> XF_BITSHIFT(NPC)), ROWS>(src_y, _y_image, height,
width);
}
}
template <int SRC_T, int ROWS, int COLS, int NPC = 1>
void iyuv2yuv4(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src_y,
xf::cv::Mat<SRC_T, ROWS / 4, COLS, NPC>& src_u,
xf::cv::Mat<SRC_T, ROWS / 4, COLS, NPC>& src_v,
xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _y_image,
xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _u_image,
xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _v_image) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_8UC1) && " Y, U, V images Type must be XF_8UC1");
assert(((src_y.rows <= ROWS) && (src_y.cols <= COLS)) && " Y image ROWS and COLS should be less than ROWS, COLS");
assert(((src_y.cols == (_y_image.cols)) && (src_y.rows == _y_image.rows)) &&
"input and ouput Y planes dimensions mismatch");
assert(((src_u.cols == (_u_image.cols)) && (src_u.rows == (_u_image.rows >> 2))) &&
"input and ouput U dimensions mismatch");
assert(((src_v.cols == (_v_image.cols)) && (src_v.rows == (_v_image.rows >> 2))) &&
"input and ouput V dimensions mismatch");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported ");
#endif
xFIyuv2Yuv4<SRC_T, ROWS, COLS, NPC, XF_WORDWIDTH(SRC_T, NPC)>(src_y, src_u, src_v, _y_image, _u_image, _v_image,
src_y.rows, src_y.cols);
}
template <int SRC_T, int UV_T, int ROWS, int COLS, int NPC, int NPC_UV, int WORDWIDTH_SRC, int WORDWIDTH_UV>
void xFIyuv2Nv12(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src_y,
xf::cv::Mat<SRC_T, ROWS / 4, COLS, NPC>& src_u,
xf::cv::Mat<SRC_T, ROWS / 4, COLS, NPC>& src_v,
xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _y_image,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& _uv_image,
uint16_t height,
uint16_t width) {
if (NPC == XF_NPPC8) {
// clang-format off
#pragma HLS DATAFLOW
// clang-format on
KernIyuv2Nv12_ro<SRC_T, UV_T, ROWS, COLS, NPC, NPC_UV, WORDWIDTH_SRC, WORDWIDTH_UV, (ROWS >> 1),
((COLS >> XF_BITSHIFT(NPC)) >> 1), ((1 << XF_BITSHIFT(NPC)) >> 1)>(src_u, src_v, _uv_image,
height, width);
write_y_ro<SRC_T, SRC_T, ROWS, COLS, NPC, WORDWIDTH_SRC, (COLS >> XF_BITSHIFT(NPC))>(src_y, _y_image, height,
width);
} else {
// clang-format off
#pragma HLS DATAFLOW
// clang-format on
KernIyuv2Nv12<SRC_T, UV_T, ROWS, COLS, NPC, NPC_UV, WORDWIDTH_SRC, WORDWIDTH_UV, (ROWS >> 1),
((COLS >> XF_BITSHIFT(NPC)) >> 1)>(src_u, src_v, _uv_image, height, width);
write_y<SRC_T, SRC_T, ROWS, COLS, NPC, WORDWIDTH_SRC, (COLS >> XF_BITSHIFT(NPC)), (ROWS >> 1)>(src_y, _y_image,
height, width);
}
}
template <int SRC_T, int UV_T, int ROWS, int COLS, int NPC = 1, int NPC_UV = 1>
void iyuv2nv12(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src_y,
xf::cv::Mat<SRC_T, ROWS / 4, COLS, NPC>& src_u,
xf::cv::Mat<SRC_T, ROWS / 4, COLS, NPC>& src_v,
xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _y_image,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& _uv_image) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_8UC1) && " Y, U, V images Type must be XF_8UC1");
assert((UV_T == XF_8UC2) && " UV image Type must be XF_8UC2");
assert(((src_y.rows <= ROWS) && (src_y.cols <= COLS)) && " Y image ROWS and COLS should be less than ROWS, COLS");
assert(((src_y.cols == (_y_image.cols)) && (src_y.rows == _y_image.rows)) &&
"input and ouput Y planes dimensions mismatch");
assert(((src_y.cols == (src_u.cols)) && (src_y.rows == (src_u.rows << 2))) && "Y and U dimensions mismatch");
assert(((src_y.cols == (src_v.cols)) && (src_y.rows == (src_v.rows << 2))) && "Y and V dimensions mismatch");
assert(((src_y.cols == (_uv_image.cols << 1)) && (src_y.rows == (_uv_image.rows << 1))) &&
"input and ouput Y planes dimensions mismatch");
if (NPC != XF_NPPC1) {
assert((NPC == (NPC_UV * 2)) &&
" NPC of Y plane must be double the UV "
"plane for multipixel parallelism ");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported ");
} else {
assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC,NPC_UV values must be same ");
}
#endif
xFIyuv2Nv12<SRC_T, UV_T, ROWS, COLS, NPC, NPC_UV, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(UV_T, NPC_UV)>(
src_y, src_u, src_v, _y_image, _uv_image, src_y.rows, src_y.cols);
}
template <int SRC_T, int UV_T, int ROWS, int COLS, int NPC, int NPC_UV, int WORDWIDTH_SRC, int WORDWIDTH_DST>
void xFNv122Iyuv(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src_y,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& src_uv,
xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _y_image,
xf::cv::Mat<SRC_T, ROWS / 4, COLS, NPC>& _u_image,
xf::cv::Mat<SRC_T, ROWS / 4, COLS, NPC>& _v_image,
uint16_t height,
uint16_t width) {
if (NPC == XF_NPPC8) {
// clang-format off
#pragma HLS DATAFLOW
// clang-format on
KernNv122Iyuv_ro<SRC_T, UV_T, ROWS, COLS, NPC, NPC_UV, WORDWIDTH_SRC, WORDWIDTH_DST,
((COLS >> XF_BITSHIFT(NPC)) >> 1), ((1 << XF_BITSHIFT(NPC)) >> 2)>(src_uv, _u_image, _v_image,
height, width);
write_y_ro<SRC_T, SRC_T, ROWS, COLS, NPC, WORDWIDTH_DST, (COLS >> XF_BITSHIFT(NPC))>(src_y, _y_image, height,
width);
} else {
// clang-format off
#pragma HLS DATAFLOW
// clang-format on
KernNv122Iyuv<SRC_T, UV_T, ROWS, COLS, NPC, NPC_UV, WORDWIDTH_SRC, WORDWIDTH_DST,
((COLS >> XF_BITSHIFT(NPC)) >> 1)>(src_uv, _u_image, _v_image, height, width);
write_y<SRC_T, SRC_T, ROWS, COLS, NPC, WORDWIDTH_DST, (COLS >> XF_BITSHIFT(NPC)), (ROWS >> XF_BITSHIFT(NPC))>(
src_y, _y_image, height, width);
}
}
// Nv122Iyuv
template <int SRC_T, int UV_T, int ROWS, int COLS, int NPC = 1, int NPC_UV = 1>
void nv122iyuv(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src_y,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& src_uv,
xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _y_image,
xf::cv::Mat<SRC_T, ROWS / 4, COLS, NPC>& _u_image,
xf::cv::Mat<SRC_T, ROWS / 4, COLS, NPC>& _v_image) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_8UC1) && " Y,U,V image Type must be XF_8UC1");
assert((UV_T == XF_8UC2) && " UV image Type must be XF_8UC2");
assert(((src_y.rows <= ROWS) && (src_y.cols <= COLS)) && " Y image ROWS and COLS should be less than ROWS, COLS");
assert(((src_y.cols == (src_uv.cols << 1)) && (src_y.rows == (src_uv.rows << 1))) &&
"Y and UV planes dimensions mismatch");
assert(((src_y.cols == _y_image.cols) && (src_y.rows == _y_image.rows)) &&
"Input and Outut Y planes dimensions mismatch");
assert(((src_y.cols == _u_image.cols) && (src_y.rows == (_u_image.rows << 2))) &&
"U, Y planes dimensions mismatch");
assert(((src_y.cols == _v_image.cols) && (src_y.rows == (_v_image.rows << 2))) &&
"V, Y planes dimensions mismatch");
if (NPC != XF_NPPC1) {
assert((NPC == (NPC_UV * 2)) &&
" NPC of Y plane must be double the UV "
"plane for multipixel parallelism ");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported ");
} else {
assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC,NPC_UV values must be same ");
}
#endif
xFNv122Iyuv<SRC_T, UV_T, ROWS, COLS, NPC, NPC_UV, XF_WORDWIDTH(UV_T, NPC_UV), XF_WORDWIDTH(SRC_T, NPC)>(
src_y, src_uv, _y_image, _u_image, _v_image, src_y.rows, src_y.cols);
}
template <int SRC_T,
int UV_T,
int DST_T,
int ROWS,
int COLS,
int NPC,
int NPC_UV,
int WORDWIDTH_Y,
int WORDWIDTH_UV,
int WORDWIDTH_DST>
void xFNv122Rgba(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src_y,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& src_uv,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst0,
uint16_t height,
uint16_t width) {
width = width >> XF_BITSHIFT(NPC);
if (NPC == 1) {
KernNv122Rgba<SRC_T, UV_T, DST_T, ROWS, COLS, NPC, NPC_UV, WORDWIDTH_Y, WORDWIDTH_UV, WORDWIDTH_DST>(
src_y, src_uv, _dst0, height, width);
} else {
KernNv122Rgba_ro<SRC_T, UV_T, DST_T, ROWS, COLS, NPC, NPC_UV, XF_CHANNELS(DST_T, NPC), XF_WORDWIDTH(SRC_T, NPC),
XF_WORDWIDTH(UV_T, NPC_UV), XF_WORDWIDTH(DST_T, NPC), (COLS >> XF_BITSHIFT(NPC)),
((1 << XF_BITSHIFT(NPC)) >> 1)>(src_y, src_uv, _dst0, height, width);
}
}
// Nv122Rgba
template <int SRC_T, int UV_T, int DST_T, int ROWS, int COLS, int NPC = 1, int NPC_UV = 1>
void nv122rgba(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src_y,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& src_uv,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst0) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_8UC1) && " Y image Type must be XF_8UC1");
assert((UV_T == XF_8UC2) && " UV image Type must be XF_8UC2");
assert((DST_T == XF_8UC4) && " RGBA image Type must be XF_8UC4");
assert(((src_y.rows <= ROWS) && (src_y.cols <= COLS)) && " Y image ROWS and COLS should be less than ROWS, COLS");
assert(((src_y.cols == _dst0.cols) && (src_y.rows == _dst0.rows)) && "Y and RGBA Aplane dimensions mismatch");
assert(((src_y.cols == (src_uv.cols << 1)) && (src_y.rows == (src_uv.rows << 1))) &&
"Y and UV planes dimensions mismatch");
if (NPC != XF_NPPC1) {
assert((NPC == (NPC_UV * 2)) &&
" NPC of Y plane must be double the UV "
"plane for multipixel parallelism ");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC2) || (NPC == XF_NPPC4) || (NPC == XF_NPPC8)) &&
" 1,2,4,8 pixel parallelism is supported ");
} else {
assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC values must be same ");
}
#endif
xFNv122Rgba<SRC_T, UV_T, DST_T, ROWS, COLS, NPC, NPC_UV, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(UV_T, NPC_UV),
XF_WORDWIDTH(DST_T, NPC)>(src_y, src_uv, _dst0, src_y.rows, src_y.cols);
}
template <int SRC_T, int UV_T, int ROWS, int COLS, int NPC, int NPC_UV, int WORDWIDTH_UV, int WORDWIDTH_DST>
void xFNv122Yuv4(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src_y,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& src_uv,
xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _y_image,
xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _u_image,
xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _v_image,
uint16_t height,
uint16_t width) {
// assert(( (in_uv.cols == (u_out.cols)) && (in_uv.rows ==
//(u_out.rows>>1)))
// && "UV plane and U plane dimensions mismatch");
// assert(( (in_uv.cols == (v_out.cols)) && (in_uv.rows ==
//(v_out.rows>>1)))
// && "UV plane and V plane dimensions mismatch");
if (NPC == XF_NPPC8) {
// clang-format off
#pragma HLS DATAFLOW
// clang-format on
KernNv122Yuv4_ro<SRC_T, UV_T, ROWS, COLS, NPC, NPC_UV, WORDWIDTH_UV, WORDWIDTH_DST, (COLS >> XF_BITSHIFT(NPC)),
((1 << (XF_BITSHIFT(NPC))) >> 1)>(src_uv, _u_image, _v_image, height, width);
write_y_ro<SRC_T, SRC_T, ROWS, COLS, NPC, WORDWIDTH_DST, (COLS >> XF_BITSHIFT(NPC))>(src_y, _y_image, height,
width);
} else {
// clang-format off
#pragma HLS DATAFLOW
// clang-format on
KernNv122Yuv4<SRC_T, UV_T, ROWS, COLS, NPC, NPC_UV, WORDWIDTH_UV, WORDWIDTH_DST, (COLS >> XF_BITSHIFT(NPC))>(
src_uv, _u_image, _v_image, height, width);
write_y<SRC_T, SRC_T, ROWS, COLS, NPC, WORDWIDTH_DST, (COLS >> XF_BITSHIFT(NPC)), ROWS>(src_y, _y_image, height,
width);
}
}
// auNv122Yuv4
template <int SRC_T, int UV_T, int ROWS, int COLS, int NPC = 1, int NPC_UV = 1>
void nv122yuv4(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src_y,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& src_uv,
xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _y_image,
xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _u_image,
xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _v_image) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
xFNv122Yuv4<SRC_T, UV_T, ROWS, COLS, NPC, NPC_UV, XF_WORDWIDTH(UV_T, NPC_UV), XF_WORDWIDTH(SRC_T, NPC)>(
src_y, src_uv, _y_image, _u_image, _v_image, src_y.rows, src_y.cols);
}
template <int SRC_T, int UV_T, int ROWS, int COLS, int NPC, int NPC_UV, int WORDWIDTH_SRC, int WORDWIDTH_DST>
void xFNv212Iyuv(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src_y,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& src_uv,
xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _y_image,
xf::cv::Mat<SRC_T, ROWS / 4, COLS, NPC>& _u_image,
xf::cv::Mat<SRC_T, ROWS / 4, COLS, NPC>& _v_image,
uint16_t height,
uint16_t width) {
if (NPC == XF_NPPC8) {
// clang-format off
#pragma HLS DATAFLOW
// clang-format on
KernNv212Iyuv_ro<SRC_T, UV_T, ROWS, COLS, NPC, NPC_UV, WORDWIDTH_SRC, WORDWIDTH_DST,
((COLS >> XF_BITSHIFT(NPC)) >> 1), ((1 << XF_BITSHIFT(NPC)) >> 2)>(src_uv, _u_image, _v_image,
height, width);
write_y_ro<SRC_T, SRC_T, ROWS, COLS, NPC, WORDWIDTH_DST, (COLS >> XF_BITSHIFT(NPC))>(src_y, _y_image, height,
width);
} else {
// clang-format off
#pragma HLS DATAFLOW
// clang-format on
KernNv212Iyuv<SRC_T, UV_T, ROWS, COLS, NPC, NPC_UV, WORDWIDTH_SRC, WORDWIDTH_DST,
((COLS >> XF_BITSHIFT(NPC)) >> 1)>(src_uv, _u_image, _v_image, height, width);
write_y<SRC_T, SRC_T, ROWS, COLS, NPC, WORDWIDTH_DST, (COLS >> XF_BITSHIFT(NPC)), ROWS>(src_y, _y_image, height,
width);
}
}
// Nv212Iyuv
template <int SRC_T, int UV_T, int ROWS, int COLS, int NPC = 1, int NPC_UV = 1>
void nv212iyuv(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src_y,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& src_uv,
xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _y_image,
xf::cv::Mat<SRC_T, ROWS / 4, COLS, NPC>& _u_image,
xf::cv::Mat<SRC_T, ROWS / 4, COLS, NPC>& _v_image) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_8UC1) && " Y,U,V image Type must be XF_8UC1");
assert((UV_T == XF_8UC2) && " VU image Type must be XF_8UC2");
assert(((src_y.rows <= ROWS) && (src_y.cols <= COLS)) && " Y image ROWS and COLS should be less than ROWS, COLS");
assert(((src_y.cols == (src_uv.cols << 1)) && (src_y.rows == (src_uv.rows << 1))) &&
"Y and VU planes dimensions mismatch");
assert(((src_y.cols == _y_image.cols) && (src_y.rows == _y_image.rows)) &&
"Input and Outut Y planes dimensions mismatch");
assert(((src_y.cols == _u_image.cols) && (src_y.rows == (_u_image.rows << 2))) &&
"U, Y planes dimensions mismatch");
assert(((src_y.cols == _v_image.cols) && (src_y.rows == (_v_image.rows << 2))) &&
"V, Y planes dimensions mismatch");
if (NPC != XF_NPPC1) {
assert((NPC == (NPC_UV * 2)) &&
" NPC of Y plane must be double the VU "
"plane for multipixel parallelism ");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported ");
} else {
assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC,NPC_UV values must be same ");
}
#endif
xFNv212Iyuv<SRC_T, UV_T, ROWS, COLS, NPC, NPC_UV, XF_WORDWIDTH(UV_T, NPC_UV), XF_WORDWIDTH(SRC_T, NPC)>(
src_y, src_uv, _y_image, _u_image, _v_image, src_y.rows, src_y.cols);
}
template <int SRC_T,
int UV_T,
int DST_T,
int ROWS,
int COLS,
int NPC,
int NPC_UV,
int WORDWIDTH_Y,
int WORDWIDTH_UV,
int WORDWIDTH_DST>
void xFNv212Rgba(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src_y,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& src_uv,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst0,
uint16_t height,
uint16_t width) {
width = width >> XF_BITSHIFT(NPC);
if (NPC == 1) {
KernNv212Rgba<SRC_T, UV_T, DST_T, ROWS, COLS, NPC, NPC_UV, WORDWIDTH_Y, WORDWIDTH_UV, WORDWIDTH_DST>(
src_y, src_uv, _dst0, height, width);
} else {
KernNv212Rgba_ro<SRC_T, UV_T, DST_T, ROWS, COLS, NPC, NPC_UV, XF_CHANNELS(DST_T, NPC), XF_WORDWIDTH(SRC_T, NPC),
XF_WORDWIDTH(UV_T, NPC_UV), XF_WORDWIDTH(DST_T, NPC), (COLS >> XF_BITSHIFT(NPC)),
((1 << XF_BITSHIFT(NPC)) >> 1)>(src_y, src_uv, _dst0, height, width);
}
}
// Nv212Rgba
template <int SRC_T, int UV_T, int DST_T, int ROWS, int COLS, int NPC = 1, int NPC_UV = 1>
void nv212rgba(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src_y,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& src_uv,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst0) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_8UC1) && " Y image Type must be XF_8UC1");
assert((UV_T == XF_8UC2) && " VU image Type must be XF_8UC2");
assert((DST_T == XF_8UC4) && " RGBA image Type must be XF_8UC4");
assert(((src_y.rows <= ROWS) && (src_y.cols <= COLS)) && " Y image ROWS and COLS should be less than ROWS, COLS");
assert(((src_y.cols == _dst0.cols) && (src_y.rows == _dst0.rows)) && "Y and RGBA Aplane dimensions mismatch");
assert(((src_y.cols == (src_uv.cols << 1)) && (src_y.rows == (src_uv.rows << 1))) &&
"Y and VU planes dimensions mismatch");
if (NPC != XF_NPPC1) {
assert((NPC == (NPC_UV * 2)) &&
" NPC of Y plane must be double the VU "
"plane for multipixel parallelism ");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported ");
} else {
assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC,NPC_UV values must be same ");
}
#endif
xFNv212Rgba<SRC_T, UV_T, DST_T, ROWS, COLS, NPC, NPC_UV, XF_WORDWIDTH(DST_T, NPC), XF_WORDWIDTH(UV_T, NPC_UV),
XF_WORDWIDTH(DST_T, NPC)>(src_y, src_uv, _dst0, src_y.rows, src_y.cols);
}
template <int SRC_T, int UV_T, int ROWS, int COLS, int NPC, int NPC_UV, int WORDWIDTH_UV, int WORDWIDTH_DST>
void xFNv212Yuv4(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src_y,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& src_uv,
xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _y_image,
xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _u_image,
xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _v_image,
uint16_t height,
uint16_t width) {
if (NPC == XF_NPPC8) {
// clang-format off
#pragma HLS DATAFLOW
// clang-format on
KernNv212Yuv4_ro<SRC_T, UV_T, ROWS, COLS, NPC, NPC_UV, WORDWIDTH_UV, WORDWIDTH_DST, (COLS >> XF_BITSHIFT(NPC)),
((1 << XF_BITSHIFT(NPC)) >> 1)>(src_uv, _u_image, _v_image, height, width);
write_y_ro<SRC_T, SRC_T, ROWS, COLS, NPC, WORDWIDTH_DST, (COLS >> XF_BITSHIFT(NPC))>(src_y, _y_image, height,
width);
} else {
// clang-format off
#pragma HLS DATAFLOW
// clang-format on
KernNv212Yuv4<SRC_T, UV_T, ROWS, COLS, NPC, NPC_UV, WORDWIDTH_UV, WORDWIDTH_DST, (COLS >> XF_BITSHIFT(NPC))>(
src_uv, _u_image, _v_image, height, width);
write_y<SRC_T, SRC_T, ROWS, COLS, NPC, WORDWIDTH_DST, (COLS >> XF_BITSHIFT(NPC)), ROWS>(src_y, _y_image, height,
width);
}
}
// auNv212Yuv4
template <int SRC_T, int UV_T, int ROWS, int COLS, int NPC = 1, int NPC_UV = 1>
void nv212yuv4(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src_y,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& src_uv,
xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _y_image,
xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _u_image,
xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _v_image) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_8UC1) && "Y plane Type must be XF_8UC1");
assert((UV_T == XF_8UC2) && "UV plane Type must be XF_8UC2");
assert(((src_y.rows <= ROWS) && (src_y.cols <= COLS)) && " Y image rows and cols should be less than ROWS, COLS");
assert(((_y_image.cols == src_y.cols) && (_y_image.rows == src_y.rows)) && "Y planes dimensions mismatch");
assert(((_u_image.cols == src_y.cols) && (_u_image.rows == src_y.rows)) && "Y and U planes dimensions mismatch");
assert(((_v_image.cols == src_y.cols) && (_v_image.rows == src_y.rows)) && "Y and V planes dimensions mismatch");
assert((((src_uv.cols << 1) == src_y.cols) && ((src_uv.rows << 1) == src_y.rows)) &&
"Y and V planes dimensions mismatch");
if (NPC != XF_NPPC1) {
assert((NPC == (NPC_UV * 2)) &&
" NPC of Y plane must be double the UV "
"plane for multipixel parallelism ");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported ");
} else {
assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC, NPC_UV values must be same ");
}
#endif
xFNv212Yuv4<SRC_T, UV_T, ROWS, COLS, NPC, NPC_UV, XF_WORDWIDTH(UV_T, NPC_UV), XF_WORDWIDTH(SRC_T, NPC)>(
src_y, src_uv, _y_image, _u_image, _v_image, src_y.rows, src_y.cols);
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST>
void xFUyvy2Iyuv(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& uyvy,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& y_plane,
xf::cv::Mat<DST_T, ROWS / 4, COLS, NPC>& u_plane,
xf::cv::Mat<DST_T, ROWS / 4, COLS, NPC>& v_plane,
uint16_t height,
uint16_t width) {
width = width >> XF_BITSHIFT(NPC);
if (NPC == XF_NPPC8) {
KernUyvy2Iyuv_ro<SRC_T, DST_T, ROWS, COLS, NPC, WORDWIDTH_SRC, WORDWIDTH_DST, ((COLS >> 1) >> XF_BITSHIFT(NPC)),
((1 << XF_BITSHIFT(NPC)) >> 1)>(uyvy, y_plane, u_plane, v_plane, height, width);
} else {
KernUyvy2Iyuv<SRC_T, DST_T, ROWS, COLS, NPC, WORDWIDTH_SRC, WORDWIDTH_DST, ((COLS >> 1) >> XF_BITSHIFT(NPC))>(
uyvy, y_plane, u_plane, v_plane, height, width);
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC = 1>
void uyvy2iyuv(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _y_image,
xf::cv::Mat<DST_T, ROWS / 4, COLS, NPC>& _u_image,
xf::cv::Mat<DST_T, ROWS / 4, COLS, NPC>& _v_image) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_16UC1) && " UYVY plane Type must be XF_16UC1");
assert((DST_T == XF_8UC1) && " Y, U, V planes Type must be XF_8UC1");
assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " UYVY image rows and cols should be less than ROWS, COLS");
assert(((_y_image.cols == _src.cols) && (_y_image.rows == _src.rows)) && "Y and UYVY planes dimensions mismatch");
assert(((_u_image.cols == _src.cols) && ((_u_image.rows << 2) == _src.rows)) &&
"U and UYVY planes dimensions mismatch");
assert(((_v_image.cols == _src.cols) && ((_v_image.rows << 2) == _src.rows)) &&
"U and UYVY planes dimensions mismatch");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1, 8 pixel parallelism is supported ");
#endif
xFUyvy2Iyuv<SRC_T, DST_T, ROWS, COLS, NPC, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(DST_T, NPC)>(
_src, _y_image, _u_image, _v_image, _src.rows, _src.cols);
}
// Uyvy2Nv12
template <int SRC_T,
int Y_T,
int UV_T,
int ROWS,
int COLS,
int NPC,
int NPC_UV,
int WORDWIDTH_SRC,
int WORDWIDTH_Y,
int WORDWIDTH_UV>
void xFUyvy2Nv12(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& uyvy,
xf::cv::Mat<Y_T, ROWS, COLS, NPC>& y_plane,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& uv_plane,
uint16_t height,
uint16_t width) {
/* assert(( (uyvy.cols == (y_plane.cols<<1)) && (uyvy.rows ==
y_plane.rows))
&& "UYVY and Y plane dimensions mismatch");
assert(( (uyvy.cols == (uv_plane.cols<<1)) && (uyvy.rows ==
(uv_plane.rows<<1)))
&& "UYVY and UV plane dimensions mismatch");*/
width = width >> XF_BITSHIFT(NPC);
if (NPC == XF_NPPC1) {
// clang-format off
#pragma HLS DATAFLOW
// clang-format on
KernUyvy2Nv12<SRC_T, Y_T, UV_T, ROWS, COLS, NPC, NPC_UV, WORDWIDTH_SRC, WORDWIDTH_Y, WORDWIDTH_UV,
((COLS >> 1) >> XF_BITSHIFT(NPC))>(uyvy, y_plane, uv_plane, height, width);
} else {
KernUyvy2Nv12_ro<SRC_T, Y_T, UV_T, ROWS, COLS, NPC, NPC_UV, WORDWIDTH_SRC, WORDWIDTH_Y, WORDWIDTH_UV,
((COLS >> 1) >> XF_BITSHIFT(NPC)), ((1 << NPC) >> 1)>(uyvy, y_plane, uv_plane, height, width);
}
}
template <int SRC_T, int Y_T, int UV_T, int ROWS, int COLS, int NPC = 1, int NPC_UV = 1>
void uyvy2nv12(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src,
xf::cv::Mat<Y_T, ROWS, COLS, NPC>& _y_image,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& _uv_image) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_16UC1) && " UYVY plane Type must be XF_16UC1");
assert((Y_T == XF_8UC1) && " Y plane Type must be XF_8UC1");
assert((UV_T == XF_8UC2) && " UV image Type must be XF_8UC2");
assert(((_y_image.rows <= ROWS) && (_y_image.cols <= COLS)) &&
" Y image rows and cols should be less than ROWS, COLS");
assert(((_y_image.cols == (_uv_image.cols << 1)) && (_y_image.rows == (_uv_image.rows << 1))) &&
"Y and UV planes dimensions mismatch");
assert(((_y_image.cols == _src.cols) && (_y_image.rows == _src.rows)) && "Y and UYVY planes dimensions mismatch");
if (NPC != XF_NPPC1) {
assert((NPC == (NPC_UV * 2)) &&
" NPC of Y plane must be double the UV "
"plane for multipixel parallelism ");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC2) || (NPC == XF_NPPC4) || (NPC == XF_NPPC8)) &&
" 1,2,4,8 pixel parallelism is supported ");
} else {
assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC, NPC_UV values must be same ");
}
#endif
xFUyvy2Nv12<SRC_T, Y_T, UV_T, ROWS, COLS, NPC, NPC_UV, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(Y_T, NPC),
XF_WORDWIDTH(UV_T, NPC_UV)>(_src, _y_image, _uv_image, _src.rows, _src.cols);
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST>
void xFUyvy2Rgba(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst,
uint16_t height,
uint16_t width) {
width = width >> XF_BITSHIFT(NPC);
if (NPC == 1) {
KernUyvy2Rgba<SRC_T, DST_T, ROWS, COLS, NPC, WORDWIDTH_SRC, WORDWIDTH_DST, ((COLS >> 1) >> XF_BITSHIFT(NPC))>(
_src, _dst, height, width);
} else {
KernUyvy2Rgba_ro<SRC_T, DST_T, ROWS, COLS, NPC, WORDWIDTH_SRC, WORDWIDTH_DST, ((COLS >> 1) >> XF_BITSHIFT(NPC)),
(1 << XF_BITSHIFT(NPC) >> 1)>(_src, _dst, height, width);
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC = 1>
void uyvy2rgba(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src, xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_16UC1) && " UYVY plane Type must be XF_16UC1");
assert((DST_T == XF_8UC4) && " RGBA plane Type must be XF_8UC4");
assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " Y image rows and cols should be less than ROWS, COLS");
assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "RGBA and UYVY planes dimensions mismatch");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported ");
#endif
xFUyvy2Rgba<SRC_T, DST_T, ROWS, COLS, NPC, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(DST_T, NPC)>(
_src, _dst, _src.rows, _src.cols);
}
// Yuyv2Iyuv
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST>
void xFYuyv2Iyuv(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _y_image,
xf::cv::Mat<DST_T, ROWS / 4, COLS, NPC>& _u_image,
xf::cv::Mat<DST_T, ROWS / 4, COLS, NPC>& _v_image,
uint16_t height,
uint16_t width) {
width = width >> XF_BITSHIFT(NPC);
if (NPC == XF_NPPC8) {
KernYuyv2Iyuv_ro<SRC_T, DST_T, ROWS, COLS, NPC, WORDWIDTH_SRC, WORDWIDTH_DST, ((COLS >> 1) >> XF_BITSHIFT(NPC)),
((1 << XF_BITSHIFT(NPC)) >> 1)>(_src, _y_image, _u_image, _v_image, height, width);
} else {
KernYuyv2Iyuv<SRC_T, DST_T, ROWS, COLS, NPC, WORDWIDTH_SRC, WORDWIDTH_DST, ((COLS >> 1) >> XF_BITSHIFT(NPC))>(
_src, _y_image, _u_image, _v_image, height, width);
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC = 1>
void yuyv2iyuv(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _y_image,
xf::cv::Mat<DST_T, ROWS / 4, COLS, NPC>& _u_image,
xf::cv::Mat<DST_T, ROWS / 4, COLS, NPC>& _v_image) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_16UC1) && " YUYV plane Type must be XF_16UC1");
assert((DST_T == XF_8UC1) && " Y, U, V planes Type must be XF_8UC1");
assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " YUYV image rows and cols should be less than ROWS, COLS");
assert(((_y_image.cols == _src.cols) && (_y_image.rows == _src.rows)) && "Y and UYVY planes dimensions mismatch");
assert(((_u_image.cols == _src.cols) && ((_u_image.rows << 2) == _src.rows)) &&
"U and UYVY planes dimensions mismatch");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1, 8 pixel parallelism is supported ");
#endif
xFYuyv2Iyuv<SRC_T, DST_T, ROWS, COLS, NPC, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(DST_T, NPC)>(
_src, _y_image, _u_image, _v_image, _src.rows, _src.cols);
}
// Yuyv2Nv12
template <int SRC_T,
int Y_T,
int UV_T,
int ROWS,
int COLS,
int NPC,
int NPC_UV,
int WORDWIDTH_SRC,
int WORDWIDTH_Y,
int WORDWIDTH_UV>
void xFYuyv2Nv12(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src,
xf::cv::Mat<Y_T, ROWS, COLS, NPC>& _y_image,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& _uv_image,
uint16_t height,
uint16_t width) {
width = width >> XF_BITSHIFT(NPC);
if (NPC == XF_NPPC1) {
KernYuyv2Nv12<SRC_T, Y_T, UV_T, ROWS, COLS, NPC, NPC_UV, WORDWIDTH_SRC, WORDWIDTH_Y, WORDWIDTH_UV,
((COLS >> 1) >> XF_BITSHIFT(NPC))>(_src, _y_image, _uv_image, height, width);
} else {
KernYuyv2Nv12_ro<SRC_T, Y_T, UV_T, ROWS, COLS, NPC, NPC_UV, WORDWIDTH_SRC, WORDWIDTH_Y, WORDWIDTH_UV,
((COLS >> 1) >> XF_BITSHIFT(NPC)), ((1 << XF_BITSHIFT(NPC)) >> 1)>(_src, _y_image, _uv_image,
height, width);
}
}
template <int SRC_T, int Y_T, int UV_T, int ROWS, int COLS, int NPC = 1, int NPC_UV = 1>
void yuyv2nv12(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src,
xf::cv::Mat<Y_T, ROWS, COLS, NPC>& _y_image,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& _uv_image) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_16UC1) && " YUYV plane Type must be XF_16UC1");
assert((Y_T == XF_8UC1) && " Y plane Type must be XF_8UC1");
assert((UV_T == XF_8UC2) && " UV image Type must be XF_8UC2");
assert(((_y_image.rows <= ROWS) && (_y_image.cols <= COLS)) &&
" Y image rows and cols should be less than ROWS, COLS");
assert(((_y_image.cols == (_uv_image.cols << 1)) && (_y_image.rows == (_uv_image.rows << 1))) &&
"Y and UV planes dimensions mismatch");
assert(((_y_image.cols == _src.cols) && (_y_image.rows == _src.rows)) && "Y and YUYV planes dimensions mismatch");
if (NPC != XF_NPPC1) {
assert((NPC == (NPC_UV * 2)) &&
" NPC of Y plane must be double the UV "
"plane for multipixel parallelism ");
} else {
assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC, NPC_UV values must be same ");
}
#endif
xFYuyv2Nv12<SRC_T, Y_T, UV_T, ROWS, COLS, NPC, NPC_UV, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(Y_T, NPC),
XF_WORDWIDTH(UV_T, NPC_UV)>(_src, _y_image, _uv_image, _src.rows, _src.cols);
}
// Yuyv2Rgba
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST>
void xFYuyv2Rgba(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst,
uint16_t height,
uint16_t width) {
width = width >> XF_BITSHIFT(NPC);
if (NPC == 1) {
KernYuyv2Rgba<SRC_T, DST_T, ROWS, COLS, NPC, WORDWIDTH_SRC, WORDWIDTH_DST, ((COLS >> 1) >> XF_BITSHIFT(NPC))>(
_src, _dst, height, width);
} else {
KernYuyv2Rgba_ro<SRC_T, DST_T, ROWS, COLS, NPC, XF_CHANNELS(DST_T, NPC), WORDWIDTH_SRC, WORDWIDTH_DST,
((COLS >> 1) >> XF_BITSHIFT(NPC)), ((COLS >> 1) >> XF_BITSHIFT(NPC))>(_src, _dst, height,
width);
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC = 1>
void yuyv2rgba(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src, xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_16UC1) && " YUYV plane Type must be XF_16UC1");
assert((DST_T == XF_8UC4) && " RGBA plane Type must be XF_8UC4");
assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " YUYV image rows and cols should be less than ROWS, COLS");
assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "YUYV and RGBA planes dimensions mismatch");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported ");
#endif
xFYuyv2Rgba<SRC_T, DST_T, ROWS, COLS, NPC, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(DST_T, NPC)>(
_src, _dst, _src.rows, _src.cols);
}
template <int SRC_T,
int Y_T,
int UV_T,
int ROWS,
int COLS,
int NPC,
int NPC_UV,
int WORDWIDTH_SRC,
int WORDWIDTH_Y,
int WORDWIDTH_UV>
void xFRgb2Nv12(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src,
xf::cv::Mat<Y_T, ROWS, COLS, NPC>& _y,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& _uv,
uint16_t height,
uint16_t width) {
width = width >> XF_BITSHIFT(NPC);
if (NPC == 1) {
KernRgba2Nv12<SRC_T, Y_T, UV_T, ROWS, COLS, NPC, NPC_UV, WORDWIDTH_SRC, WORDWIDTH_Y, WORDWIDTH_UV>(
_src, _y, _uv, height, width);
} else {
KernRgba2Nv12_ro<SRC_T, Y_T, UV_T, ROWS, COLS, NPC, NPC_UV, XF_CHANNELS(SRC_T, NPC), WORDWIDTH_SRC, WORDWIDTH_Y,
WORDWIDTH_UV, (COLS >> XF_BITSHIFT(NPC)), (1 << (XF_BITSHIFT(NPC) + 1))>(_src, _y, _uv, height,
width);
}
}
template <int SRC_T, int Y_T, int UV_T, int ROWS, int COLS, int NPC = 1, int NPC_UV = 1>
void rgb2nv12(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src,
xf::cv::Mat<Y_T, ROWS, COLS, NPC>& _y,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& _uv) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
assert((SRC_T == XF_8UC3) && " RGB image Type must be XF_8UC3");
assert((Y_T == XF_8UC1) && " Y image Type must be XF_8UC1");
assert((UV_T == XF_8UC2) && " UV image Type must be XF_8UC2");
assert(((_src.rows <= ROWS) && (_y.cols <= COLS)) && " Y image ROWS and COLS should be less than ROWS, COLS");
assert(((_src.cols == _y.cols) && (_src.rows == _y.rows)) && "Y and RGB plane dimensions mismatch");
assert(((_y.cols == (_uv.cols << 1)) && (_y.rows == (_uv.rows << 1))) && "Y and UV planes dimensions mismatch");
if (NPC != XF_NPPC1) {
assert((NPC == (NPC_UV * 2)) &&
" NPC of Y plane must be double the UV "
"plane for multipixel parallelism ");
} else {
assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC,NPC_UV values must be same ");
}
xFRgb2Nv12<SRC_T, Y_T, UV_T, ROWS, COLS, NPC, NPC_UV, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(Y_T, NPC),
XF_WORDWIDTH(UV_T, NPC_UV)>(_src, _y, _uv, _src.rows, _src.cols);
}
// template<int SRC_T, int Y_T, int UV_T,int ROWS, int COLS, int NPC, int
// WORDWIDTH_SRC, int WORDWIDTH_Y, int
// WORDWIDTH_VU> void KernRgb2Nv21(xf::cv::Mat<SRC_T, ROWS, COLS, NPC> & _rgba,
// xf::cv::Mat<Y_T, ROWS, COLS, NPC> & _y,
// xf::cv::Mat<UV_T, ROWS/2, COLS/2, NPC> & _vu,uint16_t height,uint16_t width)
//{
// width=width>>XF_BITSHIFT(NPC);
// XF_SNAME(XF_32UW) rgba;
// unsigned long long int idx=0,idx1=0;
// uint8_t y, u, v;
// bool evenRow = true, evenBlock = true;
//
// RowLoop:
// for(int i = 0; i < height; i++)
// {
//#pragma HLS LOOP_FLATTEN off
//#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// ColLoop:
// for(int j = 0; j < width; j++)
// {
//#pragma HLS pipeline
//#pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS
// rgba = _rgba.read(i*width+j);
// uint8_t r = rgba.range(7,0);
// uint8_t g = rgba.range(15,8);
// uint8_t b = rgba.range(23,16);
//
// y = CalculateY(r, g, b);
// if(evenRow)
// {
// u = CalculateU(r, g, b);
// v = CalculateV(r, g, b);
// }
// _y.write(idx++,y);
// if(evenRow)
// {
// if((j & 0x01)==0)
// _vu.write(idx1++,v | ((uint16_t)u <<
// 8));
// }
// }
// evenRow = evenRow ? false : true;
// }
//}
template <int SRC_T,
int Y_T,
int UV_T,
int ROWS,
int COLS,
int NPC,
int NPC_UV,
int WORDWIDTH_SRC,
int WORDWIDTH_Y,
int WORDWIDTH_UV>
void xFRgb2Nv21(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src,
xf::cv::Mat<Y_T, ROWS, COLS, NPC>& _y,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& _uv,
uint16_t height,
uint16_t width) {
width = width >> XF_BITSHIFT(NPC);
if (NPC == 1) {
KernRgba2Nv21<SRC_T, Y_T, UV_T, ROWS, COLS, NPC, NPC_UV, WORDWIDTH_SRC, WORDWIDTH_Y, WORDWIDTH_UV>(
_src, _y, _uv, height, width);
} else {
KernRgba2Nv21_ro<SRC_T, Y_T, UV_T, ROWS, COLS, NPC, NPC_UV, XF_CHANNELS(SRC_T, NPC), WORDWIDTH_SRC, WORDWIDTH_Y,
WORDWIDTH_UV, (COLS >> XF_BITSHIFT(NPC)), (1 << (XF_BITSHIFT(NPC) + 1))>(_src, _y, _uv, height,
width);
}
}
template <int SRC_T, int Y_T, int UV_T, int ROWS, int COLS, int NPC = 1, int NPC_UV = 1>
void rgb2nv21(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src,
xf::cv::Mat<Y_T, ROWS, COLS, NPC>& _y,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& _uv) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_8UC3) && " RGB image Type must be XF_8UC3");
assert((Y_T == XF_8UC1) && " Y image Type must be XF_8UC1");
assert((UV_T == XF_8UC2) && " UV image Type must be XF_8UC2");
assert(((_src.rows <= ROWS) && (_y.cols <= COLS)) && " Y image ROWS and COLS should be less than ROWS, COLS");
assert(((_src.cols == _y.cols) && (_src.rows == _y.rows)) && "Y and RGB plane dimensions mismatch");
assert(((_y.cols == (_uv.cols << 1)) && (_y.rows == (_uv.rows << 1))) && "Y and UV planes dimensions mismatch");
if (NPC != XF_NPPC1) {
assert((NPC == (NPC_UV * 2)) &&
" NPC of Y plane must be double the UV "
"plane for multipixel parallelism ");
} else {
assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC values must be same ");
}
#endif
xFRgb2Nv21<SRC_T, Y_T, UV_T, ROWS, COLS, NPC, NPC_UV, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(Y_T, NPC),
XF_WORDWIDTH(UV_T, NPC_UV)>(_src, _y, _uv, _src.rows, _src.cols);
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST>
void KernRgb2Yuv4(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _rgba,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _y,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _u,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _v,
uint16_t height,
uint16_t width) {
XF_SNAME(XF_32UW) rgba;
uint8_t y, u, v;
unsigned long long int idx = 0;
RowLoop:
for (int i = 0; i < height; ++i) {
// clang-format off
#pragma HLS LOOP_FLATTEN OFF
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
ColLoop:
for (int j = 0; j < width; ++j) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS
#pragma HLS PIPELINE
// clang-format on
rgba = _rgba.read(i * width + j);
y = CalculateY(rgba.range(7, 0), rgba.range(15, 8), rgba.range(23, 16));
u = CalculateU(rgba.range(7, 0), rgba.range(15, 8), rgba.range(23, 16));
v = CalculateV(rgba.range(7, 0), rgba.range(15, 8), rgba.range(23, 16));
_y.write(idx, y);
_u.write(idx, u);
_v.write(idx++, v);
}
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST>
void xFRgb2Yuv4(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _y_image,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _u_image,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _v_image,
uint16_t height,
uint16_t width) {
// assert(( (rgba.cols == y_plane.cols) && (rgba.rows == y_plane.rows))
// && "RGBA and Y plane dimensions mismatch");
// assert(( (rgba.cols == u_plane.cols) && (rgba.rows == u_plane.rows))
// && "RGBA and U plane dimensions mismatch");
// assert(( (rgba.cols == v_plane.cols) && (rgba.rows == v_plane.rows))
// && "RGBA and V plane dimensions mismatch");
width = width >> (XF_BITSHIFT(NPC));
if (NPC == 1) {
KernRgb2Yuv4<SRC_T, DST_T, ROWS, COLS, NPC, WORDWIDTH_SRC, WORDWIDTH_DST>(_src, _y_image, _u_image, _v_image,
height, width);
} else {
KernRgba2Yuv4_ro<SRC_T, DST_T, ROWS, COLS, NPC, XF_CHANNELS(SRC_T, NPC), WORDWIDTH_SRC, WORDWIDTH_DST,
(COLS >> XF_BITSHIFT(NPC)), ((1 << XF_BITSHIFT(NPC)) >> 1)>(_src, _y_image, _u_image, _v_image,
height, width);
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC = 1>
void rgb2yuv4(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _y_image,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _u_image,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _v_image) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_8UC3) && " RGB image Type must be XF_8UC3");
assert((DST_T == XF_8UC1) && " Y, U, V image Type must be XF_8UC1");
assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " RGB image rows and cols should be less than ROWS, COLS");
assert(((_src.cols == _y_image.cols) && (_src.rows == _y_image.rows)) && "RGB and Y plane dimensions mismatch");
assert(((_src.cols == _u_image.cols) && (_src.rows == _u_image.rows)) && "RGB and U plane dimensions mismatch");
assert(((_src.cols == _v_image.cols) && (_src.rows == _v_image.rows)) && "RGB and V plane dimensions mismatch");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported ");
#endif
xFRgb2Yuv4<SRC_T, DST_T, ROWS, COLS, NPC, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(DST_T, NPC)>(
_src, _y_image, _u_image, _v_image, _src.rows, _src.cols);
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST, int TC>
void KernUyvy2Rgb(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _uyvy,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _rgba,
uint16_t height,
uint16_t width) {
XF_SNAME(WORDWIDTH_DST) rgba;
XF_SNAME(WORDWIDTH_SRC) uyvy;
XF_SNAME(WORDWIDTH_SRC) uy;
XF_SNAME(WORDWIDTH_SRC) vy;
unsigned long long int idx = 0;
XF_PTNAME(XF_8UP) r, g, b;
int8_t y1, y2, u, v;
int32_t V2Rtemp, U2Gtemp, V2Gtemp, U2Btemp;
RowLoop:
for (int i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
#pragma HLS LOOP_FLATTEN off
// clang-format on
ColLoop:
for (int j = 0; j < width; j += 2) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
#pragma HLS pipeline
// clang-format on
// uyvy = _uyvy.read();
uy = _uyvy.read(i * width + j);
vy = _uyvy.read(i * width + j + 1);
u = (uint8_t)uy.range(7, 0) - 128;
/* if(uyvy.range(15,8) > 16)
y1 = (uint8_t)uyvy.range(15,8) - 16;
else
y1 = 0;*/
y1 = (uy.range(15, 8) > 16) ? ((uint8_t)uy.range(15, 8) - 16) : 0;
v = (uint8_t)vy.range(7, 0) - 128;
/* if(uyvy.range(31,24) > 16)
y2 = ((uint8_t)uyvy.range(31,24) - 16);
else
y2 = 0;*/
y2 = (vy.range(15, 8) > 16) ? ((uint8_t)vy.range(15, 8) - 16) : 0;
V2Rtemp = v * (short int)V2R;
U2Gtemp = (short int)U2G * u;
V2Gtemp = (short int)V2G * v;
U2Btemp = u * (short int)U2B;
r = CalculateR(y1, V2Rtemp, v);
g = CalculateG(y1, U2Gtemp, V2Gtemp);
b = CalculateB(y1, U2Btemp, u);
rgba = ((ap_uint24_t)r) | ((ap_uint24_t)g << 8) | ((ap_uint24_t)b << 16);
_rgba.write(idx, rgba);
idx++;
r = CalculateR(y2, V2Rtemp, v);
g = CalculateG(y2, U2Gtemp, V2Gtemp);
b = CalculateB(y2, U2Btemp, u);
rgba = ((ap_uint24_t)r) | ((ap_uint24_t)g << 8) | ((ap_uint24_t)b << 16);
_rgba.write(idx, rgba);
idx++;
}
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST>
void xFUyvy2Rgb(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst,
uint16_t height,
uint16_t width) {
/* assert(( (uyvy.cols == (rgba.cols<<1)) && (uyvy.rows == rgba.rows))
&& "UYVY and RGBA plane dimensions mismatch");*/
width = width >> XF_BITSHIFT(NPC);
if (NPC == 1) {
KernUyvy2Rgb<SRC_T, DST_T, ROWS, COLS, NPC, WORDWIDTH_SRC, WORDWIDTH_DST, ((COLS >> 1) >> XF_BITSHIFT(NPC))>(
_src, _dst, height, width);
} else {
KernUyvy2Rgb_ro<SRC_T, DST_T, ROWS, COLS, NPC, WORDWIDTH_SRC, WORDWIDTH_DST, ((COLS >> 1) >> XF_BITSHIFT(NPC)),
((COLS >> 1) >> XF_BITSHIFT(NPC))>(_src, _dst, height, width);
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC = 1>
void uyvy2rgb(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src, xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_16UC1) && " UYVY plane Type must be XF_16UC1");
assert((DST_T == XF_8UC3) && " RGB plane Type must be XF_8UC3");
assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " UYVY image rows and cols should be less than ROWS, COLS");
assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "UYVY and RGB planes dimensions mismatch");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC2) || (NPC == XF_NPPC4) || (NPC == XF_NPPC8)) &&
" 1,2,4,8 pixel parallelism is supported ");
#endif
xFUyvy2Rgb<SRC_T, DST_T, ROWS, COLS, NPC, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(DST_T, NPC)>(_src, _dst, _src.rows,
_src.cols);
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST, int TC>
void KernYuyv2Rgb(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _yuyv,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _rgba,
uint16_t height,
uint16_t width) {
XF_SNAME(WORDWIDTH_DST) rgba;
XF_SNAME(WORDWIDTH_SRC) yu, yv;
XF_PTNAME(XF_8UP) r, g, b;
int8_t y1, y2, u, v;
int32_t V2Rtemp, U2Gtemp, V2Gtemp, U2Btemp;
unsigned long long int idx = 0;
RowLoop:
for (int i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
#pragma HLS LOOP_FLATTEN off
// clang-format on
ColLoop:
for (int j = 0; j < width; j += 2) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
#pragma HLS pipeline
// clang-format on
yu = _yuyv.read(i * width + j);
yv = _yuyv.read(i * width + j + 1);
u = (uint8_t)yu.range(15, 8) - 128;
y1 = (yu.range(7, 0) > 16) ? ((uint8_t)yu.range(7, 0) - 16) : 0;
v = (uint8_t)yv.range(15, 8) - 128;
y2 = (yv.range(7, 0) > 16) ? ((uint8_t)yv.range(7, 0) - 16) : 0;
V2Rtemp = v * (short int)V2R;
U2Gtemp = (short int)U2G * u;
V2Gtemp = (short int)V2G * v;
U2Btemp = u * (short int)U2B;
r = CalculateR(y1, V2Rtemp, v);
g = CalculateG(y1, U2Gtemp, V2Gtemp);
b = CalculateB(y1, U2Btemp, u);
rgba = ((ap_uint24_t)r) | ((ap_uint24_t)g << 8) | ((ap_uint24_t)b << 16);
_rgba.write(idx++, rgba);
r = CalculateR(y2, V2Rtemp, v);
g = CalculateG(y2, U2Gtemp, V2Gtemp);
b = CalculateB(y2, U2Btemp, u);
rgba = ((ap_uint24_t)r) | ((ap_uint24_t)g << 8) | ((ap_uint24_t)b << 16);
_rgba.write(idx++, rgba);
}
}
}
// Yuyv2Rgba
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST>
void xFYuyv2Rgb(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst,
uint16_t height,
uint16_t width) {
width = width >> XF_BITSHIFT(NPC);
if (NPC == 1) {
KernYuyv2Rgb<SRC_T, DST_T, ROWS, COLS, NPC, WORDWIDTH_SRC, WORDWIDTH_DST, ((COLS >> 1) >> XF_BITSHIFT(NPC))>(
_src, _dst, height, width);
} else {
KernYuyv2Rgba_ro<SRC_T, DST_T, ROWS, COLS, NPC, XF_CHANNELS(SRC_T, NPC), WORDWIDTH_SRC, WORDWIDTH_DST,
((COLS >> 1) >> XF_BITSHIFT(NPC)), ((COLS >> 1) >> XF_BITSHIFT(NPC))>(_src, _dst, height,
width);
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC = 1>
void yuyv2rgb(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src, xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_16UC1) && " YUYV plane Type must be XF_16UC1");
assert((DST_T == XF_8UC3) && " RGB plane Type must be XF_8UC3");
assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " YUYV image rows and cols should be less than ROWS, COLS");
assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "YUYV and RGB planes dimensions mismatch");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC2) || (NPC == XF_NPPC4) || (NPC == XF_NPPC8)) &&
" 1,2,4,8 pixel parallelism is supported ");
#endif
xFYuyv2Rgb<SRC_T, DST_T, ROWS, COLS, NPC, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(DST_T, NPC)>(_src, _dst, _src.rows,
_src.cols);
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST, int TC>
void KernIyuv2Rgb(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _y,
xf::cv::Mat<SRC_T, ROWS / 4, COLS, NPC>& _u,
xf::cv::Mat<SRC_T, ROWS / 4, COLS, NPC>& _v,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _rgba,
uint16_t height,
uint16_t width) {
ap_uint<13> i, j;
hls::stream<XF_SNAME(WORDWIDTH_SRC)> uStream, vStream;
// clang-format off
#pragma HLS STREAM variable=&uStream depth=TC
#pragma HLS STREAM variable=&vStream depth=TC
// clang-format on
XF_SNAME(WORDWIDTH_SRC) yPacked, uPacked, vPacked;
XF_SNAME(WORDWIDTH_DST) rgba;
unsigned long long int idx = 0, idx1 = 0;
uint8_t y1, y2;
int32_t V2Rtemp, U2Gtemp, V2Gtemp, U2Btemp;
int8_t u, v;
bool evenRow = true, evenBlock = true;
RowLoop:
for (i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
ColLoop:
for (j = 0; j < width; j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
// clang-format on
yPacked = _y.read(i * width + j);
// dummy1 = dst1.read();
// dummy2 = dst2.read();
ap_uint<XF_BITSHIFT(NPC) + 1> k1;
if (evenBlock) {
if (evenRow) {
uPacked = _u.read(idx);
uStream.write(uPacked);
vPacked = _v.read(idx++);
vStream.write(vPacked);
} else {
/* Copy of the U and V values are pushed into stream to be used for
* next row */
uPacked = uStream.read();
vPacked = vStream.read();
}
k1 = 0;
} else {
k1 = NPC / 2;
}
ap_uint<XF_BITSHIFT(NPC) + 1> k;
bool evenPixel = true;
for (k = 0; k < NPC; k++) {
// clang-format off
#pragma HLS UNROLL
// clang-format on
y1 = (uint8_t)yPacked.range((8 * k + 7), 8 * k) > 16 ? (uint8_t)yPacked.range((8 * k + 7), 8 * k) - 16
: 0;
u = (uint8_t)uPacked.range((8 * k1 + 7), 8 * k1) - 128;
v = (uint8_t)vPacked.range((8 * k1 + 7), 8 * k1) - 128;
if (evenPixel == false) {
k1 = k1 + 1;
evenPixel = true;
} else {
evenPixel = false;
}
V2Rtemp = v * (short int)V2R;
U2Gtemp = (short int)U2G * u;
V2Gtemp = (short int)V2G * v;
U2Btemp = u * (short int)U2B;
// R = 1.164*Y + 1.596*V = Y + 0.164*Y + V + 0.596*V
// G = 1.164*Y - 0.813*V - 0.391*U = Y + 0.164*Y - 0.813*V - 0.391*U
// B = 1.164*Y + 2.018*U = Y + 0.164 + 2*U + 0.018*U
rgba.range((24 * k + 7), (24 * k)) = CalculateR(y1, V2Rtemp, v); // R
rgba.range((24 * k + 15), (24 * k + 8)) = CalculateG(y1, U2Gtemp, V2Gtemp); // G
rgba.range((24 * k + 23), (24 * k + 16)) = CalculateB(y1, U2Btemp, u); // B
}
_rgba.write(idx1++, rgba);
evenBlock = evenBlock ? false : true;
}
evenRow = evenRow ? false : true;
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST>
void xFIyuv2Rgb(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src_y,
xf::cv::Mat<SRC_T, ROWS / 4, COLS, NPC>& src_u,
xf::cv::Mat<SRC_T, ROWS / 4, COLS, NPC>& src_v,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst0,
uint16_t height,
uint16_t width) {
width = width >> XF_BITSHIFT(NPC);
KernIyuv2Rgb<SRC_T, DST_T, ROWS, COLS, NPC, WORDWIDTH_SRC, WORDWIDTH_DST, (COLS >> XF_BITSHIFT(NPC))>(
src_y, src_u, src_v, _dst0, height, width);
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC = 1>
void iyuv2rgb(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src_y,
xf::cv::Mat<SRC_T, ROWS / 4, COLS, NPC>& src_u,
xf::cv::Mat<SRC_T, ROWS / 4, COLS, NPC>& src_v,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst0) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_8UC1) && " Y,U,V planes Type must be XF_8UC1");
assert((DST_T == XF_8UC3) && " RGB image Type must be XF_8UC3");
assert(((src_y.rows <= ROWS) && (src_y.cols <= COLS)) && " Y image rows and cols should be less than ROWS, COLS");
assert(((src_y.cols == _dst0.cols) && (src_y.rows == _dst0.rows)) && "Y and RGB plane dimensions mismatch");
assert(((src_y.cols == src_u.cols) && (src_y.rows == (src_u.rows << 2))) && "Y and U planes dimensions mismatch");
assert(((src_y.cols == src_v.cols) && (src_y.rows == (src_v.rows << 2))) && "Y and U planes dimensions mismatch");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported ");
#endif
xFIyuv2Rgb<SRC_T, DST_T, ROWS, COLS, NPC, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(DST_T, NPC)>(
src_y, src_u, src_v, _dst0, src_y.rows, src_y.cols);
}
template <int SRC_T,
int UV_T,
int DST_T,
int ROWS,
int COLS,
int NPC,
int NPC_UV,
int WORDWIDTH_Y,
int WORDWIDTH_UV,
int WORDWIDTH_DST>
void KernNv122bgr(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _y,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& _uv,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _rgba,
uint16_t height,
uint16_t width) {
unsigned long long int idx = 0, idx1 = 0;
hls::stream<XF_SNAME(WORDWIDTH_UV)> uvStream;
// clang-format off
#pragma HLS STREAM variable=&uvStream depth=COLS
// clang-format on
XF_SNAME(WORDWIDTH_Y) yPacked;
XF_SNAME(WORDWIDTH_UV) uvPacked;
XF_SNAME(WORDWIDTH_DST) rgba;
uint8_t y1, y2;
int32_t V2Rtemp, U2Gtemp, V2Gtemp, U2Btemp;
int8_t u, v;
bool evenRow = true, evenBlock = true;
RowLoop:
for (int i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
ColLoop:
for (int j = 0; j < width; j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS
// clang-format on
yPacked = _y.read(i * width + j);
if (evenRow) {
if (evenBlock) {
uvPacked = _uv.read(idx++);
uvStream.write(uvPacked);
}
} else { // Keep a copy of UV row data in stream to use for oddrow
if (evenBlock) {
uvPacked = uvStream.read();
}
}
// auExtractPixels<NPC, WORDWIDTH_SRC,
// XF_8UP>(UVbuf, UVPacked, 0);
uint8_t t = yPacked.range(7, 0);
y1 = t > 16 ? t - 16 : 0;
v = (uint8_t)uvPacked.range(15, 8) - 128;
u = (uint8_t)uvPacked.range(7, 0) - 128;
V2Rtemp = v * (short int)V2R;
U2Gtemp = (short int)U2G * u;
V2Gtemp = (short int)V2G * v;
U2Btemp = u * (short int)U2B;
// R = 1.164*Y + 1.596*V = Y + 0.164*Y + V + 0.596*V
// G = 1.164*Y - 0.813*V - 0.391*U = Y + 0.164*Y - 0.813*V - 0.391*U
// B = 1.164*Y + 2.018*U = Y + 0.164 + 2*U + 0.018*U
rgba.range(23, 16) = CalculateR(y1, V2Rtemp, v); // R
rgba.range(15, 8) = CalculateG(y1, U2Gtemp, V2Gtemp); // G
rgba.range(7, 0) = CalculateB(y1, U2Btemp, u); // B
// PackedPixels =
// PackRGBAPixels<WORDWIDTH_DST>(RGB);
_rgba.write(idx1++, rgba);
evenBlock = evenBlock ? false : true;
}
evenRow = evenRow ? false : true;
}
if (height & 1) {
for (int i = 0; i < width; i++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS
// clang-format on
uvStream.read();
}
}
}
template <int SRC_T,
int UV_T,
int DST_T,
int ROWS,
int COLS,
int NPC,
int NPC_UV,
int WORDWIDTH_Y,
int WORDWIDTH_UV,
int WORDWIDTH_DST>
void xFNv122bgr(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src_y,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& src_uv,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst0,
uint16_t height,
uint16_t width) {
width = width >> XF_BITSHIFT(NPC);
if (NPC == 1) {
KernNv122bgr<SRC_T, UV_T, DST_T, ROWS, COLS, NPC, NPC_UV, WORDWIDTH_Y, WORDWIDTH_UV, WORDWIDTH_DST>(
src_y, src_uv, _dst0, height, width);
} else {
KernNv122bgr_ro<SRC_T, UV_T, DST_T, ROWS, COLS, NPC, NPC_UV, XF_CHANNELS(DST_T, NPC), XF_WORDWIDTH(SRC_T, NPC),
XF_WORDWIDTH(UV_T, NPC_UV), XF_WORDWIDTH(DST_T, NPC), (COLS >> XF_BITSHIFT(NPC)),
((1 << XF_BITSHIFT(NPC)) >> 1)>(src_y, src_uv, _dst0, height, width);
}
}
template <int SRC_T, int UV_T, int DST_T, int ROWS, int COLS, int NPC = 1, int NPC_UV = 1>
void nv122bgr(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src_y,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& src_uv,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst0) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_8UC1) && " Y image Type must be XF_8UC1");
assert((UV_T == XF_8UC2) && " VU image Type must be XF_8UC2");
assert((DST_T == XF_8UC3) && " BGR image Type must be XF_8UC3");
assert(((src_y.rows <= ROWS) && (src_y.cols <= COLS)) && " Y image rows and cols should be less than ROWS, COLS");
assert(((src_y.cols == _dst0.cols) && (src_y.rows == _dst0.rows)) && "Y and BGR plane dimensions mismatch");
assert(((src_y.cols == (src_uv.cols << 1)) && (src_y.rows == (src_uv.rows << 1))) &&
"Y and VU planes dimensions mismatch");
if (NPC != XF_NPPC1) {
assert((NPC == (NPC_UV * 2)) &&
" NPC of Y plane must be double the VU "
"plane for multipixel parallelism ");
} else {
assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC,NPC_UV values must be same ");
}
#endif
xFNv122bgr<SRC_T, UV_T, DST_T, ROWS, COLS, NPC, NPC_UV, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(UV_T, NPC_UV),
XF_WORDWIDTH(DST_T, NPC)>(src_y, src_uv, _dst0, src_y.rows, src_y.cols);
}
template <int SRC_T,
int UV_T,
int DST_T,
int ROWS,
int COLS,
int NPC,
int NPC_UV,
int WORDWIDTH_Y,
int WORDWIDTH_UV,
int WORDWIDTH_DST>
void KernNv122Rgb(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _y,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& _uv,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _rgba,
uint16_t height,
uint16_t width) {
unsigned long long int idx = 0, idx1 = 0;
hls::stream<XF_SNAME(WORDWIDTH_UV)> uvStream;
// clang-format off
#pragma HLS STREAM variable=&uvStream depth=COLS
// clang-format on
XF_SNAME(WORDWIDTH_Y) yPacked;
XF_SNAME(WORDWIDTH_UV) uvPacked;
XF_SNAME(WORDWIDTH_DST) rgba;
uint8_t y1, y2;
int32_t V2Rtemp, U2Gtemp, V2Gtemp, U2Btemp;
int8_t u, v;
bool evenRow = true, evenBlock = true;
RowLoop:
for (int i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
ColLoop:
for (int j = 0; j < width; j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS
// clang-format on
yPacked = _y.read(i * width + j);
if (evenRow) {
if (evenBlock) {
uvPacked = _uv.read(idx++);
uvStream.write(uvPacked);
}
} else { // Keep a copy of UV row data in stream to use for oddrow
if (evenBlock) {
uvPacked = uvStream.read();
}
}
// auExtractPixels<NPC, WORDWIDTH_SRC,
// XF_8UP>(UVbuf, UVPacked, 0);
uint8_t t = yPacked.range(7, 0);
y1 = t > 16 ? t - 16 : 0;
v = (uint8_t)uvPacked.range(15, 8) - 128;
u = (uint8_t)uvPacked.range(7, 0) - 128;
V2Rtemp = v * (short int)V2R;
U2Gtemp = (short int)U2G * u;
V2Gtemp = (short int)V2G * v;
U2Btemp = u * (short int)U2B;
// R = 1.164*Y + 1.596*V = Y + 0.164*Y + V + 0.596*V
// G = 1.164*Y - 0.813*V - 0.391*U = Y + 0.164*Y - 0.813*V - 0.391*U
// B = 1.164*Y + 2.018*U = Y + 0.164 + 2*U + 0.018*U
rgba.range(7, 0) = CalculateR(y1, V2Rtemp, v); // R
rgba.range(15, 8) = CalculateG(y1, U2Gtemp, V2Gtemp); // G
rgba.range(23, 16) = CalculateB(y1, U2Btemp, u); // B
// PackedPixels =
// PackRGBAPixels<WORDWIDTH_DST>(RGB);
_rgba.write(idx1++, rgba);
evenBlock = evenBlock ? false : true;
}
evenRow = evenRow ? false : true;
}
if (height & 1) {
for (int i = 0; i < width; i++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS
// clang-format on
uvStream.read();
}
}
}
template <int SRC_T,
int UV_T,
int DST_T,
int ROWS,
int COLS,
int NPC,
int NPC_UV,
int WORDWIDTH_Y,
int WORDWIDTH_UV,
int WORDWIDTH_DST>
void xFNv122Rgb(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src_y,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& src_uv,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst0,
uint16_t height,
uint16_t width) {
width = width >> XF_BITSHIFT(NPC);
if (NPC == 1) {
KernNv122Rgb<SRC_T, UV_T, DST_T, ROWS, COLS, NPC, NPC_UV, WORDWIDTH_Y, WORDWIDTH_UV, WORDWIDTH_DST>(
src_y, src_uv, _dst0, height, width);
} else {
KernNv122Rgba_ro<SRC_T, UV_T, DST_T, ROWS, COLS, NPC, NPC_UV, XF_CHANNELS(DST_T, NPC), XF_WORDWIDTH(SRC_T, NPC),
XF_WORDWIDTH(UV_T, NPC_UV), XF_WORDWIDTH(DST_T, NPC), (COLS >> XF_BITSHIFT(NPC)),
((1 << XF_BITSHIFT(NPC)) >> 1)>(src_y, src_uv, _dst0, height, width);
}
}
template <int SRC_T, int UV_T, int DST_T, int ROWS, int COLS, int NPC = 1, int NPC_UV = 1>
void nv122rgb(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src_y,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& src_uv,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst0) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_8UC1) && " Y image Type must be XF_8UC1");
assert((UV_T == XF_8UC2) && " UV image Type must be XF_8UC2");
assert((DST_T == XF_8UC3) && " RGB image Type must be XF_8UC3");
assert(((src_y.rows <= ROWS) && (src_y.cols <= COLS)) && " Y image ROWS and COLS should be less than ROWS, COLS");
assert(((src_y.cols == _dst0.cols) && (src_y.rows == _dst0.rows)) && "Y and RGB plane dimensions mismatch");
assert(((src_y.cols == (src_uv.cols << 1)) && (src_y.rows == (src_uv.rows << 1))) &&
"Y and UV planes dimensions mismatch");
if (NPC != XF_NPPC1) {
assert((NPC == (NPC_UV * 2)) &&
" NPC of Y plane must be double the UV "
"plane for multipixel parallelism ");
} else {
assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC,NPC_UV values must be same ");
}
#endif
xFNv122Rgb<SRC_T, UV_T, DST_T, ROWS, COLS, NPC, NPC_UV, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(UV_T, NPC_UV),
XF_WORDWIDTH(DST_T, NPC)>(src_y, src_uv, _dst0, src_y.rows, src_y.cols);
}
template <int SRC_T,
int UV_T,
int DST_T,
int ROWS,
int COLS,
int NPC,
int NPC_UV,
int WORDWIDTH_Y,
int WORDWIDTH_VU,
int WORDWIDTH_DST>
void KernNv212Rgb(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _y,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& _vu,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _rgba,
uint16_t height,
uint16_t width) {
hls::stream<XF_SNAME(WORDWIDTH_VU)> vuStream;
// clang-format off
#pragma HLS STREAM variable=&vuStream depth=COLS
// clang-format on
XF_SNAME(WORDWIDTH_Y) yPacked;
XF_SNAME(WORDWIDTH_VU) vuPacked;
unsigned long long int idx = 0, idx1 = 0;
XF_SNAME(WORDWIDTH_DST) rgba;
ap_uint<13> i, j;
uint8_t y1, y2;
int32_t V2Rtemp, U2Gtemp, V2Gtemp, U2Btemp;
int8_t u, v;
bool evenRow = true, evenBlock = true;
RowLoop:
for (i = 0; i < (height); i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
ColLoop:
for (j = 0; j < width; j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS
// clang-format on
yPacked = _y.read(i * width + j);
// auExtractPixels<NPC, WORDWIDTH_SRC,
// XF_8UP>(Ybuf, YPacked, 0);
if (evenRow) {
if (evenBlock) {
vuPacked = _vu.read(idx++);
vuStream.write(vuPacked);
}
} else { // Keep a copy of UV row data in stream to use for oddrow
if (evenBlock) {
vuPacked = vuStream.read();
}
}
// auExtractPixels<NPC, WORDWIDTH_SRC,
// XF_8UP>(UVbuf, UVPacked, 0);
uint8_t t = yPacked.range(7, 0);
y1 = t > 16 ? t - 16 : 0;
u = (uint8_t)vuPacked.range(15, 8) - 128;
v = (uint8_t)vuPacked.range(7, 0) - 128;
V2Rtemp = v * (short int)V2R;
U2Gtemp = (short int)U2G * u;
V2Gtemp = (short int)V2G * v;
U2Btemp = u * (short int)U2B;
// R = 1.164*Y + 1.596*V = Y + 0.164*Y + V + 0.596*V
// G = 1.164*Y - 0.813*V - 0.391*U = Y + 0.164*Y - 0.813*V - 0.391*U
// B = 1.164*Y + 2.018*U = Y + 0.164 + 2*U + 0.018*U
rgba.range(7, 0) = CalculateR(y1, V2Rtemp, v); // R
rgba.range(15, 8) = CalculateG(y1, U2Gtemp, V2Gtemp); // G
rgba.range(23, 16) = CalculateB(y1, U2Btemp, u); // B
// PackedPixels =
// PackRGBAPixels<WORDWIDTH_DST>(RGB);
_rgba.write(idx1++, rgba);
evenBlock = evenBlock ? false : true;
}
evenRow = evenRow ? false : true;
}
if (height & 1) {
for (i = 0; i < width; i++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS
// clang-format on
vuStream.read();
}
}
}
template <int SRC_T,
int UV_T,
int DST_T,
int ROWS,
int COLS,
int NPC,
int NPC_UV,
int WORDWIDTH_Y,
int WORDWIDTH_UV,
int WORDWIDTH_DST>
void xFNv212Rgb(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src_y,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& src_uv,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst0,
uint16_t height,
uint16_t width) {
width = width >> XF_BITSHIFT(NPC);
if (NPC == 1) {
KernNv212Rgb<SRC_T, UV_T, DST_T, ROWS, COLS, NPC, NPC_UV, WORDWIDTH_Y, WORDWIDTH_UV, WORDWIDTH_DST>(
src_y, src_uv, _dst0, height, width);
} else {
KernNv212Rgba_ro<SRC_T, UV_T, DST_T, ROWS, COLS, NPC, NPC_UV, XF_CHANNELS(DST_T, NPC), XF_WORDWIDTH(SRC_T, NPC),
XF_WORDWIDTH(UV_T, NPC_UV), XF_WORDWIDTH(DST_T, NPC), (COLS >> XF_BITSHIFT(NPC)),
((1 << XF_BITSHIFT(NPC)) >> 1)>(src_y, src_uv, _dst0, height, width);
}
}
template <int SRC_T, int UV_T, int DST_T, int ROWS, int COLS, int NPC = 1, int NPC_UV = 1>
void nv212rgb(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src_y,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& src_uv,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst0) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_8UC1) && " Y image Type must be XF_8UC1");
assert((UV_T == XF_8UC2) && " vu image Type must be XF_8UC2");
assert((DST_T == XF_8UC3) && " RGB image Type must be XF_8UC3");
assert(((src_y.rows <= ROWS) && (src_y.cols <= COLS)) && " Y image ROWS and COLS should be less than ROWS, COLS");
assert(((src_y.cols == _dst0.cols) && (src_y.rows == _dst0.rows)) && "Y and RGB plane dimensions mismatch");
assert(((src_y.cols == (src_uv.cols << 1)) && (src_y.rows == (src_uv.rows << 1))) &&
"Y and VU planes dimensions mismatch");
if (NPC != XF_NPPC1) {
assert((NPC == (NPC_UV * 2)) &&
" NPC of Y plane must be double the UV "
"plane for multipixel parallelism ");
} else {
assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC values must be same ");
}
#endif
xFNv212Rgb<SRC_T, UV_T, DST_T, ROWS, COLS, NPC, NPC_UV, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(UV_T, NPC),
XF_WORDWIDTH(DST_T, NPC)>(src_y, src_uv, _dst0, src_y.rows, src_y.cols);
}
template <int SRC_T,
int UV_T,
int DST_T,
int ROWS,
int COLS,
int NPC,
int NPC_UV,
int PLANES,
int WORDWIDTH_Y,
int WORDWIDTH_VU,
int WORDWIDTH_DST>
void KernNv212bgr(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _y,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& _vu,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _rgba,
uint16_t height,
uint16_t width) {
hls::stream<XF_SNAME(WORDWIDTH_VU)> vuStream;
// clang-format off
#pragma HLS STREAM variable=&vuStream depth=COLS
// clang-format on
XF_SNAME(WORDWIDTH_Y) yPacked;
XF_SNAME(WORDWIDTH_VU) vuPacked;
unsigned long long int idx = 0, idx1 = 0;
XF_SNAME(WORDWIDTH_DST) rgba;
ap_uint<13> i, j;
uint8_t y1, y2;
int32_t V2Rtemp, U2Gtemp, V2Gtemp, U2Btemp;
int8_t u, v;
bool evenRow = true, evenBlock = true;
RowLoop:
for (i = 0; i < (height); i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
ColLoop:
for (j = 0; j < width; j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS
// clang-format on
yPacked = _y.read(i * width + j);
// auExtractPixels<NPC, WORDWIDTH_SRC,
// XF_8UP>(Ybuf, YPacked, 0);
if (evenRow) {
if (evenBlock) {
vuPacked = _vu.read(idx++);
vuStream.write(vuPacked);
}
} else { // Keep a copy of UV row data in stream to use for oddrow
if (evenBlock) {
vuPacked = vuStream.read();
}
}
// auExtractPixels<NPC, WORDWIDTH_SRC,
// XF_8UP>(UVbuf, UVPacked, 0);
uint8_t t = yPacked.range(7, 0);
y1 = t > 16 ? t - 16 : 0;
u = (uint8_t)vuPacked.range(15, 8) - 128;
v = (uint8_t)vuPacked.range(7, 0) - 128;
V2Rtemp = v * (short int)V2R;
U2Gtemp = (short int)U2G * u;
V2Gtemp = (short int)V2G * v;
U2Btemp = u * (short int)U2B;
// R = 1.164*Y + 1.596*V = Y + 0.164*Y + V + 0.596*V
// G = 1.164*Y - 0.813*V - 0.391*U = Y + 0.164*Y - 0.813*V - 0.391*U
// B = 1.164*Y + 2.018*U = Y + 0.164 + 2*U + 0.018*U
rgba.range(23, 16) = CalculateR(y1, V2Rtemp, v); // R
rgba.range(15, 8) = CalculateG(y1, U2Gtemp, V2Gtemp); // G
rgba.range(7, 0) = CalculateB(y1, U2Btemp, u); // B
// PackedPixels =
// PackRGBAPixels<WORDWIDTH_DST>(RGB);
_rgba.write(idx1++, rgba);
evenBlock = evenBlock ? false : true;
}
evenRow = evenRow ? false : true;
}
if (height & 1) {
for (i = 0; i < width; i++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS
// clang-format on
vuStream.read();
}
}
}
template <int SRC_T,
int UV_T,
int DST_T,
int ROWS,
int COLS,
int NPC,
int NPC_UV,
int WORDWIDTH_Y,
int WORDWIDTH_UV,
int WORDWIDTH_DST>
void xFNv212bgr(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src_y,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& src_uv,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst0,
uint16_t height,
uint16_t width) {
width = width >> XF_BITSHIFT(NPC);
if (NPC == 1) {
KernNv212bgr<SRC_T, UV_T, DST_T, ROWS, COLS, NPC, NPC_UV, XF_CHANNELS(DST_T, NPC), WORDWIDTH_Y, WORDWIDTH_UV,
WORDWIDTH_DST>(src_y, src_uv, _dst0, height, width);
} else {
KernNv212bgr_ro<SRC_T, UV_T, DST_T, ROWS, COLS, NPC, NPC_UV, XF_CHANNELS(DST_T, NPC), XF_WORDWIDTH(SRC_T, NPC),
XF_WORDWIDTH(UV_T, NPC_UV), XF_WORDWIDTH(DST_T, NPC), (COLS >> XF_BITSHIFT(NPC)),
((1 << XF_BITSHIFT(NPC)) >> 1)>(src_y, src_uv, _dst0, height, width);
}
}
template <int SRC_T, int UV_T, int DST_T, int ROWS, int COLS, int NPC = 1, int NPC_UV = 1>
void nv212bgr(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src_y,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& src_uv,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst0) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_8UC1) && " Y image Type must be XF_8UC1");
assert((UV_T == XF_8UC2) && " VU image Type must be XF_8UC2");
assert((DST_T == XF_8UC3) && " BGR image Type must be XF_8UC3");
assert(((src_y.rows <= ROWS) && (src_y.cols <= COLS)) && " Y image rows and cols should be less than ROWS, COLS");
assert(((src_y.cols == _dst0.cols) && (src_y.rows == _dst0.rows)) && "Y and BGR plane dimensions mismatch");
assert(((src_y.cols == (src_uv.cols << 1)) && (src_y.rows == (src_uv.rows << 1))) &&
"Y and VU planes dimensions mismatch");
if (NPC != XF_NPPC1) {
assert((NPC == (NPC_UV * 2)) &&
" NPC of Y plane must be double the VU "
"plane for multipixel parallelism ");
} else {
assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC,NPC_UV values must be same ");
}
#endif
xFNv212bgr<SRC_T, UV_T, DST_T, ROWS, COLS, NPC, NPC_UV, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(UV_T, NPC_UV),
XF_WORDWIDTH(DST_T, NPC)>(src_y, src_uv, _dst0, src_y.rows, src_y.cols);
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST, int TC>
void xfrgb2gray(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& dst,
unsigned short int height,
unsigned short int width) {
XF_CTUNAME(SRC_T, NPC) RGB[XF_CHANNELS(SRC_T, NPC) * XF_NPIXPERCYCLE(NPC)];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=RGB complete
// clang-format on
XF_TNAME(SRC_T, NPC) RGB_packed; //=0;
XF_CTUNAME(DST_T, NPC) GRAY[XF_NPIXPERCYCLE(NPC)]; //=0;
XF_TNAME(DST_T, NPC) Gray_packed;
rowloop:
for (ap_uint<13> i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
columnloop:
for (ap_uint<13> j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS
// clang-format on
RGB_packed = src.read(i * (width >> XF_BITSHIFT(NPC)) + j);
ExtractUYVYPixels<WORDWIDTH_SRC>(RGB_packed, RGB);
for (ap_uint<13> k = 0, offset = 0; k < XF_NPIXPERCYCLE(NPC); k++, offset += 3) {
// clang-format off
#pragma HLS UNROLL
// clang-format on
GRAY[k] = CalculateGRAY(RGB[offset], RGB[offset + 1], RGB[offset + 2]);
Gray_packed.range((k * XF_DTPIXELDEPTH(DST_T, NPC) + (XF_DTPIXELDEPTH(DST_T, NPC) - 1)),
k * XF_DTPIXELDEPTH(DST_T, NPC)) = GRAY[k];
}
dst.write((i * (width >> XF_BITSHIFT(NPC))) + j, Gray_packed);
}
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC = 1>
void rgb2gray(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src, xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_8UC3) && " RGB image Type must be XF_8UC3");
assert((DST_T == XF_8UC1) && " GRAY image Type must be XF_8UC1");
assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " RGB image rows and cols should be less than ROWS, COLS");
assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "RGB and GRAY plane dimensions mismatch");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported ");
#endif
xfrgb2gray<SRC_T, DST_T, ROWS, COLS, NPC, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(DST_T, NPC),
(ROWS * (COLS >> (XF_NPIXPERCYCLE(NPC))))>(_src, _dst, _src.rows, _src.cols);
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST, int TC>
void xfbgr2gray(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& dst,
unsigned short int height,
unsigned short int width) {
XF_CTUNAME(SRC_T, NPC) RGB[XF_CHANNELS(SRC_T, NPC) * XF_NPIXPERCYCLE(NPC)];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=RGB complete
// clang-format on
XF_TNAME(SRC_T, NPC) RGB_packed; //=0;
XF_CTUNAME(DST_T, NPC) GRAY[XF_NPIXPERCYCLE(NPC)]; //=0;
XF_TNAME(DST_T, NPC) Gray_packed;
rowloop:
for (ap_uint<13> i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
columnloop:
for (ap_uint<13> j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS
// clang-format on
RGB_packed = src.read(i * (width >> XF_BITSHIFT(NPC)) + j);
ExtractUYVYPixels<WORDWIDTH_SRC>(RGB_packed, RGB);
for (ap_uint<13> k = 0, offset = 0; k < XF_NPIXPERCYCLE(NPC); k++, offset += 3) {
// clang-format off
#pragma HLS UNROLL
// clang-format on
GRAY[k] = CalculateGRAY(RGB[offset + 2], RGB[offset + 1], RGB[offset]);
Gray_packed.range((k * XF_DTPIXELDEPTH(DST_T, NPC) + (XF_DTPIXELDEPTH(DST_T, NPC) - 1)),
k * XF_DTPIXELDEPTH(DST_T, NPC)) = GRAY[k];
}
dst.write((i * (width >> XF_BITSHIFT(NPC))) + j, Gray_packed);
}
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC = 1>
void bgr2gray(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src, xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_8UC3) && " BGR image Type must be XF_8UC3");
assert((DST_T == XF_8UC1) && " GRAY image Type must be XF_8UC1");
assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " BGR image rows and cols should be less than ROWS, COLS");
assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "BGR and GRAY plane dimensions mismatch");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported ");
#endif
xfbgr2gray<SRC_T, DST_T, ROWS, COLS, NPC, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(DST_T, NPC),
(ROWS * (COLS >> (XF_NPIXPERCYCLE(NPC))))>(_src, _dst, _src.rows, _src.cols);
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST, int TC>
void xfgray2rgb(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& dst,
unsigned short int height,
unsigned short int width) {
XF_DTUNAME(DST_T, NPC) RGB[XF_NPIXPERCYCLE(NPC)];
XF_TNAME(DST_T, NPC) RGB_packed;
XF_TNAME(SRC_T, NPC) GRAY_packed;
XF_TNAME(SRC_T, NPC) GRAY[XF_NPIXPERCYCLE(NPC)];
rowloop:
for (ap_uint<13> i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
columnloop:
for (ap_uint<13> j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS
// clang-format on
GRAY_packed = src.read(i * (width >> XF_BITSHIFT(NPC)) + j);
for (int k = 0; k < XF_NPIXPERCYCLE(NPC); k++) {
// clang-format off
#pragma HLS UNROLL
// clang-format on
GRAY[k] = GRAY_packed.range(k * (XF_PIXELWIDTH(SRC_T, NPC)) + XF_PIXELWIDTH(SRC_T, NPC) - 1,
k * XF_PIXELWIDTH(SRC_T, NPC));
RGB[k].range(7, 0) = GRAY[k];
RGB[k].range(15, 8) = GRAY[k];
RGB[k].range(23, 16) = GRAY[k];
RGB_packed.range(k * (XF_PIXELWIDTH(DST_T, NPC)) + XF_PIXELWIDTH(DST_T, NPC) - 1,
k * XF_PIXELWIDTH(DST_T, NPC)) = RGB[k];
}
dst.write(i * (width >> XF_BITSHIFT(NPC)) + j, RGB_packed);
}
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC = 1>
void gray2rgb(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src, xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_8UC1) && " GRAY image Type must be XF_8UC1");
assert((DST_T == XF_8UC3) && " RGB image Type must be XF_8UC3");
assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " GRAY image rows and cols should be less than ROWS, COLS");
assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "RGB and GRAY plane dimensions mismatch");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported ");
#endif
xfgray2rgb<SRC_T, DST_T, ROWS, COLS, NPC, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(DST_T, NPC),
(ROWS * (COLS >> (XF_NPIXPERCYCLE(NPC))))>(_src, _dst, _src.rows, _src.cols);
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST, int TC>
void xfgray2bgr(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& dst,
unsigned short int height,
unsigned short int width) {
XF_DTUNAME(DST_T, NPC) RGB[XF_NPIXPERCYCLE(NPC)];
XF_TNAME(DST_T, NPC) RGB_packed;
XF_TNAME(SRC_T, NPC) GRAY_packed;
XF_TNAME(SRC_T, NPC) GRAY[XF_NPIXPERCYCLE(NPC)];
rowloop:
for (ap_uint<13> i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
columnloop:
for (ap_uint<13> j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS
// clang-format on
GRAY_packed = src.read(i * (width >> XF_BITSHIFT(NPC)) + j);
for (int k = 0; k < XF_NPIXPERCYCLE(NPC); k++) {
// clang-format off
#pragma HLS UNROLL
// clang-format on
GRAY[k] = GRAY_packed.range(k * (XF_PIXELWIDTH(SRC_T, NPC)) + XF_PIXELWIDTH(SRC_T, NPC) - 1,
k * XF_PIXELWIDTH(SRC_T, NPC));
RGB[k].range(7, 0) = GRAY[k];
RGB[k].range(15, 8) = GRAY[k];
RGB[k].range(23, 16) = GRAY[k];
RGB_packed.range(k * (XF_PIXELWIDTH(DST_T, NPC)) + XF_PIXELWIDTH(DST_T, NPC) - 1,
k * XF_PIXELWIDTH(DST_T, NPC)) = RGB[k];
}
dst.write(i * (width >> XF_BITSHIFT(NPC)) + j, RGB_packed);
}
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC = 1>
void gray2bgr(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src, xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_8UC1) && " GRAY image Type must be XF_8UC1");
assert((DST_T == XF_8UC3) && " BGR image Type must be XF_8UC3");
assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " GRAY image rows and cols should be less than ROWS, COLS");
assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "BGR and GRAY plane dimensions mismatch");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported ");
#endif
xfgray2bgr<SRC_T, DST_T, ROWS, COLS, NPC, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(DST_T, NPC),
(ROWS * (COLS >> (XF_NPIXPERCYCLE(NPC))))>(_src, _dst, _src.rows, _src.cols);
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST, int TC>
void xfrgb2xyz(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& dst,
unsigned short int height,
unsigned short int width) {
ap_uint<8> RGB[3];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=RGB complete
// clang-format on
XF_TNAME(SRC_T, NPC) RGB_packed = 0;
XF_TNAME(DST_T, NPC) XYZ_packed = 0;
XF_DTUNAME(DST_T, NPC) XYZ[XF_NPIXPERCYCLE(NPC)];
XF_TNAME(DST_T, NPC) X, Y, Z;
short int depth = XF_PIXELWIDTH(DST_T, NPC) / XF_CHANNELS(SRC_T, NPC);
int k = 0;
rowloop:
for (ap_uint<13> i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
columnloop:
for (ap_uint<13> j = 0; j<width>> XF_BITSHIFT(NPC); j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS
// clang-format on
RGB_packed = src.read((i * (width >> XF_BITSHIFT(NPC))) + j);
ExtractUYVYPixels<WORDWIDTH_SRC>(RGB_packed, RGB);
for (int k = 0, offset = 0; k < XF_NPIXPERCYCLE(NPC); k++, offset += 3) {
// clang-format off
#pragma HLS UNROLL
// clang-format on
X = Calculate_X(RGB[offset], RGB[offset + 1], RGB[offset + 2]);
Y = Calculate_Y(RGB[offset], RGB[offset + 1], RGB[offset + 2]);
Z = Calculate_Z(RGB[offset], RGB[offset + 1], RGB[offset + 2]);
XYZ[k].range((XF_DTPIXELDEPTH(DST_T, NPC) - 1), 0) = X;
XYZ[k].range((XF_DTPIXELDEPTH(DST_T, NPC) * 2) - 1, XF_DTPIXELDEPTH(DST_T, NPC)) = Y;
XYZ[k].range((XF_DTPIXELDEPTH(DST_T, NPC) * 3) - 1, XF_DTPIXELDEPTH(DST_T, NPC) * 2) = Z;
XYZ_packed.range(k * XF_PIXELWIDTH(DST_T, NPC) + (XF_PIXELWIDTH(DST_T, NPC) - 1),
k * XF_PIXELWIDTH(DST_T, NPC)) = XYZ[k];
}
dst.write((i * (width >> XF_BITSHIFT(NPC))) + j, XYZ_packed);
}
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC = 1>
void rgb2xyz(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src, xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_8UC3) && " RGB image Type must be XF_8UC3");
assert((DST_T == XF_8UC3) && " XYZ image Type must be XF_8UC3");
assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " RGB image rows and cols should be less than ROWS, COLS");
assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "RGB and XYZ plane dimensions mismatch");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported ");
#endif
xfrgb2xyz<SRC_T, DST_T, ROWS, COLS, NPC, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(DST_T, NPC),
(ROWS * (COLS >> (XF_NPIXPERCYCLE(NPC))))>(_src, _dst, _src.rows, _src.cols);
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST, int TC>
void xfbgr2xyz(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& dst,
unsigned short int height,
unsigned short int width) {
ap_uint<8> RGB[3];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=RGB complete
// clang-format on
XF_TNAME(SRC_T, NPC) RGB_packed = 0;
XF_TNAME(DST_T, NPC) XYZ_packed = 0;
XF_DTUNAME(DST_T, NPC) XYZ[XF_NPIXPERCYCLE(NPC)];
XF_TNAME(DST_T, NPC) X, Y, Z;
short int depth = XF_PIXELWIDTH(DST_T, NPC) / XF_CHANNELS(SRC_T, NPC);
int k = 0;
rowloop:
for (ap_uint<13> i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
columnloop:
for (ap_uint<13> j = 0; j<width>> XF_BITSHIFT(NPC); j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS
// clang-format on
RGB_packed = src.read((i * (width >> XF_BITSHIFT(NPC))) + j);
ExtractUYVYPixels<WORDWIDTH_SRC>(RGB_packed, RGB);
for (int k = 0, offset = 0; k < XF_NPIXPERCYCLE(NPC); k++, offset += 3) {
// clang-format off
#pragma HLS UNROLL
// clang-format on
X = Calculate_X(RGB[offset + 2], RGB[offset + 1], RGB[offset]);
Y = Calculate_Y(RGB[offset + 2], RGB[offset + 1], RGB[offset]);
Z = Calculate_Z(RGB[offset + 2], RGB[offset + 1], RGB[offset]);
XYZ[k].range((XF_DTPIXELDEPTH(DST_T, NPC) - 1), 0) = X;
XYZ[k].range((XF_DTPIXELDEPTH(DST_T, NPC) * 2) - 1, XF_DTPIXELDEPTH(DST_T, NPC)) = Y;
XYZ[k].range((XF_DTPIXELDEPTH(DST_T, NPC) * 3) - 1, XF_DTPIXELDEPTH(DST_T, NPC) * 2) = Z;
XYZ_packed.range(k * XF_PIXELWIDTH(DST_T, NPC) + (XF_PIXELWIDTH(DST_T, NPC) - 1),
k * XF_PIXELWIDTH(DST_T, NPC)) = XYZ[k];
}
dst.write((i * (width >> XF_BITSHIFT(NPC))) + j, XYZ_packed);
}
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC = 1>
void bgr2xyz(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src, xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_8UC3) && " BGR image Type must be XF_8UC3");
assert((DST_T == XF_8UC3) && " XYZ image Type must be XF_8UC3");
assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " BGR image rows and cols should be less than ROWS, COLS");
assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "BGR and XYZ plane dimensions mismatch");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported ");
#endif
xfbgr2xyz<SRC_T, DST_T, ROWS, COLS, NPC, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(DST_T, NPC),
(ROWS * (COLS >> (XF_NPIXPERCYCLE(NPC))))>(_src, _dst, _src.rows, _src.cols);
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST, int TC>
void xfxyz2rgb(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& dst,
unsigned short int height,
unsigned short int width) {
XF_CTUNAME(SRC_T, NPC) XYZ[3 * XF_NPIXPERCYCLE(NPC)];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=XYZ complete
// clang-format on
XF_TNAME(DST_T, NPC) RGB[XF_NPIXPERCYCLE(NPC)];
XF_TNAME(DST_T, NPC) XYZ_packed = 0, RGB_packed = 0;
XF_TNAME(DST_T, NPC) R, G, B;
rowloop:
for (ap_uint<13> i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
columnloop:
for (ap_uint<13> j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS
// clang-format on
XYZ_packed = src.read((i * (width >> XF_BITSHIFT(NPC))) + j);
ExtractUYVYPixels<WORDWIDTH_SRC>(XYZ_packed, XYZ);
for (int k = 0, offset = 0; k < XF_NPIXPERCYCLE(NPC); k++, offset += 3) {
// clang-format off
#pragma HLS UNROLL
// clang-format on
R = Calculate_R(XYZ[offset], XYZ[offset + 1], XYZ[offset + 2]);
G = Calculate_G(XYZ[offset], XYZ[offset + 1], XYZ[offset + 2]);
B = Calculate_B(XYZ[offset], XYZ[offset + 1], XYZ[offset + 2]);
RGB[k].range((XF_DTPIXELDEPTH(DST_T, NPC) - 1), 0) = R;
RGB[k].range((XF_DTPIXELDEPTH(DST_T, NPC) * 2) - 1, XF_DTPIXELDEPTH(DST_T, NPC)) = G;
RGB[k].range((XF_DTPIXELDEPTH(DST_T, NPC) * 3) - 1, XF_DTPIXELDEPTH(DST_T, NPC) * 2) = B;
RGB_packed.range(k * XF_PIXELWIDTH(DST_T, NPC) + (XF_PIXELWIDTH(DST_T, NPC) - 1),
k * XF_PIXELWIDTH(DST_T, NPC)) = RGB[k];
}
dst.write((i * (width >> XF_BITSHIFT(NPC))) + j, RGB_packed);
}
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC = 1>
void xyz2rgb(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src, xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_8UC3) && " XYZ image Type must be XF_8UC3");
assert((DST_T == XF_8UC3) && " RGB image Type must be XF_8UC3");
assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " XYZ image rows and cols should be less than ROWS, COLS");
assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "RGB and XYZ plane dimensions mismatch");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported ");
#endif
xfxyz2rgb<SRC_T, DST_T, ROWS, COLS, NPC, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(DST_T, NPC),
(ROWS * (COLS >> (XF_NPIXPERCYCLE(NPC))))>(_src, _dst, _src.rows, _src.cols);
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST, int TC>
void xfxyz2bgr(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& dst,
unsigned short int height,
unsigned short int width) {
XF_CTUNAME(SRC_T, NPC) XYZ[3 * XF_NPIXPERCYCLE(NPC)];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=XYZ complete
// clang-format on
XF_TNAME(DST_T, NPC) RGB[XF_NPIXPERCYCLE(NPC)];
XF_TNAME(DST_T, NPC) XYZ_packed = 0, RGB_packed = 0;
XF_TNAME(DST_T, NPC) R, G, B;
rowloop:
for (ap_uint<13> i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
columnloop:
for (ap_uint<13> j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS
// clang-format on
XYZ_packed = src.read((i * (width >> XF_BITSHIFT(NPC))) + j);
ExtractUYVYPixels<WORDWIDTH_SRC>(XYZ_packed, XYZ);
for (int k = 0, offset = 0; k < XF_NPIXPERCYCLE(NPC); k++, offset += 3) {
// clang-format off
#pragma HLS UNROLL
// clang-format on
R = Calculate_R(XYZ[offset], XYZ[offset + 1], XYZ[offset + 2]);
G = Calculate_G(XYZ[offset], XYZ[offset + 1], XYZ[offset + 2]);
B = Calculate_B(XYZ[offset], XYZ[offset + 1], XYZ[offset + 2]);
RGB[k].range((XF_DTPIXELDEPTH(DST_T, NPC) - 1), 0) = B;
RGB[k].range((XF_DTPIXELDEPTH(DST_T, NPC) * 2) - 1, XF_DTPIXELDEPTH(DST_T, NPC)) = G;
RGB[k].range((XF_DTPIXELDEPTH(DST_T, NPC) * 3) - 1, XF_DTPIXELDEPTH(DST_T, NPC) * 2) = R;
RGB_packed.range(k * XF_PIXELWIDTH(DST_T, NPC) + (XF_PIXELWIDTH(DST_T, NPC) - 1),
k * XF_PIXELWIDTH(DST_T, NPC)) = RGB[k];
}
dst.write((i * (width >> XF_BITSHIFT(NPC))) + j, RGB_packed);
}
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC = 1>
void xyz2bgr(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src, xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_8UC3) && " XYZ image Type must be XF_8UC3");
assert((DST_T == XF_8UC3) && " BGR image Type must be XF_8UC3");
assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " XYZ image rows and cols should be less than ROWS, COLS");
assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "BGR and XYZ plane dimensions mismatch");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported ");
#endif
xfxyz2bgr<SRC_T, DST_T, ROWS, COLS, NPC, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(DST_T, NPC),
(ROWS * (COLS >> (XF_NPIXPERCYCLE(NPC))))>(_src, _dst, _src.rows, _src.cols);
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST, int TC>
void xfrgb2ycrcb(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& dst,
unsigned short int height,
unsigned short int width) {
XF_CTUNAME(SRC_T, NPC) RGB[3 * XF_NPIXPERCYCLE(NPC)];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=RGB complete
// clang-format on
XF_DTUNAME(DST_T, NPC) YCRCB[XF_NPIXPERCYCLE(NPC)];
XF_TNAME(SRC_T, NPC) RGB_packed = 0;
XF_TNAME(DST_T, NPC) YCRCB_packed = 0;
XF_TNAME(DST_T, NPC) Y, CR, CB;
rowloop:
for (ap_uint<13> i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
columnloop:
for (ap_uint<13> j = 0; j<width>> XF_BITSHIFT(NPC); j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS
// clang-format on
RGB_packed = src.read((i * width >> XF_BITSHIFT(NPC)) + j);
ExtractUYVYPixels<WORDWIDTH_SRC>(RGB_packed, RGB);
for (int k = 0, offset = 0; k < XF_NPIXPERCYCLE(NPC); k++, offset += 3) {
Y = CalculateGRAY(RGB[offset], RGB[offset + 1], RGB[offset + 2]);
CR = Calculate_CR(RGB[offset], Y);
CB = Calculate_CB(RGB[offset + 2], Y);
YCRCB[k].range((XF_DTPIXELDEPTH(DST_T, NPC) - 1), 0) = Y;
YCRCB[k].range((XF_DTPIXELDEPTH(DST_T, NPC) * 2) - 1, XF_DTPIXELDEPTH(DST_T, NPC)) = CR;
YCRCB[k].range((XF_DTPIXELDEPTH(DST_T, NPC) * 3) - 1, XF_DTPIXELDEPTH(DST_T, NPC) * 2) = CB;
YCRCB_packed.range(k * XF_PIXELWIDTH(DST_T, NPC) + (XF_PIXELWIDTH(DST_T, NPC) - 1),
k * XF_PIXELWIDTH(DST_T, NPC)) = YCRCB[k];
}
dst.write((i * width >> XF_BITSHIFT(NPC)) + j, YCRCB_packed);
}
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC = 1>
void rgb2ycrcb(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src, xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_8UC3) && " RGB image Type must be XF_8UC3");
assert((DST_T == XF_8UC3) && " YCrCb image Type must be XF_8UC3");
assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " RGB image rows and cols should be less than ROWS, COLS");
assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "RGB and YCrCb plane dimensions mismatch");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported ");
#endif
xfrgb2ycrcb<SRC_T, DST_T, ROWS, COLS, NPC, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(DST_T, NPC),
(ROWS * (COLS >> (XF_NPIXPERCYCLE(NPC))))>(_src, _dst, _src.rows, _src.cols);
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST, int TC>
void xfbgr2ycrcb(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& dst,
unsigned short int height,
unsigned short int width) {
XF_CTUNAME(SRC_T, NPC) RGB[3 * XF_NPIXPERCYCLE(NPC)];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=RGB complete
// clang-format on
XF_DTUNAME(DST_T, NPC) YCRCB[XF_NPIXPERCYCLE(NPC)];
XF_TNAME(SRC_T, NPC) RGB_packed = 0;
XF_TNAME(DST_T, NPC) YCRCB_packed = 0;
XF_TNAME(DST_T, NPC) Y, CR, CB;
rowloop:
for (ap_uint<13> i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
columnloop:
for (ap_uint<13> j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS
// clang-format on
RGB_packed = src.read((i * (width >> XF_BITSHIFT(NPC))) + j);
ExtractUYVYPixels<WORDWIDTH_SRC>(RGB_packed, RGB);
for (int k = 0, offset = 0; k < XF_NPIXPERCYCLE(NPC); k++, offset += 3) {
Y = CalculateGRAY(RGB[offset + 2], RGB[offset + 1], RGB[offset]);
CR = Calculate_CR(RGB[offset + 2], Y);
CB = Calculate_CB(RGB[offset], Y);
YCRCB[k].range((XF_DTPIXELDEPTH(DST_T, NPC) - 1), 0) = Y;
YCRCB[k].range((XF_DTPIXELDEPTH(DST_T, NPC) * 2) - 1, XF_DTPIXELDEPTH(DST_T, NPC)) = CR;
YCRCB[k].range((XF_DTPIXELDEPTH(DST_T, NPC) * 3) - 1, XF_DTPIXELDEPTH(DST_T, NPC) * 2) = CB;
YCRCB_packed.range(k * XF_PIXELWIDTH(DST_T, NPC) + (XF_PIXELWIDTH(DST_T, NPC) - 1),
k * XF_PIXELWIDTH(DST_T, NPC)) = YCRCB[k];
}
dst.write((i * width >> XF_BITSHIFT(NPC)) + j, YCRCB_packed);
}
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC = 1>
void bgr2ycrcb(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src, xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_8UC3) && " BGR image Type must be XF_8UC3");
assert((DST_T == XF_8UC3) && " YCrCb image Type must be XF_8UC3");
assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " BGR image rows and cols should be less than ROWS, COLS");
assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "BGR and YCrCb plane dimensions mismatch");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported ");
#endif
xfbgr2ycrcb<SRC_T, DST_T, ROWS, COLS, NPC, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(DST_T, NPC),
(ROWS * (COLS >> (XF_NPIXPERCYCLE(NPC))))>(_src, _dst, _src.rows, _src.cols);
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST, int TC>
void xfycrcb2rgb(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& dst,
unsigned short int height,
unsigned short int width) {
XF_CTUNAME(SRC_T, NPC) YCRCB[3 * XF_NPIXPERCYCLE(NPC)];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=YCRCB complete
// clang-format on
XF_TNAME(SRC_T, NPC) YCRCB_packed = 0;
XF_TNAME(DST_T, NPC) RGB_packed = 0;
XF_TNAME(DST_T, NPC) RGB[XF_NPIXPERCYCLE(NPC)];
XF_TNAME(DST_T, NPC) Y, R, B, G;
rowloop:
for (ap_uint<13> i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
columnloop:
for (ap_uint<13> j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS
// clang-format on
YCRCB_packed = src.read((i * (width >> XF_BITSHIFT(NPC))) + j);
ExtractUYVYPixels<WORDWIDTH_SRC>(YCRCB_packed, YCRCB);
for (int k = 0, offset = 0; k < XF_NPIXPERCYCLE(NPC); k++, offset += 3) {
R = Calculate_Ycrcb2R(YCRCB[offset], YCRCB[offset + 1]);
G = Calculate_Ycrcb2G(YCRCB[offset], YCRCB[offset + 1], YCRCB[offset + 2]);
B = Calculate_Ycrcb2B(YCRCB[offset], YCRCB[offset + 2]);
RGB[k].range((XF_DTPIXELDEPTH(DST_T, NPC) - 1), 0) = R;
RGB[k].range((XF_DTPIXELDEPTH(DST_T, NPC) * 2) - 1, XF_DTPIXELDEPTH(DST_T, NPC)) = G;
RGB[k].range((XF_DTPIXELDEPTH(DST_T, NPC) * 3) - 1, XF_DTPIXELDEPTH(DST_T, NPC) * 2) = B;
RGB_packed.range(k * XF_PIXELWIDTH(DST_T, NPC) + (XF_PIXELWIDTH(DST_T, NPC) - 1),
k * XF_PIXELWIDTH(DST_T, NPC)) = RGB[k];
}
dst.write(i * (width >> XF_BITSHIFT(NPC)) + j, RGB_packed);
}
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC = 1>
void ycrcb2rgb(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src, xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_8UC3) && " YCrCb image Type must be XF_8UC3");
assert((DST_T == XF_8UC3) && " RGB image Type must be XF_8UC3");
assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " YCrCb image rows and cols should be less than ROWS, COLS");
assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "RGB and YCrCb plane dimensions mismatch");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported ");
#endif
xfycrcb2rgb<SRC_T, DST_T, ROWS, COLS, NPC, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(DST_T, NPC),
(ROWS * (COLS >> (XF_NPIXPERCYCLE(NPC))))>(_src, _dst, _src.rows, _src.cols);
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST, int TC>
void xfycrcb2bgr(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& dst,
unsigned short int height,
unsigned short int width) {
XF_CTUNAME(SRC_T, NPC) YCRCB[3 * XF_NPIXPERCYCLE(NPC)];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=YCRCB complete
// clang-format on
XF_TNAME(SRC_T, NPC) YCRCB_packed = 0;
XF_TNAME(DST_T, NPC) RGB_packed = 0;
XF_TNAME(DST_T, NPC) RGB[XF_NPIXPERCYCLE(NPC)];
XF_TNAME(DST_T, NPC) Y, R, B, G;
rowloop:
for (ap_uint<13> i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
columnloop:
for (ap_uint<13> j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS
// clang-format on
YCRCB_packed = src.read((i * (width >> XF_BITSHIFT(NPC))) + j);
ExtractUYVYPixels<WORDWIDTH_SRC>(YCRCB_packed, YCRCB);
for (int k = 0, offset = 0; k < XF_NPIXPERCYCLE(NPC); k++, offset += 3) {
R = Calculate_Ycrcb2R(YCRCB[offset], YCRCB[offset + 1]);
G = Calculate_Ycrcb2G(YCRCB[offset], YCRCB[offset + 1], YCRCB[offset + 2]);
B = Calculate_Ycrcb2B(YCRCB[offset], YCRCB[offset + 2]);
RGB[k].range((XF_DTPIXELDEPTH(DST_T, NPC) - 1), 0) = B;
RGB[k].range((XF_DTPIXELDEPTH(DST_T, NPC) * 2) - 1, XF_DTPIXELDEPTH(DST_T, NPC)) = G;
RGB[k].range((XF_DTPIXELDEPTH(DST_T, NPC) * 3) - 1, XF_DTPIXELDEPTH(DST_T, NPC) * 2) = R;
RGB_packed.range(k * XF_PIXELWIDTH(DST_T, NPC) + (XF_PIXELWIDTH(DST_T, NPC) - 1),
k * XF_PIXELWIDTH(DST_T, NPC)) = RGB[k];
}
dst.write(i * (width >> XF_BITSHIFT(NPC)) + j, RGB_packed);
}
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC = 1>
void ycrcb2bgr(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src, xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_8UC3) && " YCrCb image Type must be XF_8UC3");
assert((DST_T == XF_8UC3) && " BGR image Type must be XF_8UC3");
assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " YCrCb image rows and cols should be less than ROWS, COLS");
assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "BGR and YCrCb plane dimensions mismatch");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported ");
#endif
xfycrcb2bgr<SRC_T, DST_T, ROWS, COLS, NPC, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(DST_T, NPC),
(ROWS * (COLS >> (XF_NPIXPERCYCLE(NPC))))>(_src, _dst, _src.rows, _src.cols);
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST, int TC>
void xfrgb2hls(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& dst,
unsigned short int height,
unsigned short int width) {
XF_CTUNAME(SRC_T, NPC) RGB[3 * XF_NPIXPERCYCLE(NPC)];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=RGB complete
// clang-format on
XF_TNAME(SRC_T, NPC) RGB_packed = 0;
XF_TNAME(DST_T, NPC) HSV_packed = 0;
XF_TNAME(DST_T, NPC) HSV[XF_NPIXPERCYCLE(NPC)];
XF_CTUNAME(SRC_T, NPC) r, g, b;
XF_CTUNAME(SRC_T, NPC) Vmax, Vmin;
// int Vmax=0,Vmin=0;
int consta;
int sub;
int two_L = 0;
int inv_sub = 0;
int less_if = 0;
short int depth = XF_PIXELWIDTH(DST_T, NPC) / XF_CHANNELS(SRC_T, NPC);
int inv_add = 0;
int S = 0;
int k = 0;
rowloop:
for (ap_uint<13> i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
columnloop:
for (ap_uint<13> j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS
// clang-format on
RGB_packed = src.read((i * width >> XF_BITSHIFT(NPC)) + j);
ExtractUYVYPixels<WORDWIDTH_SRC>(RGB_packed, RGB);
for (int k = 0, offset = 0; k < XF_NPIXPERCYCLE(NPC); k++, offset += 3) {
r = RGB[offset], g = RGB[offset + 1], b = RGB[offset + 2];
Vmax = b;
Vmin = b;
if ((g > r) && (g > b)) {
Vmax = g;
} else if ((r > b)) {
Vmax = r;
}
if ((g < r) && (g < b)) {
Vmin = g;
} else if ((r < b)) {
Vmin = r;
}
short int v_add = (Vmax + Vmin);
short int v_sub = (Vmax - Vmin);
two_L = (Vmax + Vmin);
int h = 0;
if (two_L < 255) {
inv_add = ((255 << 12) / (v_add));
less_if = (v_sub * inv_add + (1 << (11))) >> 12;
S = less_if;
} else {
if (Vmax == Vmin) {
S = 0;
} else {
int inv_sub = ((255 << 12) / ((2 * 255) - v_add));
int less_if = (v_sub * inv_sub + (1 << (11))) >> 12;
S = less_if;
}
}
sub = (Vmax == b) ? (r - g) : (Vmax == g) ? (b - r) : (g - b);
consta = (Vmax == b) ? 240 : (Vmax == g) ? 120 : 0;
if (Vmax == Vmin) {
h = 0;
} else {
inv_sub = ((1 << 15) / (v_sub));
h = consta + ((60 * sub * inv_sub) >> 15);
if (h < 0) {
h += 360;
}
}
HSV[k].range(7, 0) = (h >> 1);
HSV[k].range(15, 8) = (unsigned char)((two_L + 1) >> 1);
HSV[k].range(23, 16) = S;
HSV_packed.range(k * XF_PIXELWIDTH(DST_T, NPC) + (XF_PIXELWIDTH(DST_T, NPC) - 1),
k * XF_PIXELWIDTH(DST_T, NPC)) = HSV[k];
}
dst.write(i * (width >> XF_BITSHIFT(NPC)) + j, HSV_packed);
}
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC = 1>
void rgb2hls(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src, xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_8UC3) && " RGB image Type must be XF_8UC3");
assert((DST_T == XF_8UC3) && " HLS image Type must be XF_8UC3");
assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " RGB image rows and cols should be less than ROWS, COLS");
assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "RGB and HLS plane dimensions mismatch");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported ");
#endif
xfrgb2hls<SRC_T, DST_T, ROWS, COLS, NPC, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(DST_T, NPC),
(ROWS * (COLS >> (XF_NPIXPERCYCLE(NPC))))>(_src, _dst, _src.rows, _src.cols);
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST, int TC>
void xfbgr2hls(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& dst,
unsigned short int height,
unsigned short int width) {
XF_CTUNAME(SRC_T, NPC) RGB[3 * XF_NPIXPERCYCLE(NPC)];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=RGB complete
// clang-format on
XF_TNAME(SRC_T, NPC) RGB_packed = 0;
XF_TNAME(DST_T, NPC) HSV_packed = 0;
XF_TNAME(DST_T, NPC) HSV[XF_NPIXPERCYCLE(NPC)];
XF_CTUNAME(SRC_T, NPC) r, g, b;
XF_CTUNAME(SRC_T, NPC) Vmax, Vmin;
// int Vmax=0,Vmin=0;
int consta;
int sub;
int two_L = 0;
int inv_sub = 0;
int less_if = 0;
short int depth = XF_PIXELWIDTH(DST_T, NPC) / XF_CHANNELS(SRC_T, NPC);
int inv_add = 0;
int S = 0;
int k = 0;
rowloop:
for (ap_uint<13> i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
columnloop:
for (ap_uint<13> j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS
// clang-format on
RGB_packed = src.read((i * width >> XF_BITSHIFT(NPC)) + j);
ExtractUYVYPixels<WORDWIDTH_SRC>(RGB_packed, RGB);
for (int k = 0, offset = 0; k < XF_NPIXPERCYCLE(NPC); k++, offset += 3) {
b = RGB[offset], g = RGB[offset + 1], r = RGB[offset + 2];
Vmax = b;
Vmin = b;
if ((g > r) && (g > b)) {
Vmax = g;
} else if ((r > b)) {
Vmax = r;
}
if ((g < r) && (g < b)) {
Vmin = g;
} else if ((r < b)) {
Vmin = r;
}
short int v_add = (Vmax + Vmin);
short int v_sub = (Vmax - Vmin);
two_L = (Vmax + Vmin);
int h = 0;
if (two_L < 255) {
inv_add = ((255 << 12) / (v_add));
less_if = (v_sub * inv_add + (1 << (11))) >> 12;
S = less_if;
} else {
if (Vmax == Vmin) {
S = 0;
} else {
int inv_sub = ((255 << 12) / ((2 * 255) - v_add));
int less_if = (v_sub * inv_sub + (1 << (11))) >> 12;
S = less_if;
}
}
sub = (Vmax == b) ? (r - g) : (Vmax == g) ? (b - r) : (g - b);
consta = (Vmax == b) ? 240 : (Vmax == g) ? 120 : 0;
if (Vmax == Vmin) {
h = 0;
} else {
inv_sub = ((1 << 15) / (v_sub));
h = consta + ((60 * sub * inv_sub) >> 15);
if (h < 0) {
h += 360;
}
}
HSV[k].range(7, 0) = (h >> 1);
HSV[k].range(15, 8) = (unsigned char)((two_L + 1) >> 1);
HSV[k].range(23, 16) = S;
HSV_packed.range(k * XF_PIXELWIDTH(DST_T, NPC) + (XF_PIXELWIDTH(DST_T, NPC) - 1),
k * XF_PIXELWIDTH(DST_T, NPC)) = HSV[k];
}
dst.write(i * (width >> XF_BITSHIFT(NPC)) + j, HSV_packed);
}
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC = 1>
void bgr2hls(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src, xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_8UC3) && " BGR image Type must be XF_8UC3");
assert((DST_T == XF_8UC3) && " HLS image Type must be XF_8UC3");
assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " BGR image rows and cols should be less than ROWS, COLS");
assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "BGR and HLS plane dimensions mismatch");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported ");
#endif
xfbgr2hls<SRC_T, DST_T, ROWS, COLS, NPC, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(DST_T, NPC),
(ROWS * (COLS >> (XF_NPIXPERCYCLE(NPC))))>(_src, _dst, _src.rows, _src.cols);
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST, int TC>
void xfhls2rgb(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& dst,
unsigned short int height,
unsigned short int width) {
XF_CTUNAME(SRC_T, NPC) HLS[3 * XF_NPIXPERCYCLE(NPC)];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=HLS complete
// clang-format on
XF_DTUNAME(SRC_T, NPC) RGB[XF_NPIXPERCYCLE(NPC)];
XF_TNAME(SRC_T, NPC) HLS_packed = 0;
XF_TNAME(DST_T, NPC) RGB_packed = 0;
short int depth = XF_PIXELWIDTH(DST_T, NPC) / XF_CHANNELS(SRC_T, NPC);
unsigned long int r = 0;
unsigned long int g = 0;
unsigned long int b = 0;
XF_CTUNAME(SRC_T, NPC) H, L, S;
ap_fixed<28, 9> tab[4];
ap_fixed<28, 9> p1, p2;
ap_ufixed<20, 1, AP_RND> hscale = 0.0333333333333333333;
ap_ufixed<20, 1, AP_RND> s_scale = 0.0039215686274509803921568627451f;
rowloop:
for (ap_uint<13> i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
columnloop:
for (ap_uint<13> j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS
// clang-format on
HLS_packed = src.read((i * width >> XF_BITSHIFT(NPC)) + j);
ExtractUYVYPixels<WORDWIDTH_SRC>(HLS_packed, HLS);
for (int k = 0, offset = 0; k < XF_NPIXPERCYCLE(NPC); k++, offset += 3) {
H = HLS[offset], L = HLS[offset + 1], S = HLS[offset + 2];
if (S == 0)
b = g = r = L;
else {
static const int sector_data[][3] = {{1, 3, 0}, {1, 0, 2}, {3, 0, 1},
{0, 2, 1}, {0, 1, 3}, {2, 1, 0}};
ap_fixed<28, 9> mul_scl = s_scale * S;
if (2 * L <= 255) {
p2 = L + L * mul_scl;
} else {
p2 = L + S - ((L * mul_scl));
}
p1 = 2 * L - p2;
unsigned char H_scl = (unsigned char)H * hscale;
ap_fixed<28, 9> h_fix = H * hscale - H_scl;
if (H_scl >= 6) // for hrange=180, 0<H<255, then 0<h_i<8
H_scl -= 6;
tab[0] = p2;
tab[1] = p1;
tab[2] = p2 - (p2 - p1) * (h_fix);
tab[3] = p1 + (p2 - p1) * (h_fix);
b = (tab[sector_data[H_scl][0]]);
g = (tab[sector_data[H_scl][1]]);
r = (tab[sector_data[H_scl][2]]);
}
RGB[k].range(7, 0) = (unsigned char)(r);
RGB[k].range(15, 8) = (unsigned char)(g);
RGB[k].range(23, 16) = (unsigned char)(b);
RGB_packed.range(k * XF_PIXELWIDTH(DST_T, NPC) + (XF_PIXELWIDTH(DST_T, NPC) - 1),
k * XF_PIXELWIDTH(DST_T, NPC)) = RGB[k];
}
dst.write(i * (width >> XF_BITSHIFT(NPC)) + j, RGB_packed);
}
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC = 1>
void hls2rgb(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src, xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_8UC3) && " HLS image Type must be XF_8UC3");
assert((DST_T == XF_8UC3) && " RGB image Type must be XF_8UC3");
assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " HLS image rows and cols should be less than ROWS, COLS");
assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "RGB and HLS plane dimensions mismatch");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported ");
#endif
xfhls2rgb<SRC_T, DST_T, ROWS, COLS, NPC, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(DST_T, NPC),
(ROWS * (COLS >> (XF_NPIXPERCYCLE(NPC))))>(_src, _dst, _src.rows, _src.cols);
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST, int TC>
void xfhls2bgr(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& dst,
unsigned short int height,
unsigned short int width) {
XF_CTUNAME(SRC_T, NPC) HLS[3 * XF_NPIXPERCYCLE(NPC)];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=HLS complete
// clang-format on
XF_DTUNAME(SRC_T, NPC) RGB[XF_NPIXPERCYCLE(NPC)];
XF_TNAME(SRC_T, NPC) HLS_packed = 0;
XF_TNAME(DST_T, NPC) RGB_packed = 0;
short int depth = XF_PIXELWIDTH(DST_T, NPC) / XF_CHANNELS(SRC_T, NPC);
unsigned long int r = 0;
unsigned long int g = 0;
unsigned long int b = 0;
XF_CTUNAME(SRC_T, NPC) H, L, S;
ap_fixed<28, 9> tab[4];
ap_fixed<28, 9> p1, p2;
ap_ufixed<20, 1, AP_RND> hscale = 0.0333333333333333333;
ap_ufixed<20, 1, AP_RND> s_scale = 0.0039215686274509803921568627451f;
rowloop:
for (ap_uint<13> i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
columnloop:
for (ap_uint<13> j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS
// clang-format on
HLS_packed = src.read((i * width >> XF_BITSHIFT(NPC)) + j);
ExtractUYVYPixels<WORDWIDTH_SRC>(HLS_packed, HLS);
for (int k = 0, offset = 0; k < XF_NPIXPERCYCLE(NPC); k++, offset += 3) {
H = HLS[offset], L = HLS[offset + 1], S = HLS[offset + 2];
if (S == 0)
b = g = r = L;
else {
static const int sector_data[][3] = {{1, 3, 0}, {1, 0, 2}, {3, 0, 1},
{0, 2, 1}, {0, 1, 3}, {2, 1, 0}};
ap_fixed<28, 9> mul_scl = s_scale * S;
if (2 * L <= 255) {
p2 = L + L * mul_scl;
} else {
p2 = L + S - ((L * mul_scl));
}
p1 = 2 * L - p2;
unsigned char H_scl = (unsigned char)H * hscale;
ap_fixed<28, 9> h_fix = H * hscale - H_scl;
if (H_scl >= 6) // for hrange=180, 0<H<255, then 0<h_i<8
H_scl -= 6;
tab[0] = p2;
tab[1] = p1;
tab[2] = p2 - (p2 - p1) * (h_fix);
tab[3] = p1 + (p2 - p1) * (h_fix);
b = (tab[sector_data[H_scl][0]]);
g = (tab[sector_data[H_scl][1]]);
r = (tab[sector_data[H_scl][2]]);
}
RGB[k].range(7, 0) = (unsigned char)(b);
RGB[k].range(15, 8) = (unsigned char)(g);
RGB[k].range(23, 16) = (unsigned char)(r);
RGB_packed.range(k * XF_PIXELWIDTH(DST_T, NPC) + (XF_PIXELWIDTH(DST_T, NPC) - 1),
k * XF_PIXELWIDTH(DST_T, NPC)) = RGB[k];
}
dst.write(i * (width >> XF_BITSHIFT(NPC)) + j, RGB_packed);
}
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC = 1>
void hls2bgr(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src, xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_8UC3) && " HLS image Type must be XF_8UC3");
assert((DST_T == XF_8UC3) && " BGR image Type must be XF_8UC3");
assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " HLS image rows and cols should be less than ROWS, COLS");
assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "BGR and HLS plane dimensions mismatch");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported ");
#endif
xfhls2bgr<SRC_T, DST_T, ROWS, COLS, NPC, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(DST_T, NPC),
(ROWS * (COLS >> (XF_NPIXPERCYCLE(NPC))))>(_src, _dst, _src.rows, _src.cols);
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST, int TC>
void xfhsv2rgb(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& dst,
unsigned short int height,
unsigned short int width) {
XF_CTUNAME(SRC_T, NPC) HSV[3 * XF_NPIXPERCYCLE(NPC)];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=HSV complete
// clang-format on
XF_DTUNAME(DST_T, NPC) RGB[XF_NPIXPERCYCLE(NPC)];
XF_TNAME(SRC_T, NPC) HSV_packed = 0;
XF_TNAME(DST_T, NPC) RGB_packed = 0;
XF_CTUNAME(SRC_T, NPC) H, S, V;
unsigned long int r = 0;
unsigned long int g = 0;
unsigned long int b = 0;
ap_fixed<28, 9> tab[4];
ap_fixed<28, 9> p1, p2;
ap_ufixed<20, 1, AP_RND> hscale = 0.0333333333333333333;
ap_ufixed<20, 1, AP_RND> s_scale = 0.0039215686274509803921568627451;
rowloop:
for (ap_uint<13> i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
columnloop:
for (ap_uint<13> j = 0; j<width>> XF_BITSHIFT(NPC); j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS
// clang-format on
HSV_packed = src.read((i * width >> XF_BITSHIFT(NPC)) + j);
ExtractUYVYPixels<WORDWIDTH_SRC>(HSV_packed, HSV);
for (int k = 0, offset = 0; k < XF_NPIXPERCYCLE(NPC); k++, offset += 3) {
H = HSV[offset], S = HSV[offset + 1], V = HSV[offset + 2];
static const int sector_data[][3] = {{1, 3, 0}, {1, 0, 2}, {3, 0, 1}, {0, 2, 1}, {0, 1, 3}, {2, 1, 0}};
ap_fixed<28, 9> mul_scl = s_scale * S;
unsigned char H_scl = (unsigned char)H * hscale;
ap_fixed<28, 9> h_fix = H * hscale - H_scl;
if (H_scl >= 6) // for hrange=180, 0<H<255, then 0<h_i<8
H_scl -= 6;
tab[0] = V;
tab[1] = V * (1 - mul_scl);
tab[2] = V * (1 - mul_scl * h_fix);
tab[3] = V * (1 - mul_scl + mul_scl * h_fix);
b = (tab[sector_data[H_scl][0]]);
g = (tab[sector_data[H_scl][1]]);
r = (tab[sector_data[H_scl][2]]);
RGB[k].range(7, 0) = (unsigned char)(r);
RGB[k].range(15, 8) = (unsigned char)(g);
RGB[k].range(23, 16) = (unsigned char)(b);
RGB_packed.range(k * XF_PIXELWIDTH(DST_T, NPC) + (XF_PIXELWIDTH(DST_T, NPC) - 1),
k * XF_PIXELWIDTH(DST_T, NPC)) = RGB[k];
}
dst.write(i * (width >> XF_BITSHIFT(NPC)) + j, RGB_packed);
}
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC = 1>
void hsv2rgb(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src, xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_8UC3) && " HSV image Type must be XF_8UC3");
assert((DST_T == XF_8UC3) && " RGB image Type must be XF_8UC3");
assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " HSV image rows and cols should be less than ROWS, COLS");
assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "RGB and HSV plane dimensions mismatch");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism is supported ");
#endif
xfhsv2rgb<SRC_T, DST_T, ROWS, COLS, NPC, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(DST_T, NPC),
(ROWS * (COLS >> (XF_NPIXPERCYCLE(NPC))))>(_src, _dst, _src.rows, _src.cols);
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST, int TC>
void xfhsv2bgr(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& dst,
unsigned short int height,
unsigned short int width) {
XF_CTUNAME(SRC_T, NPC) HSV[3 * XF_NPIXPERCYCLE(NPC)];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=HSV complete
// clang-format on
XF_DTUNAME(DST_T, NPC) RGB[XF_NPIXPERCYCLE(NPC)];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=RGB complete
// clang-format on
XF_TNAME(SRC_T, NPC) HSV_packed = 0;
XF_TNAME(DST_T, NPC) RGB_packed = 0;
XF_CTUNAME(SRC_T, NPC) H, S, V;
unsigned long int r = 0;
unsigned long int g = 0;
unsigned long int b = 0;
ap_fixed<28, 9> tab[4];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=tab complete
// clang-format on
ap_fixed<28, 9> p1, p2;
ap_ufixed<20, 1, AP_RND> hscale = 0.0333333333333333333;
ap_ufixed<20, 1, AP_RND> s_scale = 0.0039215686274509803921568627451;
rowloop:
for (ap_uint<13> i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
columnloop:
for (ap_uint<13> j = 0; j<width>> XF_BITSHIFT(NPC); j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS
// clang-format on
HSV_packed = src.read((i * width >> XF_BITSHIFT(NPC)) + j);
ExtractUYVYPixels<WORDWIDTH_SRC>(HSV_packed, HSV);
for (int k = 0, offset = 0; k < XF_NPIXPERCYCLE(NPC); k++, offset += 3) {
H = HSV[offset], S = HSV[offset + 1], V = HSV[offset + 2];
static const int sector_data[][3] = {{1, 3, 0}, {1, 0, 2}, {3, 0, 1}, {0, 2, 1}, {0, 1, 3}, {2, 1, 0}};
ap_fixed<28, 9> mul_scl = s_scale * S;
unsigned char H_scl = (unsigned char)H * hscale;
ap_fixed<28, 9> h_fix = H * hscale - H_scl;
if (H_scl >= 6) // for hrange=180, 0<H<255, then 0<h_i<8
H_scl -= 6;
tab[0] = V;
tab[1] = V * (1 - mul_scl);
tab[2] = V * (1 - mul_scl * h_fix);
tab[3] = V * (1 - mul_scl + mul_scl * h_fix);
b = (tab[sector_data[H_scl][0]]);
g = (tab[sector_data[H_scl][1]]);
r = (tab[sector_data[H_scl][2]]);
RGB[k].range(7, 0) = (unsigned char)(b);
RGB[k].range(15, 8) = (unsigned char)(g);
RGB[k].range(23, 16) = (unsigned char)(r);
RGB_packed.range(k * XF_PIXELWIDTH(DST_T, NPC) + (XF_PIXELWIDTH(DST_T, NPC) - 1),
k * XF_PIXELWIDTH(DST_T, NPC)) = RGB[k];
}
dst.write(i * (width >> XF_BITSHIFT(NPC)) + j, RGB_packed);
}
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC = 1>
void hsv2bgr(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src, xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_8UC3) && " HSV image Type must be XF_8UC3");
assert((DST_T == XF_8UC3) && " BGR image Type must be XF_8UC3");
assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " HSV image rows and cols should be less than ROWS, COLS");
assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "BGR and HSV plane dimensions mismatch");
// assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC8)) && " 1,8 pixel parallelism
// is supported ");
#endif
xfhsv2bgr<SRC_T, DST_T, ROWS, COLS, NPC, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(DST_T, NPC),
(ROWS * (COLS >> (XF_NPIXPERCYCLE(NPC))))>(_src, _dst, _src.rows, _src.cols);
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST, int TC, int iTC>
void xfrgb2uyvy(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& dst,
unsigned short int height,
unsigned short int width) {
// XF_PTNAME(XF_8UP) Y[],U,V;
XF_PTNAME(XF_8UP) Y[XF_NPIXPERCYCLE(NPC)];
XF_PTNAME(XF_8UP) U[XF_NPIXPERCYCLE(NPC)];
XF_PTNAME(XF_8UP) V[XF_NPIXPERCYCLE(NPC)];
ap_uint<24> RGB1[XF_NPIXPERCYCLE(NPC)];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=Y complete
#pragma HLS ARRAY_PARTITION variable=U complete
#pragma HLS ARRAY_PARTITION variable=V complete
#pragma HLS ARRAY_PARTITION variable=RGB1 complete
// clang-format on
unsigned long long int idx = 0, idx1 = 0;
XF_SNAME(WORDWIDTH_SRC) Packed_rgb1;
XF_PTNAME(XF_DEPTH(DST_T, NPC))
UYPacked, VYPacked, packed_uyvy[XF_NPIXPERCYCLE(NPC)];
XF_SNAME(WORDWIDTH_DST) val_dst = 0;
uint8_t offset = 0;
uint16_t shift = 0;
bool evencol = true;
rowloop:
for (int i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
evencol = true;
columnloop:
for (int j = 0; j < (width >> (XF_BITSHIFT(NPC))); j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
// clang-format on
val_dst = 0;
// evencol=true;
Packed_rgb1 = src.read(idx++);
xfExtractPixels<NPC, XF_WORDWIDTH(SRC_T, NPC), XF_DEPTH(SRC_T, NPC)>(RGB1, Packed_rgb1, 0);
shift = 0;
for (int l = 0; l < (XF_NPIXPERCYCLE(NPC)); l++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC
#pragma HLS unroll
// clang-format on
// Y0[l] =
// CalculateY(RGB1[offset+0].range(7,0),
// RGB1[offset+0].range(15,8),
// RGB1[offset+0].range(23,16));
Y[l] = CalculateY(RGB1[l].range(7, 0), RGB1[l].range(15, 8), RGB1[l].range(23, 16));
if (evencol) {
U[l / 2] = CalculateU(RGB1[l].range(7, 0), RGB1[l].range(15, 8), RGB1[l].range(23, 16));
V[l / 2] = CalculateV(RGB1[l].range(7, 0), RGB1[l].range(15, 8), RGB1[l].range(23, 16));
// U[l] = CalculateU(RGB1[offset+0].range(7,0),
// RGB1[offset+0].range(15,8),
// RGB1[offset+0].range(23,16)); V[l]
// =
// CalculateV(RGB1[offset+0].range(7,0),
// RGB1[offset+0].range(15,8), RGB1[offset+0].range(23,16));
UYPacked.range(7, 0) = U[l / 2];
UYPacked.range(15, 8) = Y[l];
packed_uyvy[l] = UYPacked;
} else {
VYPacked.range(7, 0) = V[l / 2];
VYPacked.range(15, 8) = Y[l];
packed_uyvy[l] = VYPacked;
}
// val_dst.range(l*8+) = packed_uyvy[l];
xfPackPixels<NPC, XF_WORDWIDTH(DST_T, NPC), XF_DEPTH(DST_T, NPC)>(&packed_uyvy[l], val_dst, 0, 1,
shift);
evencol = evencol ? false : true;
}
dst.write(idx1++, val_dst);
}
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC = 1>
void rgb2uyvy(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src, xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_8UC3) && " RGB image Type must be XF_8UC3");
assert((DST_T == XF_16UC1) && " UYVY image Type must be XF_16UC1");
assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " RGB image rows and cols should be less than ROWS, COLS");
assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "RGB and UYVY plane dimensions mismatch");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC2) || (NPC == XF_NPPC4) || (NPC == XF_NPPC8)) &&
" 1,2,4,8 pixel parallelism is supported ");
#endif
xfrgb2uyvy<SRC_T, DST_T, ROWS, COLS, NPC, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(DST_T, NPC),
(ROWS * (COLS >> (XF_NPIXPERCYCLE(NPC)))), XF_NPIXPERCYCLE(NPC)>(_src, _dst, _src.rows, _src.cols);
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST, int TC, int iTC>
void xfrgb2yuyv(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& dst,
unsigned short int height,
unsigned short int width) {
// XF_PTNAME(XF_8UP) Y[],U,V;
XF_PTNAME(XF_8UP) Y[XF_NPIXPERCYCLE(NPC)];
XF_PTNAME(XF_8UP) U[XF_NPIXPERCYCLE(NPC)];
XF_PTNAME(XF_8UP) V[XF_NPIXPERCYCLE(NPC)];
ap_uint<24> RGB1[XF_NPIXPERCYCLE(NPC)];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=Y complete
#pragma HLS ARRAY_PARTITION variable=U complete
#pragma HLS ARRAY_PARTITION variable=V complete
#pragma HLS ARRAY_PARTITION variable=RGB1 complete
// clang-format on
unsigned long long int idx = 0, idx1 = 0;
XF_SNAME(WORDWIDTH_SRC) Packed_rgb1;
XF_PTNAME(XF_DEPTH(DST_T, NPC))
YUPacked, YVPacked, packed_yuyv[XF_NPIXPERCYCLE(NPC)];
XF_SNAME(WORDWIDTH_DST) val_dst = 0;
uint8_t offset = 0;
uint16_t shift = 0;
bool evencol = true;
rowloop:
for (int i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
evencol = true;
columnloop:
for (int j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
// clang-format on
val_dst = 0;
Packed_rgb1 = src.read(idx++);
xfExtractPixels<NPC, XF_WORDWIDTH(SRC_T, NPC), XF_DEPTH(SRC_T, NPC)>(RGB1, Packed_rgb1, 0);
shift = 0;
for (int l = 0; l < (XF_NPIXPERCYCLE(NPC)); l++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC
#pragma HLS unroll
// clang-format on
Y[l] = CalculateY(RGB1[l].range(7, 0), RGB1[l].range(15, 8), RGB1[l].range(23, 16));
if (evencol) {
U[l / 2] = CalculateU(RGB1[l].range(7, 0), RGB1[l].range(15, 8), RGB1[l].range(23, 16));
V[l / 2] = CalculateV(RGB1[l].range(7, 0), RGB1[l].range(15, 8), RGB1[l].range(23, 16));
YUPacked.range(7, 0) = Y[l];
YUPacked.range(15, 8) = U[l / 2];
packed_yuyv[l] = YUPacked;
} else {
YVPacked.range(7, 0) = Y[l];
YVPacked.range(15, 8) = V[l / 2];
packed_yuyv[l] = YVPacked;
}
xfPackPixels<NPC, XF_WORDWIDTH(DST_T, NPC), XF_DEPTH(DST_T, NPC)>(&packed_yuyv[l], val_dst, 0, 1,
shift);
evencol = evencol ? false : true;
}
dst.write(idx1++, val_dst);
}
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC = 1>
void rgb2yuyv(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src, xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_8UC3) && " RGB image Type must be XF_8UC3");
assert((DST_T == XF_16UC1) && " YUYV image Type must be XF_16UC1");
assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " RGB image rows and cols should be less than ROWS, COLS");
assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "RGB and YUYV plane dimensions mismatch");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC2) || (NPC == XF_NPPC4) || (NPC == XF_NPPC8)) &&
" 1,2,4,8 pixel parallelism is supported ");
#endif
xfrgb2yuyv<SRC_T, DST_T, ROWS, COLS, NPC, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(DST_T, NPC),
(ROWS * (COLS >> (XF_NPIXPERCYCLE(NPC)))), XF_NPIXPERCYCLE(NPC)>(_src, _dst, _src.rows, _src.cols);
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST, int TC, int iTC>
void xfrgb2bgr(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& dst,
unsigned short int height,
unsigned short int width) {
ap_uint<24> RGB[XF_NPIXPERCYCLE(NPC)], BGR[XF_NPIXPERCYCLE(NPC)];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=RGB complete
#pragma HLS ARRAY_PARTITION variable=BGR complete
// clang-format on
unsigned long long int idx = 0, idx1 = 0;
XF_TNAME(SRC_T, NPC) Packed_rgb1;
XF_TNAME(DST_T, NPC) val_dst = 0;
uint8_t offset = 0;
uint16_t shift = 0;
rowloop:
for (int i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
columnloop:
for (int j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
// clang-format on
val_dst = 0;
Packed_rgb1 = src.read(idx++);
for (int l = 0; l < (XF_NPIXPERCYCLE(NPC)); l++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC
#pragma HLS unroll
// clang-format on
RGB[l] = Packed_rgb1(l * 24 + 23, l * 24);
BGR[l].range(23, 16) = RGB[l].range(7, 0);
BGR[l].range(15, 8) = RGB[l].range(15, 8);
BGR[l].range(7, 0) = RGB[l].range(23, 16);
val_dst.range(l * 24 + 23, l * 24) = BGR[l];
}
dst.write(idx1++, val_dst);
}
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC = 1>
void rgb2bgr(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src, xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_8UC3) && " RGB image Type must be XF_8UC3");
assert((DST_T == XF_8UC3) && " BGR image Type must be XF_8UC3");
assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " RGB image rows and cols should be less than ROWS, COLS");
assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "BGR and RGB plane dimensions mismatch");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC2) || (NPC == XF_NPPC4) || (NPC == XF_NPPC8)) &&
" 1,2,4,8 pixel parallelism is supported ");
#endif
xfrgb2bgr<SRC_T, DST_T, ROWS, COLS, NPC, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(DST_T, NPC),
((COLS >> (XF_NPIXPERCYCLE(NPC)))), XF_NPIXPERCYCLE(NPC)>(_src, _dst, _src.rows, _src.cols);
}
template <int SRC_Y,
int SRC_UV,
int DST_T,
int ROWS,
int COLS,
int NPC,
int NPC_UV,
int WORDWIDTH_Y,
int WORDWIDTH_UV,
int WORDWIDTH_DST,
int TC>
void xfnv122uyvy(xf::cv::Mat<SRC_Y, ROWS, COLS, NPC>& _y,
xf::cv::Mat<SRC_UV, ROWS / 2, COLS / 2, NPC_UV>& _uv,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& dst,
unsigned short int height,
unsigned short int width) {
// assert();
hls::stream<XF_SNAME(WORDWIDTH_UV)> uvStream;
// clang-format off
#pragma HLS STREAM variable=&uvStream depth=COLS/2
// clang-format on
XF_SNAME(WORDWIDTH_Y) yPacked;
XF_SNAME(WORDWIDTH_UV) uvPacked;
XF_SNAME(WORDWIDTH_DST) uyvyPacked;
unsigned long long int y_idx = 0, uv_idx = 0, out_idx = 0;
uint8_t y;
int8_t u, v;
bool evenRow = true, evenBlock = true, evenPix = true;
RowLoop:
for (int i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
ColLoop:
for (int j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS
// clang-format on
yPacked = _y.read(y_idx++);
uyvyPacked = 0;
if (evenRow) {
if (evenBlock) {
uvPacked = _uv.read(uv_idx++);
uvStream.write(uvPacked);
}
} else { // Keep a copy of UV row data in stream to use for oddrow
if (evenBlock) {
uvPacked = uvStream.read();
}
}
for (int l = 0; l < (XF_NPIXPERCYCLE(NPC)); l++) {
uint8_t y = yPacked.range(l * 8 + 7, l * 8 + 0);
if (evenPix) {
v = (uint8_t)uvPacked.range((l / 2) * 16 + 15, (l / 2) * 16 + 8);
u = (uint8_t)uvPacked.range((l / 2) * 16 + 7, (l / 2) * 16 + 0);
uyvyPacked.range(l * 16 + 7, l * 16 + 0) = u;
uyvyPacked.range(l * 16 + 15, l * 16 + 8) = y;
} else {
uyvyPacked.range(l * 16 + 7, l * 16 + 0) = v;
uyvyPacked.range(l * 16 + 15, l * 16 + 8) = y;
}
evenPix = evenPix ? false : true;
}
dst.write(out_idx++, uyvyPacked);
evenBlock = ((XF_NPIXPERCYCLE(NPC)) != 1) ? true : evenBlock ? false : true;
}
evenRow = evenRow ? false : true;
}
if (height & 1) {
for (int i = 0; i < width; i++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS
// clang-format on
uvStream.read();
}
}
}
template <int SRC_Y, int SRC_UV, int DST_T, int ROWS, int COLS, int NPC = 1, int NPC_UV = 1>
void nv122uyvy(xf::cv::Mat<SRC_Y, ROWS, COLS, NPC>& _y,
xf::cv::Mat<SRC_UV, ROWS / 2, COLS / 2, NPC_UV>& _uv,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_Y == XF_8UC1) && " Y image Type must be XF_8UC1");
assert((SRC_UV == XF_8UC2) && " UV image Type must be XF_8UC2");
assert((DST_T == XF_16UC1) && " UYVY image Type must be XF_16UC1");
assert(((_y.rows <= ROWS) && (_y.cols <= COLS)) && " Y image rows and cols should be less than ROWS, COLS");
assert(((_y.cols == _dst.cols) && (_y.rows == _dst.rows)) && "Y and UYVY plane dimensions mismatch");
assert(((_y.cols == (_uv.cols << 1)) && (_y.rows == (_uv.rows << 1))) && "Y and UV planes dimensions mismatch");
if (NPC != XF_NPPC1) {
assert((NPC == (NPC_UV * 2)) &&
" NPC of Y plane must be double the UV "
"plane for multipixel parallelism ");
} else {
assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC values must be same ");
}
#endif
xfnv122uyvy<SRC_Y, SRC_UV, DST_T, ROWS, COLS, NPC, NPC_UV, XF_WORDWIDTH(SRC_Y, NPC), XF_WORDWIDTH(SRC_UV, NPC_UV),
XF_WORDWIDTH(DST_T, NPC), (COLS >> (XF_NPIXPERCYCLE(NPC)))>(_y, _uv, _dst, _y.rows, _y.cols);
}
template <int SRC_Y,
int SRC_UV,
int DST_T,
int ROWS,
int COLS,
int NPC,
int NPC_UV,
int WORDWIDTH_Y,
int WORDWIDTH_UV,
int WORDWIDTH_DST,
int TC>
void xfnv212uyvy(xf::cv::Mat<SRC_Y, ROWS, COLS, NPC>& _y,
xf::cv::Mat<SRC_UV, ROWS / 2, COLS / 2, NPC_UV>& _uv,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& dst,
unsigned short int height,
unsigned short int width) {
// assert();
hls::stream<XF_SNAME(WORDWIDTH_UV)> uvStream;
// clang-format off
#pragma HLS STREAM variable=&uvStream depth=COLS/2
// clang-format on
XF_SNAME(WORDWIDTH_Y) yPacked;
XF_SNAME(WORDWIDTH_UV) uvPacked;
XF_SNAME(WORDWIDTH_DST) uyvyPacked;
unsigned long long int y_idx = 0, uv_idx = 0, out_idx = 0;
uint8_t y;
int8_t u, v;
bool evenRow = true, evenBlock = true, evenPix = true;
RowLoop:
for (int i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
ColLoop:
for (int j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS
// clang-format on
yPacked = _y.read(y_idx++);
uyvyPacked = 0;
if (evenRow) {
if (evenBlock) {
uvPacked = _uv.read(uv_idx++);
uvStream.write(uvPacked);
}
} else { // Keep a copy of UV row data in stream to use for oddrow
if (evenBlock) {
uvPacked = uvStream.read();
}
}
for (int l = 0; l < (XF_NPIXPERCYCLE(NPC)); l++) {
y = yPacked.range(l * 8 + 7, l * 8 + 0);
if (evenPix) {
u = (uint8_t)uvPacked.range((l / 2) * 16 + 15, (l / 2) * 16 + 8);
v = (uint8_t)uvPacked.range((l / 2) * 16 + 7, (l / 2) * 16 + 0);
uyvyPacked.range(l * 16 + 7, l * 16 + 0) = u;
uyvyPacked.range(l * 16 + 15, l * 16 + 8) = y;
} else {
uyvyPacked.range(l * 16 + 7, l * 16 + 0) = v;
uyvyPacked.range(l * 16 + 15, l * 16 + 8) = y;
}
evenPix = evenPix ? false : true;
}
dst.write(out_idx++, uyvyPacked);
evenBlock = ((XF_NPIXPERCYCLE(NPC)) != 1) ? true : evenBlock ? false : true;
}
evenRow = evenRow ? false : true;
}
if (height & 1) {
for (int i = 0; i < width; i++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS
// clang-format on
uvStream.read();
}
}
}
template <int SRC_Y, int SRC_UV, int DST_T, int ROWS, int COLS, int NPC = 1, int NPC_UV = 1>
void nv212uyvy(xf::cv::Mat<SRC_Y, ROWS, COLS, NPC>& _y,
xf::cv::Mat<SRC_UV, ROWS / 2, COLS / 2, NPC_UV>& _uv,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_Y == XF_8UC1) && " Y image Type must be XF_8UC1");
assert((SRC_UV == XF_8UC2) && " UV image Type must be XF_8UC2");
assert((DST_T == XF_16UC1) && " UYVY image Type must be XF_16UC1");
assert(((_y.rows <= ROWS) && (_y.cols <= COLS)) && " Y image rows and cols should be less than ROWS, COLS");
assert(((_y.cols == _dst.cols) && (_y.rows == _dst.rows)) && "Y and UYVY plane dimensions mismatch");
assert(((_y.cols == (_uv.cols << 1)) && (_y.rows == (_uv.rows << 1))) && "Y and UV planes dimensions mismatch");
if (NPC != XF_NPPC1) {
assert((NPC == (NPC_UV * 2)) &&
" NPC of Y plane must be double the UV "
"plane for multipixel parallelism ");
} else {
assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC,NPC_UV values must be same ");
}
#endif
xfnv212uyvy<SRC_Y, SRC_UV, DST_T, ROWS, COLS, NPC, NPC_UV, XF_WORDWIDTH(SRC_Y, NPC), XF_WORDWIDTH(SRC_UV, NPC_UV),
XF_WORDWIDTH(DST_T, NPC), (ROWS * (COLS >> (XF_NPIXPERCYCLE(NPC))))>(_y, _uv, _dst, _y.rows, _y.cols);
}
template <int SRC_Y,
int SRC_UV,
int DST_T,
int ROWS,
int COLS,
int NPC,
int NPC_UV,
int WORDWIDTH_Y,
int WORDWIDTH_UV,
int WORDWIDTH_DST,
int TC>
void xfnv122yuyv(xf::cv::Mat<SRC_Y, ROWS, COLS, NPC>& _y,
xf::cv::Mat<SRC_UV, ROWS / 2, COLS / 2, NPC_UV>& _uv,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& dst,
unsigned short int height,
unsigned short int width) {
// assert();
hls::stream<XF_SNAME(WORDWIDTH_UV)> uvStream;
// clang-format off
#pragma HLS STREAM variable=&uvStream depth=COLS/2
// clang-format on
XF_SNAME(WORDWIDTH_Y) yPacked;
XF_SNAME(WORDWIDTH_UV) uvPacked;
XF_SNAME(WORDWIDTH_DST) uyvyPacked;
unsigned long long int y_idx = 0, uv_idx = 0, out_idx = 0;
uint8_t y;
int8_t u, v;
bool evenRow = true, evenBlock = true, evenPix = true;
RowLoop:
for (int i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
ColLoop:
for (int j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS
// clang-format on
yPacked = _y.read(y_idx++);
uyvyPacked = 0;
if (evenRow) {
if (evenBlock) {
uvPacked = _uv.read(uv_idx++);
uvStream.write(uvPacked);
}
} else { // Keep a copy of UV row data in stream to use for oddrow
if (evenBlock) {
uvPacked = uvStream.read();
}
}
for (int l = 0; l < (XF_NPIXPERCYCLE(NPC)); l++) {
y = yPacked.range(l * 8 + 7, l * 8 + 0);
if (evenPix) {
v = (uint8_t)uvPacked.range((l / 2) * 16 + 15, (l / 2) * 16 + 8);
u = (uint8_t)uvPacked.range((l / 2) * 16 + 7, (l / 2) * 16 + 0);
uyvyPacked.range(l * 16 + 7, l * 16 + 0) = y;
uyvyPacked.range(l * 16 + 15, l * 16 + 8) = u;
} else {
uyvyPacked.range(l * 16 + 7, l * 16 + 0) = y;
uyvyPacked.range(l * 16 + 15, l * 16 + 8) = v;
}
evenPix = evenPix ? false : true;
}
dst.write(out_idx++, uyvyPacked);
evenBlock = ((XF_NPIXPERCYCLE(NPC)) != 1) ? true : evenBlock ? false : true;
}
evenRow = evenRow ? false : true;
}
if (height & 1) {
for (int i = 0; i < width; i++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS
// clang-format on
uvStream.read();
}
}
}
template <int SRC_Y, int SRC_UV, int DST_T, int ROWS, int COLS, int NPC = 1, int NPC_UV = 1>
void nv122yuyv(xf::cv::Mat<SRC_Y, ROWS, COLS, NPC>& _y,
xf::cv::Mat<SRC_UV, ROWS / 2, COLS / 2, NPC_UV>& _uv,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_Y == XF_8UC1) && " Y image Type must be XF_8UC1");
assert((SRC_UV == XF_8UC2) && " UV image Type must be XF_8UC2");
assert((DST_T == XF_16UC1) && " YUYV image Type must be XF_16UC1");
assert(((_y.rows <= ROWS) && (_y.cols <= COLS)) && " Y image rows and cols should be less than ROWS, COLS");
assert(((_y.cols == _dst.cols) && (_y.rows == _dst.rows)) && "Y and Yuyv plane dimensions mismatch");
assert(((_y.cols == (_uv.cols << 1)) && (_y.rows == (_uv.rows << 1))) && "Y and UV planes dimensions mismatch");
if (NPC != XF_NPPC1) {
assert((NPC == (NPC_UV * 2)) &&
" NPC of Y plane must be double the UV "
"plane for multipixel parallelism ");
} else {
assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC,NPC_UV values must be same ");
}
#endif
xfnv122yuyv<SRC_Y, SRC_UV, DST_T, ROWS, COLS, NPC, NPC_UV, XF_WORDWIDTH(SRC_Y, NPC), XF_WORDWIDTH(SRC_UV, NPC_UV),
XF_WORDWIDTH(DST_T, NPC), (COLS >> (XF_NPIXPERCYCLE(NPC)))>(_y, _uv, _dst, _y.rows, _y.cols);
}
template <int SRC_Y,
int SRC_UV,
int DST_T,
int ROWS,
int COLS,
int NPC,
int NPC_UV,
int WORDWIDTH_Y,
int WORDWIDTH_UV,
int WORDWIDTH_DST,
int TC>
void xfnv212yuyv(xf::cv::Mat<SRC_Y, ROWS, COLS, NPC>& _y,
xf::cv::Mat<SRC_UV, ROWS / 2, COLS / 2, NPC_UV>& _uv,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& dst,
unsigned short int height,
unsigned short int width) {
// assert();
hls::stream<XF_SNAME(WORDWIDTH_UV)> uvStream;
// clang-format off
#pragma HLS STREAM variable=&uvStream depth=COLS/2
// clang-format on
XF_SNAME(WORDWIDTH_Y) yPacked;
XF_SNAME(WORDWIDTH_UV) uvPacked;
XF_SNAME(WORDWIDTH_DST) uyvyPacked;
unsigned long long int y_idx = 0, uv_idx = 0, out_idx = 0;
uint8_t y;
int8_t u, v;
bool evenRow = true, evenBlock = true, evenPix = true;
RowLoop:
for (int i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
ColLoop:
for (int j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS
// clang-format on
yPacked = _y.read(y_idx++);
uyvyPacked = 0;
if (evenRow) {
if (evenBlock) {
uvPacked = _uv.read(uv_idx++);
uvStream.write(uvPacked);
}
} else { // Keep a copy of UV row data in stream to use for oddrow
if (evenBlock) {
uvPacked = uvStream.read();
}
}
for (int l = 0; l < (XF_NPIXPERCYCLE(NPC)); l++) {
y = yPacked.range(l * 8 + 7, l * 8 + 0);
if (evenPix) {
u = (uint8_t)uvPacked.range((l / 2) * 16 + 15, (l / 2) * 16 + 8);
v = (uint8_t)uvPacked.range((l / 2) * 16 + 7, (l / 2) * 16 + 0);
uyvyPacked.range(l * 16 + 7, l * 16 + 0) = y;
uyvyPacked.range(l * 16 + 15, l * 16 + 8) = u;
} else {
uyvyPacked.range(l * 16 + 7, l * 16 + 0) = y;
uyvyPacked.range(l * 16 + 15, l * 16 + 8) = v;
}
evenPix = evenPix ? false : true;
}
dst.write(out_idx++, uyvyPacked);
evenBlock = ((XF_NPIXPERCYCLE(NPC)) != 1) ? true : evenBlock ? false : true;
}
evenRow = evenRow ? false : true;
}
if (height & 1) {
for (int i = 0; i < width; i++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS
// clang-format on
uvStream.read();
}
}
}
template <int SRC_Y, int SRC_UV, int DST_T, int ROWS, int COLS, int NPC = 1, int NPC_UV = 1>
void nv212yuyv(xf::cv::Mat<SRC_Y, ROWS, COLS, NPC>& _y,
xf::cv::Mat<SRC_UV, ROWS / 2, COLS / 2, NPC_UV>& _uv,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_Y == XF_8UC1) && " Y image Type must be XF_8UC1");
assert((SRC_UV == XF_8UC2) && " VU image Type must be XF_8UC2");
assert((DST_T == XF_16UC1) && " YUYV image Type must be XF_16UC1");
assert(((_y.rows <= ROWS) && (_y.cols <= COLS)) && " Y image rows and cols should be less than ROWS, COLS");
assert(((_y.cols == _dst.cols) && (_y.rows == _dst.rows)) && "Y and Yuyv plane dimensions mismatch");
assert(((_y.cols == (_uv.cols << 1)) && (_y.rows == (_uv.rows << 1))) && "Y and VU planes dimensions mismatch");
if (NPC != XF_NPPC1) {
assert((NPC == (NPC_UV * 2)) &&
" NPC of Y plane must be double the VU "
"plane for multipixel parallelism ");
} else {
assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC values must be same ");
}
#endif
xfnv212yuyv<SRC_Y, SRC_UV, DST_T, ROWS, COLS, NPC, NPC_UV, XF_WORDWIDTH(SRC_Y, NPC), XF_WORDWIDTH(SRC_UV, NPC_UV),
XF_WORDWIDTH(DST_T, NPC), (ROWS * (COLS >> (XF_NPIXPERCYCLE(NPC))))>(_y, _uv, _dst, _y.rows, _y.cols);
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST, int TC, int iTC>
void xfbgr2uyvy(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& dst,
unsigned short int height,
unsigned short int width) {
// XF_PTNAME(XF_8UP) Y[],U,V;
XF_PTNAME(XF_8UP) Y[XF_NPIXPERCYCLE(NPC)];
XF_PTNAME(XF_8UP) U[XF_NPIXPERCYCLE(NPC)];
XF_PTNAME(XF_8UP) V[XF_NPIXPERCYCLE(NPC)];
ap_uint<24> RGB1[XF_NPIXPERCYCLE(NPC)];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=Y complete
#pragma HLS ARRAY_PARTITION variable=U complete
#pragma HLS ARRAY_PARTITION variable=V complete
#pragma HLS ARRAY_PARTITION variable=RGB1 complete
// clang-format on
unsigned long long int idx = 0, idx1 = 0;
XF_SNAME(WORDWIDTH_SRC) Packed_rgb1;
XF_PTNAME(XF_DEPTH(DST_T, NPC))
UYPacked, VYPacked, packed_uyvy[XF_NPIXPERCYCLE(NPC)];
XF_SNAME(WORDWIDTH_DST) val_dst = 0;
uint8_t offset = 0;
uint16_t shift = 0;
bool evencol = true;
rowloop:
for (int i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
evencol = true;
columnloop:
for (int j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
// clang-format on
val_dst = 0;
// evencol=true;
Packed_rgb1 = src.read(idx++);
xfExtractPixels<NPC, XF_WORDWIDTH(SRC_T, NPC), XF_DEPTH(SRC_T, NPC)>(RGB1, Packed_rgb1, 0);
shift = 0;
for (int l = 0; l < (XF_NPIXPERCYCLE(NPC)); l++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC
#pragma HLS unroll
// clang-format on
// Y0[l] =
// CalculateY(RGB1[offset+0].range(7,0),
// RGB1[offset+0].range(15,8),
// RGB1[offset+0].range(23,16));
Y[l] = CalculateY(RGB1[l].range(23, 16), RGB1[l].range(15, 8), RGB1[l].range(7, 0));
if (evencol) {
U[l / 2] = CalculateU(RGB1[l].range(23, 16), RGB1[l].range(15, 8), RGB1[l].range(7, 0));
V[l / 2] = CalculateV(RGB1[l].range(23, 16), RGB1[l].range(15, 8), RGB1[l].range(7, 0));
// U[l] =
// CalculateU(RGB1[offset+0].range(7,0),
// RGB1[offset+0].range(15,8),
// RGB1[offset+0].range(23,16)); V[l]
// =
// CalculateV(RGB1[offset+0].range(7,0),
// RGB1[offset+0].range(15,8), RGB1[offset+0].range(23,16));
UYPacked.range(7, 0) = U[l / 2];
UYPacked.range(15, 8) = Y[l];
packed_uyvy[l] = UYPacked;
} else {
VYPacked.range(7, 0) = V[l / 2];
VYPacked.range(15, 8) = Y[l];
packed_uyvy[l] = VYPacked;
}
// val_dst.range() = packed_uyvy[l];
xfPackPixels<NPC, XF_WORDWIDTH(DST_T, NPC), XF_DEPTH(DST_T, NPC)>(&packed_uyvy[l], val_dst, 0, 1,
shift);
evencol = evencol ? false : true;
}
dst.write(idx1++, val_dst);
}
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC = 1>
void bgr2uyvy(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src, xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_8UC3) && " BGR image Type must be XF_8UC3");
assert((DST_T == XF_16UC1) && " UYVY image Type must be XF_16UC1");
assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " BGR image rows and cols should be less than ROWS, COLS");
assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "BGR and UYVY plane dimensions mismatch");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC2) || (NPC == XF_NPPC4) || (NPC == XF_NPPC8)) &&
" 1,2,4,8 pixel parallelism is supported ");
#endif
xfbgr2uyvy<SRC_T, DST_T, ROWS, COLS, NPC, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(DST_T, NPC),
(ROWS * (COLS >> (XF_NPIXPERCYCLE(NPC)))), XF_NPIXPERCYCLE(NPC)>(_src, _dst, _src.rows, _src.cols);
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST, int TC, int iTC>
void xfbgr2yuyv(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& dst,
unsigned short int height,
unsigned short int width) {
// XF_PTNAME(XF_8UP) Y[],U,V;
XF_PTNAME(XF_8UP) Y[XF_NPIXPERCYCLE(NPC)];
XF_PTNAME(XF_8UP) U[XF_NPIXPERCYCLE(NPC)];
XF_PTNAME(XF_8UP) V[XF_NPIXPERCYCLE(NPC)];
ap_uint<24> RGB1[XF_NPIXPERCYCLE(NPC)];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=Y complete
#pragma HLS ARRAY_PARTITION variable=U complete
#pragma HLS ARRAY_PARTITION variable=V complete
#pragma HLS ARRAY_PARTITION variable=RGB1 complete
// clang-format on
unsigned long long int idx = 0, idx1 = 0;
XF_SNAME(WORDWIDTH_SRC) Packed_rgb1;
XF_PTNAME(XF_DEPTH(DST_T, NPC))
YUPacked, YVPacked, packed_yuyv[XF_NPIXPERCYCLE(NPC)];
XF_SNAME(WORDWIDTH_DST) val_dst = 0;
uint8_t offset = 0;
uint16_t shift = 0;
bool evencol = true;
rowloop:
for (int i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
evencol = true;
columnloop:
for (int j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
// clang-format on
val_dst = 0;
Packed_rgb1 = src.read(idx++);
xfExtractPixels<NPC, XF_WORDWIDTH(SRC_T, NPC), XF_DEPTH(SRC_T, NPC)>(RGB1, Packed_rgb1, 0);
shift = 0;
for (int l = 0; l < (XF_NPIXPERCYCLE(NPC)); l++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC
#pragma HLS unroll
// clang-format on
Y[l] = CalculateY(RGB1[l].range(23, 16), RGB1[l].range(15, 8), RGB1[l].range(7, 0));
if (evencol) {
U[l / 2] = CalculateU(RGB1[l].range(23, 16), RGB1[l].range(15, 8), RGB1[l].range(7, 0));
V[l / 2] = CalculateV(RGB1[l].range(23, 16), RGB1[l].range(15, 8), RGB1[l].range(7, 0));
YUPacked.range(7, 0) = Y[l];
YUPacked.range(15, 8) = U[l / 2];
packed_yuyv[l] = YUPacked;
} else {
YVPacked.range(7, 0) = Y[l];
YVPacked.range(15, 8) = V[l / 2];
packed_yuyv[l] = YVPacked;
}
xfPackPixels<NPC, XF_WORDWIDTH(DST_T, NPC), XF_DEPTH(DST_T, NPC)>(&packed_yuyv[l], val_dst, 0, 1,
shift);
evencol = evencol ? false : true;
}
dst.write(idx1++, val_dst);
}
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC = 1>
void bgr2yuyv(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src, xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_8UC3) && " BGR image Type must be XF_8UC3");
assert((DST_T == XF_16UC1) && " YUYV image Type must be XF_16UC1");
assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " BGR image rows and cols should be less than ROWS, COLS");
assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "BGR and YUYV plane dimensions mismatch");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC2) || (NPC == XF_NPPC4) || (NPC == XF_NPPC8)) &&
" 1,2,4,8 pixel parallelism is supported ");
#endif
xfbgr2yuyv<SRC_T, DST_T, ROWS, COLS, NPC, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(DST_T, NPC),
(ROWS * (COLS >> (XF_NPIXPERCYCLE(NPC)))), XF_NPIXPERCYCLE(NPC)>(_src, _dst, _src.rows, _src.cols);
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST, int TC, int iTC>
void xfbgr2rgb(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& src,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& dst,
unsigned short int height,
unsigned short int width) {
ap_uint<24> RGB[XF_NPIXPERCYCLE(NPC)], BGR[XF_NPIXPERCYCLE(NPC)];
// clang-format off
#pragma HLS ARRAY_PARTITION variable=RGB complete
#pragma HLS ARRAY_PARTITION variable=BGR complete
// clang-format on
unsigned long long int idx = 0, idx1 = 0;
XF_SNAME(WORDWIDTH_SRC) Packed_rgb1;
XF_SNAME(WORDWIDTH_DST) val_dst = 0;
uint8_t offset = 0;
uint16_t shift = 0;
rowloop:
for (int i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
columnloop:
for (int j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
// clang-format on
val_dst = 0;
Packed_rgb1 = src.read(idx++);
for (int l = 0; l < (XF_NPIXPERCYCLE(NPC)); l++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=iTC max=iTC
#pragma HLS unroll
// clang-format on
xfExtractPixels<NPC, XF_WORDWIDTH(SRC_T, NPC), XF_DEPTH(SRC_T, NPC)>(BGR, Packed_rgb1, 0);
RGB[l].range(23, 16) = BGR[l].range(7, 0);
RGB[l].range(15, 8) = BGR[l].range(15, 8);
RGB[l].range(7, 0) = BGR[l].range(23, 16);
val_dst.range(l * XF_PIXELWIDTH(SRC_T, NPC) + XF_PIXELWIDTH(SRC_T, NPC) - 1,
l * XF_PIXELWIDTH(SRC_T, NPC)) = RGB[l];
}
dst.write(idx1++, val_dst);
}
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC = 1>
void bgr2rgb(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src, xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_8UC3) && " BGR image Type must be XF_8UC3");
assert((DST_T == XF_8UC3) && " RGB image Type must be XF_8UC3");
assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " BGR image rows and cols should be less than ROWS, COLS");
assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "BGR and RGB plane dimensions mismatch");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC2) || (NPC == XF_NPPC4) || (NPC == XF_NPPC8)) &&
" 1,2,4,8 pixel parallelism is supported ");
#endif
xfbgr2rgb<SRC_T, DST_T, ROWS, COLS, NPC, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(DST_T, NPC),
(ROWS * (COLS >> (XF_NPIXPERCYCLE(NPC)))), XF_NPIXPERCYCLE(NPC)>(_src, _dst, _src.rows, _src.cols);
}
template <int SRC_T,
int Y_T,
int UV_T,
int ROWS,
int COLS,
int NPC,
int NPC_UV,
int WORDWIDTH_SRC,
int WORDWIDTH_Y,
int WORDWIDTH_UV>
void Kernbgr2Nv12(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _rgba,
xf::cv::Mat<Y_T, ROWS, COLS, NPC>& _y,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& _uv,
uint16_t height,
uint16_t width) {
unsigned long long int idx = 0, idx1 = 0;
ap_uint<32> rgba;
ap_uint<16> val1;
uint8_t y, u, v;
bool evenRow = true, evenBlock = true;
RowLoop:
for (int i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
ColLoop:
for (int j = 0; j < width; j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS
// clang-format on
rgba = _rgba.read(i * width + j);
uint8_t b = rgba.range(7, 0);
uint8_t g = rgba.range(15, 8);
uint8_t r = rgba.range(23, 16);
y = CalculateY(r, g, b);
if (evenRow) {
u = CalculateU(r, g, b);
v = CalculateV(r, g, b);
}
_y.write(idx++, y);
if (evenRow) {
if ((j & 0x01) == 0)
//{
_uv.write(idx1++, u | (uint16_t)v << 8);
//_uv.write(v);
//}
// _uv.write(u | (uint16_t)v << 8);
}
}
evenRow = evenRow ? false : true;
}
}
template <int SRC_T,
int Y_T,
int UV_T,
int ROWS,
int COLS,
int NPC,
int NPC_UV,
int WORDWIDTH_SRC,
int WORDWIDTH_Y,
int WORDWIDTH_UV>
void xFbgr2Nv12(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src,
xf::cv::Mat<Y_T, ROWS, COLS, NPC>& _y,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& _uv,
uint16_t height,
uint16_t width) {
width = width >> XF_BITSHIFT(NPC);
if (NPC == 1) {
Kernbgr2Nv12<SRC_T, Y_T, UV_T, ROWS, COLS, NPC, NPC_UV, WORDWIDTH_SRC, WORDWIDTH_Y, WORDWIDTH_UV>(
_src, _y, _uv, height, width);
} else {
Kernbgr2Nv12_ro<SRC_T, Y_T, UV_T, ROWS, COLS, NPC, NPC_UV, XF_CHANNELS(SRC_T, NPC), WORDWIDTH_SRC, WORDWIDTH_Y,
WORDWIDTH_UV, (COLS >> XF_BITSHIFT(NPC)), (1 << (XF_BITSHIFT(NPC) + 1))>(_src, _y, _uv, height,
width);
}
}
template <int SRC_T, int Y_T, int UV_T, int ROWS, int COLS, int NPC = 1, int NPC_UV = 1>
void bgr2nv12(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src,
xf::cv::Mat<Y_T, ROWS, COLS, NPC>& _y,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& _uv) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_8UC3) && " BGR image Type must be XF_8UC3");
assert((Y_T == XF_8UC1) && " Y image Type must be XF_8UC1");
assert((UV_T == XF_8UC2) && " UV image Type must be XF_8UC2");
assert(((_src.rows <= ROWS) && (_y.cols <= COLS)) && " Y image ROWS and COLS should be less than ROWS, COLS");
assert(((_src.cols == _y.cols) && (_src.rows == _y.rows)) && "Y and BGR plane dimensions mismatch");
assert(((_y.cols == (_uv.cols << 1)) && (_y.rows == (_uv.rows << 1))) && "Y and UV planes dimensions mismatch");
if (NPC != XF_NPPC1) {
assert((NPC == (NPC_UV * 2)) &&
" NPC of Y plane must be double the UV "
"plane for multipixel parallelism ");
} else {
assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC values must be same ");
}
#endif
xFbgr2Nv12<SRC_T, Y_T, UV_T, ROWS, COLS, NPC, NPC_UV, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(Y_T, NPC),
XF_WORDWIDTH(UV_T, NPC_UV)>(_src, _y, _uv, _src.rows, _src.cols);
}
template <int SRC_T,
int Y_T,
int UV_T,
int ROWS,
int COLS,
int NPC,
int NPC_UV,
int WORDWIDTH_SRC,
int WORDWIDTH_Y,
int WORDWIDTH_VU>
void Kernbgr2Nv21(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _rgba,
xf::cv::Mat<Y_T, ROWS, COLS, NPC>& _y,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& _vu,
uint16_t height,
uint16_t width) {
width = width >> XF_BITSHIFT(NPC);
XF_SNAME(XF_32UW) rgba;
unsigned long long int idx = 0, idx1 = 0;
uint8_t y, u, v;
bool evenRow = true, evenBlock = true;
RowLoop:
for (int i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
ColLoop:
for (int j = 0; j < width; j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS
// clang-format on
rgba = _rgba.read(i * width + j);
uint8_t b = rgba.range(7, 0);
uint8_t g = rgba.range(15, 8);
uint8_t r = rgba.range(23, 16);
y = CalculateY(r, g, b);
if (evenRow) {
u = CalculateU(r, g, b);
v = CalculateV(r, g, b);
}
_y.write(idx++, y);
if (evenRow) {
if ((j & 0x01) == 0) _vu.write(idx1++, v | ((uint16_t)u << 8));
}
}
evenRow = evenRow ? false : true;
}
}
template <int SRC_T,
int Y_T,
int UV_T,
int ROWS,
int COLS,
int NPC,
int NPC_UV,
int WORDWIDTH_SRC,
int WORDWIDTH_Y,
int WORDWIDTH_UV>
void xFbgr2Nv21(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src,
xf::cv::Mat<Y_T, ROWS, COLS, NPC>& _y,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& _uv,
uint16_t height,
uint16_t width) {
width = width >> XF_BITSHIFT(NPC);
if (NPC == 1) {
Kernbgr2Nv21<SRC_T, Y_T, UV_T, ROWS, COLS, NPC, NPC_UV, WORDWIDTH_SRC, WORDWIDTH_Y, WORDWIDTH_UV>(
_src, _y, _uv, height, width);
} else {
Kernbgr2Nv21_ro<SRC_T, Y_T, UV_T, ROWS, COLS, NPC, NPC_UV, XF_CHANNELS(SRC_T, NPC), WORDWIDTH_SRC, WORDWIDTH_Y,
WORDWIDTH_UV, (COLS >> XF_BITSHIFT(NPC)), (1 << (XF_BITSHIFT(NPC) + 1))>(_src, _y, _uv, height,
width);
}
}
template <int SRC_T, int Y_T, int UV_T, int ROWS, int COLS, int NPC = 1, int NPC_UV = 1>
void bgr2nv21(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src,
xf::cv::Mat<Y_T, ROWS, COLS, NPC>& _y,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& _uv) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_8UC3) && " BGR image Type must be XF_8UC3");
assert((Y_T == XF_8UC1) && " Y image Type must be XF_8UC1");
assert((UV_T == XF_8UC2) && " UV image Type must be XF_8UC2");
assert(((_src.rows <= ROWS) && (_y.cols <= COLS)) && " Y image ROWS and COLS should be less than ROWS, COLS");
assert(((_src.cols == _y.cols) && (_src.rows == _y.rows)) && "Y and BGR plane dimensions mismatch");
assert(((_y.cols == (_uv.cols << 1)) && (_y.rows == (_uv.rows << 1))) && "Y and UV planes dimensions mismatch");
if (NPC != XF_NPPC1) {
assert((NPC == (NPC_UV * 2)) &&
" NPC of Y plane must be double the UV "
"plane for multipixel parallelism ");
} else {
assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC values must be same ");
}
#endif
xFbgr2Nv21<SRC_T, Y_T, UV_T, ROWS, COLS, NPC, NPC_UV, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(Y_T, NPC),
XF_WORDWIDTH(UV_T, NPC_UV)>(_src, _y, _uv, _src.rows, _src.cols);
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST, int TC>
void KernYuyv2bgr(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _yuyv,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _rgba,
uint16_t height,
uint16_t width) {
XF_SNAME(WORDWIDTH_DST) rgba;
XF_SNAME(WORDWIDTH_SRC) yu, yv;
XF_PTNAME(XF_8UP) r, g, b;
int8_t y1, y2, u, v;
int32_t V2Rtemp, U2Gtemp, V2Gtemp, U2Btemp;
unsigned long long int idx = 0;
RowLoop:
for (int i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
#pragma HLS LOOP_FLATTEN off
// clang-format on
ColLoop:
for (int j = 0; j < width; j += 2) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
#pragma HLS pipeline
// clang-format on
yu = _yuyv.read(i * width + j);
yv = _yuyv.read(i * width + j + 1);
u = (uint8_t)yu.range(15, 8) - 128;
y1 = (yu.range(7, 0) > 16) ? ((uint8_t)yu.range(7, 0) - 16) : 0;
v = (uint8_t)yv.range(15, 8) - 128;
y2 = (yv.range(7, 0) > 16) ? ((uint8_t)yv.range(7, 0) - 16) : 0;
V2Rtemp = v * (short int)V2R;
U2Gtemp = (short int)U2G * u;
V2Gtemp = (short int)V2G * v;
U2Btemp = u * (short int)U2B;
r = CalculateR(y1, V2Rtemp, v);
g = CalculateG(y1, U2Gtemp, V2Gtemp);
b = CalculateB(y1, U2Btemp, u);
rgba = ((ap_uint24_t)b) | ((ap_uint24_t)g << 8) | ((ap_uint24_t)r << 16);
_rgba.write(idx++, rgba);
r = CalculateR(y2, V2Rtemp, v);
g = CalculateG(y2, U2Gtemp, V2Gtemp);
b = CalculateB(y2, U2Btemp, u);
rgba = ((ap_uint24_t)b) | ((ap_uint24_t)g << 8) | ((ap_uint24_t)r << 16);
_rgba.write(idx++, rgba);
}
}
}
// Yuyv2Rgba
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST>
void xFYuyv2bgr(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst,
uint16_t height,
uint16_t width) {
width = width >> XF_BITSHIFT(NPC);
if (NPC == 1) {
KernYuyv2bgr<SRC_T, DST_T, ROWS, COLS, NPC, WORDWIDTH_SRC, WORDWIDTH_DST, ((COLS >> 1) >> XF_BITSHIFT(NPC))>(
_src, _dst, height, width);
} else {
KernYuyv2bgr_ro<SRC_T, DST_T, ROWS, COLS, NPC, XF_CHANNELS(DST_T, NPC), WORDWIDTH_SRC, WORDWIDTH_DST,
((COLS >> 1) >> XF_BITSHIFT(NPC)), ((COLS >> 1) >> XF_BITSHIFT(NPC))>(_src, _dst, height,
width);
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC = 1>
void yuyv2bgr(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src, xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_16UC1) && " YUYV plane Type must be XF_16UC1");
assert((DST_T == XF_8UC3) && " BGR plane Type must be XF_8UC3");
assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " YUYV image rows and cols should be less than ROWS, COLS");
assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "YUYV and BGR planes dimensions mismatch");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC2) || (NPC == XF_NPPC4) || (NPC == XF_NPPC8)) &&
" 1,2,4,8 pixel parallelism is supported ");
#endif
xFYuyv2bgr<SRC_T, DST_T, ROWS, COLS, NPC, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(DST_T, NPC)>(_src, _dst, _src.rows,
_src.cols);
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST, int TC>
void KernUyvy2bgr(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _uyvy,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _rgba,
uint16_t height,
uint16_t width) {
XF_SNAME(WORDWIDTH_DST) rgba;
XF_SNAME(WORDWIDTH_SRC) uyvy;
XF_SNAME(WORDWIDTH_SRC) uy;
XF_SNAME(WORDWIDTH_SRC) vy;
unsigned long long int idx = 0;
XF_PTNAME(XF_8UP) r, g, b;
int8_t y1, y2, u, v;
int32_t V2Rtemp, U2Gtemp, V2Gtemp, U2Btemp;
RowLoop:
for (int i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
#pragma HLS LOOP_FLATTEN off
// clang-format on
ColLoop:
for (int j = 0; j < width; j += 2) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=TC max=TC
#pragma HLS pipeline
// clang-format on
// uyvy = _uyvy.read();
uy = _uyvy.read(i * width + j);
vy = _uyvy.read(i * width + j + 1);
u = (uint8_t)uy.range(7, 0) - 128;
/* if(uyvy.range(15,8) > 16)
y1 = (uint8_t)uyvy.range(15,8) - 16;
else
y1 = 0;*/
y1 = (uy.range(15, 8) > 16) ? ((uint8_t)uy.range(15, 8) - 16) : 0;
v = (uint8_t)vy.range(7, 0) - 128;
/* if(uyvy.range(31,24) > 16)
y2 = ((uint8_t)uyvy.range(31,24) - 16);
else
y2 = 0;*/
y2 = (vy.range(15, 8) > 16) ? ((uint8_t)vy.range(15, 8) - 16) : 0;
V2Rtemp = v * (short int)V2R;
U2Gtemp = (short int)U2G * u;
V2Gtemp = (short int)V2G * v;
U2Btemp = u * (short int)U2B;
r = CalculateR(y1, V2Rtemp, v);
g = CalculateG(y1, U2Gtemp, V2Gtemp);
b = CalculateB(y1, U2Btemp, u);
rgba = ((ap_uint24_t)b) | ((ap_uint24_t)g << 8) | ((ap_uint24_t)r << 16);
_rgba.write(idx, rgba);
idx++;
r = CalculateR(y2, V2Rtemp, v);
g = CalculateG(y2, U2Gtemp, V2Gtemp);
b = CalculateB(y2, U2Btemp, u);
rgba = ((ap_uint24_t)b) | ((ap_uint24_t)g << 8) | ((ap_uint24_t)r << 16);
_rgba.write(idx, rgba);
idx++;
}
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST>
void xFUyvy2bgr(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst,
uint16_t height,
uint16_t width) {
width = width >> XF_BITSHIFT(NPC);
if (NPC == 1) {
KernUyvy2bgr<SRC_T, DST_T, ROWS, COLS, NPC, WORDWIDTH_SRC, WORDWIDTH_DST, ((COLS >> 1) >> XF_BITSHIFT(NPC))>(
_src, _dst, height, width);
} else {
KernUyvy2bgr_ro<SRC_T, DST_T, ROWS, COLS, NPC, WORDWIDTH_SRC, WORDWIDTH_DST, ((COLS >> 1) >> XF_BITSHIFT(NPC)),
((COLS >> 1) >> XF_BITSHIFT(NPC))>(_src, _dst, height, width);
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC = 1>
void uyvy2bgr(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src, xf::cv::Mat<DST_T, ROWS, COLS, NPC>& _dst) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_16UC1) && " UYVY plane Type must be XF_16UC1");
assert((DST_T == XF_8UC3) && " BGR plane Type must be XF_8UC3");
assert(((_src.rows <= ROWS) && (_src.cols <= COLS)) && " UYVY image rows and cols should be less than ROWS, COLS");
assert(((_dst.cols == _src.cols) && (_dst.rows == _src.rows)) && "UYVY and BGR planes dimensions mismatch");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC2) || (NPC == XF_NPPC4) || (NPC == XF_NPPC8)) &&
" 1,2,4,8 pixel parallelism is supported ");
#endif
xFUyvy2bgr<SRC_T, DST_T, ROWS, COLS, NPC, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(DST_T, NPC)>(_src, _dst, _src.rows,
_src.cols);
}
// Yuyv2Nv12
template <int SRC_T,
int Y_T,
int UV_T,
int ROWS,
int COLS,
int NPC,
int NPC_UV,
int WORDWIDTH_SRC,
int WORDWIDTH_Y,
int WORDWIDTH_UV>
void xFYuyv2Nv21(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src,
xf::cv::Mat<Y_T, ROWS, COLS, NPC>& _y_image,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& _uv_image,
uint16_t height,
uint16_t width) {
width = width >> XF_BITSHIFT(NPC);
if (NPC == XF_NPPC1) {
KernYuyv2Nv21<SRC_T, Y_T, UV_T, ROWS, COLS, NPC, NPC_UV, WORDWIDTH_SRC, WORDWIDTH_Y, WORDWIDTH_UV,
((COLS >> 1) >> XF_BITSHIFT(NPC))>(_src, _y_image, _uv_image, height, width);
} else {
KernYuyv2Nv21_ro<SRC_T, Y_T, UV_T, ROWS, COLS, NPC, NPC_UV, WORDWIDTH_SRC, WORDWIDTH_Y, WORDWIDTH_UV,
((COLS >> 1) >> XF_BITSHIFT(NPC)), ((1 << XF_BITSHIFT(NPC)) >> 1)>(_src, _y_image, _uv_image,
height, width);
}
}
template <int SRC_T, int Y_T, int UV_T, int ROWS, int COLS, int NPC = 1, int NPC_UV = 1>
void yuyv2nv21(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src,
xf::cv::Mat<Y_T, ROWS, COLS, NPC>& _y_image,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& _uv_image) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_16UC1) && " YUYV plane Type must be XF_16UC1");
assert((Y_T == XF_8UC1) && " Y plane Type must be XF_8UC1");
assert((UV_T == XF_8UC2) && " VU image Type must be XF_8UC2");
assert(((_y_image.rows <= ROWS) && (_y_image.cols <= COLS)) &&
" Y image rows and cols should be less than ROWS, COLS");
assert(((_y_image.cols == (_uv_image.cols << 1)) && (_y_image.rows == (_uv_image.rows << 1))) &&
"Y and UV planes dimensions mismatch");
assert(((_y_image.cols == _src.cols) && (_y_image.rows == _src.rows)) && "Y and YUYV planes dimensions mismatch");
if (NPC != XF_NPPC1) {
assert((NPC == (NPC_UV * 2)) &&
" NPC of Y plane must be double the UV "
"plane for multipixel parallelism ");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC2) || (NPC == XF_NPPC4) || (NPC == XF_NPPC8)) &&
" 1,2,4,8 pixel parallelism is supported ");
} else {
assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC, NPC_UV values must be same ");
}
#endif
xFYuyv2Nv21<SRC_T, Y_T, UV_T, ROWS, COLS, NPC, NPC_UV, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(Y_T, NPC),
XF_WORDWIDTH(UV_T, NPC_UV)>(_src, _y_image, _uv_image, _src.rows, _src.cols);
}
// Yuyv2nv21
// Uyvy2Nv21
template <int SRC_T,
int Y_T,
int UV_T,
int ROWS,
int COLS,
int NPC,
int NPC_UV,
int WORDWIDTH_SRC,
int WORDWIDTH_Y,
int WORDWIDTH_UV>
void xFUyvy2Nv21(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& uyvy,
xf::cv::Mat<Y_T, ROWS, COLS, NPC>& y_plane,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& uv_plane,
uint16_t height,
uint16_t width) {
width = width >> XF_BITSHIFT(NPC);
if (NPC == XF_NPPC1) {
// clang-format off
#pragma HLS DATAFLOW
// clang-format on
KernUyvy2Nv21<SRC_T, Y_T, UV_T, ROWS, COLS, NPC, NPC_UV, WORDWIDTH_SRC, WORDWIDTH_Y, WORDWIDTH_UV,
((COLS >> 1) >> XF_BITSHIFT(NPC))>(uyvy, y_plane, uv_plane, height, width);
} else {
KernUyvy2Nv21_ro<SRC_T, Y_T, UV_T, ROWS, COLS, NPC, NPC_UV, WORDWIDTH_SRC, WORDWIDTH_Y, WORDWIDTH_UV,
((COLS >> 1) >> XF_BITSHIFT(NPC)), ((1 << NPC) >> 1)>(uyvy, y_plane, uv_plane, height, width);
}
}
template <int SRC_T, int Y_T, int UV_T, int ROWS, int COLS, int NPC = 1, int NPC_UV = 1>
void uyvy2nv21(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& _src,
xf::cv::Mat<Y_T, ROWS, COLS, NPC>& _y_image,
xf::cv::Mat<UV_T, ROWS / 2, COLS / 2, NPC_UV>& _uv_image) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_16UC1) && " UYVY plane Type must be XF_16UC1");
assert((Y_T == XF_8UC1) && " Y plane Type must be XF_8UC1");
assert((UV_T == XF_8UC2) && " UV image Type must be XF_8UC2");
assert(((_y_image.rows <= ROWS) && (_y_image.cols <= COLS)) &&
" Y image rows and cols should be less than ROWS, COLS");
assert(((_y_image.cols == (_uv_image.cols << 1)) && (_y_image.rows == (_uv_image.rows << 1))) &&
"Y and UV planes dimensions mismatch");
assert(((_y_image.cols == _src.cols) && (_y_image.rows == _src.rows)) && "Y and UYVY planes dimensions mismatch");
if (NPC != XF_NPPC1) {
assert((NPC == (NPC_UV * 2)) &&
" NPC of Y plane must be double the UV "
"plane for multipixel parallelism ");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC2) || (NPC == XF_NPPC4) || (NPC == XF_NPPC8)) &&
" 1,2,4,8 pixel parallelism is supported ");
} else {
assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC, NPC_UV values must be same ");
}
#endif
xFUyvy2Nv21<SRC_T, Y_T, UV_T, ROWS, COLS, NPC, NPC_UV, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(Y_T, NPC),
XF_WORDWIDTH(UV_T, NPC_UV)>(_src, _y_image, _uv_image, _src.rows, _src.cols);
}
template <int SRC_Y, int SRC_UV, int ROWS, int COLS, int NPC, int NPC_UV, int WORDWIDTH_Y, int WORDWIDTH_UV, int TC>
void xfnv122nv21(xf::cv::Mat<SRC_Y, ROWS, COLS, NPC>& _y,
xf::cv::Mat<SRC_UV, ROWS / 2, COLS / 2, NPC_UV>& _uv,
xf::cv::Mat<SRC_Y, ROWS, COLS, NPC>& out_y,
xf::cv::Mat<SRC_UV, ROWS / 2, COLS / 2, NPC_UV>& out_uv,
unsigned short int height,
unsigned short int width) {
// assert();
XF_SNAME(WORDWIDTH_Y) yPacked = 0;
XF_SNAME(WORDWIDTH_UV) uvPacked[8], vuPacked[8], packed_Data = 0, val_dst = 0;
unsigned long long int y_idx = 0, uv_idx = 0;
unsigned long long int outUV_idx = 0;
RowLoop:
for (int i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
ColLoop:
for (int j = 0; j<width>> XF_BITSHIFT(NPC); j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS
// clang-format on
yPacked = _y.read(i * (width >> XF_BITSHIFT(NPC)) + j);
out_y.write(y_idx++, yPacked);
if (i < _uv.rows && j < (_uv.cols >> XF_BITSHIFT(NPC_UV))) {
packed_Data = _uv.read(uv_idx++);
}
for (int l = 0; l < (XF_NPIXPERCYCLE(NPC_UV)); l++) {
// clang-format off
#pragma HLS unroll
// clang-format on
uvPacked[l] = packed_Data(l * 16 + 15, l * 16);
vuPacked[l].range(15, 8) = uvPacked[l].range(7, 0);
vuPacked[l].range(7, 0) = uvPacked[l].range(15, 8);
val_dst.range(l * 16 + 15, l * 16) = vuPacked[l];
}
if (i < _uv.rows && j < (_uv.cols >> XF_BITSHIFT(NPC_UV))) {
out_uv.write(outUV_idx++, val_dst);
}
}
}
}
template <int SRC_Y, int SRC_UV, int ROWS, int COLS, int NPC = 1, int NPC_UV = 1>
void nv122nv21(xf::cv::Mat<SRC_Y, ROWS, COLS, NPC>& _y,
xf::cv::Mat<SRC_UV, ROWS / 2, COLS / 2, NPC_UV>& _uv,
xf::cv::Mat<SRC_Y, ROWS, COLS, NPC>& out_y,
xf::cv::Mat<SRC_UV, ROWS / 2, COLS / 2, NPC_UV>& out_uv) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_Y == XF_8UC1) && " Y plane Type must be XF_8UC1");
assert((SRC_UV == XF_8UC2) && " UV image Type must be XF_8UC2");
assert(((_y.rows <= ROWS) && (_y.cols <= COLS)) && " Y image rows and cols should be less than ROWS, COLS");
assert(((_y.cols == (_uv.cols << 1)) && (_y.rows == (_uv.rows << 1))) && "Y and UV planes dimensions mismatch");
if (NPC != XF_NPPC1) {
assert((NPC == (NPC_UV * 2)) &&
" NPC of Y plane must be double the UV "
"plane for multipixel parallelism ");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC2) || (NPC == XF_NPPC4) || (NPC == XF_NPPC8)) &&
" 1,2,4,8 pixel parallelism is supported ");
} else {
assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC, NPC_UV values must be same ");
}
#endif
xfnv122nv21<SRC_Y, SRC_UV, ROWS, COLS, NPC, NPC_UV, XF_WORDWIDTH(SRC_Y, NPC), XF_WORDWIDTH(SRC_UV, NPC_UV),
(ROWS * (COLS >> (XF_NPIXPERCYCLE(NPC))))>(_y, _uv, out_y, out_uv, _y.rows, _y.cols);
}
template <int SRC_Y, int SRC_UV, int ROWS, int COLS, int NPC = 1, int NPC_UV = 1>
void nv212nv12(xf::cv::Mat<SRC_Y, ROWS, COLS, NPC>& _y,
xf::cv::Mat<SRC_UV, ROWS / 2, COLS / 2, NPC_UV>& _uv,
xf::cv::Mat<SRC_Y, ROWS, COLS, NPC>& out_y,
xf::cv::Mat<SRC_UV, ROWS / 2, COLS / 2, NPC_UV>& out_uv) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_Y == XF_8UC1) && " Y plane Type must be XF_8UC1");
assert((SRC_UV == XF_8UC2) && " VU image Type must be XF_8UC2");
assert(((_y.rows <= ROWS) && (_y.cols <= COLS)) && " Y image rows and cols should be less than ROWS, COLS");
assert(((_y.cols == (_uv.cols << 1)) && (_y.rows == (_uv.rows << 1))) && "Y and VU planes dimensions mismatch");
if (NPC != XF_NPPC1) {
assert((NPC == (NPC_UV * 2)) &&
" NPC of Y plane must be double the UV "
"plane for multipixel parallelism ");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC2) || (NPC == XF_NPPC4) || (NPC == XF_NPPC8)) &&
" 1,2,4,8 pixel parallelism is supported ");
} else {
assert((NPC == NPC_UV == XF_NPPC1) && " Both NPC, NPC_UV values must be same ");
}
#endif
xfnv122nv21<SRC_Y, SRC_UV, ROWS, COLS, NPC, NPC_UV, XF_WORDWIDTH(SRC_Y, NPC), XF_WORDWIDTH(SRC_UV, NPC_UV),
(ROWS * (COLS >> (XF_NPIXPERCYCLE(NPC))))>(_y, _uv, out_y, out_uv, _y.rows, _y.cols);
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST, int TC>
void xfuyvy2yuyv(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& uyvy,
xf::cv::Mat<DST_T, ROWS, COLS, NPC>& yuyv,
unsigned short int height,
unsigned short int width) {
// assert();
XF_SNAME(WORDWIDTH_SRC) uy = 0;
XF_SNAME(WORDWIDTH_DST) yu[8], uyPacked[8], packed_Data = 0, val_dst = 0;
unsigned long long int y_idx = 0, uv_idx = 0;
unsigned long long int outUV_idx = 0;
RowLoop:
for (int i = 0; i < height; i++) {
// clang-format off
#pragma HLS LOOP_FLATTEN off
#pragma HLS LOOP_TRIPCOUNT min=ROWS max=ROWS
// clang-format on
ColLoop:
for (int j = 0; j < (width >> XF_BITSHIFT(NPC)); j++) {
// clang-format off
#pragma HLS pipeline
#pragma HLS LOOP_TRIPCOUNT min=COLS max=COLS
// clang-format on
uy = uyvy.read(i * (width >> XF_BITSHIFT(NPC)) + j);
for (int l = 0; l < (XF_NPIXPERCYCLE(NPC)); l++) {
// clang-format off
#pragma HLS unroll
// clang-format on
uyPacked[l] = uy(l * 16 + 15, l * 16);
yu[l].range(15, 8) = uyPacked[l].range(7, 0);
yu[l].range(7, 0) = uyPacked[l].range(15, 8);
val_dst.range(l * 16 + 15, l * 16) = yu[l];
}
yuyv.write(outUV_idx++, val_dst);
}
}
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC = 1>
void uyvy2yuyv(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& uyvy, xf::cv::Mat<DST_T, ROWS, COLS, NPC>& yuyv) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_16UC1) && " UYVY image Type must be XF_16UC1");
assert((DST_T == XF_16UC1) && " YUYV image Type must be XF_16UC1");
assert(((yuyv.rows <= ROWS) && (yuyv.cols <= COLS)) && " Y image rows and cols should be less than ROWS, COLS");
assert(((yuyv.cols == uyvy.cols) && (yuyv.rows == uyvy.rows)) && "YUYV and UYVY plane dimensions mismatch");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC2) || (NPC == XF_NPPC4) || (NPC == XF_NPPC8)) &&
" 1,2,4,8 pixel parallelism is supported ");
#endif
xfuyvy2yuyv<SRC_T, DST_T, ROWS, COLS, NPC, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(DST_T, NPC),
(ROWS * (COLS >> (XF_NPIXPERCYCLE(NPC))))>(uyvy, yuyv, uyvy.rows, uyvy.cols);
}
template <int SRC_T, int DST_T, int ROWS, int COLS, int NPC = 1>
void yuyv2uyvy(xf::cv::Mat<SRC_T, ROWS, COLS, NPC>& yuyv, xf::cv::Mat<DST_T, ROWS, COLS, NPC>& uyvy) {
// clang-format off
#pragma HLS INLINE OFF
// clang-format on
#ifndef __SYNTHESIS__
assert((SRC_T == XF_16UC1) && " YUYV image Type must be XF_16UC1");
assert((DST_T == XF_16UC1) && " UYVY image Type must be XF_16UC1");
assert(((yuyv.rows <= ROWS) && (yuyv.cols <= COLS)) && " Y image rows and cols should be less than ROWS, COLS");
assert(((yuyv.cols == uyvy.cols) && (yuyv.rows == uyvy.rows)) && "YUYV and UYVY plane dimensions mismatch");
assert(((NPC == XF_NPPC1) || (NPC == XF_NPPC2) || (NPC == XF_NPPC4) || (NPC == XF_NPPC8)) &&
" 1,2,4,8 pixel parallelism is supported ");
#endif
xfuyvy2yuyv<SRC_T, DST_T, ROWS, COLS, NPC, XF_WORDWIDTH(SRC_T, NPC), XF_WORDWIDTH(DST_T, NPC),
(ROWS * (COLS >> (XF_NPIXPERCYCLE(NPC))))>(yuyv, uyvy, uyvy.rows, uyvy.cols);
}
} // namespace cv
} // namespace xf
#endif