Program Listing for File xf_structs.hpp
↰ Return to documentation for file (/tmp/ws/src/vitis_common/include/common/xf_structs.hpp
)
/*
* Copyright 2020 Xilinx, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _XF_STRUCTS_H_
#define _XF_STRUCTS_H_
#ifndef __cplusplus
#error C++ is needed to use this file!
#endif
#ifndef __SYNTHESIS__
#include <iostream>
#endif
#include "ap_axi_sdata.h"
#include "hls_stream.h"
#include "xf_types.hpp"
#include <assert.h>
#include <math.h>
#include <stdio.h>
#include <type_traits>
namespace xf {
namespace cv {
template <typename T>
T float2ap_uint(float val) {
T* val_out = (T*)(&val);
return *val_out;
}
template <typename T>
float ap_uint2float(T val) {
float* val_out = (float*)(&val);
return *val_out;
}
//----------------------------------------------------------------------------------------------------//
// LOCAL STEREO BLOCK MATCHING UTILITY
//----------------------------------------------------------------------------------------------------//
template <int WSIZE, int NDISP, int NDISP_UNIT>
class xFSBMState {
public:
// pre-filtering (normalization of input images)
int preFilterType; // =HLS_STEREO_BM_XSOBEL_TEST
int preFilterSize; // averaging window size: ~5x5..21x21
int preFilterCap; // the output of pre-filtering is clipped by
// [-preFilterCap,preFilterCap]
// correspondence using Sum of Absolute Difference (SAD)
int SADWindowSize; // ~5x5..21x21 // defined in macro
int minDisparity; // minimum disparity (can be negative)
int numberOfDisparities; // maximum disparity - minimum disparity (> 0)
// post-filtering
int textureThreshold; // the disparity is only computed for pixels
// with textured enough neighborhood
int uniquenessRatio; // accept the computed disparity d* only if
// SAD(d) >= SAD(d*)*(1 + uniquenessRatio/100.)
// for any d != d*+/-1 within the search range.
// int speckleWindowSize; // disparity variation window
// int speckleRange; // acceptable range of variation in window
int ndisp_unit;
int sweepFactor;
int remainder;
xFSBMState() {
preFilterType = XF_STEREO_PREFILTER_SOBEL_TYPE; // Default Sobel filter
preFilterSize = WSIZE;
preFilterCap = 31;
SADWindowSize = WSIZE;
minDisparity = 0;
numberOfDisparities = NDISP;
textureThreshold = 10;
uniquenessRatio = 15;
sweepFactor = (NDISP / NDISP_UNIT) + ((NDISP % NDISP_UNIT) != 0);
ndisp_unit = NDISP_UNIT;
remainder = NDISP_UNIT * sweepFactor - NDISP;
}
};
//----------------------------------------------------------------------------------------------------//
//----------------------------------------------------------------------------------------------------//
// Template class of Point_
//----------------------------------------------------------------------------------------------------//
template <typename T>
class Point_ {
public:
Point_();
Point_(T _x, T _y);
Point_(const Point_& pt);
~Point_();
T x, y;
};
/* Member functions of Point_ class */
template <typename T>
inline Point_<T>::Point_() {}
template <typename T>
inline Point_<T>::Point_(T _x, T _y) : x(_x), y(_y) {}
template <typename T>
inline Point_<T>::Point_(const Point_<T>& pt) : x(pt.x), y(pt.y) {}
template <typename T>
inline Point_<T>::~Point_() {}
typedef Point_<int> Point;
//----------------------------------------------------------------------------------------------------//
//----------------------------------------------------------------------------------------------------//
// Template class of Size_
//----------------------------------------------------------------------------------------------------//
template <typename T>
class Size_ {
public:
Size_();
Size_(T _width, T _height);
Size_(const Size_<T>& sz);
Size_(const Point_<T>& pt);
T area();
~Size_();
T width, height;
};
/* Member functions of Size_ class */
template <typename T>
inline Size_<T>::Size_() {}
template <typename T>
inline Size_<T>::Size_(T _width, T _height) : width(_width), height(_height) {}
template <typename T>
inline Size_<T>::Size_(const Size_<T>& sz) : width(sz.width), height(sz.height) {}
template <typename T>
inline Size_<T>::Size_(const Point_<T>& pt) : width(pt.x), height(pt.y) {}
template <typename T>
inline T Size_<T>::area() {
return width * height;
}
template <typename T>
inline Size_<T>::~Size_() {}
typedef Size_<int> Size;
//----------------------------------------------------------------------------------------------------//
//----------------------------------------------------------------------------------------------------//
// Template class of Rect_
//----------------------------------------------------------------------------------------------------//
template <typename T>
class Rect_ {
public:
Rect_();
Rect_(T _x, T _y, T _width, T _height);
Rect_(const Rect_& rect);
Rect_(const Point_<T>& pt, const Size_<T>& sz);
T area();
Size_<T> size();
Point_<T> tl(); // top-left point(inside);
Point_<T> tr(); // top-right point(outside);
Point_<T> bl(); // bottom-left point(outside);
Point_<T> br(); // bottom-right point(outside);
bool bContains(const Point_<T>& pt);
~Rect_();
T x, y, width, height;
};
/* Member functions of Rect_ class */
template <typename T>
inline Rect_<T>::Rect_() {}
template <typename T>
inline Rect_<T>::Rect_(T _x, T _y, T _width, T _height) : x(_x), y(_y), width(_width), height(_height) {}
template <typename T>
inline Rect_<T>::Rect_(const Rect_<T>& rect) : x(rect.x), y(rect.y), width(rect.width), height(rect.height) {}
template <typename T>
inline Rect_<T>::Rect_(const Point_<T>& pt, const Size_<T>& sz)
: x(pt.x), y(pt.y), width(sz.width), height(sz.height) {}
template <typename T>
inline T Rect_<T>::area() {
return width * height;
}
template <typename T>
inline Size_<T> Rect_<T>::size() {
return Size_<T>(width, height);
}
template <typename T>
inline Point_<T> Rect_<T>::tl() {
return Point_<T>(x, y);
}
template <typename T>
inline Point_<T> Rect_<T>::tr() {
return Point_<T>(x + width, y);
}
template <typename T>
inline Point_<T> Rect_<T>::bl() {
return Point_<T>(x, y + height);
}
template <typename T>
inline Point_<T> Rect_<T>::br() {
return Point_<T>(x + width, y + height);
}
template <typename T>
inline bool Rect_<T>::bContains(const Point_<T>& pt) {
return (pt.x >= x && pt.x < x + width && pt.y >= y && pt.y < y + height);
}
template <typename T>
inline Rect_<T>::~Rect_() {}
typedef Rect_<int> Rect;
//----------------------------------------------------------------------------------------------------//
//----------------------------------------------------------------------------------------------------//
// Template class of Scalar
//----------------------------------------------------------------------------------------------------//
template <int N, typename T>
class Scalar {
public:
Scalar() {
// clang-format off
#pragma HLS INLINE
#pragma HLS ARRAY_PARTITION variable=val dim=1 complete
// clang-format on
assert(N > 0);
}
Scalar(T v0) {
// clang-format off
#pragma HLS INLINE
#pragma HLS ARRAY_PARTITION variable=val dim=1 complete
// clang-format on
assert(N >= 1 && "Scalar must have enough channels for constructor.");
val[0] = v0;
}
Scalar(T v0, T v1) {
// clang-format off
#pragma HLS ARRAY_PARTITION variable=val dim=1 complete
#pragma HLS INLINE
// clang-format on
assert(N >= 2 && "Scalar must have enough channels for constructor.");
val[0] = v0;
val[1] = v1;
}
Scalar(T v0, T v1, T v2) {
// clang-format off
#pragma HLS ARRAY_PARTITION variable=val dim=1 complete
#pragma HLS INLINE
// clang-format on
assert(N >= 3 && "Scalar must have enough channels for constructor.");
val[0] = v0;
val[1] = v1;
val[2] = v2;
}
Scalar(T v0, T v1, T v2, T v3) {
// clang-format off
#pragma HLS ARRAY_PARTITION variable=val dim=1 complete
#pragma HLS INLINE
// clang-format on
assert(N >= 4 && "Scalar must have enough channels for constructor.");
val[0] = v0;
val[1] = v1;
val[2] = v2;
val[3] = v3;
}
void operator=(T value);
Scalar<N, T> operator+(T value);
Scalar<N, T> operator+(Scalar<N, T> s);
Scalar<N, T> operator-(T value);
Scalar<N, T> operator-(Scalar<N, T> s);
Scalar<N, T> operator*(T value);
Scalar<N, T> operator*(Scalar<N, T> s);
Scalar<N, T> operator/(T value);
Scalar<N, T> operator/(Scalar<N, T> s);
T val[N];
};
template <int N, typename T>
void Scalar<N, T>::operator=(T value) {
// clang-format off
#pragma HLS inline
// clang-format on
for (int k = 0; k < N; k++) {
// clang-format off
#pragma HLS unroll
// clang-format on
val[k] = value;
}
}
template <int N, typename T>
Scalar<N, T> Scalar<N, T>::operator+(T value) {
// clang-format off
#pragma HLS inline
// clang-format on
Scalar<N, T> res;
for (int k = 0; k < N; k++) {
// clang-format off
#pragma HLS unroll
// clang-format on
res.val[k] = val[k] + value;
}
return res;
}
template <int N, typename T>
Scalar<N, T> Scalar<N, T>::operator+(Scalar<N, T> s) {
// clang-format off
#pragma HLS inline
// clang-format on
Scalar<N, T> res;
for (int k = 0; k < N; k++) {
// clang-format off
#pragma HLS unroll
// clang-format on
res.val[k] = val[k] + s.val[k];
}
return res;
}
template <int N, typename T>
Scalar<N, T> Scalar<N, T>::operator-(T value) {
// clang-format off
#pragma HLS inline
// clang-format on
Scalar<N, T> res;
for (int k = 0; k < N; k++) {
// clang-format off
#pragma HLS unroll
// clang-format on
res.val[k] = val[k] - value;
}
return res;
}
template <int N, typename T>
Scalar<N, T> Scalar<N, T>::operator-(Scalar<N, T> s) {
// clang-format off
#pragma HLS inline
// clang-format on
Scalar<N, T> res;
for (int k = 0; k < N; k++) {
// clang-format off
#pragma HLS unroll
// clang-format on
res.val[k] = val[k] - s.val[k];
}
return res;
}
template <int N, typename T>
Scalar<N, T> Scalar<N, T>::operator*(T value) {
// clang-format off
#pragma HLS inline
// clang-format on
Scalar<N, T> res;
for (int k = 0; k < N; k++) {
// clang-format off
#pragma HLS unroll
// clang-format on
res.val[k] = val[k] * value;
}
return res;
}
template <int N, typename T>
Scalar<N, T> Scalar<N, T>::operator*(Scalar<N, T> s) {
// clang-format off
#pragma HLS inline
// clang-format on
Scalar<N, T> res;
for (int k = 0; k < N; k++) {
// clang-format off
#pragma HLS unroll
// clang-format on
res.val[k] = val[k] * s.val[k];
}
return res;
}
template <int N, typename T>
Scalar<N, T> Scalar<N, T>::operator/(T value) {
// clang-format off
#pragma HLS inline
// clang-format on
Scalar<N, T> res;
for (int k = 0; k < N; k++) {
// clang-format off
#pragma HLS unroll
// clang-format on
res.val[k] = val[k] / value;
}
return res;
}
template <int N, typename T>
Scalar<N, T> Scalar<N, T>::operator/(Scalar<N, T> s) {
// clang-format off
#pragma HLS inline
// clang-format on
Scalar<N, T> res;
for (int k = 0; k < N; k++) {
// clang-format off
#pragma HLS unroll
// clang-format on
res.val[k] = val[k] / s.val[k];
}
return res;
}
//----------------------------------------------------------------------------------------------------//
//----------------------------------------------------------------------------------------------------//
// Template class of Mat
//----------------------------------------------------------------------------------------------------//
#if defined(__SYNTHESIS__) && !defined(__SDA_MEM_MAP__)
static constexpr int _XFCVDEPTH_DEFAULT = 2;
#else
static constexpr int _XFCVDEPTH_DEFAULT = -1;
#endif
template <int T, int ROWS, int COLS, int NPC, int XFCVDEPTH = _XFCVDEPTH_DEFAULT>
class Mat {
public:
unsigned char allocatedFlag; // flag to mark memory allocation in this class
int rows, cols, size; // actual image size
// int cols_align_npc; // cols
// multiple
// of
// NPC
typedef XF_TNAME(T, NPC) DATATYPE;
using _DATATTYPE = typename std::conditional<
(XFCVDEPTH < 0),
DATATYPE*, // Case of Memory Mapped pointer
typename std::conditional< // Case of Stream
(XFCVDEPTH == 0),
hls::stream<DATATYPE>, // Case of default Dtream depth or user can override outside
hls::stream<DATATYPE, XFCVDEPTH> // Case of Stream depth specified
>::type>::type;
_DATATTYPE data;
Mat(); // default constructor
Mat(Size _sz);
Mat(int _rows, int _cols);
Mat(int _size, int _rows, int _cols);
Mat(int _rows, int _cols, void* _data);
Mat(const Mat&); // copy constructor
~Mat();
Mat& operator=(const Mat&); // Assignment operator
// XF_TNAME(T, XF_NPPC1) operator() (unsigned int r, unsigned int c);
// XF_CTUNAME(T, NPC) operator() (unsigned int r, unsigned int c, unsigned
// int ch);
template <int D = XFCVDEPTH, typename std::enable_if<(D < 0)>::type* = nullptr>
void alloc_data() {
#ifndef __SYNTHESIS__
data = (DATATYPE*)malloc(size * sizeof(DATATYPE));
if (data == NULL) {
fprintf(stderr, "\nFailed to allocate memory\n");
} else {
allocatedFlag = 1;
}
#endif
}
template <int D = XFCVDEPTH, typename std::enable_if<(D >= 0)>::type* = nullptr>
void alloc_data() {
// This is a stream
}
template <int D = XFCVDEPTH, typename std::enable_if<(D < 0)>::type* = nullptr>
void free_data() {
if (data != NULL) {
#ifndef __SYNTHESIS__
free(data);
#endif
}
}
template <int D = XFCVDEPTH, typename std::enable_if<(D >= 0)>::type* = nullptr>
void free_data() {}
template <int D = XFCVDEPTH, typename std::enable_if<(D < 0)>::type* = nullptr>
void copyData(const Mat& src) {
for (int i = 0; i < (rows * ((cols + NPC - 1) >> XF_BITSHIFT(NPC))); ++i) {
data[i] = src.data[i];
}
}
template <int D = XFCVDEPTH, typename std::enable_if<(D >= 0)>::type* = nullptr>
void copyData(const Mat& src) {
// This is a stream
assert(0);
}
template <int D = XFCVDEPTH, typename std::enable_if<(D < 0)>::type* = nullptr>
void assignDataPtr(void* _data) {
data = (DATATYPE*)_data;
}
template <int D = XFCVDEPTH, typename std::enable_if<(D >= 0)>::type* = nullptr>
void assignDataPtr(void* _data) {
// This is a stream
assert(0);
}
template <int D = XFCVDEPTH, typename std::enable_if<(D < 0)>::type* = nullptr>
XF_TNAME(T, NPC)
read(int index) {
return data[index];
}
template <int D = XFCVDEPTH, typename std::enable_if<(D >= 0)>::type* = nullptr>
XF_TNAME(T, NPC)
read(int index) {
return data.read();
}
float read_float(int index);
template <int D = XFCVDEPTH, typename std::enable_if<(D < 0)>::type* = nullptr>
void write(int index, XF_TNAME(T, NPC) val) {
data[index] = val;
}
template <int D = XFCVDEPTH, typename std::enable_if<(D >= 0)>::type* = nullptr>
void write(int index, XF_TNAME(T, NPC) val) {
data.write(val);
}
void write_float(int index, float val);
template <int D = XFCVDEPTH, typename std::enable_if<(D >= 0)>::type* = nullptr>
void init(int _rows, int _cols, void* _data) {
init(_rows, _cols);
copyTo(_data);
}
template <int D = XFCVDEPTH, typename std::enable_if<(D < 0)>::type* = nullptr>
void init(int _rows, int _cols, void* _data) {
init(_rows, _cols, false);
assignDataPtr(_data);
}
void init(int _rows, int _cols, bool allocate = true);
void copyTo(void* fromData);
unsigned char* copyFrom();
const int type() const;
const int depth() const;
const int channels() const;
template <int DST_T>
void convertTo(Mat<DST_T, ROWS, COLS, NPC, XFCVDEPTH>& dst, int otype, double alpha = 1, double beta = 0);
};
template <int T, int ROWS, int COLS, int NPC, int XFCVDEPTH>
const int Mat<T, ROWS, COLS, NPC, XFCVDEPTH>::type() const {
// clang-format off
#pragma HLS inline
// clang-format on
return (T);
}
template <int T, int ROWS, int COLS, int NPC, int XFCVDEPTH>
const int Mat<T, ROWS, COLS, NPC, XFCVDEPTH>::depth() const {
// clang-format off
#pragma HLS inline
// clang-format on
return XF_DTPIXELDEPTH(T, NPC);
}
template <int T, int ROWS, int COLS, int NPC, int XFCVDEPTH>
const int Mat<T, ROWS, COLS, NPC, XFCVDEPTH>::channels() const {
// clang-format off
#pragma HLS inline
// clang-format on
return XF_CHANNELS(T, NPC);
}
template <int T, int ROWS, int COLS, int NPPC, int XFCVDEPTH>
inline void Mat<T, ROWS, COLS, NPPC, XFCVDEPTH>::init(int _rows, int _cols, bool allocate) {
// clang-format off
#pragma HLS inline
// clang-format on
assert((_rows > 0) && (_rows <= ROWS) && (_cols > 0) && (_cols <= COLS) &&
"The number of rows and columns must be less than the template "
"arguments.");
rows = _rows;
cols = _cols;
size = _rows * ((_cols + NPPC - 1) >> XF_BITSHIFT(NPPC));
if (allocate) {
alloc_data();
}
}
/*Copy constructor definition*/
template <int T, int ROWS, int COLS, int NPC, int XFCVDEPTH>
inline Mat<T, ROWS, COLS, NPC, XFCVDEPTH>::Mat(const Mat& src) {
init(src.rows, src.cols);
#ifndef __SYNTHESIS__
copyData(src);
#endif
}
/*Assignment operator definition*/
template <int T, int ROWS, int COLS, int NPC, int XFCVDEPTH>
inline Mat<T, ROWS, COLS, NPC, XFCVDEPTH>& Mat<T, ROWS, COLS, NPC, XFCVDEPTH>::operator=(const Mat& src) {
if (this == &src) {
return *this; // For self-assignment cases
}
// Cleaning up old data memory if any
free_data();
allocatedFlag = 0;
init(src.rows, src.cols);
copyData(src);
return *this;
}
template <int T, int ROWS, int COLS, int NPPC, int XFCVDEPTH>
inline Mat<T, ROWS, COLS, NPPC, XFCVDEPTH>::Mat() {
// clang-format off
#pragma HLS inline
// clang-format on
init(ROWS, COLS);
}
template <int T, int ROWS, int COLS, int NPPC, int XFCVDEPTH>
inline Mat<T, ROWS, COLS, NPPC, XFCVDEPTH>::Mat(int _rows, int _cols, void* _data) {
// clang-format off
#pragma HLS inline
// clang-format on
init(_rows, _cols, _data);
}
template <int T, int ROWS, int COLS, int NPPC, int XFCVDEPTH>
inline Mat<T, ROWS, COLS, NPPC, XFCVDEPTH>::Mat(int _rows, int _cols) {
// clang-format off
#pragma HLS inline
// clang-format on
init(_rows, _cols);
}
template <int T, int ROWS, int COLS, int NPPC, int XFCVDEPTH>
inline Mat<T, ROWS, COLS, NPPC, XFCVDEPTH>::Mat(Size _sz) {
// clang-format off
#pragma HLS inline
// clang-format on
init(_sz.height, _sz.width);
}
template <int T, int ROWS, int COLS, int NPPC, int XFCVDEPTH>
inline float Mat<T, ROWS, COLS, NPPC, XFCVDEPTH>::read_float(int index) {
union int2float {
unsigned I;
float F;
};
int2float val;
val.I = read(index).to_uint();
return val.F;
}
template <int T, int ROWS, int COLS, int NPPC, int XFCVDEPTH>
inline void Mat<T, ROWS, COLS, NPPC, XFCVDEPTH>::write_float(int index, float float_val) {
float val = float_val;
ap_uint<32>* val_out = (ap_uint<32>*)(&val);
write(index, *val_out);
}
template <int T, int ROWS, int COLS, int NPPC, int XFCVDEPTH>
inline void Mat<T, ROWS, COLS, NPPC, XFCVDEPTH>::copyTo(void* _input) {
// clang-format off
#pragma HLS inline
// clang-format on
XF_PTSNAME(T, NPPC)* input = (XF_PTSNAME(T, NPPC)*)_input;
XF_CTUNAME(T, NPPC) in_val;
int packcols = cols >> XF_BITSHIFT(NPPC); // Total columns after considering parallelism
int pixdepth = XF_PIXELWIDTH(T, NPPC); // Total bits that make up the pixel
int bitdepth = pixdepth / XF_CHANNELS(T, NPPC); // Total bits that make up each channel of the pixel
int nppc = XF_NPIXPERCYCLE(NPPC);
for (int r = 0; r < rows; r++) {
for (int c = 0; c < packcols; c++) {
for (int p = 0; p < nppc; p++) {
DATATYPE out_val;
for (int ch = 0; ch < XF_CHANNELS(T, NPPC); ch++) {
if (T == XF_32FC1) {
in_val = float2ap_uint<ap_uint<32> >(
input[XF_CHANNELS(T, NPPC) * ((r * packcols + c) * nppc + p) + ch]);
} else {
in_val = input[XF_CHANNELS(T, NPPC) * ((r * packcols + c) * nppc + p) + ch];
}
out_val.range((p * pixdepth) + (ch + 1) * bitdepth - 1, (p * pixdepth) + ch * bitdepth) = in_val;
}
write((r * packcols + c), out_val);
}
}
}
}
template <int T, int ROWS, int COLS, int NPPC, int XFCVDEPTH>
inline unsigned char* Mat<T, ROWS, COLS, NPPC, XFCVDEPTH>::copyFrom() {
// clang-format off
#pragma HLS inline
// clang-format on
// int packcols = cols >> XF_BITSHIFT(NPPC); //Total columns after
// considering parallelism
int pixdepth = XF_PIXELWIDTH(T, NPPC); // Total bits that make up the pixel
int bitdepth = pixdepth / XF_CHANNELS(T, NPPC); // Total bits that make up each channel of the pixel
int nppc = XF_NPIXPERCYCLE(NPPC);
int cv_nbytes = bitdepth / 8;
unsigned char* value =
(unsigned char*)malloc(rows * cols * (XF_CHANNELS(T, NPPC)) * (sizeof(unsigned char)) * cv_nbytes);
int xf_npc_idx = 0;
int diff_ptr = 0;
int xf_ptr = 0;
int cv_ptr = 0;
for (int r = 0; r < rows; r++) {
for (int c = 0; c < cols; c++) {
DATATYPE in_val = read(xf_ptr);
for (int ch = 0; ch < XF_CHANNELS(T, NPPC); ch++) {
for (int b = 0; b < cv_nbytes; ++b) {
value[cv_ptr++] = in_val.range((xf_npc_idx * pixdepth) + (ch * bitdepth) + (b + 1) * 8 - 1,
(xf_npc_idx * pixdepth) + (ch * bitdepth) + b * 8);
}
}
if (xf_npc_idx == nppc - 1) {
xf_npc_idx = 0;
xf_ptr++;
} else {
xf_npc_idx++;
}
}
}
return (unsigned char*)value;
}
/* Member functions of Mat class */
template <int T, int ROWS, int COLS, int NPPC, int XFCVDEPTH>
template <int DST_T>
inline void Mat<T, ROWS, COLS, NPPC, XFCVDEPTH>::convertTo(Mat<DST_T, ROWS, COLS, NPPC, XFCVDEPTH>& dst,
int otype,
double alpha,
double beta) {
assert((XF_CHANNELS(T, NPPC) == 1) && "Multi-channel images not supported");
XF_TNAME(T, NPPC) tmp_in_pix;
XF_TNAME(DST_T, NPPC) tmp_out_pix;
XF_CTUNAME(T, NPPC) in_pix;
XF_CTUNAME(DST_T, NPPC) out_pix;
int min, max;
if (DST_T == XF_8UC1) {
min = 0;
max = 255;
} else if (DST_T == XF_16UC1) {
min = 0;
max = 65535;
} else if (DST_T == XF_16SC1) {
min = -32768;
max = 32767;
} else if (DST_T == XF_32SC1) {
min = -2147483648;
max = 2147483647;
} else {
assert(1 &&
"Output image type not supoorted. XF_8UC1, XF_16UC1, XF_16SC1 "
"and XF_32SC1 are valid");
}
#define __SATCAST(X) (X >= max ? max : (X < 0 ? 0 : lround(X)))
for (int i = 0; i < rows; i++) {
for (int j = 0; j<cols>> (XF_BITSHIFT(NPPC)); j++) {
int IN_STEP = XF_PIXELDEPTH(XF_DEPTH(T, NPPC));
int OUT_STEP = XF_PIXELDEPTH(XF_DEPTH(DST_T, NPPC));
int in_shift = 0;
int out_shift = 0;
DATATYPE in_val = read((i * cols >> (XF_BITSHIFT(NPPC))) + j);
DATATYPE out_val;
for (int k = 0; k < (1 << (XF_BITSHIFT(NPPC))); k++) {
#ifdef __SDSVHLS__
in_pix = in_val.range(in_shift + IN_STEP - 1, in_shift);
#else
in_pix = in_val.chnl[k][0];
#endif
if (otype == XF_CONVERT_16U_TO_8U || otype == XF_CONVERT_16S_TO_8U || otype == XF_CONVERT_32S_TO_8U ||
otype == XF_CONVERT_32S_TO_16U || otype == XF_CONVERT_32S_TO_16S) {
float tmp = (float)(in_pix * alpha + beta);
in_pix = __SATCAST(tmp);
if (in_pix < min) in_pix = min;
if (in_pix > max) in_pix = max;
tmp_out_pix.range(out_shift + OUT_STEP - 1, out_shift) = in_pix;
} else {
if ((((XF_PTNAME(XF_DEPTH(DST_T, NPPC)))in_pix * alpha) + beta) > max) {
tmp_out_pix.range(out_shift + OUT_STEP - 1, out_shift) = max;
} else if ((((XF_PTNAME(XF_DEPTH(DST_T, NPPC)))in_pix * alpha) + beta) < min) {
tmp_out_pix.range(out_shift + OUT_STEP - 1, out_shift) = min;
} else {
tmp_out_pix.range(out_shift + OUT_STEP - 1, out_shift) = __SATCAST(in_pix * alpha + beta);
}
}
out_pix = tmp_out_pix.range(out_shift + OUT_STEP - 1, out_shift);
#ifdef __SDSVHLS__
out_val.range(out_shift + OUT_STEP - 1, out_shift) = out_pix;
#else
out_val.chnl[k][0] = out_pix;
#endif
in_shift = in_shift + IN_STEP;
out_shift = out_shift + OUT_STEP;
}
write(((i * cols >> (XF_BITSHIFT(NPPC))) + j), out_val);
}
}
}
template <int SRC_T, int ROWS, int COLS, int NPC, int XFCVDEPTH>
Mat<SRC_T, ROWS, COLS, NPC, XFCVDEPTH>::~Mat() {
#ifndef __SYNTHESIS__
if (allocatedFlag == 1) {
free_data();
}
#endif
}
//----------------------------------------------------------------------------------------------------//
// Template metaprogramming implementation of floor log2 [[
template <int N>
struct log2 {
public:
static constexpr int fvalue = 1 + (log2<N / 2>::fvalue); // floor value
static constexpr int cvalue = (N > (1 << fvalue)) ? (fvalue + 1) : fvalue; // ceiling value
};
template <>
struct log2<1> {
public:
static constexpr int fvalue = 0;
static constexpr int cvalue = 0;
};
//]]
/*
* The purpose of this class is to provide an interator over ap_uint<PTR_WIDTH>
* object array such
* that at each iter step one can get data bits corresponding to data to be
* processed per cycle.
*
*/
template <int PTR_WIDTH, int T, int ROWS, int COLS, int NPC, int XFCVDEPTH = _XFCVDEPTH_DEFAULT>
class MMIter : public Mat<T, ROWS, COLS, NPC, XFCVDEPTH> {
public:
using Mat<T, ROWS, COLS, NPC, XFCVDEPTH>::data;
using Mat<T, ROWS, COLS, NPC, XFCVDEPTH>::rows;
using Mat<T, ROWS, COLS, NPC, XFCVDEPTH>::cols;
static constexpr int XF_BITS_PER_CLOCK = XF_PIXELWIDTH(T, NPC) * XF_NPIXPERCYCLE(NPC);
static constexpr int COLS_BOUND_PER_NPC = ((COLS + (XF_NPIXPERCYCLE(NPC) - 1)) >> XF_BITSHIFT(NPC));
static constexpr int LOOPBOUND = ROWS * COLS_BOUND_PER_NPC;
static constexpr int ADDRBOUND = ((ROWS * COLS * XF_PIXELWIDTH(T, NPC)) + (PTR_WIDTH - 1)) >>
(log2<PTR_WIDTH>::cvalue);
static constexpr int LAST_BLK_PXL_WIDTH =
((COLS >> XF_BITSHIFT(NPC)) == COLS_BOUND_PER_NPC)
? XF_BITS_PER_CLOCK
: XF_PIXELWIDTH(T, NPC) * (COLS - ((COLS >> XF_BITSHIFT(NPC)) << XF_BITSHIFT(NPC)));
static int cols_npc_aligned(int cols) { return ((cols + (XF_NPIXPERCYCLE(NPC) - 1)) >> XF_BITSHIFT(NPC)); }
static int last_blk_pxl_width(int cols, int cols_bound_per_npc) {
return ((cols >> XF_BITSHIFT(NPC)) == cols_bound_per_npc)
? XF_BITS_PER_CLOCK
: XF_PIXELWIDTH(T, NPC) * (cols - ((cols >> XF_BITSHIFT(NPC)) << XF_BITSHIFT(NPC)));
}
static int loopbound(int rows, int cols) { return rows * cols_npc_aligned(cols); }
int loopbound() { return rows * cols_npc_aligned(cols); }
static int addrbound(int rows, int cols) {
ap_uint<16> rows_int16 = rows;
ap_uint<16> cols_int16 = cols;
return ((rows_int16 * cols_int16 * XF_PIXELWIDTH(T, NPC)) + (PTR_WIDTH - 1)) >> (log2<PTR_WIDTH>::cvalue);
}
MMIter() : Mat<T, ROWS, COLS, NPC, XFCVDEPTH>() {}
MMIter(int _rows, int _cols) : Mat<T, ROWS, COLS, NPC, XFCVDEPTH>(_rows, _cols) {}
};
#define _MMITER MMIter<PTR_WIDTH, T, ROWS, COLS, NPC, XFCVDEPTH>
template <int PTR_WIDTH, int T, int ROWS, int COLS, int NPC, int XFCVDEPTH = _XFCVDEPTH_DEFAULT>
class MMIterIn : public _MMITER {
public:
using Mat<T, ROWS, COLS, NPC, XFCVDEPTH>::data;
using Mat<T, ROWS, COLS, NPC, XFCVDEPTH>::rows;
using Mat<T, ROWS, COLS, NPC, XFCVDEPTH>::cols;
using _MMITER::XF_BITS_PER_CLOCK;
using _MMITER::ADDRBOUND;
using _MMITER::COLS_BOUND_PER_NPC;
using _MMITER::LAST_BLK_PXL_WIDTH;
using _MMITER::LOOPBOUND;
private:
static void Axi2AxiStream(ap_uint<PTR_WIDTH>* din,
hls::stream<ap_uint<PTR_WIDTH> >& dout,
ap_uint<log2<ADDRBOUND>::cvalue + 1>& addrbound) {
ap_uint<log2<ADDRBOUND>::cvalue + 1> i;
MMIterInLoop1:
for (i = 0; i < addrbound; i++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=1 max=ADDRBOUND
#pragma HLS PIPELINE
// clang-format on
dout.write(din[i]);
}
}
static void Axi2AxiStream(hls::stream<ap_axiu<PTR_WIDTH, 0, 0, 0> >& din,
hls::stream<ap_uint<PTR_WIDTH> >& dout,
ap_uint<log2<ADDRBOUND>::cvalue + 1>& addrbound) {
ap_uint<log2<ADDRBOUND>::cvalue + 1> i;
MMIterInLoop3:
for (i = 0; i < addrbound; i++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=1 max=ADDRBOUND
#pragma HLS PIPELINE
// clang-format on
ap_axiu<PTR_WIDTH, 0, 0, 0> v = din.read();
dout.write(v.data);
}
}
template <int DEPTH>
static void AxiStream2MatStream(hls::stream<ap_uint<PTR_WIDTH> >& din,
hls::stream<ap_uint<XF_BITS_PER_CLOCK>, DEPTH>& dout,
int rows,
int cols_bound_per_npc,
int last_blk_width,
int stride = -1) {
int stride_bound_per_npc, strideBased_last_blk_width;
if (stride == -1) {
stride_bound_per_npc = cols_bound_per_npc;
strideBased_last_blk_width = last_blk_width;
} else {
stride_bound_per_npc = _MMITER::cols_npc_aligned(stride);
strideBased_last_blk_width = _MMITER::last_blk_pxl_width(stride, stride_bound_per_npc);
}
int rd_cnt = 0;
int rem = 0;
ap_uint<PTR_WIDTH> val = 0;
int i;
int j = 0;
int bound = rows * stride_bound_per_npc;
MMIterInLoopRow:
for (i = 0; i < bound; i++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=1 max=LOOPBOUND
#pragma HLS PIPELINE
// clang-format on
bool bLast = (j == (stride_bound_per_npc - 1));
int xf_bits_per_clock = bLast ? strideBased_last_blk_width : XF_BITS_PER_CLOCK;
int ptr_width_minus = bLast ? (PTR_WIDTH - strideBased_last_blk_width) : (PTR_WIDTH - XF_BITS_PER_CLOCK);
int ptr_width_plus = bLast ? (PTR_WIDTH + strideBased_last_blk_width) : (PTR_WIDTH + XF_BITS_PER_CLOCK);
ap_uint<XF_BITS_PER_CLOCK> localbuffer = 0;
if (rem < xf_bits_per_clock) {
if (rem != 0) {
localbuffer.range(rem - 1, 0) = val.range(PTR_WIDTH - 1, (PTR_WIDTH - rem));
}
val = din.read();
rd_cnt++;
localbuffer.range((xf_bits_per_clock - 1), rem) = val.range(((xf_bits_per_clock - 1) - rem), 0);
rem = ptr_width_minus + rem;
} else {
localbuffer = val.range(((ptr_width_plus - 1) - rem), (PTR_WIDTH - rem));
rem = rem - xf_bits_per_clock;
}
bool bLast_width = (j == (cols_bound_per_npc - 1));
ap_uint<XF_BITS_PER_CLOCK> localbuffer2 = 0;
if (bLast_width == 0)
localbuffer2 = localbuffer;
else
localbuffer2 = localbuffer.range(last_blk_width - 1, 0);
if (j < cols_bound_per_npc) dout.write(localbuffer2);
j = (bLast) ? 0 : (j + 1);
}
}
template <int DEPTH>
static void MatStream2Mat(hls::stream<ap_uint<XF_BITS_PER_CLOCK>, DEPTH>& din,
ap_uint<XF_BITS_PER_CLOCK>* dout,
int rows,
int cols_bound_per_npc) {
int i;
int bound = rows * cols_bound_per_npc;
MMIterInLoop2:
for (i = 0; i < bound; i++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=1 max=LOOPBOUND
#pragma HLS PIPELINE
// clang-format on
dout[i] = din.read();
}
}
static void AxiStream2Mat(hls::stream<ap_uint<PTR_WIDTH> >& din,
hls::stream<ap_uint<XF_BITS_PER_CLOCK>, XFCVDEPTH>& dout,
int rows = ROWS,
int cols = COLS,
int stride = -1) {
// clang-format off
#pragma HLS DATAFLOW
// clang-format on
int cols_bound_per_npc = _MMITER::cols_npc_aligned(cols);
int last_blk_width = _MMITER::last_blk_pxl_width(cols, cols_bound_per_npc);
AxiStream2MatStream(din, dout, rows, cols_bound_per_npc, last_blk_width, stride);
}
static void AxiStream2Mat(hls::stream<ap_uint<PTR_WIDTH> >& din,
ap_uint<XF_BITS_PER_CLOCK>* dout,
int rows = ROWS,
int cols = COLS,
int stride = -1) {
// clang-format off
#pragma HLS DATAFLOW
// clang-format on
hls::stream<ap_uint<XF_BITS_PER_CLOCK> > ldata;
int cols_bound_per_npc = _MMITER::cols_npc_aligned(cols);
int last_blk_width = _MMITER::last_blk_pxl_width(cols, cols_bound_per_npc);
AxiStream2MatStream(din, ldata, rows, cols_bound_per_npc, last_blk_width, stride);
MatStream2Mat(ldata, dout, rows, cols_bound_per_npc);
}
static void Axi2Mat(ap_uint<PTR_WIDTH>* din,
hls::stream<ap_uint<XF_BITS_PER_CLOCK>, XFCVDEPTH>& dout,
int rows = ROWS,
int cols = COLS,
int stride = -1) {
// clang-format off
#pragma HLS DATAFLOW
// clang-format on
hls::stream<ap_uint<PTR_WIDTH> > ldata;
int cols_tmp;
if (stride == -1)
cols_tmp = cols;
else
cols_tmp = stride;
ap_uint<log2<ADDRBOUND>::cvalue + 1> axibound = _MMITER::addrbound(rows, cols_tmp);
Axi2AxiStream(din, ldata, axibound);
AxiStream2Mat(ldata, dout, rows, cols, stride);
}
static void Axi2Mat(hls::stream<ap_axiu<PTR_WIDTH, 0, 0, 0> >& din,
hls::stream<ap_uint<XF_BITS_PER_CLOCK>, XFCVDEPTH>& dout,
int rows = ROWS,
int cols = COLS) {
// clang-format off
#pragma HLS DATAFLOW
// clang-format on
hls::stream<ap_uint<PTR_WIDTH> > ldata;
ap_uint<log2<ADDRBOUND>::cvalue + 1> axibound = _MMITER::addrbound(rows, cols);
Axi2AxiStream(din, ldata, axibound);
AxiStream2Mat(ldata, dout, rows, cols);
}
static void Axi2Mat(
ap_uint<PTR_WIDTH>* din, ap_uint<XF_BITS_PER_CLOCK>* dout, int rows = ROWS, int cols = COLS, int stride = -1) {
// clang-format off
#pragma HLS DATAFLOW
// clang-format on
hls::stream<ap_uint<PTR_WIDTH> > ldata;
int cols_tmp;
if (stride == -1)
cols_tmp = cols;
else
cols_tmp = stride;
ap_uint<log2<ADDRBOUND>::cvalue + 1> axibound = _MMITER::addrbound(rows, cols_tmp);
Axi2AxiStream(din, ldata, axibound);
AxiStream2Mat(ldata, dout, rows, cols, stride);
}
static void Axi2Mat(hls::stream<ap_axiu<PTR_WIDTH, 0, 0, 0> >& din,
ap_uint<XF_BITS_PER_CLOCK>* dout,
int rows = ROWS,
int cols = COLS) {
// clang-format off
#pragma HLS DATAFLOW
// clang-format on
hls::stream<ap_uint<PTR_WIDTH> > ldata;
ap_uint<log2<ADDRBOUND>::cvalue + 1> axibound = _MMITER::addrbound(rows, cols);
Axi2AxiStream(din, ldata, axibound);
AxiStream2Mat(ldata, dout, rows, cols);
}
public:
MMIterIn(ap_uint<PTR_WIDTH>* d) : _MMITER() { Axi2Mat(d, data); }
MMIterIn(ap_uint<PTR_WIDTH>* d, int _rows, int _cols) : _MMITER(_rows, _cols) { Axi2Mat(d, data, rows, cols); }
MMIterIn(hls::stream<ap_axiu<PTR_WIDTH, 0, 0, 0> >& d) : _MMITER() { Axi2Mat(d, data); }
MMIterIn(hls::stream<ap_axiu<PTR_WIDTH, 0, 0, 0> >& d, int _rows, int _cols) : _MMITER(_rows, _cols) {
Axi2Mat(d, data, rows, cols);
}
MMIterIn(hls::stream<ap_uint<PTR_WIDTH> >& d) : _MMITER() { AxiStream2Mat(d, data); }
MMIterIn(hls::stream<ap_uint<PTR_WIDTH> >& d, int _rows, int _cols) : _MMITER(_rows, _cols) {
AxiStream2Mat(d, data, rows, cols);
}
inline static ap_uint<XF_BITS_PER_CLOCK> read(hls::stream<ap_uint<XF_BITS_PER_CLOCK> >& din, int index) {
return din.read();
}
inline static ap_uint<XF_BITS_PER_CLOCK> read(ap_uint<XF_BITS_PER_CLOCK>* din, int index) { return din[index]; }
ap_uint<XF_BITS_PER_CLOCK> read(int index) { return read(data, index); }
static void Array2xfMat(ap_uint<PTR_WIDTH>* srcPtr,
xf::cv::Mat<T, ROWS, COLS, NPC, XFCVDEPTH>& dstMat,
int stride = -1) {
Axi2Mat(srcPtr, dstMat.data, dstMat.rows, dstMat.cols, stride);
}
static void Array2xfMat(
ap_uint<PTR_WIDTH>* srcPtr, ap_uint<XF_BITS_PER_CLOCK>* dstPtr, int rows, int cols, int stride = -1) {
Axi2Mat(srcPtr, dstPtr, rows, cols, stride);
}
static void axiStrm2xfMat(hls::stream<ap_axiu<PTR_WIDTH, 0, 0, 0> >& srcPtr,
xf::cv::Mat<T, ROWS, COLS, NPC, XFCVDEPTH>& dstMat) {
Axi2Mat(srcPtr, dstMat.data, dstMat.rows, dstMat.cols);
}
};
template <int PTR_WIDTH, int T, int ROWS, int COLS, int NPC, int FILLZERO = 1, int XFCVDEPTH = _XFCVDEPTH_DEFAULT>
class MMIterOut : public _MMITER {
public:
using Mat<T, ROWS, COLS, NPC, XFCVDEPTH>::data;
using Mat<T, ROWS, COLS, NPC, XFCVDEPTH>::rows;
using Mat<T, ROWS, COLS, NPC, XFCVDEPTH>::cols;
using _MMITER::XF_BITS_PER_CLOCK;
using _MMITER::ADDRBOUND;
using _MMITER::COLS_BOUND_PER_NPC;
using _MMITER::LAST_BLK_PXL_WIDTH;
using _MMITER::LOOPBOUND;
private:
template <int DEPTH>
static void Mat2MatStream(ap_uint<XF_BITS_PER_CLOCK>* din,
hls::stream<ap_uint<XF_BITS_PER_CLOCK>, DEPTH>& dout,
int rows,
int cols_bound_per_npc) {
int i;
int bound = rows * cols_bound_per_npc;
MMIterOutLoop1:
for (i = 0; i < bound; i++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=1 max=LOOPBOUND
#pragma HLS PIPELINE
// clang-format on
dout.write(din[i]);
}
}
template <int DEPTH>
static void MatStream2AxiStream(hls::stream<ap_uint<XF_BITS_PER_CLOCK>, DEPTH>& din,
hls::stream<ap_uint<PTR_WIDTH> >& dout,
int rows,
int cols_bound_per_npc,
int last_blk_width,
int stride = -1) {
ap_uint<16> strideBased_cols_bound_per_npc;
if (stride == -1 || FILLZERO == 0) {
strideBased_cols_bound_per_npc = cols_bound_per_npc;
} else {
strideBased_cols_bound_per_npc = _MMITER::cols_npc_aligned(stride);
}
ap_uint<log2<PTR_WIDTH>::cvalue + 1> filled = 0; // valid bits remaining in current buffer
ap_uint<PTR_WIDTH> localbuffer = 0;
ap_uint<16> i;
ap_uint<16> rows_int16 = rows;
ap_uint<16> j;
MMIterOutRow:
for (i = 0; i < rows_int16; i++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=1 max=ROWS
// clang-format on
MMIterOutCol:
for (j = 0; j < strideBased_cols_bound_per_npc; j++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=1 max=COLS_BOUND_PER_NPC
#pragma HLS PIPELINE
// clang-format on
bool bLast = (j == (cols_bound_per_npc - 1));
int xf_bits_per_clock = bLast ? last_blk_width : XF_BITS_PER_CLOCK;
ap_uint<PTR_WIDTH> val;
if (j < cols_bound_per_npc || FILLZERO == 0)
val = din.read();
else
val = 0;
ap_uint<PTR_WIDTH> tempval = (val << filled);
localbuffer = (localbuffer | tempval);
ap_uint<log2<PTR_WIDTH>::cvalue + 1> filled_next;
if (filled < (PTR_WIDTH - xf_bits_per_clock)) {
filled_next = filled + xf_bits_per_clock;
} else if (j == cols_bound_per_npc - 1 && FILLZERO == 0) {
dout.write(localbuffer);
localbuffer = 0;
filled_next = 0;
} else {
dout.write(localbuffer);
localbuffer = (val >> (PTR_WIDTH - filled));
filled_next = filled + (xf_bits_per_clock - PTR_WIDTH);
}
filled = filled_next;
}
}
if (filled != 0) {
dout.write(localbuffer);
}
}
static void Mat2AxiStream(hls::stream<ap_uint<XF_BITS_PER_CLOCK>, XFCVDEPTH>& din,
hls::stream<ap_uint<PTR_WIDTH> >& dout,
int rows = ROWS,
int cols = COLS,
int stride = -1) {
// clang-format off
#pragma HLS DATAFLOW
// clang-format on
int cols_bound_per_npc = _MMITER::cols_npc_aligned(cols);
int last_blk_width = _MMITER::last_blk_pxl_width(cols, cols_bound_per_npc);
MatStream2AxiStream(din, dout, rows, cols_bound_per_npc, last_blk_width, stride);
}
static void Mat2AxiStream(ap_uint<XF_BITS_PER_CLOCK>* din,
hls::stream<ap_uint<PTR_WIDTH> >& dout,
int rows = ROWS,
int cols = COLS,
int stride = -1) {
// clang-format off
#pragma HLS DATAFLOW
// clang-format on
hls::stream<ap_uint<XF_BITS_PER_CLOCK> > ldata;
int cols_bound_per_npc = _MMITER::cols_npc_aligned(cols);
int last_blk_width = _MMITER::last_blk_pxl_width(cols, cols_bound_per_npc);
Mat2MatStream(din, ldata, rows, cols_bound_per_npc);
MatStream2AxiStream(ldata, dout, rows, cols_bound_per_npc, last_blk_width, stride);
}
static void AxiStream2Axi(hls::stream<ap_uint<PTR_WIDTH> >& din,
ap_uint<PTR_WIDTH>* dout,
ap_uint<log2<ADDRBOUND>::cvalue + 1>& addrbound) {
ap_uint<log2<ADDRBOUND>::cvalue + 1> i;
MMIterOutLoop2:
for (i = 0; i < addrbound; i++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=1 max=ADDRBOUND
#pragma HLS PIPELINE
// clang-format on
dout[i] = din.read();
}
}
static void AxiStream2Axi(hls::stream<ap_uint<PTR_WIDTH> >& din,
hls::stream<ap_axiu<PTR_WIDTH, 0, 0, 0> >& dout,
ap_uint<log2<ADDRBOUND>::cvalue + 1>& addrbound) {
ap_uint<log2<ADDRBOUND>::cvalue + 1> i;
MMIterOutLoop3:
for (i = 0; i < addrbound; i++) {
// clang-format off
#pragma HLS LOOP_TRIPCOUNT min=1 max=ADDRBOUND
#pragma HLS PIPELINE
// clang-format on
ap_axiu<PTR_WIDTH, 0, 0, 0> v;
v.data = din.read();
dout.write(v);
}
}
static void Mat2Axi(hls::stream<ap_uint<XF_BITS_PER_CLOCK>, XFCVDEPTH>& din,
ap_uint<PTR_WIDTH>* dout,
int rows = ROWS,
int cols = COLS,
int stride = -1) {
// clang-format off
#pragma HLS DATAFLOW
// clang-format on
int cols_tmp;
if (stride == -1)
cols_tmp = cols;
else
cols_tmp = stride;
hls::stream<ap_uint<PTR_WIDTH> > ldata;
ap_uint<log2<ADDRBOUND>::cvalue + 1> axibound = _MMITER::addrbound(rows, cols_tmp);
Mat2AxiStream(din, ldata, rows, cols, stride);
if (FILLZERO == 1)
AxiStream2Axi(ldata, dout, axibound);
else {
ap_uint<log2<ADDRBOUND>::cvalue + 1> axibound_1row = _MMITER::addrbound(1, cols);
ap_uint<log2<ADDRBOUND>::cvalue + 1> offset_1row = _MMITER::addrbound(1, stride);
for (int rowIdx = 0, offset = 0; rowIdx < rows; rowIdx++, offset += offset_1row) {
AxiStream2Axi(ldata, dout + offset, axibound_1row);
}
}
}
static void Mat2Axi(
ap_uint<XF_BITS_PER_CLOCK>* din, ap_uint<PTR_WIDTH>* dout, int rows = ROWS, int cols = COLS, int stride = -1) {
// clang-format off
#pragma HLS DATAFLOW
// clang-format on
int cols_tmp;
if (stride == -1)
cols_tmp = cols;
else
cols_tmp = stride;
hls::stream<ap_uint<PTR_WIDTH> > ldata;
ap_uint<log2<ADDRBOUND>::cvalue + 1> axibound = _MMITER::addrbound(rows, cols_tmp);
Mat2AxiStream(din, ldata, rows, cols, stride);
if (FILLZERO == 1)
AxiStream2Axi(ldata, dout, axibound);
else {
ap_uint<log2<ADDRBOUND>::cvalue + 1> axibound_1row = _MMITER::addrbound(1, cols);
ap_uint<log2<ADDRBOUND>::cvalue + 1> offset_1row = _MMITER::addrbound(1, stride);
for (int rowIdx = 0, offset = 0; rowIdx < rows; rowIdx++, offset += offset_1row) {
AxiStream2Axi(ldata, dout + offset, axibound_1row);
}
}
}
static void Mat2Axi(hls::stream<ap_uint<XF_BITS_PER_CLOCK>, XFCVDEPTH>& din,
hls::stream<ap_axiu<PTR_WIDTH, 0, 0, 0> >& dout,
int rows = ROWS,
int cols = COLS,
int stride = -1) {
// clang-format off
#pragma HLS DATAFLOW
// clang-format on
int cols_tmp;
if (stride == -1)
cols_tmp = cols;
else
cols_tmp = stride;
hls::stream<ap_uint<PTR_WIDTH> > ldata;
ap_uint<log2<ADDRBOUND>::cvalue + 1> axibound = _MMITER::addrbound(rows, cols_tmp);
Mat2AxiStream(din, ldata, rows, cols, stride);
if (FILLZERO == 1)
AxiStream2Axi(ldata, dout, axibound);
else {
ap_uint<log2<ADDRBOUND>::cvalue + 1> axibound_1row = _MMITER::addrbound(1, cols);
ap_uint<log2<ADDRBOUND>::cvalue + 1> offset_1row = _MMITER::addrbound(1, stride);
for (int rowIdx = 0, offset = 0; rowIdx < rows; rowIdx++, offset += offset_1row) {
AxiStream2Axi(ldata, dout + offset, axibound_1row);
}
}
}
static void Mat2Axi(ap_uint<XF_BITS_PER_CLOCK>* din,
hls::stream<ap_axiu<PTR_WIDTH, 0, 0, 0> >& dout,
int rows = ROWS,
int cols = COLS,
int stride = -1) {
// clang-format off
#pragma HLS DATAFLOW
// clang-format on
int cols_tmp;
if (stride == -1)
cols_tmp = cols;
else
cols_tmp = stride;
hls::stream<ap_uint<PTR_WIDTH> > ldata;
ap_uint<log2<ADDRBOUND>::cvalue + 1> axibound = _MMITER::addrbound(rows, cols_tmp);
Mat2AxiStream(din, ldata, rows, cols, stride);
if (FILLZERO == 1)
AxiStream2Axi(ldata, dout, axibound);
else {
ap_uint<log2<ADDRBOUND>::cvalue + 1> axibound_1row = _MMITER::addrbound(1, cols);
ap_uint<log2<ADDRBOUND>::cvalue + 1> offset_1row = _MMITER::addrbound(1, stride);
for (int rowIdx = 0, offset = 0; rowIdx < rows; rowIdx++, offset += offset_1row) {
AxiStream2Axi(ldata, dout + offset, axibound_1row);
}
}
}
public:
MMIterOut() : _MMITER() {}
MMIterOut(int _rows, int _cols) : _MMITER(_rows, _cols) {}
inline static void write(hls::stream<ap_uint<XF_BITS_PER_CLOCK> >& dout,
ap_uint<XF_BITS_PER_CLOCK>& val,
int index) {
dout.write(val);
}
inline static void write(ap_uint<XF_BITS_PER_CLOCK>* dout, ap_uint<XF_BITS_PER_CLOCK>& val, int index) {
dout[index] = val;
}
void write(ap_uint<XF_BITS_PER_CLOCK>& val, int index) { write(data, val, index); }
void transfer(ap_uint<PTR_WIDTH>* dout) { Mat2Axi(data, dout); }
void transfer(ap_uint<PTR_WIDTH>* dout, int rows, int cols) { Mat2Axi(data, dout, rows, cols); }
void transfer(hls::stream<ap_axiu<PTR_WIDTH, 0, 0, 0> >& dout) { Mat2Axi(data, dout); }
void transfer(hls::stream<ap_axiu<PTR_WIDTH, 0, 0, 0> >& dout, int rows, int cols) {
Mat2Axi(data, dout, rows, cols);
}
void transfer(hls::stream<ap_uint<PTR_WIDTH> >& dout) { Mat2AxiStream(data, dout); }
void transfer(hls::stream<ap_uint<PTR_WIDTH> >& dout, int rows, int cols) { Mat2AxiStream(data, dout, rows, cols); }
static void xfMat2Array(xf::cv::Mat<T, ROWS, COLS, NPC, XFCVDEPTH>& srcMat,
ap_uint<PTR_WIDTH>* dstPtr,
int stride = -1) {
Mat2Axi(srcMat.data, dstPtr, srcMat.rows, srcMat.cols, stride);
}
static void xfMat2Array(
ap_uint<XF_BITS_PER_CLOCK>* srcPtr, ap_uint<PTR_WIDTH>* dstPtr, int rows, int cols, int stride = -1) {
Mat2Axi(srcPtr, dstPtr, rows, cols, stride);
}
static void xfMat2axiStrm(xf::cv::Mat<T, ROWS, COLS, NPC, XFCVDEPTH>& srcMat,
hls::stream<ap_uint<PTR_WIDTH> >& dstPtr) {
Mat2Axi(srcMat.data, dstPtr, srcMat.rows, srcMat.cols);
}
};
} // namespace cv
} // namespace xf
#endif // _XF_STRUCTS_H_