Program Listing for File xf_3dlut.hpp
↰ Return to documentation for file (/tmp/ws/src/vitis_common/include/imgproc/xf_3dlut.hpp
)
/*
* Copyright 2020 Xilinx, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "hls_stream.h"
#include "ap_int.h"
#include "../common/xf_common.hpp"
namespace xf {
namespace cv {
#define __MAXVAL(pixeldepth) ((1 << pixeldepth) - 1)
typedef ap_ufixed<9, 1> _FIXED_LUT_TYPE;
typedef ap_ufixed<16, 6> _FIXED_PIXEL_TYPE;
typedef ap_ufixed<9, 8> _FIXED_OUT_PIXEL_TYPE;
typedef struct _cube {
_FIXED_LUT_TYPE P000;
_FIXED_LUT_TYPE P001;
_FIXED_LUT_TYPE P010;
_FIXED_LUT_TYPE P011;
_FIXED_LUT_TYPE P100;
_FIXED_LUT_TYPE P101;
_FIXED_LUT_TYPE P110;
_FIXED_LUT_TYPE P111;
} cube;
typedef struct _index {
unsigned short R;
unsigned short G;
unsigned short B;
} pIndex;
/* Linear interpolation */
template <int T = 0>
_FIXED_PIXEL_TYPE interp1(_FIXED_PIXEL_TYPE val1, _FIXED_PIXEL_TYPE val2, _FIXED_PIXEL_TYPE val) {
#pragma HLS INLINE OFF
_FIXED_PIXEL_TYPE ret = val1 + val * (val2 - val1);
return ret;
}
/* Tri-linear interpolation */
template <int T = 0>
_FIXED_PIXEL_TYPE interp3(cube vertix, _FIXED_PIXEL_TYPE dist_r, _FIXED_PIXEL_TYPE dist_g, _FIXED_PIXEL_TYPE dist_b) {
#pragma HLS INLINE OFF
_FIXED_PIXEL_TYPE a = interp1(vertix.P000, vertix.P100, dist_r);
_FIXED_PIXEL_TYPE b = interp1(vertix.P001, vertix.P101, dist_r);
_FIXED_PIXEL_TYPE c = interp1(vertix.P010, vertix.P110, dist_r);
_FIXED_PIXEL_TYPE d = interp1(vertix.P011, vertix.P111, dist_r);
_FIXED_PIXEL_TYPE e = interp1(a, b, dist_g);
_FIXED_PIXEL_TYPE f = interp1(c, d, dist_g);
_FIXED_PIXEL_TYPE g = interp1(e, f, dist_b);
return g;
}
template <int LUTDIM, int SQLUTDIM, int INTYPE, int OUTTYPE, int ROWS, int COLS, int NPPC = 1, int URAM = 0>
void lut3d(xf::cv::Mat<INTYPE, ROWS, COLS, NPPC>& in_img,
xf::cv::Mat<XF_32FC3, SQLUTDIM, LUTDIM, NPPC>& lut,
xf::cv::Mat<OUTTYPE, ROWS, COLS, NPPC>& out_img,
unsigned char lutdim) {
#ifndef __SYNTHESIS__
assert(((COLS >= in_img.cols) && (ROWS >= in_img.rows)) &&
"ROWS and COLS values should be greater than input image rows and columns");
assert((lutdim <= LUTDIM) && "LUT dimensions should be greater than or equal to lutdim value");
assert((SQLUTDIM == LUTDIM * LUTDIM) && "SQLUTDIM value should be equal to LUTDIM*LUTDIM");
assert((INTYPE == XF_8UC3) || (OUTTYPE == XF_8UC3) || (INTYPE == XF_10UC3) || (OUTTYPE == XF_10UC3) ||
(INTYPE == XF_12UC3) || (OUTTYPE == XF_12UC3) || (INTYPE == XF_16UC3) ||
(OUTTYPE == XF_16UC3) && "Only XF_8UC3, XF_10UC3, XF_12UC3, XF_16UC3 types are supported");
assert((NPPC == 1) && "Only 1 pixel parallelism (NPPC=1) is supported");
#endif
#pragma HLS INLINE OFF
_FIXED_LUT_TYPE lutGrid_r[LUTDIM - 1][LUTDIM - 1][LUTDIM - 1];
_FIXED_LUT_TYPE lutGrid_g[LUTDIM - 1][LUTDIM - 1][LUTDIM - 1];
_FIXED_LUT_TYPE lutGrid_b[LUTDIM - 1][LUTDIM - 1][LUTDIM - 1];
_FIXED_LUT_TYPE borderLutRX[LUTDIM][LUTDIM];
_FIXED_LUT_TYPE borderLutRY[LUTDIM][LUTDIM - 1];
_FIXED_LUT_TYPE borderLutRZ[LUTDIM - 1][LUTDIM - 1];
_FIXED_LUT_TYPE borderLutBX[LUTDIM][LUTDIM];
_FIXED_LUT_TYPE borderLutBY[LUTDIM][LUTDIM - 1];
_FIXED_LUT_TYPE borderLutBZ[LUTDIM - 1][LUTDIM - 1];
_FIXED_LUT_TYPE borderLutGX[LUTDIM][LUTDIM];
_FIXED_LUT_TYPE borderLutGY[LUTDIM - 1][LUTDIM - 1];
_FIXED_LUT_TYPE borderLutGZ[LUTDIM - 1][LUTDIM - 1];
cube cubeBufferR, cubeBufferG, cubeBufferB;
pIndex pixelIndex;
if (URAM) {
// clang-format off
#pragma HLS bind_storage variable=lutGrid_r type=ram_t2p impl=uram
#pragma HLS bind_storage variable=lutGrid_g type=ram_t2p impl=uram
#pragma HLS bind_storage variable=lutGrid_b type=ram_t2p impl=uram
#pragma HLS bind_storage variable=borderLutRX type=RAM_T2P impl=uram
#pragma HLS bind_storage variable=borderLutRY type=RAM_T2P impl=uram
#pragma HLS bind_storage variable=borderLutRZ type=RAM_T2P impl=uram
#pragma HLS bind_storage variable=borderLutBX type=RAM_T2P impl=uram
#pragma HLS bind_storage variable=borderLutBY type=RAM_T2P impl=uram
#pragma HLS bind_storage variable=borderLutBZ type=RAM_T2P impl=uram
#pragma HLS bind_storage variable=borderLutGX type=RAM_T2P impl=uram
#pragma HLS bind_storage variable=borderLutGY type=RAM_T2P impl=uram
#pragma HLS bind_storage variable=borderLutGZ type=RAM_T2P impl=uram
// clang-format on
} else {
// clang-format off
#pragma HLS bind_storage variable=lutGrid_r type=RAM_T2P impl=bram
#pragma HLS bind_storage variable=lutGrid_g type=RAM_T2P impl=bram
#pragma HLS bind_storage variable=lutGrid_b type=RAM_T2P impl=bram
#pragma HLS bind_storage variable=borderLutRX type=RAM_T2P impl=bram
#pragma HLS bind_storage variable=borderLutRY type=RAM_T2P impl=bram
#pragma HLS bind_storage variable=borderLutRZ type=RAM_T2P impl=bram
#pragma HLS bind_storage variable=borderLutBX type=RAM_T2P impl=bram
#pragma HLS bind_storage variable=borderLutBY type=RAM_T2P impl=bram
#pragma HLS bind_storage variable=borderLutBZ type=RAM_T2P impl=bram
#pragma HLS bind_storage variable=borderLutGX type=RAM_T2P impl=bram
#pragma HLS bind_storage variable=borderLutGY type=RAM_T2P impl=bram
#pragma HLS bind_storage variable=borderLutGZ type=RAM_T2P impl=bram
// clang-format on
}
// clang-format off
#pragma HLS ARRAY_PARTITION variable=lutGrid_r dim=1 cyclic factor=2
#pragma HLS ARRAY_PARTITION variable=lutGrid_g dim=1 cyclic factor=2
#pragma HLS ARRAY_PARTITION variable=lutGrid_b dim=1 cyclic factor=2
#pragma HLS ARRAY_PARTITION variable=lutGrid_r dim=2 cyclic factor=2
#pragma HLS ARRAY_PARTITION variable=lutGrid_g dim=2 cyclic factor=2
#pragma HLS ARRAY_PARTITION variable=lutGrid_b dim=2 cyclic factor=2
#pragma HLS ARRAY_PARTITION variable=lutGrid_r dim=3 cyclic factor=2
#pragma HLS ARRAY_PARTITION variable=lutGrid_g dim=3 cyclic factor=2
#pragma HLS ARRAY_PARTITION variable=lutGrid_b dim=3 cyclic factor=2
#pragma HLS ARRAY_PARTITION variable=borderLutRX dim=1 cyclic factor=2
#pragma HLS ARRAY_PARTITION variable=borderLutRY dim=1 cyclic factor=2
#pragma HLS ARRAY_PARTITION variable=borderLutRZ dim=1 cyclic factor=2
#pragma HLS ARRAY_PARTITION variable=borderLutGX dim=1 cyclic factor=2
#pragma HLS ARRAY_PARTITION variable=borderLutGY dim=1 cyclic factor=2
#pragma HLS ARRAY_PARTITION variable=borderLutGZ dim=1 cyclic factor=2
#pragma HLS ARRAY_PARTITION variable=borderLutBX dim=1 cyclic factor=2
#pragma HLS ARRAY_PARTITION variable=borderLutBY dim=1 cyclic factor=2
#pragma HLS ARRAY_PARTITION variable=borderLutBZ dim=1 cyclic factor=2
#pragma HLS ARRAY_PARTITION variable=borderLutRX dim=2 cyclic factor=2
#pragma HLS ARRAY_PARTITION variable=borderLutRY dim=2 cyclic factor=2
#pragma HLS ARRAY_PARTITION variable=borderLutRZ dim=2 cyclic factor=2
#pragma HLS ARRAY_PARTITION variable=borderLutGX dim=2 cyclic factor=2
#pragma HLS ARRAY_PARTITION variable=borderLutGY dim=2 cyclic factor=2
#pragma HLS ARRAY_PARTITION variable=borderLutGZ dim=2 cyclic factor=2
#pragma HLS ARRAY_PARTITION variable=borderLutBX dim=2 cyclic factor=2
#pragma HLS ARRAY_PARTITION variable=borderLutBY dim=2 cyclic factor=2
#pragma HLS ARRAY_PARTITION variable=borderLutBZ dim=2 cyclic factor=2
// clang-format on
_FIXED_LUT_TYPE stmp_r, stmp_g, stmp_b;
int loc_z = 0, loc_y = 0, loc_x = 0, temp = 0;
int r_int = 0, g_int = 0, b_int = 0;
int count = 0;
static constexpr int step = XF_DTPIXELDEPTH(INTYPE, NPPC);
const ap_ufixed<step + 1, step> __max = (float)(__MAXVAL(step));
z_loop:
for (unsigned char k = 0; k < lutdim; k++) {
#pragma HLS LOOP_TRIPCOUNT min = LUTDIM max = LUTDIM
y_loop:
for (unsigned char l = 0; l < lutdim; l++) {
#pragma HLS LOOP_TRIPCOUNT min = LUTDIM max = LUTDIM
x_loop:
for (unsigned char m = 0; m < lutdim; m++) {
#pragma HLS LOOP_TRIPCOUNT min = LUTDIM max = LUTDIM
ap_uint<96> inLutVal = lut.read(k * lutdim * lutdim + l * lutdim + m);
r_int = inLutVal.range(31, 0);
g_int = inLutVal.range(63, 32);
b_int = inLutVal.range(95, 64);
stmp_r = *((float*)(&r_int));
stmp_g = *((float*)(&g_int));
stmp_b = *((float*)(&b_int));
loc_x = m;
loc_y = l;
loc_z = k;
/* All border pixels in all three dimensions to be
* stored in a separate arrays instead of main 3d array to
* get cyclic partition applicable to main 3d array*/
if (loc_x == lutdim - 1) { // border pixels in x-dim
borderLutRX[loc_z][loc_y] = stmp_r;
borderLutGX[loc_z][loc_y] = stmp_g;
borderLutBX[loc_z][loc_y] = stmp_b;
} else if (loc_y == lutdim - 1) { // y-dim
borderLutRY[loc_z][loc_x] = stmp_r;
borderLutGY[loc_z][loc_x] = stmp_g;
borderLutBY[loc_z][loc_x] = stmp_b;
} else if (loc_z == lutdim - 1) { // z-dim
borderLutRZ[loc_y][loc_x] = stmp_r;
borderLutGZ[loc_y][loc_x] = stmp_g;
borderLutBZ[loc_y][loc_x] = stmp_b;
} else { // Non-border pixels
lutGrid_r[loc_z][loc_y][loc_x] = stmp_r;
lutGrid_g[loc_z][loc_y][loc_x] = stmp_g;
lutGrid_b[loc_z][loc_y][loc_x] = stmp_b;
}
}
}
}
_FIXED_PIXEL_TYPE outG = 0;
_FIXED_PIXEL_TYPE outB = 0;
_FIXED_PIXEL_TYPE outR = 0;
ROW_LOOP:
for (short i = 0; i < in_img.rows; ++i) {
#pragma HLS LOOP_TRIPCOUNT min = ROWS max = ROWS
#pragma HLS PIPELINE II = 1
COL_LOOP:
for (short j = 0; j < in_img.cols; ++j) {
#pragma HLS LOOP_TRIPCOUNT min = COLS max = COLS
XF_TNAME(INTYPE, NPPC) inPix = in_img.read(i * in_img.cols + j);
ap_uint<step> inPixR = inPix.range(step - 1, 0);
ap_uint<step> inPixG = inPix.range(step * 2 - 1, step);
ap_uint<step> inPixB = inPix.range(step * 3 - 1, step * 2);
_FIXED_LUT_TYPE scale_r = (int)inPixR / (float)(__MAXVAL(step));
_FIXED_LUT_TYPE scale_g = (int)inPixG / (float)(__MAXVAL(step));
_FIXED_LUT_TYPE scale_b = (int)inPixB / (float)(__MAXVAL(step));
_FIXED_PIXEL_TYPE index_r = scale_r * (lutdim - 1);
_FIXED_PIXEL_TYPE index_g = scale_g * (lutdim - 1);
_FIXED_PIXEL_TYPE index_b = scale_b * (lutdim - 1);
pixelIndex.R = (int)(index_r.to_float());
pixelIndex.G = (int)(index_g.to_float());
pixelIndex.B = (int)(index_b.to_float());
_FIXED_LUT_TYPE dist_r = index_r - pixelIndex.R;
_FIXED_LUT_TYPE dist_g = index_g - pixelIndex.G;
_FIXED_LUT_TYPE dist_b = index_b - pixelIndex.B;
/* No need to interpolate for border pixels*/
if (pixelIndex.R == lutdim - 1) {
outR = borderLutRX[pixelIndex.B][pixelIndex.G];
outG = borderLutGX[pixelIndex.B][pixelIndex.G];
outB = borderLutBX[pixelIndex.B][pixelIndex.G];
} else if (pixelIndex.G == lutdim - 1) {
outR = borderLutRY[pixelIndex.B][pixelIndex.R];
outG = borderLutGY[pixelIndex.B][pixelIndex.R];
outB = borderLutBY[pixelIndex.B][pixelIndex.R];
} else if (pixelIndex.B == lutdim - 1) {
outR = borderLutRZ[pixelIndex.G][pixelIndex.R];
outG = borderLutGZ[pixelIndex.G][pixelIndex.R];
outB = borderLutBZ[pixelIndex.G][pixelIndex.R];
} else { // Interpolate for non-border pixels
/* Special condition for last but one border pixels
* as few values of the cube reside in main 3d array
* and the rest in the borderLut array(s).
*/
if (pixelIndex.R == lutdim - 2) { // x-dimension
cubeBufferR.P001 = borderLutRX[pixelIndex.B][pixelIndex.G];
cubeBufferG.P001 = borderLutGX[pixelIndex.B][pixelIndex.G];
cubeBufferB.P001 = borderLutBX[pixelIndex.B][pixelIndex.G];
cubeBufferR.P011 = borderLutRX[pixelIndex.B][pixelIndex.G + 1];
cubeBufferG.P011 = borderLutGX[pixelIndex.B][pixelIndex.G + 1];
cubeBufferB.P011 = borderLutBX[pixelIndex.B][pixelIndex.G + 1];
cubeBufferR.P101 = borderLutRX[pixelIndex.B + 1][pixelIndex.G];
cubeBufferG.P101 = borderLutGX[pixelIndex.B + 1][pixelIndex.G];
cubeBufferB.P101 = borderLutBX[pixelIndex.B + 1][pixelIndex.G];
cubeBufferR.P111 = borderLutRX[pixelIndex.B + 1][pixelIndex.G + 1];
cubeBufferG.P111 = borderLutGX[pixelIndex.B + 1][pixelIndex.G + 1];
cubeBufferB.P111 = borderLutBX[pixelIndex.B + 1][pixelIndex.G + 1];
if (pixelIndex.R == lutdim - 2 && pixelIndex.G == lutdim - 2 &&
pixelIndex.B < lutdim - 2) { // x-y border
cubeBufferR.P000 = lutGrid_r[pixelIndex.B][pixelIndex.G][pixelIndex.R];
cubeBufferG.P000 = lutGrid_g[pixelIndex.B][pixelIndex.G][pixelIndex.R];
cubeBufferB.P000 = lutGrid_b[pixelIndex.B][pixelIndex.G][pixelIndex.R];
cubeBufferR.P010 = borderLutRY[pixelIndex.B][pixelIndex.R];
cubeBufferG.P010 = borderLutGY[pixelIndex.B][pixelIndex.R];
cubeBufferB.P010 = borderLutBY[pixelIndex.B][pixelIndex.R];
cubeBufferR.P100 = lutGrid_r[pixelIndex.B + 1][pixelIndex.G][pixelIndex.R];
cubeBufferG.P100 = lutGrid_g[pixelIndex.B + 1][pixelIndex.G][pixelIndex.R];
cubeBufferB.P100 = lutGrid_b[pixelIndex.B + 1][pixelIndex.G][pixelIndex.R];
cubeBufferR.P110 = borderLutRY[pixelIndex.B + 1][pixelIndex.R];
cubeBufferG.P110 = borderLutGY[pixelIndex.B + 1][pixelIndex.R];
cubeBufferB.P110 = borderLutBY[pixelIndex.B + 1][pixelIndex.R];
} else if (pixelIndex.R == lutdim - 2 && pixelIndex.B == lutdim - 2 &&
pixelIndex.G < lutdim - 2) { // x-z border
cubeBufferR.P000 = lutGrid_r[pixelIndex.B][pixelIndex.G][pixelIndex.R];
cubeBufferG.P000 = lutGrid_g[pixelIndex.B][pixelIndex.G][pixelIndex.R];
cubeBufferB.P000 = lutGrid_b[pixelIndex.B][pixelIndex.G][pixelIndex.R];
cubeBufferR.P010 = lutGrid_r[pixelIndex.B][pixelIndex.G + 1][pixelIndex.R];
cubeBufferG.P010 = lutGrid_g[pixelIndex.B][pixelIndex.G + 1][pixelIndex.R];
cubeBufferB.P010 = lutGrid_b[pixelIndex.B][pixelIndex.G + 1][pixelIndex.R];
cubeBufferR.P100 = borderLutRZ[pixelIndex.G][pixelIndex.R];
cubeBufferG.P100 = borderLutGZ[pixelIndex.G][pixelIndex.R];
cubeBufferB.P100 = borderLutBZ[pixelIndex.G][pixelIndex.R];
cubeBufferR.P110 = borderLutRZ[pixelIndex.G + 1][pixelIndex.R];
cubeBufferG.P110 = borderLutGZ[pixelIndex.G + 1][pixelIndex.R];
cubeBufferB.P110 = borderLutBZ[pixelIndex.G + 1][pixelIndex.R];
} else if (pixelIndex.R == lutdim - 2 && pixelIndex.G == lutdim - 2 &&
pixelIndex.B == lutdim - 2) { // x-y-z border
cubeBufferR.P000 = lutGrid_r[pixelIndex.B][pixelIndex.G][pixelIndex.R];
cubeBufferG.P000 = lutGrid_g[pixelIndex.B][pixelIndex.G][pixelIndex.R];
cubeBufferB.P000 = lutGrid_b[pixelIndex.B][pixelIndex.G][pixelIndex.R];
cubeBufferR.P010 = borderLutRY[pixelIndex.B][pixelIndex.R];
cubeBufferG.P010 = borderLutGY[pixelIndex.B][pixelIndex.R];
cubeBufferB.P010 = borderLutBY[pixelIndex.B][pixelIndex.R];
cubeBufferR.P100 = borderLutRZ[pixelIndex.G][pixelIndex.R];
cubeBufferG.P100 = borderLutGZ[pixelIndex.G][pixelIndex.R];
cubeBufferB.P100 = borderLutBZ[pixelIndex.G][pixelIndex.R];
cubeBufferR.P110 = borderLutRY[pixelIndex.B + 1][pixelIndex.R];
cubeBufferG.P110 = borderLutGY[pixelIndex.B + 1][pixelIndex.R];
cubeBufferB.P110 = borderLutBY[pixelIndex.B + 1][pixelIndex.R];
} else { // only x border
cubeBufferR.P000 = lutGrid_r[pixelIndex.B][pixelIndex.G][pixelIndex.R];
cubeBufferG.P000 = lutGrid_g[pixelIndex.B][pixelIndex.G][pixelIndex.R];
cubeBufferB.P000 = lutGrid_b[pixelIndex.B][pixelIndex.G][pixelIndex.R];
cubeBufferR.P010 = lutGrid_r[pixelIndex.B][pixelIndex.G + 1][pixelIndex.R];
cubeBufferG.P010 = lutGrid_g[pixelIndex.B][pixelIndex.G + 1][pixelIndex.R];
cubeBufferB.P010 = lutGrid_b[pixelIndex.B][pixelIndex.G + 1][pixelIndex.R];
cubeBufferR.P100 = lutGrid_r[pixelIndex.B + 1][pixelIndex.G][pixelIndex.R];
cubeBufferG.P100 = lutGrid_g[pixelIndex.B + 1][pixelIndex.G][pixelIndex.R];
cubeBufferB.P100 = lutGrid_b[pixelIndex.B + 1][pixelIndex.G][pixelIndex.R];
cubeBufferR.P110 = lutGrid_r[pixelIndex.B + 1][pixelIndex.G + 1][pixelIndex.R];
cubeBufferG.P110 = lutGrid_g[pixelIndex.B + 1][pixelIndex.G + 1][pixelIndex.R];
cubeBufferB.P110 = lutGrid_b[pixelIndex.B + 1][pixelIndex.G + 1][pixelIndex.R];
}
} else if (pixelIndex.G == lutdim - 2) { // y-dimension
if (pixelIndex.B == lutdim - 2 && pixelIndex.G == lutdim - 2 &&
pixelIndex.R < lutdim - 2) { // y-z border
cubeBufferR.P000 = lutGrid_r[pixelIndex.B][pixelIndex.G][pixelIndex.R];
cubeBufferG.P000 = lutGrid_g[pixelIndex.B][pixelIndex.G][pixelIndex.R];
cubeBufferB.P000 = lutGrid_b[pixelIndex.B][pixelIndex.G][pixelIndex.R];
cubeBufferR.P010 = borderLutRY[pixelIndex.B][pixelIndex.R];
cubeBufferG.P010 = borderLutGY[pixelIndex.B][pixelIndex.R];
cubeBufferB.P010 = borderLutBY[pixelIndex.B][pixelIndex.R];
cubeBufferR.P100 = borderLutRZ[pixelIndex.G][pixelIndex.R];
cubeBufferG.P100 = borderLutGZ[pixelIndex.G][pixelIndex.R];
cubeBufferB.P100 = borderLutBZ[pixelIndex.G][pixelIndex.R];
cubeBufferR.P110 = borderLutRY[pixelIndex.B + 1][pixelIndex.R];
cubeBufferG.P110 = borderLutGY[pixelIndex.B + 1][pixelIndex.R];
cubeBufferB.P110 = borderLutBY[pixelIndex.B + 1][pixelIndex.R];
cubeBufferR.P001 = lutGrid_r[pixelIndex.B][pixelIndex.G][pixelIndex.R + 1];
cubeBufferG.P001 = lutGrid_g[pixelIndex.B][pixelIndex.G][pixelIndex.R + 1];
cubeBufferB.P001 = lutGrid_b[pixelIndex.B][pixelIndex.G][pixelIndex.R + 1];
cubeBufferR.P011 = borderLutRY[pixelIndex.B][pixelIndex.R + 1];
cubeBufferG.P011 = borderLutGY[pixelIndex.B][pixelIndex.R + 1];
cubeBufferB.P011 = borderLutBY[pixelIndex.B][pixelIndex.R + 1];
cubeBufferR.P101 = borderLutRZ[pixelIndex.G][pixelIndex.R + 1];
cubeBufferG.P101 = borderLutGZ[pixelIndex.G][pixelIndex.R + 1];
cubeBufferB.P101 = borderLutBZ[pixelIndex.G][pixelIndex.R + 1];
cubeBufferR.P111 = borderLutRY[pixelIndex.B + 1][pixelIndex.R + 1];
cubeBufferG.P111 = borderLutGY[pixelIndex.B + 1][pixelIndex.R + 1];
cubeBufferB.P111 = borderLutBY[pixelIndex.B + 1][pixelIndex.R + 1];
} else { // only y-border
cubeBufferR.P000 = lutGrid_r[pixelIndex.B][pixelIndex.G][pixelIndex.R];
cubeBufferG.P000 = lutGrid_g[pixelIndex.B][pixelIndex.G][pixelIndex.R];
cubeBufferB.P000 = lutGrid_b[pixelIndex.B][pixelIndex.G][pixelIndex.R];
cubeBufferR.P001 = lutGrid_r[pixelIndex.B][pixelIndex.G][pixelIndex.R + 1];
cubeBufferG.P001 = lutGrid_g[pixelIndex.B][pixelIndex.G][pixelIndex.R + 1];
cubeBufferB.P001 = lutGrid_b[pixelIndex.B][pixelIndex.G][pixelIndex.R + 1];
cubeBufferR.P100 = lutGrid_r[pixelIndex.B + 1][pixelIndex.G][pixelIndex.R];
cubeBufferG.P100 = lutGrid_g[pixelIndex.B + 1][pixelIndex.G][pixelIndex.R];
cubeBufferB.P100 = lutGrid_b[pixelIndex.B + 1][pixelIndex.G][pixelIndex.R];
cubeBufferR.P101 = lutGrid_r[pixelIndex.B + 1][pixelIndex.G][pixelIndex.R + 1];
cubeBufferG.P101 = lutGrid_g[pixelIndex.B + 1][pixelIndex.G][pixelIndex.R + 1];
cubeBufferB.P101 = lutGrid_b[pixelIndex.B + 1][pixelIndex.G][pixelIndex.R + 1];
cubeBufferR.P010 = borderLutRY[pixelIndex.B][pixelIndex.R];
cubeBufferG.P010 = borderLutGY[pixelIndex.B][pixelIndex.R];
cubeBufferB.P010 = borderLutBY[pixelIndex.B][pixelIndex.R];
cubeBufferR.P011 = borderLutRY[pixelIndex.B][pixelIndex.R + 1];
cubeBufferG.P011 = borderLutGY[pixelIndex.B][pixelIndex.R + 1];
cubeBufferB.P011 = borderLutBY[pixelIndex.B][pixelIndex.R + 1];
cubeBufferR.P110 = borderLutRY[pixelIndex.B + 1][pixelIndex.R];
cubeBufferG.P110 = borderLutGY[pixelIndex.B + 1][pixelIndex.R];
cubeBufferB.P110 = borderLutBY[pixelIndex.B + 1][pixelIndex.R];
cubeBufferR.P111 = borderLutRY[pixelIndex.B + 1][pixelIndex.R + 1];
cubeBufferG.P111 = borderLutGY[pixelIndex.B + 1][pixelIndex.R + 1];
cubeBufferB.P111 = borderLutBY[pixelIndex.B + 1][pixelIndex.R + 1];
}
} else if (pixelIndex.B == lutdim - 2) { // z-dimension
cubeBufferR.P000 = lutGrid_r[pixelIndex.B][pixelIndex.G][pixelIndex.R];
cubeBufferG.P000 = lutGrid_g[pixelIndex.B][pixelIndex.G][pixelIndex.R];
cubeBufferB.P000 = lutGrid_b[pixelIndex.B][pixelIndex.G][pixelIndex.R];
cubeBufferR.P001 = lutGrid_r[pixelIndex.B][pixelIndex.G][pixelIndex.R + 1];
cubeBufferG.P001 = lutGrid_g[pixelIndex.B][pixelIndex.G][pixelIndex.R + 1];
cubeBufferB.P001 = lutGrid_b[pixelIndex.B][pixelIndex.G][pixelIndex.R + 1];
cubeBufferR.P010 = lutGrid_r[pixelIndex.B][pixelIndex.G + 1][pixelIndex.R];
cubeBufferG.P010 = lutGrid_g[pixelIndex.B][pixelIndex.G + 1][pixelIndex.R];
cubeBufferB.P010 = lutGrid_b[pixelIndex.B][pixelIndex.G + 1][pixelIndex.R];
cubeBufferR.P011 = lutGrid_r[pixelIndex.B][pixelIndex.G + 1][pixelIndex.R + 1];
cubeBufferG.P011 = lutGrid_g[pixelIndex.B][pixelIndex.G + 1][pixelIndex.R + 1];
cubeBufferB.P011 = lutGrid_b[pixelIndex.B][pixelIndex.G + 1][pixelIndex.R + 1];
cubeBufferR.P100 = borderLutRZ[pixelIndex.G][pixelIndex.R];
cubeBufferG.P100 = borderLutGZ[pixelIndex.G][pixelIndex.R];
cubeBufferB.P100 = borderLutBZ[pixelIndex.G][pixelIndex.R];
cubeBufferR.P101 = borderLutRZ[pixelIndex.G][pixelIndex.R + 1];
cubeBufferG.P101 = borderLutGZ[pixelIndex.G][pixelIndex.R + 1];
cubeBufferB.P101 = borderLutBZ[pixelIndex.G][pixelIndex.R + 1];
cubeBufferR.P110 = borderLutRZ[pixelIndex.G + 1][pixelIndex.R];
cubeBufferG.P110 = borderLutGZ[pixelIndex.G + 1][pixelIndex.R];
cubeBufferB.P110 = borderLutBZ[pixelIndex.G + 1][pixelIndex.R];
cubeBufferR.P111 = borderLutRZ[pixelIndex.G + 1][pixelIndex.R + 1];
cubeBufferG.P111 = borderLutGZ[pixelIndex.G + 1][pixelIndex.R + 1];
cubeBufferB.P111 = borderLutBZ[pixelIndex.G + 1][pixelIndex.R + 1];
} else { // For all other pixels use only main 3d array
cubeBufferR.P000 = lutGrid_r[pixelIndex.B][pixelIndex.G][pixelIndex.R];
cubeBufferG.P000 = lutGrid_g[pixelIndex.B][pixelIndex.G][pixelIndex.R];
cubeBufferB.P000 = lutGrid_b[pixelIndex.B][pixelIndex.G][pixelIndex.R];
cubeBufferR.P001 = lutGrid_r[pixelIndex.B][pixelIndex.G][pixelIndex.R + 1];
cubeBufferG.P001 = lutGrid_g[pixelIndex.B][pixelIndex.G][pixelIndex.R + 1];
cubeBufferB.P001 = lutGrid_b[pixelIndex.B][pixelIndex.G][pixelIndex.R + 1];
cubeBufferR.P010 = lutGrid_r[pixelIndex.B][pixelIndex.G + 1][pixelIndex.R];
cubeBufferG.P010 = lutGrid_g[pixelIndex.B][pixelIndex.G + 1][pixelIndex.R];
cubeBufferB.P010 = lutGrid_b[pixelIndex.B][pixelIndex.G + 1][pixelIndex.R];
cubeBufferR.P011 = lutGrid_r[pixelIndex.B][pixelIndex.G + 1][pixelIndex.R + 1];
cubeBufferG.P011 = lutGrid_g[pixelIndex.B][pixelIndex.G + 1][pixelIndex.R + 1];
cubeBufferB.P011 = lutGrid_b[pixelIndex.B][pixelIndex.G + 1][pixelIndex.R + 1];
cubeBufferR.P100 = lutGrid_r[pixelIndex.B + 1][pixelIndex.G][pixelIndex.R];
cubeBufferG.P100 = lutGrid_g[pixelIndex.B + 1][pixelIndex.G][pixelIndex.R];
cubeBufferB.P100 = lutGrid_b[pixelIndex.B + 1][pixelIndex.G][pixelIndex.R];
cubeBufferR.P101 = lutGrid_r[pixelIndex.B + 1][pixelIndex.G][pixelIndex.R + 1];
cubeBufferG.P101 = lutGrid_g[pixelIndex.B + 1][pixelIndex.G][pixelIndex.R + 1];
cubeBufferB.P101 = lutGrid_b[pixelIndex.B + 1][pixelIndex.G][pixelIndex.R + 1];
cubeBufferR.P110 = lutGrid_r[pixelIndex.B + 1][pixelIndex.G + 1][pixelIndex.R];
cubeBufferG.P110 = lutGrid_g[pixelIndex.B + 1][pixelIndex.G + 1][pixelIndex.R];
cubeBufferB.P110 = lutGrid_b[pixelIndex.B + 1][pixelIndex.G + 1][pixelIndex.R];
cubeBufferR.P111 = lutGrid_r[pixelIndex.B + 1][pixelIndex.G + 1][pixelIndex.R + 1];
cubeBufferG.P111 = lutGrid_g[pixelIndex.B + 1][pixelIndex.G + 1][pixelIndex.R + 1];
cubeBufferB.P111 = lutGrid_b[pixelIndex.B + 1][pixelIndex.G + 1][pixelIndex.R + 1];
}
outR = interp3(cubeBufferR, dist_r, dist_g, dist_b);
outG = interp3(cubeBufferG, dist_r, dist_g, dist_b);
outB = interp3(cubeBufferB, dist_r, dist_g, dist_b);
}
XF_TNAME(OUTTYPE, NPPC) outPix = 0;
_FIXED_OUT_PIXEL_TYPE _outR = outR * __max;
_FIXED_OUT_PIXEL_TYPE _outG = outG * __max;
_FIXED_OUT_PIXEL_TYPE _outB = outB * __max;
outPix.range(step - 1, 0) = _outR;
outPix.range(step * 2 - 1, step) = _outG;
outPix.range(step * 3 - 1, step * 2) = _outB;
out_img.write(i * in_img.cols + j, outPix);
}
}
}
}
}