26 # define M_PI 3.141592653589793238462643383279502884196 39 static inline void idct_1D_u32(int32_t *in,
int instride, int32_t *out,
int outstride)
41 for (
int x = 0; x < 8; x++)
49 int32_t c181 = c * 181;
50 out[0*outstride] += c181;
51 out[1*outstride] += c181;
52 out[2*outstride] += c181;
53 out[3*outstride] += c181;
54 out[4*outstride] += c181;
55 out[5*outstride] += c181;
56 out[6*outstride] += c181;
57 out[7*outstride] += c181;
63 int32_t c251 = c * 251;
64 int32_t c212 = c * 212;
65 int32_t c142 = c * 142;
67 out[0*outstride] += c251;
68 out[1*outstride] += c212;
69 out[2*outstride] += c142;
70 out[3*outstride] += c49;
71 out[4*outstride] -= c49;
72 out[5*outstride] -= c142;
73 out[6*outstride] -= c212;
74 out[7*outstride] -= c251;
82 out[0*outstride] += c236;
83 out[1*outstride] += c97;
84 out[2*outstride] -= c97;
85 out[3*outstride] -= c236;
86 out[4*outstride] -= c236;
87 out[5*outstride] -= c97;
88 out[6*outstride] += c97;
89 out[7*outstride] += c236;
99 out[0*outstride] += c212;
100 out[1*outstride] -= c49;
101 out[2*outstride] -= c251;
102 out[3*outstride] -= c142;
103 out[4*outstride] += c142;
104 out[5*outstride] += c251;
105 out[6*outstride] += c49;
106 out[7*outstride] -= c212;
112 int32_t c181 = c*181;
113 out[0*outstride] += c181;
114 out[1*outstride] -= c181;
115 out[2*outstride] -= c181;
116 out[3*outstride] += c181;
117 out[4*outstride] += c181;
118 out[5*outstride] -= c181;
119 out[6*outstride] -= c181;
120 out[7*outstride] += c181;
126 int32_t c142 = c*142;
127 int32_t c251 = c*251;
129 int32_t c212 = c*212;
130 out[0*outstride] += c142;
131 out[1*outstride] -= c251;
132 out[2*outstride] += c49;
133 out[3*outstride] += c212;
134 out[4*outstride] -= c212;
135 out[5*outstride] -= c49;
136 out[6*outstride] += c251;
137 out[7*outstride] -= c142;
144 int32_t c236 = c*236;
145 out[0*outstride] += c97;
146 out[1*outstride] -= c236;
147 out[2*outstride] += c236;
148 out[3*outstride] -= c97;
149 out[4*outstride] -= c97;
150 out[5*outstride] += c236;
151 out[6*outstride] -= c236;
152 out[7*outstride] += c97;
159 int32_t c142 = c*142;
160 int32_t c212 = c*212;
161 int32_t c251 = c*251;
162 out[0*outstride] += c49;
163 out[1*outstride] -= c142;
164 out[2*outstride] += c212;
165 out[3*outstride] -= c251;
166 out[4*outstride] += c251;
167 out[5*outstride] -= c212;
168 out[6*outstride] += c142;
169 out[7*outstride] -= c49;
178 for (
int y = 0; y < 8; y++)
184 for (
int x = 0; x < 8; x++)
188 for (
int y = 0; y < 8; y++) {
189 for (
int x = 0; x < 8; x++) {
201 const int32_t offset = (128 << 18) + (1 << 17);
202 int32_t v = (tmp2[i] + offset) >> 18;
209 out[y*outstride + x] = v;
216 static inline void idct_1D_double(
double *in,
int instride,
double *out,
int outstride)
218 for (
int x = 0; x < 8; x++)
219 out[x*outstride] = 0;
222 double Cu = 1/sqrt(2);
224 for (
int u = 0; u < 8; u++, Cu = 1) {
226 double coeff = in[u*instride];
230 for (
int x = 0; x < 8; x++)
231 out[x*outstride] += Cu*cos((2*x+1)*u*
M_PI/16) * coeff;
237 double din[64], dout[64];
238 for (
int i = 0; i < 64; i++)
244 for (
int y = 0; y < 8; y++)
248 for (
int x = 0; x < 8; x++)
252 for (
int y = 0; y < 8; y++) {
253 for (
int x = 0; x < 8; x++) {
256 dout[i] = (dout[i] / 4) + 128;
263 out[y*outstride + x] = dout[i];
269 static inline unsigned char njClip(
const int x) {
270 return (x < 0) ? 0 : ((x > 0xFF) ? 0xFF : (
unsigned char) x);
281 int x0, x1, x2, x3, x4, x5, x6, x7, x8;
282 if (!((x1 = blk[4] << 11)
290 blk[0] = blk[1] = blk[2] = blk[3] = blk[4] = blk[5] = blk[6] = blk[7] = blk[0] << 3;
293 x0 = (blk[0] << 11) + 128;
295 x4 = x8 + (
W1 -
W7) * x4;
296 x5 = x8 - (
W1 +
W7) * x5;
298 x6 = x8 - (
W3 -
W5) * x6;
299 x7 = x8 - (
W3 +
W5) * x7;
303 x2 = x1 - (
W2 +
W6) * x2;
304 x3 = x1 + (
W2 -
W6) * x3;
313 x2 = (181 * (x4 + x5) + 128) >> 8;
314 x4 = (181 * (x4 - x5) + 128) >> 8;
315 blk[0] = (x7 + x1) >> 8;
316 blk[1] = (x3 + x2) >> 8;
317 blk[2] = (x0 + x4) >> 8;
318 blk[3] = (x8 + x6) >> 8;
319 blk[4] = (x8 - x6) >> 8;
320 blk[5] = (x0 - x4) >> 8;
321 blk[6] = (x3 - x2) >> 8;
322 blk[7] = (x7 - x1) >> 8;
325 static inline void njColIDCT(
const int* blk,
unsigned char *out,
int stride) {
326 int x0, x1, x2, x3, x4, x5, x6, x7, x8;
327 if (!((x1 = blk[8*4] << 8)
335 x1 =
njClip(((blk[0] + 32) >> 6) + 128);
336 for (x0 = 8; x0; --x0) {
337 *out = (
unsigned char) x1;
342 x0 = (blk[0] << 8) + 8192;
343 x8 =
W7 * (x4 + x5) + 4;
344 x4 = (x8 + (
W1 -
W7) * x4) >> 3;
345 x5 = (x8 - (
W1 +
W7) * x5) >> 3;
346 x8 =
W3 * (x6 + x7) + 4;
347 x6 = (x8 - (
W3 -
W5) * x6) >> 3;
348 x7 = (x8 - (
W3 +
W5) * x7) >> 3;
351 x1 =
W6 * (x3 + x2) + 4;
352 x2 = (x1 - (
W2 +
W6) * x2) >> 3;
353 x3 = (x1 + (
W2 -
W6) * x3) >> 3;
362 x2 = (181 * (x4 + x5) + 128) >> 8;
363 x4 = (181 * (x4 - x5) + 128) >> 8;
364 *out =
njClip(((x7 + x1) >> 14) + 128); out += stride;
365 *out =
njClip(((x3 + x2) >> 14) + 128); out += stride;
366 *out =
njClip(((x0 + x4) >> 14) + 128); out += stride;
367 *out =
njClip(((x8 + x6) >> 14) + 128); out += stride;
368 *out =
njClip(((x8 - x6) >> 14) + 128); out += stride;
369 *out =
njClip(((x0 - x4) >> 14) + 128); out += stride;
370 *out =
njClip(((x3 - x2) >> 14) + 128); out += stride;
371 *out =
njClip(((x7 - x1) >> 14) + 128);
378 for (coef = 0; coef < 64; coef += 8)
380 for (coef = 0; coef < 8; ++coef)
381 njColIDCT(&in[coef], &out[coef], outstride);
static void idct_1D_u32(int32_t *in, int instride, int32_t *out, int outstride)
static void njRowIDCT(int *blk)
void pjpeg_idct_2D_double(int32_t in[64], uint8_t *out, uint32_t outstride)
static unsigned char njClip(const int x)
static void njColIDCT(const int *blk, unsigned char *out, int stride)
static void idct_1D_double(double *in, int instride, double *out, int outstride)
void pjpeg_idct_2D_nanojpeg(int32_t in[64], uint8_t *out, uint32_t outstride)
void pjpeg_idct_2D_u32(int32_t in[64], uint8_t *out, uint32_t outstride)