32 # define M_PI 3.141592653589793238462643383279502884196 45 static inline void idct_1D_u32(int32_t *in,
int instride, int32_t *out,
int outstride)
47 for (
int x = 0; x < 8; x++)
55 int32_t c181 = c * 181;
56 out[0*outstride] += c181;
57 out[1*outstride] += c181;
58 out[2*outstride] += c181;
59 out[3*outstride] += c181;
60 out[4*outstride] += c181;
61 out[5*outstride] += c181;
62 out[6*outstride] += c181;
63 out[7*outstride] += c181;
69 int32_t c251 = c * 251;
70 int32_t c212 = c * 212;
71 int32_t c142 = c * 142;
73 out[0*outstride] += c251;
74 out[1*outstride] += c212;
75 out[2*outstride] += c142;
76 out[3*outstride] += c49;
77 out[4*outstride] -= c49;
78 out[5*outstride] -= c142;
79 out[6*outstride] -= c212;
80 out[7*outstride] -= c251;
88 out[0*outstride] += c236;
89 out[1*outstride] += c97;
90 out[2*outstride] -= c97;
91 out[3*outstride] -= c236;
92 out[4*outstride] -= c236;
93 out[5*outstride] -= c97;
94 out[6*outstride] += c97;
95 out[7*outstride] += c236;
101 int32_t c212 = c*212;
103 int32_t c251 = c*251;
104 int32_t c142 = c*142;
105 out[0*outstride] += c212;
106 out[1*outstride] -= c49;
107 out[2*outstride] -= c251;
108 out[3*outstride] -= c142;
109 out[4*outstride] += c142;
110 out[5*outstride] += c251;
111 out[6*outstride] += c49;
112 out[7*outstride] -= c212;
118 int32_t c181 = c*181;
119 out[0*outstride] += c181;
120 out[1*outstride] -= c181;
121 out[2*outstride] -= c181;
122 out[3*outstride] += c181;
123 out[4*outstride] += c181;
124 out[5*outstride] -= c181;
125 out[6*outstride] -= c181;
126 out[7*outstride] += c181;
132 int32_t c142 = c*142;
133 int32_t c251 = c*251;
135 int32_t c212 = c*212;
136 out[0*outstride] += c142;
137 out[1*outstride] -= c251;
138 out[2*outstride] += c49;
139 out[3*outstride] += c212;
140 out[4*outstride] -= c212;
141 out[5*outstride] -= c49;
142 out[6*outstride] += c251;
143 out[7*outstride] -= c142;
150 int32_t c236 = c*236;
151 out[0*outstride] += c97;
152 out[1*outstride] -= c236;
153 out[2*outstride] += c236;
154 out[3*outstride] -= c97;
155 out[4*outstride] -= c97;
156 out[5*outstride] += c236;
157 out[6*outstride] -= c236;
158 out[7*outstride] += c97;
165 int32_t c142 = c*142;
166 int32_t c212 = c*212;
167 int32_t c251 = c*251;
168 out[0*outstride] += c49;
169 out[1*outstride] -= c142;
170 out[2*outstride] += c212;
171 out[3*outstride] -= c251;
172 out[4*outstride] += c251;
173 out[5*outstride] -= c212;
174 out[6*outstride] += c142;
175 out[7*outstride] -= c49;
184 for (
int y = 0; y < 8; y++)
190 for (
int x = 0; x < 8; x++)
194 for (
int y = 0; y < 8; y++) {
195 for (
int x = 0; x < 8; x++) {
207 const int32_t offset = (128 << 18) + (1 << 17);
208 int32_t v = (tmp2[i] + offset) >> 18;
215 out[y*outstride + x] = v;
222 static inline void idct_1D_double(
double *in,
int instride,
double *out,
int outstride)
224 for (
int x = 0; x < 8; x++)
225 out[x*outstride] = 0;
228 double Cu = 1/sqrt(2);
230 for (
int u = 0; u < 8; u++, Cu = 1) {
232 double coeff = in[u*instride];
236 for (
int x = 0; x < 8; x++)
237 out[x*outstride] += Cu*cos((2*x+1)*u*
M_PI/16) * coeff;
243 double din[64], dout[64];
244 for (
int i = 0; i < 64; i++)
250 for (
int y = 0; y < 8; y++)
254 for (
int x = 0; x < 8; x++)
258 for (
int y = 0; y < 8; y++) {
259 for (
int x = 0; x < 8; x++) {
262 dout[i] = (dout[i] / 4) + 128;
269 out[y*outstride + x] = dout[i];
275 static inline unsigned char njClip(
const int x) {
276 return (x < 0) ? 0 : ((x > 0xFF) ? 0xFF : (
unsigned char) x);
287 int x0, x1, x2, x3, x4, x5, x6, x7, x8;
288 if (!((x1 = blk[4] << 11)
296 blk[0] = blk[1] = blk[2] = blk[3] = blk[4] = blk[5] = blk[6] = blk[7] = blk[0] << 3;
299 x0 = (blk[0] << 11) + 128;
301 x4 = x8 + (
W1 -
W7) * x4;
302 x5 = x8 - (
W1 +
W7) * x5;
304 x6 = x8 - (
W3 -
W5) * x6;
305 x7 = x8 - (
W3 +
W5) * x7;
309 x2 = x1 - (
W2 +
W6) * x2;
310 x3 = x1 + (
W2 -
W6) * x3;
319 x2 = (181 * (x4 + x5) + 128) >> 8;
320 x4 = (181 * (x4 - x5) + 128) >> 8;
321 blk[0] = (x7 + x1) >> 8;
322 blk[1] = (x3 + x2) >> 8;
323 blk[2] = (x0 + x4) >> 8;
324 blk[3] = (x8 + x6) >> 8;
325 blk[4] = (x8 - x6) >> 8;
326 blk[5] = (x0 - x4) >> 8;
327 blk[6] = (x3 - x2) >> 8;
328 blk[7] = (x7 - x1) >> 8;
331 static inline void njColIDCT(
const int* blk,
unsigned char *out,
int stride) {
332 int x0, x1, x2, x3, x4, x5, x6, x7, x8;
333 if (!((x1 = blk[8*4] << 8)
341 x1 =
njClip(((blk[0] + 32) >> 6) + 128);
342 for (x0 = 8; x0; --x0) {
343 *out = (
unsigned char) x1;
348 x0 = (blk[0] << 8) + 8192;
349 x8 =
W7 * (x4 + x5) + 4;
350 x4 = (x8 + (
W1 -
W7) * x4) >> 3;
351 x5 = (x8 - (
W1 +
W7) * x5) >> 3;
352 x8 =
W3 * (x6 + x7) + 4;
353 x6 = (x8 - (
W3 -
W5) * x6) >> 3;
354 x7 = (x8 - (
W3 +
W5) * x7) >> 3;
357 x1 =
W6 * (x3 + x2) + 4;
358 x2 = (x1 - (
W2 +
W6) * x2) >> 3;
359 x3 = (x1 + (
W2 -
W6) * x3) >> 3;
368 x2 = (181 * (x4 + x5) + 128) >> 8;
369 x4 = (181 * (x4 - x5) + 128) >> 8;
370 *out =
njClip(((x7 + x1) >> 14) + 128); out += stride;
371 *out =
njClip(((x3 + x2) >> 14) + 128); out += stride;
372 *out =
njClip(((x0 + x4) >> 14) + 128); out += stride;
373 *out =
njClip(((x8 + x6) >> 14) + 128); out += stride;
374 *out =
njClip(((x8 - x6) >> 14) + 128); out += stride;
375 *out =
njClip(((x0 - x4) >> 14) + 128); out += stride;
376 *out =
njClip(((x3 - x2) >> 14) + 128); out += stride;
377 *out =
njClip(((x7 - x1) >> 14) + 128);
384 for (coef = 0; coef < 64; coef += 8)
386 for (coef = 0; coef < 8; ++coef)
387 njColIDCT(&in[coef], &out[coef], outstride);
static void idct_1D_u32(int32_t *in, int instride, int32_t *out, int outstride)
static void njRowIDCT(int *blk)
void pjpeg_idct_2D_double(int32_t in[64], uint8_t *out, uint32_t outstride)
static unsigned char njClip(const int x)
static void njColIDCT(const int *blk, unsigned char *out, int stride)
static void idct_1D_double(double *in, int instride, double *out, int outstride)
void pjpeg_idct_2D_nanojpeg(int32_t in[64], uint8_t *out, uint32_t outstride)
void pjpeg_idct_2D_u32(int32_t in[64], uint8_t *out, uint32_t outstride)