28 #define _USE_MATH_DEFINES
42 static inline void idct_1D_u32(int32_t *in,
int instride, int32_t *out,
int outstride)
44 for (
int x = 0; x < 8; x++)
52 int32_t c181 = c * 181;
53 out[0*outstride] += c181;
54 out[1*outstride] += c181;
55 out[2*outstride] += c181;
56 out[3*outstride] += c181;
57 out[4*outstride] += c181;
58 out[5*outstride] += c181;
59 out[6*outstride] += c181;
60 out[7*outstride] += c181;
66 int32_t c251 = c * 251;
67 int32_t c212 = c * 212;
68 int32_t c142 = c * 142;
70 out[0*outstride] += c251;
71 out[1*outstride] += c212;
72 out[2*outstride] += c142;
73 out[3*outstride] += c49;
74 out[4*outstride] -= c49;
75 out[5*outstride] -= c142;
76 out[6*outstride] -= c212;
77 out[7*outstride] -= c251;
85 out[0*outstride] += c236;
86 out[1*outstride] += c97;
87 out[2*outstride] -= c97;
88 out[3*outstride] -= c236;
89 out[4*outstride] -= c236;
90 out[5*outstride] -= c97;
91 out[6*outstride] += c97;
92 out[7*outstride] += c236;
100 int32_t c251 = c*251;
101 int32_t c142 = c*142;
102 out[0*outstride] += c212;
103 out[1*outstride] -= c49;
104 out[2*outstride] -= c251;
105 out[3*outstride] -= c142;
106 out[4*outstride] += c142;
107 out[5*outstride] += c251;
108 out[6*outstride] += c49;
109 out[7*outstride] -= c212;
115 int32_t c181 = c*181;
116 out[0*outstride] += c181;
117 out[1*outstride] -= c181;
118 out[2*outstride] -= c181;
119 out[3*outstride] += c181;
120 out[4*outstride] += c181;
121 out[5*outstride] -= c181;
122 out[6*outstride] -= c181;
123 out[7*outstride] += c181;
129 int32_t c142 = c*142;
130 int32_t c251 = c*251;
132 int32_t c212 = c*212;
133 out[0*outstride] += c142;
134 out[1*outstride] -= c251;
135 out[2*outstride] += c49;
136 out[3*outstride] += c212;
137 out[4*outstride] -= c212;
138 out[5*outstride] -= c49;
139 out[6*outstride] += c251;
140 out[7*outstride] -= c142;
147 int32_t c236 = c*236;
148 out[0*outstride] += c97;
149 out[1*outstride] -= c236;
150 out[2*outstride] += c236;
151 out[3*outstride] -= c97;
152 out[4*outstride] -= c97;
153 out[5*outstride] += c236;
154 out[6*outstride] -= c236;
155 out[7*outstride] += c97;
162 int32_t c142 = c*142;
163 int32_t c212 = c*212;
164 int32_t c251 = c*251;
165 out[0*outstride] += c49;
166 out[1*outstride] -= c142;
167 out[2*outstride] += c212;
168 out[3*outstride] -= c251;
169 out[4*outstride] += c251;
170 out[5*outstride] -= c212;
171 out[6*outstride] += c142;
172 out[7*outstride] -= c49;
181 for (
int y = 0; y < 8; y++)
187 for (
int x = 0; x < 8; x++)
191 for (
int y = 0; y < 8; y++) {
192 for (
int x = 0; x < 8; x++) {
204 const int32_t offset = (128 << 18) + (1 << 17);
205 int32_t v = (tmp2[i] + offset) >> 18;
212 out[y*outstride + x] = v;
219 static inline void idct_1D_double(
double *in,
int instride,
double *out,
int outstride)
221 for (
int x = 0; x < 8; x++)
222 out[x*outstride] = 0;
225 double Cu = 1/sqrt(2);
227 for (
int u = 0; u < 8; u++, Cu = 1) {
229 double coeff = in[u*instride];
233 for (
int x = 0; x < 8; x++)
234 out[x*outstride] += Cu*cos((2*x+1)*u*M_PI/16) * coeff;
240 double din[64], dout[64];
241 for (
int i = 0; i < 64; i++)
247 for (
int y = 0; y < 8; y++)
251 for (
int x = 0; x < 8; x++)
255 for (
int y = 0; y < 8; y++) {
256 for (
int x = 0; x < 8; x++) {
259 dout[i] = (dout[i] / 4) + 128;
266 out[y*outstride + x] = dout[i];
272 static inline unsigned char njClip(
const int x) {
273 return (x < 0) ? 0 : ((x > 0xFF) ? 0xFF : (
unsigned char) x);
284 int x0, x1, x2, x3, x4, x5, x6, x7, x8;
285 if (!((x1 = blk[4] << 11)
293 blk[0] = blk[1] = blk[2] = blk[3] = blk[4] = blk[5] = blk[6] = blk[7] = blk[0] << 3;
296 x0 = (blk[0] << 11) + 128;
298 x4 = x8 + (
W1 -
W7) * x4;
299 x5 = x8 - (
W1 +
W7) * x5;
301 x6 = x8 - (
W3 -
W5) * x6;
302 x7 = x8 - (
W3 +
W5) * x7;
306 x2 = x1 - (
W2 +
W6) * x2;
307 x3 = x1 + (
W2 -
W6) * x3;
316 x2 = (181 * (x4 + x5) + 128) >> 8;
317 x4 = (181 * (x4 - x5) + 128) >> 8;
318 blk[0] = (x7 + x1) >> 8;
319 blk[1] = (x3 + x2) >> 8;
320 blk[2] = (x0 + x4) >> 8;
321 blk[3] = (x8 + x6) >> 8;
322 blk[4] = (x8 - x6) >> 8;
323 blk[5] = (x0 - x4) >> 8;
324 blk[6] = (x3 - x2) >> 8;
325 blk[7] = (x7 - x1) >> 8;
328 static inline void njColIDCT(
const int* blk,
unsigned char *out,
int stride) {
329 int x0, x1, x2, x3, x4, x5, x6, x7, x8;
330 if (!((x1 = blk[8*4] << 8)
338 x1 =
njClip(((blk[0] + 32) >> 6) + 128);
339 for (x0 = 8; x0; --x0) {
340 *out = (
unsigned char) x1;
345 x0 = (blk[0] << 8) + 8192;
346 x8 =
W7 * (x4 + x5) + 4;
347 x4 = (x8 + (
W1 -
W7) * x4) >> 3;
348 x5 = (x8 - (
W1 +
W7) * x5) >> 3;
349 x8 =
W3 * (x6 + x7) + 4;
350 x6 = (x8 - (
W3 -
W5) * x6) >> 3;
351 x7 = (x8 - (
W3 +
W5) * x7) >> 3;
354 x1 =
W6 * (x3 + x2) + 4;
355 x2 = (x1 - (
W2 +
W6) * x2) >> 3;
356 x3 = (x1 + (
W2 -
W6) * x3) >> 3;
365 x2 = (181 * (x4 + x5) + 128) >> 8;
366 x4 = (181 * (x4 - x5) + 128) >> 8;
367 *out =
njClip(((x7 + x1) >> 14) + 128); out += stride;
368 *out =
njClip(((x3 + x2) >> 14) + 128); out += stride;
369 *out =
njClip(((x0 + x4) >> 14) + 128); out += stride;
370 *out =
njClip(((x8 + x6) >> 14) + 128); out += stride;
371 *out =
njClip(((x8 - x6) >> 14) + 128); out += stride;
372 *out =
njClip(((x0 - x4) >> 14) + 128); out += stride;
373 *out =
njClip(((x3 - x2) >> 14) + 128); out += stride;
374 *out =
njClip(((x7 - x1) >> 14) + 128);
381 for (coef = 0; coef < 64; coef += 8)
383 for (coef = 0; coef < 8; ++coef)
384 njColIDCT(&in[coef], &out[coef], outstride);