pjpeg-idct.c
Go to the documentation of this file.
1 /* Copyright (C) 2013-2016, The Regents of The University of Michigan.
2 All rights reserved.
3 
4 This software was developed in the APRIL Robotics Lab under the
5 direction of Edwin Olson, ebolson@umich.edu. This software may be
6 available under alternative licensing terms; contact the address above.
7 
8 This library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public
10 License as published by the Free Software Foundation; either
11 version 2.1 of the License, or (at your option) any later version.
12 
13 This library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17 
18 You should have received a copy of the GNU Lesser General Public
19 License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 */
21 
22 #include <math.h>
23 #include <stdint.h>
24 
25 #ifndef M_PI
26 # define M_PI 3.141592653589793238462643383279502884196
27 #endif
28 
29 // 8 bits of fixed-point output
30 //
31 // This implementation has a worst-case complexity of 22 multiplies
32 // and 64 adds. This makes it significantly worse (about 2x) than the
33 // best-known fast inverse cosine transform methods. HOWEVER, zero
34 // coefficients can be skipped over, and since that's common (often
35 // more than half the coefficients are zero).
36 //
37 // The output is scaled by a factor of 256 (due to our fixed-point
38 // integer arithmetic)..
39 static inline void idct_1D_u32(int32_t *in, int instride, int32_t *out, int outstride)
40 {
41  for (int x = 0; x < 8; x++)
42  out[x*outstride] = 0;
43 
44  int32_t c;
45 
46  c = in[0*instride];
47  if (c) {
48  // 181 181 181 181 181 181 181 181
49  int32_t c181 = c * 181;
50  out[0*outstride] += c181;
51  out[1*outstride] += c181;
52  out[2*outstride] += c181;
53  out[3*outstride] += c181;
54  out[4*outstride] += c181;
55  out[5*outstride] += c181;
56  out[6*outstride] += c181;
57  out[7*outstride] += c181;
58  }
59 
60  c = in[1*instride];
61  if (c) {
62  // 251 212 142 49 -49 -142 -212 -251
63  int32_t c251 = c * 251;
64  int32_t c212 = c * 212;
65  int32_t c142 = c * 142;
66  int32_t c49 = c * 49;
67  out[0*outstride] += c251;
68  out[1*outstride] += c212;
69  out[2*outstride] += c142;
70  out[3*outstride] += c49;
71  out[4*outstride] -= c49;
72  out[5*outstride] -= c142;
73  out[6*outstride] -= c212;
74  out[7*outstride] -= c251;
75  }
76 
77  c = in[2*instride];
78  if (c) {
79  // 236 97 -97 -236 -236 -97 97 236
80  int32_t c236 = c*236;
81  int32_t c97 = c*97;
82  out[0*outstride] += c236;
83  out[1*outstride] += c97;
84  out[2*outstride] -= c97;
85  out[3*outstride] -= c236;
86  out[4*outstride] -= c236;
87  out[5*outstride] -= c97;
88  out[6*outstride] += c97;
89  out[7*outstride] += c236;
90  }
91 
92  c = in[3*instride];
93  if (c) {
94  // 212 -49 -251 -142 142 251 49 -212
95  int32_t c212 = c*212;
96  int32_t c49 = c*49;
97  int32_t c251 = c*251;
98  int32_t c142 = c*142;
99  out[0*outstride] += c212;
100  out[1*outstride] -= c49;
101  out[2*outstride] -= c251;
102  out[3*outstride] -= c142;
103  out[4*outstride] += c142;
104  out[5*outstride] += c251;
105  out[6*outstride] += c49;
106  out[7*outstride] -= c212;
107  }
108 
109  c = in[4*instride];
110  if (c) {
111  // 181 -181 -181 181 181 -181 -181 181
112  int32_t c181 = c*181;
113  out[0*outstride] += c181;
114  out[1*outstride] -= c181;
115  out[2*outstride] -= c181;
116  out[3*outstride] += c181;
117  out[4*outstride] += c181;
118  out[5*outstride] -= c181;
119  out[6*outstride] -= c181;
120  out[7*outstride] += c181;
121  }
122 
123  c = in[5*instride];
124  if (c) {
125  // 142 -251 49 212 -212 -49 251 -142
126  int32_t c142 = c*142;
127  int32_t c251 = c*251;
128  int32_t c49 = c*49;
129  int32_t c212 = c*212;
130  out[0*outstride] += c142;
131  out[1*outstride] -= c251;
132  out[2*outstride] += c49;
133  out[3*outstride] += c212;
134  out[4*outstride] -= c212;
135  out[5*outstride] -= c49;
136  out[6*outstride] += c251;
137  out[7*outstride] -= c142;
138  }
139 
140  c = in[6*instride];
141  if (c) {
142  // 97 -236 236 -97 -97 236 -236 97
143  int32_t c97 = c*97;
144  int32_t c236 = c*236;
145  out[0*outstride] += c97;
146  out[1*outstride] -= c236;
147  out[2*outstride] += c236;
148  out[3*outstride] -= c97;
149  out[4*outstride] -= c97;
150  out[5*outstride] += c236;
151  out[6*outstride] -= c236;
152  out[7*outstride] += c97;
153  }
154 
155  c = in[7*instride];
156  if (c) {
157  // 49 -142 212 -251 251 -212 142 -49
158  int32_t c49 = c*49;
159  int32_t c142 = c*142;
160  int32_t c212 = c*212;
161  int32_t c251 = c*251;
162  out[0*outstride] += c49;
163  out[1*outstride] -= c142;
164  out[2*outstride] += c212;
165  out[3*outstride] -= c251;
166  out[4*outstride] += c251;
167  out[5*outstride] -= c212;
168  out[6*outstride] += c142;
169  out[7*outstride] -= c49;
170  }
171 }
172 
173 void pjpeg_idct_2D_u32(int32_t in[64], uint8_t *out, uint32_t outstride)
174 {
175  int32_t tmp[64];
176 
177  // idct on rows
178  for (int y = 0; y < 8; y++)
179  idct_1D_u32(&in[8*y], 1, &tmp[8*y], 1);
180 
181  int32_t tmp2[64];
182 
183  // idct on columns
184  for (int x = 0; x < 8; x++)
185  idct_1D_u32(&tmp[x], 8, &tmp2[x], 8);
186 
187  // scale, adjust bias, and clamp
188  for (int y = 0; y < 8; y++) {
189  for (int x = 0; x < 8; x++) {
190  int i = 8*y + x;
191 
192  // Shift of 18: the divide by 4 as part of the idct, and a shift by 16
193  // to undo the fixed-point arithmetic. (We accumulated 8 bits of
194  // fractional precision during each of the row and column IDCTs)
195  //
196  // Originally:
197  // int32_t v = (tmp2[i] >> 18) + 128;
198  //
199  // Move the add before the shift and we can do rounding at
200  // the same time.
201  const int32_t offset = (128 << 18) + (1 << 17);
202  int32_t v = (tmp2[i] + offset) >> 18;
203 
204  if (v < 0)
205  v = 0;
206  if (v > 255)
207  v = 255;
208 
209  out[y*outstride + x] = v;
210  }
211  }
212 }
213 
215 // Below: a "as straight-forward as I can make" implementation.
216 static inline void idct_1D_double(double *in, int instride, double *out, int outstride)
217 {
218  for (int x = 0; x < 8; x++)
219  out[x*outstride] = 0;
220 
221  // iterate over IDCT coefficients
222  double Cu = 1/sqrt(2);
223 
224  for (int u = 0; u < 8; u++, Cu = 1) {
225 
226  double coeff = in[u*instride];
227  if (coeff == 0)
228  continue;
229 
230  for (int x = 0; x < 8; x++)
231  out[x*outstride] += Cu*cos((2*x+1)*u*M_PI/16) * coeff;
232  }
233 }
234 
235 void pjpeg_idct_2D_double(int32_t in[64], uint8_t *out, uint32_t outstride)
236 {
237  double din[64], dout[64];
238  for (int i = 0; i < 64; i++)
239  din[i] = in[i];
240 
241  double tmp[64];
242 
243  // idct on rows
244  for (int y = 0; y < 8; y++)
245  idct_1D_double(&din[8*y], 1, &tmp[8*y], 1);
246 
247  // idct on columns
248  for (int x = 0; x < 8; x++)
249  idct_1D_double(&tmp[x], 8, &dout[x], 8);
250 
251  // scale, adjust bias, and clamp
252  for (int y = 0; y < 8; y++) {
253  for (int x = 0; x < 8; x++) {
254  int i = 8*y + x;
255 
256  dout[i] = (dout[i] / 4) + 128;
257  if (dout[i] < 0)
258  dout[i] = 0;
259  if (dout[i] > 255)
260  dout[i] = 255;
261 
262  // XXX round by adding +.5?
263  out[y*outstride + x] = dout[i];
264  }
265  }
266 }
267 
269 static inline unsigned char njClip(const int x) {
270  return (x < 0) ? 0 : ((x > 0xFF) ? 0xFF : (unsigned char) x);
271 }
272 
273 #define W1 2841
274 #define W2 2676
275 #define W3 2408
276 #define W5 1609
277 #define W6 1108
278 #define W7 565
279 
280 static inline void njRowIDCT(int* blk) {
281  int x0, x1, x2, x3, x4, x5, x6, x7, x8;
282  if (!((x1 = blk[4] << 11)
283  | (x2 = blk[6])
284  | (x3 = blk[2])
285  | (x4 = blk[1])
286  | (x5 = blk[7])
287  | (x6 = blk[5])
288  | (x7 = blk[3])))
289  {
290  blk[0] = blk[1] = blk[2] = blk[3] = blk[4] = blk[5] = blk[6] = blk[7] = blk[0] << 3;
291  return;
292  }
293  x0 = (blk[0] << 11) + 128;
294  x8 = W7 * (x4 + x5);
295  x4 = x8 + (W1 - W7) * x4;
296  x5 = x8 - (W1 + W7) * x5;
297  x8 = W3 * (x6 + x7);
298  x6 = x8 - (W3 - W5) * x6;
299  x7 = x8 - (W3 + W5) * x7;
300  x8 = x0 + x1;
301  x0 -= x1;
302  x1 = W6 * (x3 + x2);
303  x2 = x1 - (W2 + W6) * x2;
304  x3 = x1 + (W2 - W6) * x3;
305  x1 = x4 + x6;
306  x4 -= x6;
307  x6 = x5 + x7;
308  x5 -= x7;
309  x7 = x8 + x3;
310  x8 -= x3;
311  x3 = x0 + x2;
312  x0 -= x2;
313  x2 = (181 * (x4 + x5) + 128) >> 8;
314  x4 = (181 * (x4 - x5) + 128) >> 8;
315  blk[0] = (x7 + x1) >> 8;
316  blk[1] = (x3 + x2) >> 8;
317  blk[2] = (x0 + x4) >> 8;
318  blk[3] = (x8 + x6) >> 8;
319  blk[4] = (x8 - x6) >> 8;
320  blk[5] = (x0 - x4) >> 8;
321  blk[6] = (x3 - x2) >> 8;
322  blk[7] = (x7 - x1) >> 8;
323 }
324 
325 static inline void njColIDCT(const int* blk, unsigned char *out, int stride) {
326  int x0, x1, x2, x3, x4, x5, x6, x7, x8;
327  if (!((x1 = blk[8*4] << 8)
328  | (x2 = blk[8*6])
329  | (x3 = blk[8*2])
330  | (x4 = blk[8*1])
331  | (x5 = blk[8*7])
332  | (x6 = blk[8*5])
333  | (x7 = blk[8*3])))
334  {
335  x1 = njClip(((blk[0] + 32) >> 6) + 128);
336  for (x0 = 8; x0; --x0) {
337  *out = (unsigned char) x1;
338  out += stride;
339  }
340  return;
341  }
342  x0 = (blk[0] << 8) + 8192;
343  x8 = W7 * (x4 + x5) + 4;
344  x4 = (x8 + (W1 - W7) * x4) >> 3;
345  x5 = (x8 - (W1 + W7) * x5) >> 3;
346  x8 = W3 * (x6 + x7) + 4;
347  x6 = (x8 - (W3 - W5) * x6) >> 3;
348  x7 = (x8 - (W3 + W5) * x7) >> 3;
349  x8 = x0 + x1;
350  x0 -= x1;
351  x1 = W6 * (x3 + x2) + 4;
352  x2 = (x1 - (W2 + W6) * x2) >> 3;
353  x3 = (x1 + (W2 - W6) * x3) >> 3;
354  x1 = x4 + x6;
355  x4 -= x6;
356  x6 = x5 + x7;
357  x5 -= x7;
358  x7 = x8 + x3;
359  x8 -= x3;
360  x3 = x0 + x2;
361  x0 -= x2;
362  x2 = (181 * (x4 + x5) + 128) >> 8;
363  x4 = (181 * (x4 - x5) + 128) >> 8;
364  *out = njClip(((x7 + x1) >> 14) + 128); out += stride;
365  *out = njClip(((x3 + x2) >> 14) + 128); out += stride;
366  *out = njClip(((x0 + x4) >> 14) + 128); out += stride;
367  *out = njClip(((x8 + x6) >> 14) + 128); out += stride;
368  *out = njClip(((x8 - x6) >> 14) + 128); out += stride;
369  *out = njClip(((x0 - x4) >> 14) + 128); out += stride;
370  *out = njClip(((x3 - x2) >> 14) + 128); out += stride;
371  *out = njClip(((x7 - x1) >> 14) + 128);
372 }
373 
374 void pjpeg_idct_2D_nanojpeg(int32_t in[64], uint8_t *out, uint32_t outstride)
375 {
376  int coef;
377 
378  for (coef = 0; coef < 64; coef += 8)
379  njRowIDCT(&in[coef]);
380  for (coef = 0; coef < 8; ++coef)
381  njColIDCT(&in[coef], &out[coef], outstride);
382 }
#define W6
Definition: pjpeg-idct.c:277
static void idct_1D_u32(int32_t *in, int instride, int32_t *out, int outstride)
Definition: pjpeg-idct.c:39
static void njRowIDCT(int *blk)
Definition: pjpeg-idct.c:280
#define W5
Definition: pjpeg-idct.c:276
void pjpeg_idct_2D_double(int32_t in[64], uint8_t *out, uint32_t outstride)
Definition: pjpeg-idct.c:235
#define W7
Definition: pjpeg-idct.c:278
static unsigned char njClip(const int x)
Definition: pjpeg-idct.c:269
static void njColIDCT(const int *blk, unsigned char *out, int stride)
Definition: pjpeg-idct.c:325
static void idct_1D_double(double *in, int instride, double *out, int outstride)
Definition: pjpeg-idct.c:216
#define M_PI
Definition: pjpeg-idct.c:26
#define W3
Definition: pjpeg-idct.c:275
#define W2
Definition: pjpeg-idct.c:274
void pjpeg_idct_2D_nanojpeg(int32_t in[64], uint8_t *out, uint32_t outstride)
Definition: pjpeg-idct.c:374
void pjpeg_idct_2D_u32(int32_t in[64], uint8_t *out, uint32_t outstride)
Definition: pjpeg-idct.c:173
#define W1
Definition: pjpeg-idct.c:273


apriltags2
Author(s): Danylo Malyuta
autogenerated on Fri Oct 19 2018 04:02:32