pjpeg-idct.c
Go to the documentation of this file.
1 /* Copyright (C) 2013-2016, The Regents of The University of Michigan.
2 All rights reserved.
3 This software was developed in the APRIL Robotics Lab under the
4 direction of Edwin Olson, ebolson@umich.edu. This software may be
5 available under alternative licensing terms; contact the address above.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 1. Redistributions of source code must retain the above copyright notice, this
9  list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright notice,
11  this list of conditions and the following disclaimer in the documentation
12  and/or other materials provided with the distribution.
13 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
14 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
15 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
16 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
17 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
18 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
19 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
20 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
22 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 The views and conclusions contained in the software and documentation are those
24 of the authors and should not be interpreted as representing official policies,
25 either expressed or implied, of the Regents of The University of Michigan.
26 */
27 
28 #include <math.h>
29 #include <stdint.h>
30 
31 #ifndef M_PI
32 # define M_PI 3.141592653589793238462643383279502884196
33 #endif
34 
35 // 8 bits of fixed-point output
36 //
37 // This implementation has a worst-case complexity of 22 multiplies
38 // and 64 adds. This makes it significantly worse (about 2x) than the
39 // best-known fast inverse cosine transform methods. HOWEVER, zero
40 // coefficients can be skipped over, and since that's common (often
41 // more than half the coefficients are zero).
42 //
43 // The output is scaled by a factor of 256 (due to our fixed-point
44 // integer arithmetic)..
45 static inline void idct_1D_u32(int32_t *in, int instride, int32_t *out, int outstride)
46 {
47  for (int x = 0; x < 8; x++)
48  out[x*outstride] = 0;
49 
50  int32_t c;
51 
52  c = in[0*instride];
53  if (c) {
54  // 181 181 181 181 181 181 181 181
55  int32_t c181 = c * 181;
56  out[0*outstride] += c181;
57  out[1*outstride] += c181;
58  out[2*outstride] += c181;
59  out[3*outstride] += c181;
60  out[4*outstride] += c181;
61  out[5*outstride] += c181;
62  out[6*outstride] += c181;
63  out[7*outstride] += c181;
64  }
65 
66  c = in[1*instride];
67  if (c) {
68  // 251 212 142 49 -49 -142 -212 -251
69  int32_t c251 = c * 251;
70  int32_t c212 = c * 212;
71  int32_t c142 = c * 142;
72  int32_t c49 = c * 49;
73  out[0*outstride] += c251;
74  out[1*outstride] += c212;
75  out[2*outstride] += c142;
76  out[3*outstride] += c49;
77  out[4*outstride] -= c49;
78  out[5*outstride] -= c142;
79  out[6*outstride] -= c212;
80  out[7*outstride] -= c251;
81  }
82 
83  c = in[2*instride];
84  if (c) {
85  // 236 97 -97 -236 -236 -97 97 236
86  int32_t c236 = c*236;
87  int32_t c97 = c*97;
88  out[0*outstride] += c236;
89  out[1*outstride] += c97;
90  out[2*outstride] -= c97;
91  out[3*outstride] -= c236;
92  out[4*outstride] -= c236;
93  out[5*outstride] -= c97;
94  out[6*outstride] += c97;
95  out[7*outstride] += c236;
96  }
97 
98  c = in[3*instride];
99  if (c) {
100  // 212 -49 -251 -142 142 251 49 -212
101  int32_t c212 = c*212;
102  int32_t c49 = c*49;
103  int32_t c251 = c*251;
104  int32_t c142 = c*142;
105  out[0*outstride] += c212;
106  out[1*outstride] -= c49;
107  out[2*outstride] -= c251;
108  out[3*outstride] -= c142;
109  out[4*outstride] += c142;
110  out[5*outstride] += c251;
111  out[6*outstride] += c49;
112  out[7*outstride] -= c212;
113  }
114 
115  c = in[4*instride];
116  if (c) {
117  // 181 -181 -181 181 181 -181 -181 181
118  int32_t c181 = c*181;
119  out[0*outstride] += c181;
120  out[1*outstride] -= c181;
121  out[2*outstride] -= c181;
122  out[3*outstride] += c181;
123  out[4*outstride] += c181;
124  out[5*outstride] -= c181;
125  out[6*outstride] -= c181;
126  out[7*outstride] += c181;
127  }
128 
129  c = in[5*instride];
130  if (c) {
131  // 142 -251 49 212 -212 -49 251 -142
132  int32_t c142 = c*142;
133  int32_t c251 = c*251;
134  int32_t c49 = c*49;
135  int32_t c212 = c*212;
136  out[0*outstride] += c142;
137  out[1*outstride] -= c251;
138  out[2*outstride] += c49;
139  out[3*outstride] += c212;
140  out[4*outstride] -= c212;
141  out[5*outstride] -= c49;
142  out[6*outstride] += c251;
143  out[7*outstride] -= c142;
144  }
145 
146  c = in[6*instride];
147  if (c) {
148  // 97 -236 236 -97 -97 236 -236 97
149  int32_t c97 = c*97;
150  int32_t c236 = c*236;
151  out[0*outstride] += c97;
152  out[1*outstride] -= c236;
153  out[2*outstride] += c236;
154  out[3*outstride] -= c97;
155  out[4*outstride] -= c97;
156  out[5*outstride] += c236;
157  out[6*outstride] -= c236;
158  out[7*outstride] += c97;
159  }
160 
161  c = in[7*instride];
162  if (c) {
163  // 49 -142 212 -251 251 -212 142 -49
164  int32_t c49 = c*49;
165  int32_t c142 = c*142;
166  int32_t c212 = c*212;
167  int32_t c251 = c*251;
168  out[0*outstride] += c49;
169  out[1*outstride] -= c142;
170  out[2*outstride] += c212;
171  out[3*outstride] -= c251;
172  out[4*outstride] += c251;
173  out[5*outstride] -= c212;
174  out[6*outstride] += c142;
175  out[7*outstride] -= c49;
176  }
177 }
178 
179 void pjpeg_idct_2D_u32(int32_t in[64], uint8_t *out, uint32_t outstride)
180 {
181  int32_t tmp[64];
182 
183  // idct on rows
184  for (int y = 0; y < 8; y++)
185  idct_1D_u32(&in[8*y], 1, &tmp[8*y], 1);
186 
187  int32_t tmp2[64];
188 
189  // idct on columns
190  for (int x = 0; x < 8; x++)
191  idct_1D_u32(&tmp[x], 8, &tmp2[x], 8);
192 
193  // scale, adjust bias, and clamp
194  for (int y = 0; y < 8; y++) {
195  for (int x = 0; x < 8; x++) {
196  int i = 8*y + x;
197 
198  // Shift of 18: the divide by 4 as part of the idct, and a shift by 16
199  // to undo the fixed-point arithmetic. (We accumulated 8 bits of
200  // fractional precision during each of the row and column IDCTs)
201  //
202  // Originally:
203  // int32_t v = (tmp2[i] >> 18) + 128;
204  //
205  // Move the add before the shift and we can do rounding at
206  // the same time.
207  const int32_t offset = (128 << 18) + (1 << 17);
208  int32_t v = (tmp2[i] + offset) >> 18;
209 
210  if (v < 0)
211  v = 0;
212  if (v > 255)
213  v = 255;
214 
215  out[y*outstride + x] = v;
216  }
217  }
218 }
219 
221 // Below: a "as straight-forward as I can make" implementation.
222 static inline void idct_1D_double(double *in, int instride, double *out, int outstride)
223 {
224  for (int x = 0; x < 8; x++)
225  out[x*outstride] = 0;
226 
227  // iterate over IDCT coefficients
228  double Cu = 1/sqrt(2);
229 
230  for (int u = 0; u < 8; u++, Cu = 1) {
231 
232  double coeff = in[u*instride];
233  if (coeff == 0)
234  continue;
235 
236  for (int x = 0; x < 8; x++)
237  out[x*outstride] += Cu*cos((2*x+1)*u*M_PI/16) * coeff;
238  }
239 }
240 
241 void pjpeg_idct_2D_double(int32_t in[64], uint8_t *out, uint32_t outstride)
242 {
243  double din[64], dout[64];
244  for (int i = 0; i < 64; i++)
245  din[i] = in[i];
246 
247  double tmp[64];
248 
249  // idct on rows
250  for (int y = 0; y < 8; y++)
251  idct_1D_double(&din[8*y], 1, &tmp[8*y], 1);
252 
253  // idct on columns
254  for (int x = 0; x < 8; x++)
255  idct_1D_double(&tmp[x], 8, &dout[x], 8);
256 
257  // scale, adjust bias, and clamp
258  for (int y = 0; y < 8; y++) {
259  for (int x = 0; x < 8; x++) {
260  int i = 8*y + x;
261 
262  dout[i] = (dout[i] / 4) + 128;
263  if (dout[i] < 0)
264  dout[i] = 0;
265  if (dout[i] > 255)
266  dout[i] = 255;
267 
268  // XXX round by adding +.5?
269  out[y*outstride + x] = dout[i];
270  }
271  }
272 }
273 
275 static inline unsigned char njClip(const int x) {
276  return (x < 0) ? 0 : ((x > 0xFF) ? 0xFF : (unsigned char) x);
277 }
278 
279 #define W1 2841
280 #define W2 2676
281 #define W3 2408
282 #define W5 1609
283 #define W6 1108
284 #define W7 565
285 
286 static inline void njRowIDCT(int* blk) {
287  int x0, x1, x2, x3, x4, x5, x6, x7, x8;
288  if (!((x1 = blk[4] << 11)
289  | (x2 = blk[6])
290  | (x3 = blk[2])
291  | (x4 = blk[1])
292  | (x5 = blk[7])
293  | (x6 = blk[5])
294  | (x7 = blk[3])))
295  {
296  blk[0] = blk[1] = blk[2] = blk[3] = blk[4] = blk[5] = blk[6] = blk[7] = blk[0] << 3;
297  return;
298  }
299  x0 = (blk[0] << 11) + 128;
300  x8 = W7 * (x4 + x5);
301  x4 = x8 + (W1 - W7) * x4;
302  x5 = x8 - (W1 + W7) * x5;
303  x8 = W3 * (x6 + x7);
304  x6 = x8 - (W3 - W5) * x6;
305  x7 = x8 - (W3 + W5) * x7;
306  x8 = x0 + x1;
307  x0 -= x1;
308  x1 = W6 * (x3 + x2);
309  x2 = x1 - (W2 + W6) * x2;
310  x3 = x1 + (W2 - W6) * x3;
311  x1 = x4 + x6;
312  x4 -= x6;
313  x6 = x5 + x7;
314  x5 -= x7;
315  x7 = x8 + x3;
316  x8 -= x3;
317  x3 = x0 + x2;
318  x0 -= x2;
319  x2 = (181 * (x4 + x5) + 128) >> 8;
320  x4 = (181 * (x4 - x5) + 128) >> 8;
321  blk[0] = (x7 + x1) >> 8;
322  blk[1] = (x3 + x2) >> 8;
323  blk[2] = (x0 + x4) >> 8;
324  blk[3] = (x8 + x6) >> 8;
325  blk[4] = (x8 - x6) >> 8;
326  blk[5] = (x0 - x4) >> 8;
327  blk[6] = (x3 - x2) >> 8;
328  blk[7] = (x7 - x1) >> 8;
329 }
330 
331 static inline void njColIDCT(const int* blk, unsigned char *out, int stride) {
332  int x0, x1, x2, x3, x4, x5, x6, x7, x8;
333  if (!((x1 = blk[8*4] << 8)
334  | (x2 = blk[8*6])
335  | (x3 = blk[8*2])
336  | (x4 = blk[8*1])
337  | (x5 = blk[8*7])
338  | (x6 = blk[8*5])
339  | (x7 = blk[8*3])))
340  {
341  x1 = njClip(((blk[0] + 32) >> 6) + 128);
342  for (x0 = 8; x0; --x0) {
343  *out = (unsigned char) x1;
344  out += stride;
345  }
346  return;
347  }
348  x0 = (blk[0] << 8) + 8192;
349  x8 = W7 * (x4 + x5) + 4;
350  x4 = (x8 + (W1 - W7) * x4) >> 3;
351  x5 = (x8 - (W1 + W7) * x5) >> 3;
352  x8 = W3 * (x6 + x7) + 4;
353  x6 = (x8 - (W3 - W5) * x6) >> 3;
354  x7 = (x8 - (W3 + W5) * x7) >> 3;
355  x8 = x0 + x1;
356  x0 -= x1;
357  x1 = W6 * (x3 + x2) + 4;
358  x2 = (x1 - (W2 + W6) * x2) >> 3;
359  x3 = (x1 + (W2 - W6) * x3) >> 3;
360  x1 = x4 + x6;
361  x4 -= x6;
362  x6 = x5 + x7;
363  x5 -= x7;
364  x7 = x8 + x3;
365  x8 -= x3;
366  x3 = x0 + x2;
367  x0 -= x2;
368  x2 = (181 * (x4 + x5) + 128) >> 8;
369  x4 = (181 * (x4 - x5) + 128) >> 8;
370  *out = njClip(((x7 + x1) >> 14) + 128); out += stride;
371  *out = njClip(((x3 + x2) >> 14) + 128); out += stride;
372  *out = njClip(((x0 + x4) >> 14) + 128); out += stride;
373  *out = njClip(((x8 + x6) >> 14) + 128); out += stride;
374  *out = njClip(((x8 - x6) >> 14) + 128); out += stride;
375  *out = njClip(((x0 - x4) >> 14) + 128); out += stride;
376  *out = njClip(((x3 - x2) >> 14) + 128); out += stride;
377  *out = njClip(((x7 - x1) >> 14) + 128);
378 }
379 
380 void pjpeg_idct_2D_nanojpeg(int32_t in[64], uint8_t *out, uint32_t outstride)
381 {
382  int coef;
383 
384  for (coef = 0; coef < 64; coef += 8)
385  njRowIDCT(&in[coef]);
386  for (coef = 0; coef < 8; ++coef)
387  njColIDCT(&in[coef], &out[coef], outstride);
388 }
#define W6
Definition: pjpeg-idct.c:283
static void idct_1D_u32(int32_t *in, int instride, int32_t *out, int outstride)
Definition: pjpeg-idct.c:45
static void njRowIDCT(int *blk)
Definition: pjpeg-idct.c:286
#define W5
Definition: pjpeg-idct.c:282
void pjpeg_idct_2D_double(int32_t in[64], uint8_t *out, uint32_t outstride)
Definition: pjpeg-idct.c:241
#define W7
Definition: pjpeg-idct.c:284
static unsigned char njClip(const int x)
Definition: pjpeg-idct.c:275
static void njColIDCT(const int *blk, unsigned char *out, int stride)
Definition: pjpeg-idct.c:331
static void idct_1D_double(double *in, int instride, double *out, int outstride)
Definition: pjpeg-idct.c:222
#define M_PI
Definition: pjpeg-idct.c:32
#define W3
Definition: pjpeg-idct.c:281
#define W2
Definition: pjpeg-idct.c:280
void pjpeg_idct_2D_nanojpeg(int32_t in[64], uint8_t *out, uint32_t outstride)
Definition: pjpeg-idct.c:380
void pjpeg_idct_2D_u32(int32_t in[64], uint8_t *out, uint32_t outstride)
Definition: pjpeg-idct.c:179
#define W1
Definition: pjpeg-idct.c:279


apriltag
Author(s): Edwin Olson , Max Krogius
autogenerated on Mon Jun 26 2023 02:26:12