pjpeg-idct.c
Go to the documentation of this file.
1 /* Copyright (C) 2013-2016, The Regents of The University of Michigan.
2 All rights reserved.
3 This software was developed in the APRIL Robotics Lab under the
4 direction of Edwin Olson, ebolson@umich.edu. This software may be
5 available under alternative licensing terms; contact the address above.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 1. Redistributions of source code must retain the above copyright notice, this
9  list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright notice,
11  this list of conditions and the following disclaimer in the documentation
12  and/or other materials provided with the distribution.
13 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
14 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
15 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
16 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
17 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
18 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
19 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
20 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
22 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 The views and conclusions contained in the software and documentation are those
24 of the authors and should not be interpreted as representing official policies,
25 either expressed or implied, of the Regents of The University of Michigan.
26 */
27 
28 #define _USE_MATH_DEFINES
29 #include <math.h>
30 #include <stdint.h>
31 
32 // 8 bits of fixed-point output
33 //
34 // This implementation has a worst-case complexity of 22 multiplies
35 // and 64 adds. This makes it significantly worse (about 2x) than the
36 // best-known fast inverse cosine transform methods. HOWEVER, zero
37 // coefficients can be skipped over, and since that's common (often
38 // more than half the coefficients are zero).
39 //
40 // The output is scaled by a factor of 256 (due to our fixed-point
41 // integer arithmetic)..
42 static inline void idct_1D_u32(int32_t *in, int instride, int32_t *out, int outstride)
43 {
44  for (int x = 0; x < 8; x++)
45  out[x*outstride] = 0;
46 
47  int32_t c;
48 
49  c = in[0*instride];
50  if (c) {
51  // 181 181 181 181 181 181 181 181
52  int32_t c181 = c * 181;
53  out[0*outstride] += c181;
54  out[1*outstride] += c181;
55  out[2*outstride] += c181;
56  out[3*outstride] += c181;
57  out[4*outstride] += c181;
58  out[5*outstride] += c181;
59  out[6*outstride] += c181;
60  out[7*outstride] += c181;
61  }
62 
63  c = in[1*instride];
64  if (c) {
65  // 251 212 142 49 -49 -142 -212 -251
66  int32_t c251 = c * 251;
67  int32_t c212 = c * 212;
68  int32_t c142 = c * 142;
69  int32_t c49 = c * 49;
70  out[0*outstride] += c251;
71  out[1*outstride] += c212;
72  out[2*outstride] += c142;
73  out[3*outstride] += c49;
74  out[4*outstride] -= c49;
75  out[5*outstride] -= c142;
76  out[6*outstride] -= c212;
77  out[7*outstride] -= c251;
78  }
79 
80  c = in[2*instride];
81  if (c) {
82  // 236 97 -97 -236 -236 -97 97 236
83  int32_t c236 = c*236;
84  int32_t c97 = c*97;
85  out[0*outstride] += c236;
86  out[1*outstride] += c97;
87  out[2*outstride] -= c97;
88  out[3*outstride] -= c236;
89  out[4*outstride] -= c236;
90  out[5*outstride] -= c97;
91  out[6*outstride] += c97;
92  out[7*outstride] += c236;
93  }
94 
95  c = in[3*instride];
96  if (c) {
97  // 212 -49 -251 -142 142 251 49 -212
98  int32_t c212 = c*212;
99  int32_t c49 = c*49;
100  int32_t c251 = c*251;
101  int32_t c142 = c*142;
102  out[0*outstride] += c212;
103  out[1*outstride] -= c49;
104  out[2*outstride] -= c251;
105  out[3*outstride] -= c142;
106  out[4*outstride] += c142;
107  out[5*outstride] += c251;
108  out[6*outstride] += c49;
109  out[7*outstride] -= c212;
110  }
111 
112  c = in[4*instride];
113  if (c) {
114  // 181 -181 -181 181 181 -181 -181 181
115  int32_t c181 = c*181;
116  out[0*outstride] += c181;
117  out[1*outstride] -= c181;
118  out[2*outstride] -= c181;
119  out[3*outstride] += c181;
120  out[4*outstride] += c181;
121  out[5*outstride] -= c181;
122  out[6*outstride] -= c181;
123  out[7*outstride] += c181;
124  }
125 
126  c = in[5*instride];
127  if (c) {
128  // 142 -251 49 212 -212 -49 251 -142
129  int32_t c142 = c*142;
130  int32_t c251 = c*251;
131  int32_t c49 = c*49;
132  int32_t c212 = c*212;
133  out[0*outstride] += c142;
134  out[1*outstride] -= c251;
135  out[2*outstride] += c49;
136  out[3*outstride] += c212;
137  out[4*outstride] -= c212;
138  out[5*outstride] -= c49;
139  out[6*outstride] += c251;
140  out[7*outstride] -= c142;
141  }
142 
143  c = in[6*instride];
144  if (c) {
145  // 97 -236 236 -97 -97 236 -236 97
146  int32_t c97 = c*97;
147  int32_t c236 = c*236;
148  out[0*outstride] += c97;
149  out[1*outstride] -= c236;
150  out[2*outstride] += c236;
151  out[3*outstride] -= c97;
152  out[4*outstride] -= c97;
153  out[5*outstride] += c236;
154  out[6*outstride] -= c236;
155  out[7*outstride] += c97;
156  }
157 
158  c = in[7*instride];
159  if (c) {
160  // 49 -142 212 -251 251 -212 142 -49
161  int32_t c49 = c*49;
162  int32_t c142 = c*142;
163  int32_t c212 = c*212;
164  int32_t c251 = c*251;
165  out[0*outstride] += c49;
166  out[1*outstride] -= c142;
167  out[2*outstride] += c212;
168  out[3*outstride] -= c251;
169  out[4*outstride] += c251;
170  out[5*outstride] -= c212;
171  out[6*outstride] += c142;
172  out[7*outstride] -= c49;
173  }
174 }
175 
176 void pjpeg_idct_2D_u32(int32_t in[64], uint8_t *out, uint32_t outstride)
177 {
178  int32_t tmp[64];
179 
180  // idct on rows
181  for (int y = 0; y < 8; y++)
182  idct_1D_u32(&in[8*y], 1, &tmp[8*y], 1);
183 
184  int32_t tmp2[64];
185 
186  // idct on columns
187  for (int x = 0; x < 8; x++)
188  idct_1D_u32(&tmp[x], 8, &tmp2[x], 8);
189 
190  // scale, adjust bias, and clamp
191  for (int y = 0; y < 8; y++) {
192  for (int x = 0; x < 8; x++) {
193  int i = 8*y + x;
194 
195  // Shift of 18: the divide by 4 as part of the idct, and a shift by 16
196  // to undo the fixed-point arithmetic. (We accumulated 8 bits of
197  // fractional precision during each of the row and column IDCTs)
198  //
199  // Originally:
200  // int32_t v = (tmp2[i] >> 18) + 128;
201  //
202  // Move the add before the shift and we can do rounding at
203  // the same time.
204  const int32_t offset = (128 << 18) + (1 << 17);
205  int32_t v = (tmp2[i] + offset) >> 18;
206 
207  if (v < 0)
208  v = 0;
209  if (v > 255)
210  v = 255;
211 
212  out[y*outstride + x] = v;
213  }
214  }
215 }
216 
218 // Below: a "as straight-forward as I can make" implementation.
219 static inline void idct_1D_double(double *in, int instride, double *out, int outstride)
220 {
221  for (int x = 0; x < 8; x++)
222  out[x*outstride] = 0;
223 
224  // iterate over IDCT coefficients
225  double Cu = 1/sqrt(2);
226 
227  for (int u = 0; u < 8; u++, Cu = 1) {
228 
229  double coeff = in[u*instride];
230  if (coeff == 0)
231  continue;
232 
233  for (int x = 0; x < 8; x++)
234  out[x*outstride] += Cu*cos((2*x+1)*u*M_PI/16) * coeff;
235  }
236 }
237 
238 void pjpeg_idct_2D_double(int32_t in[64], uint8_t *out, uint32_t outstride)
239 {
240  double din[64], dout[64];
241  for (int i = 0; i < 64; i++)
242  din[i] = in[i];
243 
244  double tmp[64];
245 
246  // idct on rows
247  for (int y = 0; y < 8; y++)
248  idct_1D_double(&din[8*y], 1, &tmp[8*y], 1);
249 
250  // idct on columns
251  for (int x = 0; x < 8; x++)
252  idct_1D_double(&tmp[x], 8, &dout[x], 8);
253 
254  // scale, adjust bias, and clamp
255  for (int y = 0; y < 8; y++) {
256  for (int x = 0; x < 8; x++) {
257  int i = 8*y + x;
258 
259  dout[i] = (dout[i] / 4) + 128;
260  if (dout[i] < 0)
261  dout[i] = 0;
262  if (dout[i] > 255)
263  dout[i] = 255;
264 
265  // XXX round by adding +.5?
266  out[y*outstride + x] = dout[i];
267  }
268  }
269 }
270 
272 static inline unsigned char njClip(const int x) {
273  return (x < 0) ? 0 : ((x > 0xFF) ? 0xFF : (unsigned char) x);
274 }
275 
276 #define W1 2841
277 #define W2 2676
278 #define W3 2408
279 #define W5 1609
280 #define W6 1108
281 #define W7 565
282 
283 static inline void njRowIDCT(int* blk) {
284  int x0, x1, x2, x3, x4, x5, x6, x7, x8;
285  if (!((x1 = blk[4] << 11)
286  | (x2 = blk[6])
287  | (x3 = blk[2])
288  | (x4 = blk[1])
289  | (x5 = blk[7])
290  | (x6 = blk[5])
291  | (x7 = blk[3])))
292  {
293  blk[0] = blk[1] = blk[2] = blk[3] = blk[4] = blk[5] = blk[6] = blk[7] = blk[0] << 3;
294  return;
295  }
296  x0 = (blk[0] << 11) + 128;
297  x8 = W7 * (x4 + x5);
298  x4 = x8 + (W1 - W7) * x4;
299  x5 = x8 - (W1 + W7) * x5;
300  x8 = W3 * (x6 + x7);
301  x6 = x8 - (W3 - W5) * x6;
302  x7 = x8 - (W3 + W5) * x7;
303  x8 = x0 + x1;
304  x0 -= x1;
305  x1 = W6 * (x3 + x2);
306  x2 = x1 - (W2 + W6) * x2;
307  x3 = x1 + (W2 - W6) * x3;
308  x1 = x4 + x6;
309  x4 -= x6;
310  x6 = x5 + x7;
311  x5 -= x7;
312  x7 = x8 + x3;
313  x8 -= x3;
314  x3 = x0 + x2;
315  x0 -= x2;
316  x2 = (181 * (x4 + x5) + 128) >> 8;
317  x4 = (181 * (x4 - x5) + 128) >> 8;
318  blk[0] = (x7 + x1) >> 8;
319  blk[1] = (x3 + x2) >> 8;
320  blk[2] = (x0 + x4) >> 8;
321  blk[3] = (x8 + x6) >> 8;
322  blk[4] = (x8 - x6) >> 8;
323  blk[5] = (x0 - x4) >> 8;
324  blk[6] = (x3 - x2) >> 8;
325  blk[7] = (x7 - x1) >> 8;
326 }
327 
328 static inline void njColIDCT(const int* blk, unsigned char *out, int stride) {
329  int x0, x1, x2, x3, x4, x5, x6, x7, x8;
330  if (!((x1 = blk[8*4] << 8)
331  | (x2 = blk[8*6])
332  | (x3 = blk[8*2])
333  | (x4 = blk[8*1])
334  | (x5 = blk[8*7])
335  | (x6 = blk[8*5])
336  | (x7 = blk[8*3])))
337  {
338  x1 = njClip(((blk[0] + 32) >> 6) + 128);
339  for (x0 = 8; x0; --x0) {
340  *out = (unsigned char) x1;
341  out += stride;
342  }
343  return;
344  }
345  x0 = (blk[0] << 8) + 8192;
346  x8 = W7 * (x4 + x5) + 4;
347  x4 = (x8 + (W1 - W7) * x4) >> 3;
348  x5 = (x8 - (W1 + W7) * x5) >> 3;
349  x8 = W3 * (x6 + x7) + 4;
350  x6 = (x8 - (W3 - W5) * x6) >> 3;
351  x7 = (x8 - (W3 + W5) * x7) >> 3;
352  x8 = x0 + x1;
353  x0 -= x1;
354  x1 = W6 * (x3 + x2) + 4;
355  x2 = (x1 - (W2 + W6) * x2) >> 3;
356  x3 = (x1 + (W2 - W6) * x3) >> 3;
357  x1 = x4 + x6;
358  x4 -= x6;
359  x6 = x5 + x7;
360  x5 -= x7;
361  x7 = x8 + x3;
362  x8 -= x3;
363  x3 = x0 + x2;
364  x0 -= x2;
365  x2 = (181 * (x4 + x5) + 128) >> 8;
366  x4 = (181 * (x4 - x5) + 128) >> 8;
367  *out = njClip(((x7 + x1) >> 14) + 128); out += stride;
368  *out = njClip(((x3 + x2) >> 14) + 128); out += stride;
369  *out = njClip(((x0 + x4) >> 14) + 128); out += stride;
370  *out = njClip(((x8 + x6) >> 14) + 128); out += stride;
371  *out = njClip(((x8 - x6) >> 14) + 128); out += stride;
372  *out = njClip(((x0 - x4) >> 14) + 128); out += stride;
373  *out = njClip(((x3 - x2) >> 14) + 128); out += stride;
374  *out = njClip(((x7 - x1) >> 14) + 128);
375 }
376 
377 void pjpeg_idct_2D_nanojpeg(int32_t in[64], uint8_t *out, uint32_t outstride)
378 {
379  int coef;
380 
381  for (coef = 0; coef < 64; coef += 8)
382  njRowIDCT(&in[coef]);
383  for (coef = 0; coef < 8; ++coef)
384  njColIDCT(&in[coef], &out[coef], outstride);
385 }
pjpeg_idct_2D_nanojpeg
void pjpeg_idct_2D_nanojpeg(int32_t in[64], uint8_t *out, uint32_t outstride)
Definition: pjpeg-idct.c:377
njRowIDCT
static void njRowIDCT(int *blk)
Definition: pjpeg-idct.c:283
njColIDCT
static void njColIDCT(const int *blk, unsigned char *out, int stride)
Definition: pjpeg-idct.c:328
W3
#define W3
Definition: pjpeg-idct.c:278
W2
#define W2
Definition: pjpeg-idct.c:277
idct_1D_u32
static void idct_1D_u32(int32_t *in, int instride, int32_t *out, int outstride)
Definition: pjpeg-idct.c:42
pjpeg_idct_2D_double
void pjpeg_idct_2D_double(int32_t in[64], uint8_t *out, uint32_t outstride)
Definition: pjpeg-idct.c:238
W5
#define W5
Definition: pjpeg-idct.c:279
W6
#define W6
Definition: pjpeg-idct.c:280
pjpeg_idct_2D_u32
void pjpeg_idct_2D_u32(int32_t in[64], uint8_t *out, uint32_t outstride)
Definition: pjpeg-idct.c:176
idct_1D_double
static void idct_1D_double(double *in, int instride, double *out, int outstride)
Definition: pjpeg-idct.c:219
njClip
static unsigned char njClip(const int x)
Definition: pjpeg-idct.c:272
W7
#define W7
Definition: pjpeg-idct.c:281
W1
#define W1
Definition: pjpeg-idct.c:276


apriltag
Author(s): Edwin Olson , Max Krogius
autogenerated on Sun Apr 20 2025 02:08:19