roswrap/src/toojpeg/toojpeg.cpp
Go to the documentation of this file.
1 // //////////////////////////////////////////////////////////
2 // toojpeg.cpp
3 // written by Stephan Brumme, 2018-2019
4 // see https://create.stephan-brumme.com/toojpeg/
5 //
6 
7 #include "toojpeg.h"
8 
9 // - the "official" specifications: https://www.w3.org/Graphics/JPEG/itu-t81.pdf and https://www.w3.org/Graphics/JPEG/jfif3.pdf
10 // - Wikipedia has a short description of the JFIF/JPEG file format: https://en.wikipedia.org/wiki/JPEG_File_Interchange_Format
11 // - the popular STB Image library includes Jon's JPEG encoder as well: https://github.com/nothings/stb/blob/master/stb_image_write.h
12 // - the most readable JPEG book (from a developer's perspective) is Miano's "Compressed Image File Formats" (1999, ISBN 0-201-60443-4),
13 // used copies are really cheap nowadays and include a CD with C++ sources as well (plus great format descriptions of GIF & PNG)
14 // - much more detailled is Mitchell/Pennebaker's "JPEG: Still Image Data Compression Standard" (1993, ISBN 0-442-01272-1)
15 // which contains the official JPEG standard, too - fun fact: I bought a signed copy in a second-hand store without noticing
16 
17 namespace // anonymous namespace to hide local functions / constants / etc.
18 {
19 // ////////////////////////////////////////
20 // data types
21 using uint8_t = unsigned char;
22 using uint16_t = unsigned short;
23 using int16_t = short;
24 using int32_t = int; // at least four bytes
25 
26 // ////////////////////////////////////////
27 // constants
28 
29 // quantization tables from JPEG Standard, Annex K
30 const uint8_t DefaultQuantLuminance[8*8] =
31  { 16, 11, 10, 16, 24, 40, 51, 61, // there are a few experts proposing slightly more efficient values,
32  12, 12, 14, 19, 26, 58, 60, 55, // e.g. https://www.imagemagick.org/discourse-server/viewtopic.php?t=20333
33  14, 13, 16, 24, 40, 57, 69, 56, // btw: Google's Guetzli project optimizes the quantization tables per image
34  14, 17, 22, 29, 51, 87, 80, 62,
35  18, 22, 37, 56, 68,109,103, 77,
36  24, 35, 55, 64, 81,104,113, 92,
37  49, 64, 78, 87,103,121,120,101,
38  72, 92, 95, 98,112,100,103, 99 };
39 const uint8_t DefaultQuantChrominance[8*8] =
40  { 17, 18, 24, 47, 99, 99, 99, 99,
41  18, 21, 26, 66, 99, 99, 99, 99,
42  24, 26, 56, 99, 99, 99, 99, 99,
43  47, 66, 99, 99, 99, 99, 99, 99,
44  99, 99, 99, 99, 99, 99, 99, 99,
45  99, 99, 99, 99, 99, 99, 99, 99,
46  99, 99, 99, 99, 99, 99, 99, 99,
47  99, 99, 99, 99, 99, 99, 99, 99 };
48 
49 // 8x8 blocks are processed in zig-zag order
50 // most encoders use a zig-zag "forward" table, I switched to its inverse for performance reasons
51 // note: ZigZagInv[ZigZag[i]] = i
52 const uint8_t ZigZagInv[8*8] =
53  { 0, 1, 8,16, 9, 2, 3,10, // ZigZag[] = 0, 1, 5, 6,14,15,27,28,
54  17,24,32,25,18,11, 4, 5, // 2, 4, 7,13,16,26,29,42,
55  12,19,26,33,40,48,41,34, // 3, 8,12,17,25,30,41,43,
56  27,20,13, 6, 7,14,21,28, // 9,11,18,24,31,40,44,53,
57  35,42,49,56,57,50,43,36, // 10,19,23,32,39,45,52,54,
58  29,22,15,23,30,37,44,51, // 20,22,33,38,46,51,55,60,
59  58,59,52,45,38,31,39,46, // 21,34,37,47,50,56,59,61,
60  53,60,61,54,47,55,62,63 }; // 35,36,48,49,57,58,62,63
61 
62 // static Huffman code tables from JPEG standard Annex K
63 // - CodesPerBitsize tables define how many Huffman codes will have a certain bitsize (plus 1 because there nothing with zero bits),
64 // e.g. DcLuminanceCodesPerBitsize[2] = 5 because there are 5 Huffman codes being 2+1=3 bits long
65 // - Values tables are a list of values ordered by their Huffman code bitsize,
66 // e.g. AcLuminanceValues => Huffman(0x01,0x02 and 0x03) will have 2 bits, Huffman(0x00) will have 3 bits, Huffman(0x04,0x11 and 0x05) will have 4 bits, ...
67 
68 // Huffman definitions for first DC/AC tables (luminance / Y channel)
69 const uint8_t DcLuminanceCodesPerBitsize[16] = { 0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0 }; // sum = 12
70 const uint8_t DcLuminanceValues [12] = { 0,1,2,3,4,5,6,7,8,9,10,11 }; // => 12 codes
71 const uint8_t AcLuminanceCodesPerBitsize[16] = { 0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,125 }; // sum = 162
72 const uint8_t AcLuminanceValues [162] = // => 162 codes
73  { 0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xA1,0x08, // 16*10+2 symbols because
74  0x23,0x42,0xB1,0xC1,0x15,0x52,0xD1,0xF0,0x24,0x33,0x62,0x72,0x82,0x09,0x0A,0x16,0x17,0x18,0x19,0x1A,0x25,0x26,0x27,0x28, // upper 4 bits can be 0..F
75  0x29,0x2A,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x53,0x54,0x55,0x56,0x57,0x58,0x59, // while lower 4 bits can be 1..A
76  0x5A,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x83,0x84,0x85,0x86,0x87,0x88,0x89, // plus two special codes 0x00 and 0xF0
77  0x8A,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xB2,0xB3,0xB4,0xB5,0xB6, // order of these symbols was determined empirically by JPEG committee
78  0xB7,0xB8,0xB9,0xBA,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0xDA,0xE1,0xE2,
79  0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA };
80 // Huffman definitions for second DC/AC tables (chrominance / Cb and Cr channels)
81 const uint8_t DcChrominanceCodesPerBitsize[16] = { 0,3,1,1,1,1,1,1,1,1,1,0,0,0,0,0 }; // sum = 12
82 const uint8_t DcChrominanceValues [12] = { 0,1,2,3,4,5,6,7,8,9,10,11 }; // => 12 codes (identical to DcLuminanceValues)
83 const uint8_t AcChrominanceCodesPerBitsize[16] = { 0,2,1,2,4,4,3,4,7,5,4,4,0,1,2,119 }; // sum = 162
84 const uint8_t AcChrominanceValues [162] = // => 162 codes
85  { 0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,0x51,0x07,0x61,0x71,0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91, // same number of symbol, just different order
86  0xA1,0xB1,0xC1,0x09,0x23,0x33,0x52,0xF0,0x15,0x62,0x72,0xD1,0x0A,0x16,0x24,0x34,0xE1,0x25,0xF1,0x17,0x18,0x19,0x1A,0x26, // (which is more efficient for AC coding)
87  0x27,0x28,0x29,0x2A,0x35,0x36,0x37,0x38,0x39,0x3A,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x53,0x54,0x55,0x56,0x57,0x58,
88  0x59,0x5A,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x82,0x83,0x84,0x85,0x86,0x87,
89  0x88,0x89,0x8A,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xB2,0xB3,0xB4,
90  0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0xDA,
91  0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA };
92 const int16_t CodeWordLimit = 2048; // +/-2^11, maximum value after DCT
93 
94 // ////////////////////////////////////////
95 // structs
96 
97 // represent a single Huffman code
98 struct BitCode
99 {
100  BitCode() = default; // undefined state, must be initialized at a later time
101  BitCode(uint16_t code_, uint8_t numBits_)
102  : code(code_), numBits(numBits_) {}
103  uint16_t code; // JPEG's Huffman codes are limited to 16 bits
104  uint8_t numBits; // number of valid bits
105 };
106 
107 // wrapper for bit output operations
108 struct BitWriter
109 {
110  // user-supplied callback that writes/stores one byte
112  // initialize writer
113  explicit BitWriter(TooJpeg::WRITE_ONE_BYTE output_) : output(output_) {}
114 
115  // store the most recently encoded bits that are not written yet
116  struct BitBuffer
117  {
118  int32_t data = 0; // actually only at most 24 bits are used
119  uint8_t numBits = 0; // number of valid bits (the right-most bits)
120  } buffer;
121 
122  // write Huffman bits stored in BitCode, keep excess bits in BitBuffer
123  BitWriter& operator<<(const BitCode& data)
124  {
125  // append the new bits to those bits leftover from previous call(s)
126  buffer.numBits += data.numBits;
127  buffer.data <<= data.numBits;
128  buffer.data |= data.code;
129 
130  // write all "full" bytes
131  while (buffer.numBits >= 8)
132  {
133  // extract highest 8 bits
134  buffer.numBits -= 8;
135  auto oneByte = uint8_t(buffer.data >> buffer.numBits);
136  output(oneByte);
137 
138  if (oneByte == 0xFF) // 0xFF has a special meaning for JPEGs (it's a block marker)
139  output(0); // therefore pad a zero to indicate "nope, this one ain't a marker, it's just a coincidence"
140 
141  // note: I don't clear those written bits, therefore buffer.bits may contain garbage in the high bits
142  // if you really want to "clean up" (e.g. for debugging purposes) then uncomment the following line
143  //buffer.bits &= (1 << buffer.numBits) - 1;
144  }
145  return *this;
146  }
147 
148  // write all non-yet-written bits, fill gaps with 1s (that's a strange JPEG thing)
149  void flush()
150  {
151  // at most seven set bits needed to "fill" the last byte: 0x7F = binary 0111 1111
152  *this << BitCode(0x7F, 7); // I should set buffer.numBits = 0 but since there are no single bits written after flush() I can safely ignore it
153  }
154 
155  // NOTE: all the following BitWriter functions IGNORE the BitBuffer and write straight to output !
156  // write a single byte
157  BitWriter& operator<<(uint8_t oneByte)
158  {
159  output(oneByte);
160  return *this;
161  }
162 
163  // write an array of bytes
164  template <typename T, int Size>
165  BitWriter& operator<<(T (&manyBytes)[Size])
166  {
167  for (auto c : manyBytes)
168  output(c);
169  return *this;
170  }
171 
172  // start a new JFIF block
173  void addMarker(uint8_t id, uint16_t length)
174  {
175  output(0xFF); output(id); // ID, always preceded by 0xFF
176  output(uint8_t(length >> 8)); // length of the block (big-endian, includes the 2 length bytes as well)
177  output(uint8_t(length & 0xFF));
178  }
179 };
180 
181 // ////////////////////////////////////////
182 // functions / templates
183 
184 // same as std::min()
185 template <typename Number>
186 Number minimum(Number value, Number maximum)
187 {
188  return value <= maximum ? value : maximum;
189 }
190 
191 // restrict a value to the interval [minimum, maximum]
192 template <typename Number, typename Limit>
193 Number clamp(Number value, Limit minValue, Limit maxValue)
194 {
195  if (value <= minValue) return minValue; // never smaller than the minimum
196  if (value >= maxValue) return maxValue; // never bigger than the maximum
197  return value; // value was inside interval, keep it
198 }
199 
200 // convert from RGB to YCbCr, constants are similar to ITU-R, see https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion
201 float rgb2y (float r, float g, float b) { return +0.299f * r +0.587f * g +0.114f * b; }
202 float rgb2cb(float r, float g, float b) { return -0.16874f * r -0.33126f * g +0.5f * b; }
203 float rgb2cr(float r, float g, float b) { return +0.5f * r -0.41869f * g -0.08131f * b; }
204 float rgb2y (int r, int g, int b) { return rgb2y ((float)r, (float)g, (float)b); }
205 float rgb2cb(int r, int g, int b) { return rgb2cb((float)r, (float)g, (float)b); }
206 float rgb2cr(int r, int g, int b) { return rgb2cr((float)r, (float)g, (float)b); }
207 
208 // forward DCT computation "in one dimension" (fast AAN algorithm by Arai, Agui and Nakajima: "A fast DCT-SQ scheme for images")
209 void DCT(float block[8*8], uint8_t stride) // stride must be 1 (=horizontal) or 8 (=vertical)
210 {
211  const auto SqrtHalfSqrt = 1.306562965f; // sqrt((2 + sqrt(2)) / 2) = cos(pi * 1 / 8) * sqrt(2)
212  const auto InvSqrt = 0.707106781f; // 1 / sqrt(2) = cos(pi * 2 / 8)
213  const auto HalfSqrtSqrt = 0.382683432f; // sqrt(2 - sqrt(2)) / 2 = cos(pi * 3 / 8)
214  const auto InvSqrtSqrt = 0.541196100f; // 1 / sqrt(2 - sqrt(2)) = cos(pi * 3 / 8) * sqrt(2)
215 
216  // modify in-place
217  auto& block0 = block[0 ];
218  auto& block1 = block[1 * stride];
219  auto& block2 = block[2 * stride];
220  auto& block3 = block[3 * stride];
221  auto& block4 = block[4 * stride];
222  auto& block5 = block[5 * stride];
223  auto& block6 = block[6 * stride];
224  auto& block7 = block[7 * stride];
225 
226  // based on https://dev.w3.org/Amaya/libjpeg/jfdctflt.c , the original variable names can be found in my comments
227  auto add07 = block0 + block7; auto sub07 = block0 - block7; // tmp0, tmp7
228  auto add16 = block1 + block6; auto sub16 = block1 - block6; // tmp1, tmp6
229  auto add25 = block2 + block5; auto sub25 = block2 - block5; // tmp2, tmp5
230  auto add34 = block3 + block4; auto sub34 = block3 - block4; // tmp3, tmp4
231 
232  auto add0347 = add07 + add34; auto sub07_34 = add07 - add34; // tmp10, tmp13 ("even part" / "phase 2")
233  auto add1256 = add16 + add25; auto sub16_25 = add16 - add25; // tmp11, tmp12
234 
235  block0 = add0347 + add1256; block4 = add0347 - add1256; // "phase 3"
236 
237  auto z1 = (sub16_25 + sub07_34) * InvSqrt; // all temporary z-variables kept their original names
238  block2 = sub07_34 + z1; block6 = sub07_34 - z1; // "phase 5"
239 
240  auto sub23_45 = sub25 + sub34; // tmp10 ("odd part" / "phase 2")
241  auto sub12_56 = sub16 + sub25; // tmp11
242  auto sub01_67 = sub16 + sub07; // tmp12
243 
244  auto z5 = (sub23_45 - sub01_67) * HalfSqrtSqrt;
245  auto z2 = sub23_45 * InvSqrtSqrt + z5;
246  auto z3 = sub12_56 * InvSqrt;
247  auto z4 = sub01_67 * SqrtHalfSqrt + z5;
248  auto z6 = sub07 + z3; // z11 ("phase 5")
249  auto z7 = sub07 - z3; // z13
250  block1 = z6 + z4; block7 = z6 - z4; // "phase 6"
251  block5 = z7 + z2; block3 = z7 - z2;
252 }
253 
254 // run DCT, quantize and write Huffman bit codes
255 int16_t encodeBlock(BitWriter& writer, float block[8][8], const float scaled[8*8], int16_t lastDC,
256  const BitCode huffmanDC[256], const BitCode huffmanAC[256], const BitCode* codewords)
257 {
258  // "linearize" the 8x8 block, treat it as a flat array of 64 floats
259  auto block64 = (float*) block;
260 
261  // DCT: rows
262  for (auto offset = 0; offset < 8; offset++)
263  DCT(block64 + offset*8, 1);
264  // DCT: columns
265  for (auto offset = 0; offset < 8; offset++)
266  DCT(block64 + offset*1, 8);
267 
268  // scale
269  for (auto i = 0; i < 8*8; i++)
270  block64[i] *= scaled[i];
271 
272  // encode DC (the first coefficient is the "average color" of the 8x8 block)
273  auto DC = int(block64[0] + (block64[0] >= 0 ? +0.5f : -0.5f)); // C++11's nearbyint() achieves a similar effect
274 
275  // quantize and zigzag the other 63 coefficients
276  auto posNonZero = 0; // find last coefficient which is not zero (because trailing zeros are encoded differently)
277  int16_t quantized[8*8];
278  for (auto i = 1; i < 8*8; i++) // start at 1 because block64[0]=DC was already processed
279  {
280  auto value = block64[ZigZagInv[i]];
281  // round to nearest integer
282  quantized[i] = int(value + (value >= 0 ? +0.5f : -0.5f)); // C++11's nearbyint() achieves a similar effect
283  // remember offset of last non-zero coefficient
284  if (quantized[i] != 0)
285  posNonZero = i;
286  }
287 
288  // same "average color" as previous block ?
289  auto diff = DC - lastDC;
290  if (diff == 0)
291  writer << huffmanDC[0x00]; // yes, write a special short symbol
292  else
293  {
294  auto bits = codewords[diff]; // nope, encode the difference to previous block's average color
295  writer << huffmanDC[bits.numBits] << bits;
296  }
297 
298  // encode ACs (quantized[1..63])
299  auto offset = 0; // upper 4 bits count the number of consecutive zeros
300  for (auto i = 1; i <= posNonZero; i++) // quantized[0] was already written, skip all trailing zeros, too
301  {
302  // zeros are encoded in a special way
303  while (quantized[i] == 0) // found another zero ?
304  {
305  offset += 0x10; // add 1 to the upper 4 bits
306  // split into blocks of at most 16 consecutive zeros
307  if (offset > 0xF0) // remember, the counter is in the upper 4 bits, 0xF = 15
308  {
309  writer << huffmanAC[0xF0]; // 0xF0 is a special code for "16 zeros"
310  offset = 0;
311  }
312  i++;
313  }
314 
315  auto encoded = codewords[quantized[i]];
316  // combine number of zeros with the number of bits of the next non-zero value
317  writer << huffmanAC[offset + encoded.numBits] << encoded; // and the value itself
318  offset = 0;
319  }
320 
321  // send end-of-block code (0x00), only needed if there are trailing zeros
322  if (posNonZero < 8*8 - 1) // = 63
323  writer << huffmanAC[0x00];
324 
325  return DC;
326 }
327 
328 // Jon's code includes the pre-generated Huffman codes
329 // I don't like these "magic constants" and compute them on my own :-)
330 void generateHuffmanTable(const uint8_t numCodes[16], const uint8_t* values, BitCode result[256])
331 {
332  // process all bitsizes 1 thru 16, no JPEG Huffman code is allowed to exceed 16 bits
333  auto huffmanCode = 0;
334  for (auto numBits = 1; numBits <= 16; numBits++)
335  {
336  // ... and each code of these bitsizes
337  for (auto i = 0; i < numCodes[numBits - 1]; i++) // note: numCodes array starts at zero, but smallest bitsize is 1
338  result[*values++] = BitCode(huffmanCode++, numBits);
339 
340  // next Huffman code needs to be one bit wider
341  huffmanCode <<= 1;
342  }
343 }
344 
345 } // end of anonymous namespace
346 
347 // -------------------- externally visible code --------------------
348 
349 namespace TooJpeg
350 {
351 // the only exported function ...
352 bool writeJpeg(WRITE_ONE_BYTE output, const void* pixels_, unsigned short width, unsigned short height,
353  bool isRGB, unsigned char quality_, bool downsample, const char* comment)
354 {
355  // reject invalid pointers
356  if (output == nullptr || pixels_ == nullptr)
357  return false;
358  // check image format
359  if (width == 0 || height == 0)
360  return false;
361 
362  // number of components
363  const auto numComponents = isRGB ? 3 : 1;
364  // note: if there is just one component (=grayscale), then only luminance needs to be stored in the file
365  // thus everything related to chrominance need not to be written to the JPEG
366  // I still compute a few things, like quantization tables to avoid a complete code mess
367 
368  // grayscale images can't be downsampled (because there are no Cb + Cr channels)
369  if (!isRGB)
370  downsample = false;
371 
372  // wrapper for all output operations
373  BitWriter bitWriter(output);
374 
375  // ////////////////////////////////////////
376  // JFIF headers
377  const uint8_t HeaderJfif[2+2+16] =
378  { 0xFF,0xD8, // SOI marker (start of image)
379  0xFF,0xE0, // JFIF APP0 tag
380  0,16, // length: 16 bytes (14 bytes payload + 2 bytes for this length field)
381  'J','F','I','F',0, // JFIF identifier, zero-terminated
382  1,1, // JFIF version 1.1
383  0, // no density units specified
384  0,1,0,1, // density: 1 pixel "per pixel" horizontally and vertically
385  0,0 }; // no thumbnail (size 0 x 0)
386  bitWriter << HeaderJfif;
387 
388  // ////////////////////////////////////////
389  // comment (optional)
390  if (comment != nullptr)
391  {
392  // look for zero terminator
393  auto length = 0; // = strlen(comment);
394  while (comment[length] != 0)
395  length++;
396 
397  // write COM marker
398  bitWriter.addMarker(0xFE, 2+length); // block size is number of bytes (without zero terminator) + 2 bytes for this length field
399  // ... and write the comment itself
400  for (auto i = 0; i < length; i++)
401  bitWriter << comment[i];
402  }
403 
404  // ////////////////////////////////////////
405  // adjust quantization tables to desired quality
406 
407  // quality level must be in 1 ... 100
408  auto quality = clamp<uint16_t>(quality_, 1, 100);
409  // convert to an internal JPEG quality factor, formula taken from libjpeg
410  quality = quality < 50 ? 5000 / quality : 200 - quality * 2;
411 
412  uint8_t quantLuminance [8*8];
413  uint8_t quantChrominance[8*8];
414  for (auto i = 0; i < 8*8; i++)
415  {
416  int luminance = (DefaultQuantLuminance [ZigZagInv[i]] * quality + 50) / 100;
417  int chrominance = (DefaultQuantChrominance[ZigZagInv[i]] * quality + 50) / 100;
418 
419  // clamp to 1..255
420  quantLuminance [i] = clamp(luminance, 1, 255);
421  quantChrominance[i] = clamp(chrominance, 1, 255);
422  }
423 
424  // write quantization tables
425  bitWriter.addMarker(0xDB, 2 + (isRGB ? 2 : 1) * (1 + 8*8)); // length: 65 bytes per table + 2 bytes for this length field
426  // each table has 64 entries and is preceded by an ID byte
427 
428  bitWriter << 0x00 << quantLuminance; // first quantization table
429  if (isRGB)
430  bitWriter << 0x01 << quantChrominance; // second quantization table, only relevant for color images
431 
432  // ////////////////////////////////////////
433  // write image infos (SOF0 - start of frame)
434  bitWriter.addMarker(0xC0, 2+6+3*numComponents); // length: 6 bytes general info + 3 per channel + 2 bytes for this length field
435 
436  // 8 bits per channel
437  bitWriter << 0x08
438  // image dimensions (big-endian)
439  << (height >> 8) << (height & 0xFF)
440  << (width >> 8) << (width & 0xFF);
441 
442  // sampling and quantization tables for each component
443  bitWriter << numComponents; // 1 component (grayscale, Y only) or 3 components (Y,Cb,Cr)
444  for (auto id = 1; id <= numComponents; id++)
445  bitWriter << id // component ID (Y=1, Cb=2, Cr=3)
446  // bitmasks for sampling: highest 4 bits: horizontal, lowest 4 bits: vertical
447  << (id == 1 && downsample ? 0x22 : 0x11) // 0x11 is default YCbCr 4:4:4 and 0x22 stands for YCbCr 4:2:0
448  << (id == 1 ? 0 : 1); // use quantization table 0 for Y, table 1 for Cb and Cr
449 
450  // ////////////////////////////////////////
451  // Huffman tables
452  // DHT marker - define Huffman tables
453  bitWriter.addMarker(0xC4, isRGB ? (2+208+208) : (2+208));
454  // 2 bytes for the length field, store chrominance only if needed
455  // 1+16+12 for the DC luminance
456  // 1+16+162 for the AC luminance (208 = 1+16+12 + 1+16+162)
457  // 1+16+12 for the DC chrominance
458  // 1+16+162 for the AC chrominance (208 = 1+16+12 + 1+16+162, same as above)
459 
460  // store luminance's DC+AC Huffman table definitions
461  bitWriter << 0x00 // highest 4 bits: 0 => DC, lowest 4 bits: 0 => Y (baseline)
462  << DcLuminanceCodesPerBitsize
463  << DcLuminanceValues;
464  bitWriter << 0x10 // highest 4 bits: 1 => AC, lowest 4 bits: 0 => Y (baseline)
465  << AcLuminanceCodesPerBitsize
466  << AcLuminanceValues;
467 
468  // compute actual Huffman code tables (see Jon's code for precalculated tables)
469  BitCode huffmanLuminanceDC[256];
470  BitCode huffmanLuminanceAC[256];
471  generateHuffmanTable(DcLuminanceCodesPerBitsize, DcLuminanceValues, huffmanLuminanceDC);
472  generateHuffmanTable(AcLuminanceCodesPerBitsize, AcLuminanceValues, huffmanLuminanceAC);
473 
474  // chrominance is only relevant for color images
475  BitCode huffmanChrominanceDC[256];
476  BitCode huffmanChrominanceAC[256];
477  if (isRGB)
478  {
479  // store luminance's DC+AC Huffman table definitions
480  bitWriter << 0x01 // highest 4 bits: 0 => DC, lowest 4 bits: 1 => Cr,Cb (baseline)
481  << DcChrominanceCodesPerBitsize
482  << DcChrominanceValues;
483  bitWriter << 0x11 // highest 4 bits: 1 => AC, lowest 4 bits: 1 => Cr,Cb (baseline)
484  << AcChrominanceCodesPerBitsize
485  << AcChrominanceValues;
486 
487  // compute actual Huffman code tables (see Jon's code for precalculated tables)
488  generateHuffmanTable(DcChrominanceCodesPerBitsize, DcChrominanceValues, huffmanChrominanceDC);
489  generateHuffmanTable(AcChrominanceCodesPerBitsize, AcChrominanceValues, huffmanChrominanceAC);
490  }
491 
492  // ////////////////////////////////////////
493  // start of scan (there is only a single scan for baseline JPEGs)
494  bitWriter.addMarker(0xDA, 2+1+2*numComponents+3); // 2 bytes for the length field, 1 byte for number of components,
495  // then 2 bytes for each component and 3 bytes for spectral selection
496 
497  // assign Huffman tables to each component
498  bitWriter << numComponents;
499  for (auto id = 1; id <= numComponents; id++)
500  // highest 4 bits: DC Huffman table, lowest 4 bits: AC Huffman table
501  bitWriter << id << (id == 1 ? 0x00 : 0x11); // Y: tables 0 for DC and AC; Cb + Cr: tables 1 for DC and AC
502 
503  // constant values for our baseline JPEGs (which have a single sequential scan)
504  static const uint8_t Spectral[3] = { 0, 63, 0 }; // spectral selection: must be from 0 to 63; successive approximation must be 0
505  bitWriter << Spectral;
506 
507  // ////////////////////////////////////////
508  // adjust quantization tables with AAN scaling factors to simplify DCT
509  float scaledLuminance [8*8];
510  float scaledChrominance[8*8];
511  for (auto i = 0; i < 8*8; i++)
512  {
513  auto row = ZigZagInv[i] / 8; // same as ZigZagInv[i] >> 3
514  auto column = ZigZagInv[i] % 8; // same as ZigZagInv[i] & 7
515 
516  // scaling constants for AAN DCT algorithm: AanScaleFactors[0] = 1, AanScaleFactors[k=1..7] = cos(k*PI/16) * sqrt(2)
517  static const float AanScaleFactors[8] = { 1, 1.387039845f, 1.306562965f, 1.175875602f, 1, 0.785694958f, 0.541196100f, 0.275899379f };
518  auto factor = 1 / (AanScaleFactors[row] * AanScaleFactors[column] * 8);
519  scaledLuminance [ZigZagInv[i]] = factor / quantLuminance [i];
520  scaledChrominance[ZigZagInv[i]] = factor / quantChrominance[i];
521  // if you really want JPEGs that are bitwise identical to Jon Olick's code then you need slightly different formulas (note: sqrt(8) = 2.828427125f)
522  //static const float aasf[] = { 1.0f * 2.828427125f, 1.387039845f * 2.828427125f, 1.306562965f * 2.828427125f, 1.175875602f * 2.828427125f, 1.0f * 2.828427125f, 0.785694958f * 2.828427125f, 0.541196100f * 2.828427125f, 0.275899379f * 2.828427125f }; // line 240 of jo_jpeg.cpp
523  //scaledLuminance [ZigZagInv[i]] = 1 / (quantLuminance [i] * aasf[row] * aasf[column]); // lines 266-267 of jo_jpeg.cpp
524  //scaledChrominance[ZigZagInv[i]] = 1 / (quantChrominance[i] * aasf[row] * aasf[column]);
525  }
526 
527  // ////////////////////////////////////////
528  // precompute JPEG codewords for quantized DCT
529  BitCode codewordsArray[2 * CodeWordLimit]; // note: quantized[i] is found at codewordsArray[quantized[i] + CodeWordLimit]
530  BitCode* codewords = &codewordsArray[CodeWordLimit]; // allow negative indices, so quantized[i] is at codewords[quantized[i]]
531  uint8_t numBits = 1; // each codeword has at least one bit (value == 0 is undefined)
532  int32_t mask = 1; // mask is always 2^numBits - 1, initial value 2^1-1 = 2-1 = 1
533  for (int16_t value = 1; value < CodeWordLimit; value++)
534  {
535  // numBits = position of highest set bit (ignoring the sign)
536  // mask = (2^numBits) - 1
537  if (value > mask) // one more bit ?
538  {
539  numBits++;
540  mask = (mask << 1) | 1; // append a set bit
541  }
542  codewords[-value] = BitCode(mask - value, numBits); // note that I use a negative index => codewords[-value] = codewordsArray[CodeWordLimit value]
543  codewords[+value] = BitCode( value, numBits);
544  }
545 
546  // just convert image data from void*
547  auto pixels = (const uint8_t*)pixels_;
548 
549  // the next two variables are frequently used when checking for image borders
550  const auto maxWidth = width - 1; // "last row"
551  const auto maxHeight = height - 1; // "bottom line"
552 
553  // process MCUs (minimum codes units) => image is subdivided into a grid of 8x8 or 16x16 tiles
554  const auto sampling = downsample ? 2 : 1; // 1x1 or 2x2 sampling
555  const auto mcuSize = 8 * sampling;
556 
557  // average color of the previous MCU
558  int16_t lastYDC = 0, lastCbDC = 0, lastCrDC = 0;
559  // convert from RGB to YCbCr
560  float Y[8][8], Cb[8][8], Cr[8][8];
561 
562  for (auto mcuY = 0; mcuY < height; mcuY += mcuSize) // each step is either 8 or 16 (=mcuSize)
563  for (auto mcuX = 0; mcuX < width; mcuX += mcuSize)
564  {
565  // YCbCr 4:4:4 format: each MCU is a 8x8 block - the same applies to grayscale images, too
566  // YCbCr 4:2:0 format: each MCU represents a 16x16 block, stored as 4x 8x8 Y-blocks plus 1x 8x8 Cb and 1x 8x8 Cr block)
567  for (auto blockY = 0; blockY < mcuSize; blockY += 8) // iterate once (YCbCr444 and grayscale) or twice (YCbCr420)
568  for (auto blockX = 0; blockX < mcuSize; blockX += 8)
569  {
570  // now we finally have an 8x8 block ...
571  for (auto deltaY = 0; deltaY < 8; deltaY++)
572  {
573  auto column = minimum(mcuX + blockX , maxWidth); // must not exceed image borders, replicate last row/column if needed
574  auto row = minimum(mcuY + blockY + deltaY, maxHeight);
575  for (auto deltaX = 0; deltaX < 8; deltaX++)
576  {
577  // find actual pixel position within the current image
578  auto pixelPos = row * int(width) + column; // the cast ensures that we don't run into multiplication overflows
579  if (column < maxWidth)
580  column++;
581 
582  // grayscale images have solely a Y channel which can be easily derived from the input pixel by shifting it by 128
583  if (!isRGB)
584  {
585  Y[deltaY][deltaX] = pixels[pixelPos] - 128.f;
586  continue;
587  }
588 
589  // RGB: 3 bytes per pixel (whereas grayscale images have only 1 byte per pixel)
590  auto r = pixels[3 * pixelPos ];
591  auto g = pixels[3 * pixelPos + 1];
592  auto b = pixels[3 * pixelPos + 2];
593 
594  Y [deltaY][deltaX] = rgb2y (r, g, b) - 128; // again, the JPEG standard requires Y to be shifted by 128
595  // YCbCr444 is easy - the more complex YCbCr420 has to be computed about 20 lines below in a second pass
596  if (!downsample)
597  {
598  Cb[deltaY][deltaX] = rgb2cb(r, g, b); // standard RGB-to-YCbCr conversion
599  Cr[deltaY][deltaX] = rgb2cr(r, g, b);
600  }
601  }
602  }
603 
604  // encode Y channel
605  lastYDC = encodeBlock(bitWriter, Y, scaledLuminance, lastYDC, huffmanLuminanceDC, huffmanLuminanceAC, codewords);
606  // Cb and Cr are encoded about 50 lines below
607  }
608 
609  // grayscale images don't need any Cb and Cr information
610  if (!isRGB)
611  continue;
612 
613  // ////////////////////////////////////////
614  // the following lines are only relevant for YCbCr420:
615  // average/downsample chrominance of four pixels while respecting the image borders
616  if (downsample)
617  for (short deltaY = 7; downsample && deltaY >= 0; deltaY--) // iterating loop in reverse increases cache read efficiency
618  {
619  auto row = minimum(mcuY + 2*deltaY, maxHeight); // each deltaX/Y step covers a 2x2 area
620  auto column = mcuX; // column is updated inside next loop
621  auto pixelPos = (row * int(width) + column) * 3; // numComponents = 3
622 
623  // deltas (in bytes) to next row / column, must not exceed image borders
624  auto rowStep = (row < maxHeight) ? 3 * int(width) : 0; // always numComponents*width except for bottom line
625  auto columnStep = (column < maxWidth ) ? 3 : 0; // always numComponents except for rightmost pixel
626 
627  for (short deltaX = 0; deltaX < 8; deltaX++)
628  {
629  // let's add all four samples (2x2 area)
630  auto right = pixelPos + columnStep;
631  auto down = pixelPos + rowStep;
632  auto downRight = pixelPos + columnStep + rowStep;
633 
634  // note: cast from 8 bits to >8 bits to avoid overflows when adding
635  auto r = short(pixels[pixelPos ]) + pixels[right ] + pixels[down ] + pixels[downRight ];
636  auto g = short(pixels[pixelPos + 1]) + pixels[right + 1] + pixels[down + 1] + pixels[downRight + 1];
637  auto b = short(pixels[pixelPos + 2]) + pixels[right + 2] + pixels[down + 2] + pixels[downRight + 2];
638 
639  // convert to Cb and Cr
640  Cb[deltaY][deltaX] = rgb2cb(r, g, b) / 4; // I still have to divide r,g,b by 4 to get their average values
641  Cr[deltaY][deltaX] = rgb2cr(r, g, b) / 4; // it's a bit faster if done AFTER CbCr conversion
642 
643  // step forward to next 2x2 area
644  pixelPos += 2*3; // 2 pixels => 6 bytes (2*numComponents)
645  column += 2;
646 
647  // reached right border ?
648  if (column >= maxWidth)
649  {
650  columnStep = 0;
651  pixelPos = ((row + 1) * int(width) - 1) * 3; // same as (row * width + maxWidth) * numComponents => current's row last pixel
652  }
653  }
654  } // end of YCbCr420 code for Cb and Cr
655 
656  // encode Cb and Cr
657  lastCbDC = encodeBlock(bitWriter, Cb, scaledChrominance, lastCbDC, huffmanChrominanceDC, huffmanChrominanceAC, codewords);
658  lastCrDC = encodeBlock(bitWriter, Cr, scaledChrominance, lastCrDC, huffmanChrominanceDC, huffmanChrominanceAC, codewords);
659  }
660 
661  bitWriter.flush(); // now image is completely encoded, write any bits still left in the buffer
662 
663  // ///////////////////////////
664  // EOI marker
665  bitWriter << 0xFF << 0xD9; // this marker has no length, therefore I can't use addMarker()
666  return true;
667 } // writeJpeg()
668 } // namespace TooJpeg
toojpeg.h
TiXmlNode::value
TIXML_STRING value
Definition: tinyxml.h:866
msgpack11::operator<<
std::ostream & operator<<(std::ostream &os, const MsgPack &msgpack)
Definition: msgpack11.cpp:371
data
data
TooJpeg
Definition: roswrap/src/toojpeg/toojpeg.cpp:349
TooJpeg::writeJpeg
bool writeJpeg(WRITE_ONE_BYTE output, const void *pixels_, unsigned short width, unsigned short height, bool isRGB, unsigned char quality_, bool downsample, const char *comment)
Definition: roswrap/src/toojpeg/toojpeg.cpp:352
TooJpeg::WRITE_ONE_BYTE
void(* WRITE_ONE_BYTE)(unsigned char)
Definition: roswrap/src/toojpeg/toojpeg.h:24
sick_scan_xd_api_test.c
c
Definition: sick_scan_xd_api_test.py:445
length
TFSIMD_FORCE_INLINE tfScalar length(const Quaternion &q)


sick_scan_xd
Author(s): Michael Lehning , Jochen Sprickerhof , Martin Günther
autogenerated on Fri Oct 25 2024 02:47:12