binary_to_compressed_c.cpp
Go to the documentation of this file.
1 // dear imgui
2 // (binary_to_compressed_c.cpp)
3 // Helper tool to turn a file into a C array, if you want to embed font data in your source code.
4 
5 // The data is first compressed with stb_compress() to reduce source code size,
6 // then encoded in Base85 to fit in a string so we can fit roughly 4 bytes of compressed data into 5 bytes of source code (suggested by @mmalex)
7 // (If we used 32-bit constants it would require take 11 bytes of source code to encode 4 bytes, and be endianness dependent)
8 // Note that even with compression, the output array is likely to be bigger than the binary file..
9 // Load compressed TTF fonts with ImGui::GetIO().Fonts->AddFontFromMemoryCompressedTTF()
10 
11 // Build with, e.g:
12 // # cl.exe binary_to_compressed_c.cpp
13 // # gcc binary_to_compressed_c.cpp
14 // You can also find a precompiled Windows binary in the binary/demo package available from https://github.com/ocornut/imgui
15 
16 // Usage:
17 // binary_to_compressed_c.exe [-base85] [-nocompress] <inputfile> <symbolname>
18 // Usage example:
19 // # binary_to_compressed_c.exe myfont.ttf MyFont > myfont.cpp
20 // # binary_to_compressed_c.exe -base85 myfont.ttf MyFont > myfont.cpp
21 
22 #define _CRT_SECURE_NO_WARNINGS
23 #include <stdio.h>
24 #include <string.h>
25 #include <stdlib.h>
26 #include <assert.h>
27 
28 // stb_compress* from stb.h - declaration
29 typedef unsigned int stb_uint;
30 typedef unsigned char stb_uchar;
32 
33 static bool binary_to_compressed_c(const char* filename, const char* symbol, bool use_base85_encoding, bool use_compression);
34 
35 int main(int argc, char** argv)
36 {
37  if (argc < 3)
38  {
39  printf("Syntax: %s [-base85] [-nocompress] <inputfile> <symbolname>\n", argv[0]);
40  return 0;
41  }
42 
43  int argn = 1;
44  bool use_base85_encoding = false;
45  bool use_compression = true;
46  if (argv[argn][0] == '-')
47  {
48  if (strcmp(argv[argn], "-base85") == 0) { use_base85_encoding = true; argn++; }
49  else if (strcmp(argv[argn], "-nocompress") == 0) { use_compression = false; argn++; }
50  else
51  {
52  fprintf(stderr, "Unknown argument: '%s'\n", argv[argn]);
53  return 1;
54  }
55  }
56 
57  bool ret = binary_to_compressed_c(argv[argn], argv[argn+1], use_base85_encoding, use_compression);
58  if (!ret)
59  fprintf(stderr, "Error opening or reading file: '%s'\n", argv[argn]);
60  return ret ? 0 : 1;
61 }
62 
63 char Encode85Byte(unsigned int x)
64 {
65  x = (x % 85) + 35;
66  return (x>='\\') ? x+1 : x;
67 }
68 
69 bool binary_to_compressed_c(const char* filename, const char* symbol, bool use_base85_encoding, bool use_compression)
70 {
71  // Read file
72  FILE* f = fopen(filename, "rb");
73  if (!f) return false;
74  int data_sz;
75  if (fseek(f, 0, SEEK_END) || (data_sz = (int)ftell(f)) == -1 || fseek(f, 0, SEEK_SET)) { fclose(f); return false; }
76  char* data = new char[data_sz+4];
77  if (fread(data, 1, data_sz, f) != (size_t)data_sz) { fclose(f); delete[] data; return false; }
78  memset((void*)(((char*)data) + data_sz), 0, 4);
79  fclose(f);
80 
81  // Compress
82  int maxlen = data_sz + 512 + (data_sz >> 2) + sizeof(int); // total guess
83  char* compressed = use_compression ? new char[maxlen] : data;
84  int compressed_sz = use_compression ? stb_compress((stb_uchar*)compressed, (stb_uchar*)data, data_sz) : data_sz;
85  if (use_compression)
86  memset(compressed + compressed_sz, 0, maxlen - compressed_sz);
87 
88  // Output as Base85 encoded
89  FILE* out = stdout;
90  fprintf(out, "// File: '%s' (%d bytes)\n", filename, (int)data_sz);
91  fprintf(out, "// Exported using binary_to_compressed_c.cpp\n");
92  const char* compressed_str = use_compression ? "compressed_" : "";
93  if (use_base85_encoding)
94  {
95  fprintf(out, "static const char %s_%sdata_base85[%d+1] =\n \"", symbol, compressed_str, (int)((compressed_sz+3)/4)*5);
96  char prev_c = 0;
97  for (int src_i = 0; src_i < compressed_sz; src_i += 4)
98  {
99  // This is made a little more complicated by the fact that ??X sequences are interpreted as trigraphs by old C/C++ compilers. So we need to escape pairs of ??.
100  unsigned int d = *(unsigned int*)(compressed + src_i);
101  for (unsigned int n5 = 0; n5 < 5; n5++, d /= 85)
102  {
103  char c = Encode85Byte(d);
104  fprintf(out, (c == '?' && prev_c == '?') ? "\\%c" : "%c", c);
105  prev_c = c;
106  }
107  if ((src_i % 112) == 112-4)
108  fprintf(out, "\"\n \"");
109  }
110  fprintf(out, "\";\n\n");
111  }
112  else
113  {
114  fprintf(out, "static const unsigned int %s_%ssize = %d;\n", symbol, compressed_str, (int)compressed_sz);
115  fprintf(out, "static const unsigned int %s_%sdata[%d/4] =\n{", symbol, compressed_str, (int)((compressed_sz+3)/4)*4);
116  int column = 0;
117  for (int i = 0; i < compressed_sz; i += 4)
118  {
119  unsigned int d = *(unsigned int*)(compressed + i);
120  if ((column++ % 12) == 0)
121  fprintf(out, "\n 0x%08x, ", d);
122  else
123  fprintf(out, "0x%08x, ", d);
124  }
125  fprintf(out, "\n};\n\n");
126  }
127 
128  // Cleanup
129  delete[] data;
130  if (use_compression)
131  delete[] compressed;
132  return true;
133 }
134 
135 // stb_compress* from stb.h - definition
136 
138 
140 {
141  const unsigned long ADLER_MOD = 65521;
142  unsigned long s1 = adler32 & 0xffff, s2 = adler32 >> 16;
143  unsigned long blocklen, i;
144 
145  blocklen = buflen % 5552;
146  while (buflen) {
147  for (i=0; i + 7 < blocklen; i += 8) {
148  s1 += buffer[0], s2 += s1;
149  s1 += buffer[1], s2 += s1;
150  s1 += buffer[2], s2 += s1;
151  s1 += buffer[3], s2 += s1;
152  s1 += buffer[4], s2 += s1;
153  s1 += buffer[5], s2 += s1;
154  s1 += buffer[6], s2 += s1;
155  s1 += buffer[7], s2 += s1;
156 
157  buffer += 8;
158  }
159 
160  for (; i < blocklen; ++i)
161  s1 += *buffer++, s2 += s1;
162 
163  s1 %= ADLER_MOD, s2 %= ADLER_MOD;
164  buflen -= blocklen;
165  blocklen = 5552;
166  }
167  return (s2 << 16) + s1;
168 }
169 
170 static unsigned int stb_matchlen(stb_uchar *m1, stb_uchar *m2, stb_uint maxlen)
171 {
172  stb_uint i;
173  for (i=0; i < maxlen; ++i)
174  if (m1[i] != m2[i]) return i;
175  return i;
176 }
177 
178 // simple implementation that just takes the source data in a big block
179 
181 static FILE *stb__outfile;
183 
184 static void stb__write(unsigned char v)
185 {
186  fputc(v, stb__outfile);
187  ++stb__outbytes;
188 }
189 
190 //#define stb_out(v) (stb__out ? *stb__out++ = (stb_uchar) (v) : stb__write((stb_uchar) (v)))
191 #define stb_out(v) do { if (stb__out) *stb__out++ = (stb_uchar) (v); else stb__write((stb_uchar) (v)); } while (0)
192 
193 static void stb_out2(stb_uint v) { stb_out(v >> 8); stb_out(v); }
194 static void stb_out3(stb_uint v) { stb_out(v >> 16); stb_out(v >> 8); stb_out(v); }
195 static void stb_out4(stb_uint v) { stb_out(v >> 24); stb_out(v >> 16); stb_out(v >> 8 ); stb_out(v); }
196 
197 static void outliterals(stb_uchar *in, int numlit)
198 {
199  while (numlit > 65536) {
200  outliterals(in,65536);
201  in += 65536;
202  numlit -= 65536;
203  }
204 
205  if (numlit == 0) ;
206  else if (numlit <= 32) stb_out (0x000020 + numlit-1);
207  else if (numlit <= 2048) stb_out2(0x000800 + numlit-1);
208  else /* numlit <= 65536) */ stb_out3(0x070000 + numlit-1);
209 
210  if (stb__out) {
211  memcpy(stb__out,in,numlit);
212  stb__out += numlit;
213  } else
214  fwrite(in, 1, numlit, stb__outfile);
215 }
216 
217 static int stb__window = 0x40000; // 256K
218 
219 static int stb_not_crap(int best, int dist)
220 {
221  return ((best > 2 && dist <= 0x00100)
222  || (best > 5 && dist <= 0x04000)
223  || (best > 7 && dist <= 0x80000));
224 }
225 
226 static stb_uint stb__hashsize = 32768;
227 
228 // note that you can play with the hashing functions all you
229 // want without needing to change the decompressor
230 #define stb__hc(q,h,c) (((h) << 7) + ((h) >> 25) + q[c])
231 #define stb__hc2(q,h,c,d) (((h) << 14) + ((h) >> 18) + (q[c] << 7) + q[d])
232 #define stb__hc3(q,c,d,e) ((q[c] << 14) + (q[d] << 7) + q[e])
233 
234 static unsigned int stb__running_adler;
235 
236 static int stb_compress_chunk(stb_uchar *history,
237  stb_uchar *start,
238  stb_uchar *end,
239  int length,
240  int *pending_literals,
241  stb_uchar **chash,
242  stb_uint mask)
243 {
244  (void)history;
245  int window = stb__window;
246  stb_uint match_max;
247  stb_uchar *lit_start = start - *pending_literals;
248  stb_uchar *q = start;
249 
250 #define STB__SCRAMBLE(h) (((h) + ((h) >> 16)) & mask)
251 
252  // stop short of the end so we don't scan off the end doing
253  // the hashing; this means we won't compress the last few bytes
254  // unless they were part of something longer
255  while (q < start+length && q+12 < end) {
256  int m;
257  stb_uint h1,h2,h3,h4, h;
258  stb_uchar *t;
259  int best = 2, dist=0;
260 
261  if (q+65536 > end)
262  match_max = end-q;
263  else
264  match_max = 65536;
265 
266 #define stb__nc(b,d) ((d) <= window && ((b) > 9 || stb_not_crap(b,d)))
267 
268 #define STB__TRY(t,p) /* avoid retrying a match we already tried */ \
269  if (p ? dist != q-t : 1) \
270  if ((m = stb_matchlen(t, q, match_max)) > best) \
271  if (stb__nc(m,q-(t))) \
272  best = m, dist = q - (t)
273 
274  // rather than search for all matches, only try 4 candidate locations,
275  // chosen based on 4 different hash functions of different lengths.
276  // this strategy is inspired by LZO; hashing is unrolled here using the
277  // 'hc' macro
278  h = stb__hc3(q,0, 1, 2); h1 = STB__SCRAMBLE(h);
279  t = chash[h1]; if (t) STB__TRY(t,0);
280  h = stb__hc2(q,h, 3, 4); h2 = STB__SCRAMBLE(h);
281  h = stb__hc2(q,h, 5, 6); t = chash[h2]; if (t) STB__TRY(t,1);
282  h = stb__hc2(q,h, 7, 8); h3 = STB__SCRAMBLE(h);
283  h = stb__hc2(q,h, 9,10); t = chash[h3]; if (t) STB__TRY(t,1);
284  h = stb__hc2(q,h,11,12); h4 = STB__SCRAMBLE(h);
285  t = chash[h4]; if (t) STB__TRY(t,1);
286 
287  // because we use a shared hash table, can only update it
288  // _after_ we've probed all of them
289  chash[h1] = chash[h2] = chash[h3] = chash[h4] = q;
290 
291  if (best > 2)
292  assert(dist > 0);
293 
294  // see if our best match qualifies
295  if (best < 3) { // fast path literals
296  ++q;
297  } else if (best > 2 && best <= 0x80 && dist <= 0x100) {
298  outliterals(lit_start, q-lit_start); lit_start = (q += best);
299  stb_out(0x80 + best-1);
300  stb_out(dist-1);
301  } else if (best > 5 && best <= 0x100 && dist <= 0x4000) {
302  outliterals(lit_start, q-lit_start); lit_start = (q += best);
303  stb_out2(0x4000 + dist-1);
304  stb_out(best-1);
305  } else if (best > 7 && best <= 0x100 && dist <= 0x80000) {
306  outliterals(lit_start, q-lit_start); lit_start = (q += best);
307  stb_out3(0x180000 + dist-1);
308  stb_out(best-1);
309  } else if (best > 8 && best <= 0x10000 && dist <= 0x80000) {
310  outliterals(lit_start, q-lit_start); lit_start = (q += best);
311  stb_out3(0x100000 + dist-1);
312  stb_out2(best-1);
313  } else if (best > 9 && dist <= 0x1000000) {
314  if (best > 65536) best = 65536;
315  outliterals(lit_start, q-lit_start); lit_start = (q += best);
316  if (best <= 0x100) {
317  stb_out(0x06);
318  stb_out3(dist-1);
319  stb_out(best-1);
320  } else {
321  stb_out(0x04);
322  stb_out3(dist-1);
323  stb_out2(best-1);
324  }
325  } else { // fallback literals if no match was a balanced tradeoff
326  ++q;
327  }
328  }
329 
330  // if we didn't get all the way, add the rest to literals
331  if (q-start < length)
332  q = start+length;
333 
334  // the literals are everything from lit_start to q
335  *pending_literals = (q - lit_start);
336 
338  return q - start;
339 }
340 
342 {
343  int literals = 0;
344  stb_uint len,i;
345 
346  stb_uchar **chash;
347  chash = (stb_uchar**) malloc(stb__hashsize * sizeof(stb_uchar*));
348  if (chash == NULL) return 0; // failure
349  for (i=0; i < stb__hashsize; ++i)
350  chash[i] = NULL;
351 
352  // stream signature
353  stb_out(0x57); stb_out(0xbc);
354  stb_out2(0);
355 
356  stb_out4(0); // 64-bit length requires 32-bit leading 0
357  stb_out4(length);
359 
360  stb__running_adler = 1;
361 
362  len = stb_compress_chunk(input, input, input+length, length, &literals, chash, stb__hashsize-1);
363  assert(len == length);
364 
365  outliterals(input+length - literals, literals);
366 
367  free(chash);
368 
369  stb_out2(0x05fa); // end opcode
370 
372 
373  return 1; // success
374 }
375 
377 {
378  stb__out = out;
379  stb__outfile = NULL;
380 
382 
383  return stb__out - out;
384 }
stb_matchlen
static unsigned int stb_matchlen(stb_uchar *m1, stb_uchar *m2, stb_uint maxlen)
Definition: binary_to_compressed_c.cpp:170
stb_out
#define stb_out(v)
Definition: binary_to_compressed_c.cpp:191
stb_not_crap
static int stb_not_crap(int best, int dist)
Definition: binary_to_compressed_c.cpp:219
binary_to_compressed_c
static bool binary_to_compressed_c(const char *filename, const char *symbol, bool use_base85_encoding, bool use_compression)
Definition: binary_to_compressed_c.cpp:69
end
GLuint GLuint end
Definition: glcorearb.h:2858
stb__out
static stb_uchar * stb__out
Definition: binary_to_compressed_c.cpp:180
stb__hashsize
static stb_uint stb__hashsize
Definition: binary_to_compressed_c.cpp:226
NULL
NULL
Definition: test_security_zap.cpp:405
Encode85Byte
char Encode85Byte(unsigned int x)
Definition: binary_to_compressed_c.cpp:63
length
GLenum GLuint GLenum GLsizei length
Definition: glcorearb.h:2695
input
std::string input
Definition: tokenizer_unittest.cc:197
stb__outbytes
static stb_uint stb__outbytes
Definition: binary_to_compressed_c.cpp:182
conformance_python.stdout
stdout
Definition: conformance_python.py:50
x
GLint GLenum GLint x
Definition: glcorearb.h:2834
stb_compress_inner
static int stb_compress_inner(stb_uchar *input, stb_uint length)
Definition: binary_to_compressed_c.cpp:341
stb_out3
static void stb_out3(stb_uint v)
Definition: binary_to_compressed_c.cpp:194
STB__TRY
#define STB__TRY(t, p)
stb_uint
unsigned int stb_uint
Definition: binary_to_compressed_c.cpp:29
mask
GLint GLuint mask
Definition: glcorearb.h:2789
start
GLuint start
Definition: glcorearb.h:2858
stb_compress
stb_uint stb_compress(stb_uchar *out, stb_uchar *in, stb_uint len)
Definition: binary_to_compressed_c.cpp:376
outliterals
static void outliterals(stb_uchar *in, int numlit)
Definition: binary_to_compressed_c.cpp:197
stb__write
static void stb__write(unsigned char v)
Definition: binary_to_compressed_c.cpp:184
stb__outfile
static FILE * stb__outfile
Definition: binary_to_compressed_c.cpp:181
buffer
Definition: buffer_processor.h:43
d
d
void
typedef void(APIENTRY *GLDEBUGPROCARB)(GLenum source
i
int i
Definition: gmock-matchers_test.cc:764
stb_uchar
unsigned char stb_uchar
Definition: binary_to_compressed_c.cpp:30
stb__hc2
#define stb__hc2(q, h, c, d)
Definition: binary_to_compressed_c.cpp:231
stb_out4
static void stb_out4(stb_uint v)
Definition: binary_to_compressed_c.cpp:195
len
int len
Definition: php/ext/google/protobuf/map.c:206
v
const GLdouble * v
Definition: glcorearb.h:3106
stb__running_adler
static unsigned int stb__running_adler
Definition: binary_to_compressed_c.cpp:234
stb_adler32
static stb_uint stb_adler32(stb_uint adler32, stb_uchar *buffer, stb_uint buflen)
Definition: binary_to_compressed_c.cpp:139
stb_compress_chunk
static int stb_compress_chunk(stb_uchar *history, stb_uchar *start, stb_uchar *end, int length, int *pending_literals, stb_uchar **chash, stb_uint mask)
Definition: binary_to_compressed_c.cpp:236
stb_out2
static void stb_out2(stb_uint v)
Definition: binary_to_compressed_c.cpp:193
m
const upb_json_parsermethod * m
Definition: ruby/ext/google/protobuf_c/upb.h:10501
data
GLint GLenum GLsizei GLsizei GLsizei GLint GLsizei const GLvoid * data
Definition: glcorearb.h:2879
stb__window
static int stb__window
Definition: binary_to_compressed_c.cpp:217
f
GLfloat f
Definition: glcorearb.h:3964
assert.h
main
int main(int argc, char **argv)
Definition: binary_to_compressed_c.cpp:35
stb__hc3
#define stb__hc3(q, c, d, e)
Definition: binary_to_compressed_c.cpp:232
h
GLfloat GLfloat GLfloat GLfloat h
Definition: glcorearb.h:4147
STB__SCRAMBLE
#define STB__SCRAMBLE(h)


libaditof
Author(s):
autogenerated on Wed May 21 2025 02:06:48