zlib/crc32.c
Go to the documentation of this file.
1 /* crc32.c -- compute the CRC-32 of a data stream
2  * Copyright (C) 1995-2022 Mark Adler
3  * For conditions of distribution and use, see copyright notice in zlib.h
4  *
5  * This interleaved implementation of a CRC makes use of pipelined multiple
6  * arithmetic-logic units, commonly found in modern CPU cores. It is due to
7  * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution.
8  */
9 
10 /* @(#) $Id$ */
11 
12 /*
13  Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore
14  protection on the static variables used to control the first-use generation
15  of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should
16  first call get_crc_table() to initialize the tables before allowing more than
17  one thread to use crc32().
18 
19  MAKECRCH can be #defined to write out crc32.h. A main() routine is also
20  produced, so that this one source file can be compiled to an executable.
21  */
22 
23 #ifdef MAKECRCH
24 # include <stdio.h>
25 # ifndef DYNAMIC_CRC_TABLE
26 # define DYNAMIC_CRC_TABLE
27 # endif /* !DYNAMIC_CRC_TABLE */
28 #endif /* MAKECRCH */
29 
30 #include "zutil.h" /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */
31 
32  /*
33  A CRC of a message is computed on N braids of words in the message, where
34  each word consists of W bytes (4 or 8). If N is 3, for example, then three
35  running sparse CRCs are calculated respectively on each braid, at these
36  indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ...
37  This is done starting at a word boundary, and continues until as many blocks
38  of N * W bytes as are available have been processed. The results are combined
39  into a single CRC at the end. For this code, N must be in the range 1..6 and
40  W must be 4 or 8. The upper limit on N can be increased if desired by adding
41  more #if blocks, extending the patterns apparent in the code. In addition,
42  crc32.h would need to be regenerated, if the maximum N value is increased.
43 
44  N and W are chosen empirically by benchmarking the execution time on a given
45  processor. The choices for N and W below were based on testing on Intel Kaby
46  Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64
47  Octeon II processors. The Intel, AMD, and ARM processors were all fastest
48  with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4.
49  They were all tested with either gcc or clang, all using the -O3 optimization
50  level. Your mileage may vary.
51  */
52 
53 /* Define N */
54 #ifdef Z_TESTN
55 # define N Z_TESTN
56 #else
57 # define N 5
58 #endif
59 #if N < 1 || N > 6
60 # error N must be in 1..6
61 #endif
62 
63 /*
64  z_crc_t must be at least 32 bits. z_word_t must be at least as long as
65  z_crc_t. It is assumed here that z_word_t is either 32 bits or 64 bits, and
66  that bytes are eight bits.
67  */
68 
69 /*
70  Define W and the associated z_word_t type. If W is not defined, then a
71  braided calculation is not used, and the associated tables and code are not
72  compiled.
73  */
74 #ifdef Z_TESTW
75 # if Z_TESTW-1 != -1
76 # define W Z_TESTW
77 # endif
78 #else
79 # ifdef MAKECRCH
80 # define W 8 /* required for MAKECRCH */
81 # else
82 # if defined(__x86_64__) || defined(__aarch64__)
83 # define W 8
84 # else
85 # define W 4
86 # endif
87 # endif
88 #endif
89 #ifdef W
90 # if W == 8 && defined(Z_U8)
91  typedef Z_U8 z_word_t;
92 # elif defined(Z_U4)
93 # undef W
94 # define W 4
95  typedef Z_U4 z_word_t;
96 # else
97 # undef W
98 # endif
99 #endif
100 
101 /* If available, use the ARM processor CRC32 instruction. */
102 #if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && W == 8
103 # define ARMCRC32
104 #endif
105 
106 /* Local functions. */
108 local z_crc_t x2nmodp OF((z_off64_t n, unsigned k));
109 
110 #if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE))
111  local z_word_t byte_swap OF((z_word_t word));
112 #endif
113 
114 #if defined(W) && !defined(ARMCRC32)
115  local z_crc_t crc_word OF((z_word_t data));
116  local z_word_t crc_word_big OF((z_word_t data));
117 #endif
118 
119 #if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE))
120 /*
121  Swap the bytes in a z_word_t to convert between little and big endian. Any
122  self-respecting compiler will optimize this to a single machine byte-swap
123  instruction, if one is available. This assumes that word_t is either 32 bits
124  or 64 bits.
125  */
126 local z_word_t byte_swap(word)
127  z_word_t word;
128 {
129 # if W == 8
130  return
131  (word & 0xff00000000000000) >> 56 |
132  (word & 0xff000000000000) >> 40 |
133  (word & 0xff0000000000) >> 24 |
134  (word & 0xff00000000) >> 8 |
135  (word & 0xff000000) << 8 |
136  (word & 0xff0000) << 24 |
137  (word & 0xff00) << 40 |
138  (word & 0xff) << 56;
139 # else /* W == 4 */
140  return
141  (word & 0xff000000) >> 24 |
142  (word & 0xff0000) >> 8 |
143  (word & 0xff00) << 8 |
144  (word & 0xff) << 24;
145 # endif
146 }
147 #endif
148 
149 /* CRC polynomial. */
150 #define POLY 0xedb88320 /* p(x) reflected, with x^32 implied */
151 
152 #ifdef DYNAMIC_CRC_TABLE
153 
156 local void make_crc_table OF((void));
157 #ifdef W
158  local z_word_t FAR crc_big_table[256];
159  local z_crc_t FAR crc_braid_table[W][256];
160  local z_word_t FAR crc_braid_big_table[W][256];
161  local void braid OF((z_crc_t [][256], z_word_t [][256], int, int));
162 #endif
163 #ifdef MAKECRCH
164  local void write_table OF((FILE *, const z_crc_t FAR *, int));
165  local void write_table32hi OF((FILE *, const z_word_t FAR *, int));
166  local void write_table64 OF((FILE *, const z_word_t FAR *, int));
167 #endif /* MAKECRCH */
168 
169 /*
170  Define a once() function depending on the availability of atomics. If this is
171  compiled with DYNAMIC_CRC_TABLE defined, and if CRCs will be computed in
172  multiple threads, and if atomics are not available, then get_crc_table() must
173  be called to initialize the tables and must return before any threads are
174  allowed to compute or combine CRCs.
175  */
176 
177 /* Definition of once functionality. */
178 typedef struct once_s once_t;
179 local void once OF((once_t *, void (*)(void)));
180 
181 /* Check for the availability of atomics. */
182 #if defined(__STDC__) && __STDC_VERSION__ >= 201112L && \
183  !defined(__STDC_NO_ATOMICS__)
184 
185 #include <stdatomic.h>
186 
187 /* Structure for once(), which must be initialized with ONCE_INIT. */
188 struct once_s {
189  atomic_flag begun;
190  atomic_int done;
191 };
192 #define ONCE_INIT {ATOMIC_FLAG_INIT, 0}
193 
194 /*
195  Run the provided init() function exactly once, even if multiple threads
196  invoke once() at the same time. The state must be a once_t initialized with
197  ONCE_INIT.
198  */
199 local void once(state, init)
200  once_t *state;
201  void (*init)(void);
202 {
203  if (!atomic_load(&state->done)) {
204  if (atomic_flag_test_and_set(&state->begun))
205  while (!atomic_load(&state->done))
206  ;
207  else {
208  init();
209  atomic_store(&state->done, 1);
210  }
211  }
212 }
213 
214 #else /* no atomics */
215 
216 /* Structure for once(), which must be initialized with ONCE_INIT. */
217 struct once_s {
218  volatile int begun;
219  volatile int done;
220 };
221 #define ONCE_INIT {0, 0}
222 
223 /* Test and set. Alas, not atomic, but tries to minimize the period of
224  vulnerability. */
225 local int test_and_set OF((int volatile *));
226 local int test_and_set(flag)
227  int volatile *flag;
228 {
229  int was;
230 
231  was = *flag;
232  *flag = 1;
233  return was;
234 }
235 
236 /* Run the provided init() function once. This is not thread-safe. */
237 local void once(state, init)
238  once_t *state;
239  void (*init)(void);
240 {
241  if (!state->done) {
242  if (test_and_set(&state->begun))
243  while (!state->done)
244  ;
245  else {
246  init();
247  state->done = 1;
248  }
249  }
250 }
251 
252 #endif
253 
254 /* State for once(). */
255 local once_t made = ONCE_INIT;
256 
257 /*
258  Generate tables for a byte-wise 32-bit CRC calculation on the polynomial:
259  x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
260 
261  Polynomials over GF(2) are represented in binary, one bit per coefficient,
262  with the lowest powers in the most significant bit. Then adding polynomials
263  is just exclusive-or, and multiplying a polynomial by x is a right shift by
264  one. If we call the above polynomial p, and represent a byte as the
265  polynomial q, also with the lowest power in the most significant bit (so the
266  byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p,
267  where a mod b means the remainder after dividing a by b.
268 
269  This calculation is done using the shift-register method of multiplying and
270  taking the remainder. The register is initialized to zero, and for each
271  incoming bit, x^32 is added mod p to the register if the bit is a one (where
272  x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x
273  (which is shifting right by one and adding x^32 mod p if the bit shifted out
274  is a one). We start with the highest power (least significant bit) of q and
275  repeat for all eight bits of q.
276 
277  The table is simply the CRC of all possible eight bit values. This is all the
278  information needed to generate CRCs on data a byte at a time for all
279  combinations of CRC register values and incoming bytes.
280  */
281 
282 local void make_crc_table()
283 {
284  unsigned i, j, n;
285  z_crc_t p;
286 
287  /* initialize the CRC of bytes tables */
288  for (i = 0; i < 256; i++) {
289  p = i;
290  for (j = 0; j < 8; j++)
291  p = p & 1 ? (p >> 1) ^ POLY : p >> 1;
292  crc_table[i] = p;
293 #ifdef W
294  crc_big_table[i] = byte_swap(p);
295 #endif
296  }
297 
298  /* initialize the x^2^n mod p(x) table */
299  p = (z_crc_t)1 << 30; /* x^1 */
300  x2n_table[0] = p;
301  for (n = 1; n < 32; n++)
302  x2n_table[n] = p = multmodp(p, p);
303 
304 #ifdef W
305  /* initialize the braiding tables -- needs x2n_table[] */
306  braid(crc_braid_table, crc_braid_big_table, N, W);
307 #endif
308 
309 #ifdef MAKECRCH
310  {
311  /*
312  The crc32.h header file contains tables for both 32-bit and 64-bit
313  z_word_t's, and so requires a 64-bit type be available. In that case,
314  z_word_t must be defined to be 64-bits. This code then also generates
315  and writes out the tables for the case that z_word_t is 32 bits.
316  */
317 #if !defined(W) || W != 8
318 # error Need a 64-bit integer type in order to generate crc32.h.
319 #endif
320  FILE *out;
321  int k, n;
322  z_crc_t ltl[8][256];
323  z_word_t big[8][256];
324 
325  out = fopen("crc32.h", "w");
326  if (out == NULL) return;
327 
328  /* write out little-endian CRC table to crc32.h */
329  fprintf(out,
330  "/* crc32.h -- tables for rapid CRC calculation\n"
331  " * Generated automatically by crc32.c\n */\n"
332  "\n"
333  "local const z_crc_t FAR crc_table[] = {\n"
334  " ");
335  write_table(out, crc_table, 256);
336  fprintf(out,
337  "};\n");
338 
339  /* write out big-endian CRC table for 64-bit z_word_t to crc32.h */
340  fprintf(out,
341  "\n"
342  "#ifdef W\n"
343  "\n"
344  "#if W == 8\n"
345  "\n"
346  "local const z_word_t FAR crc_big_table[] = {\n"
347  " ");
348  write_table64(out, crc_big_table, 256);
349  fprintf(out,
350  "};\n");
351 
352  /* write out big-endian CRC table for 32-bit z_word_t to crc32.h */
353  fprintf(out,
354  "\n"
355  "#else /* W == 4 */\n"
356  "\n"
357  "local const z_word_t FAR crc_big_table[] = {\n"
358  " ");
359  write_table32hi(out, crc_big_table, 256);
360  fprintf(out,
361  "};\n"
362  "\n"
363  "#endif\n");
364 
365  /* write out braid tables for each value of N */
366  for (n = 1; n <= 6; n++) {
367  fprintf(out,
368  "\n"
369  "#if N == %d\n", n);
370 
371  /* compute braid tables for this N and 64-bit word_t */
372  braid(ltl, big, n, 8);
373 
374  /* write out braid tables for 64-bit z_word_t to crc32.h */
375  fprintf(out,
376  "\n"
377  "#if W == 8\n"
378  "\n"
379  "local const z_crc_t FAR crc_braid_table[][256] = {\n");
380  for (k = 0; k < 8; k++) {
381  fprintf(out, " {");
382  write_table(out, ltl[k], 256);
383  fprintf(out, "}%s", k < 7 ? ",\n" : "");
384  }
385  fprintf(out,
386  "};\n"
387  "\n"
388  "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
389  for (k = 0; k < 8; k++) {
390  fprintf(out, " {");
391  write_table64(out, big[k], 256);
392  fprintf(out, "}%s", k < 7 ? ",\n" : "");
393  }
394  fprintf(out,
395  "};\n");
396 
397  /* compute braid tables for this N and 32-bit word_t */
398  braid(ltl, big, n, 4);
399 
400  /* write out braid tables for 32-bit z_word_t to crc32.h */
401  fprintf(out,
402  "\n"
403  "#else /* W == 4 */\n"
404  "\n"
405  "local const z_crc_t FAR crc_braid_table[][256] = {\n");
406  for (k = 0; k < 4; k++) {
407  fprintf(out, " {");
408  write_table(out, ltl[k], 256);
409  fprintf(out, "}%s", k < 3 ? ",\n" : "");
410  }
411  fprintf(out,
412  "};\n"
413  "\n"
414  "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
415  for (k = 0; k < 4; k++) {
416  fprintf(out, " {");
417  write_table32hi(out, big[k], 256);
418  fprintf(out, "}%s", k < 3 ? ",\n" : "");
419  }
420  fprintf(out,
421  "};\n"
422  "\n"
423  "#endif\n"
424  "\n"
425  "#endif\n");
426  }
427  fprintf(out,
428  "\n"
429  "#endif\n");
430 
431  /* write out zeros operator table to crc32.h */
432  fprintf(out,
433  "\n"
434  "local const z_crc_t FAR x2n_table[] = {\n"
435  " ");
436  write_table(out, x2n_table, 32);
437  fprintf(out,
438  "};\n");
439  fclose(out);
440  }
441 #endif /* MAKECRCH */
442 }
443 
444 #ifdef MAKECRCH
445 
446 /*
447  Write the 32-bit values in table[0..k-1] to out, five per line in
448  hexadecimal separated by commas.
449  */
450 local void write_table(out, table, k)
451  FILE *out;
452  const z_crc_t FAR *table;
453  int k;
454 {
455  int n;
456 
457  for (n = 0; n < k; n++)
458  fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ",
459  (unsigned long)(table[n]),
460  n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
461 }
462 
463 /*
464  Write the high 32-bits of each value in table[0..k-1] to out, five per line
465  in hexadecimal separated by commas.
466  */
467 local void write_table32hi(out, table, k)
468 FILE *out;
469 const z_word_t FAR *table;
470 int k;
471 {
472  int n;
473 
474  for (n = 0; n < k; n++)
475  fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ",
476  (unsigned long)(table[n] >> 32),
477  n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
478 }
479 
480 /*
481  Write the 64-bit values in table[0..k-1] to out, three per line in
482  hexadecimal separated by commas. This assumes that if there is a 64-bit
483  type, then there is also a long long integer type, and it is at least 64
484  bits. If not, then the type cast and format string can be adjusted
485  accordingly.
486  */
487 local void write_table64(out, table, k)
488  FILE *out;
489  const z_word_t FAR *table;
490  int k;
491 {
492  int n;
493 
494  for (n = 0; n < k; n++)
495  fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : " ",
496  (unsigned long long)(table[n]),
497  n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", "));
498 }
499 
500 /* Actually do the deed. */
501 int main()
502 {
503  make_crc_table();
504  return 0;
505 }
506 
507 #endif /* MAKECRCH */
508 
509 #ifdef W
510 /*
511  Generate the little and big-endian braid tables for the given n and z_word_t
512  size w. Each array must have room for w blocks of 256 elements.
513  */
514 local void braid(ltl, big, n, w)
515  z_crc_t ltl[][256];
516  z_word_t big[][256];
517  int n;
518  int w;
519 {
520  int k;
521  z_crc_t i, p, q;
522  for (k = 0; k < w; k++) {
523  p = x2nmodp((n * w + 3 - k) << 3, 0);
524  ltl[k][0] = 0;
525  big[w - 1 - k][0] = 0;
526  for (i = 1; i < 256; i++) {
527  ltl[k][i] = q = multmodp(i << 24, p);
528  big[w - 1 - k][i] = byte_swap(q);
529  }
530  }
531 }
532 #endif
533 
534 #else /* !DYNAMIC_CRC_TABLE */
535 /* ========================================================================
536  * Tables for byte-wise and braided CRC-32 calculations, and a table of powers
537  * of x for combining CRC-32s, all made by make_crc_table().
538  */
539 #include "crc32.h"
540 #endif /* DYNAMIC_CRC_TABLE */
541 
542 /* ========================================================================
543  * Routines used for CRC calculation. Some are also required for the table
544  * generation above.
545  */
546 
547 /*
548  Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial,
549  reflected. For speed, this requires that a not be zero.
550  */
552  z_crc_t a;
553  z_crc_t b;
554 {
555  z_crc_t m, p;
556 
557  m = (z_crc_t)1 << 31;
558  p = 0;
559  for (;;) {
560  if (a & m) {
561  p ^= b;
562  if ((a & (m - 1)) == 0)
563  break;
564  }
565  m >>= 1;
566  b = b & 1 ? (b >> 1) ^ POLY : b >> 1;
567  }
568  return p;
569 }
570 
571 /*
572  Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been
573  initialized.
574  */
576  z_off64_t n;
577  unsigned k;
578 {
579  z_crc_t p;
580 
581  p = (z_crc_t)1 << 31; /* x^0 == 1 */
582  while (n) {
583  if (n & 1)
584  p = multmodp(x2n_table[k & 31], p);
585  n >>= 1;
586  k++;
587  }
588  return p;
589 }
590 
591 /* =========================================================================
592  * This function can be used by asm versions of crc32(), and to force the
593  * generation of the CRC tables in a threaded application.
594  */
596 {
597 #ifdef DYNAMIC_CRC_TABLE
598  once(&made, make_crc_table);
599 #endif /* DYNAMIC_CRC_TABLE */
600  return (const z_crc_t FAR *)crc_table;
601 }
602 
603 /* =========================================================================
604  * Use ARM machine instructions if available. This will compute the CRC about
605  * ten times faster than the braided calculation. This code does not check for
606  * the presence of the CRC instruction at run time. __ARM_FEATURE_CRC32 will
607  * only be defined if the compilation specifies an ARM processor architecture
608  * that has the instructions. For example, compiling with -march=armv8.1-a or
609  * -march=armv8-a+crc, or -march=native if the compile machine has the crc32
610  * instructions.
611  */
612 #ifdef ARMCRC32
613 
614 /*
615  Constants empirically determined to maximize speed. These values are from
616  measurements on a Cortex-A57. Your mileage may vary.
617  */
618 #define Z_BATCH 3990 /* number of words in a batch */
619 #define Z_BATCH_ZEROS 0xa10d3d0c /* computed from Z_BATCH = 3990 */
620 #define Z_BATCH_MIN 800 /* fewest words in a final batch */
621 
622 unsigned long ZEXPORT crc32_z(crc, buf, len)
623  unsigned long crc;
624  const unsigned char FAR *buf;
625  z_size_t len;
626 {
627  z_crc_t val;
628  z_word_t crc1, crc2;
629  const z_word_t *word;
630  z_word_t val0, val1, val2;
631  z_size_t last, last2, i;
632  z_size_t num;
633 
634  /* Return initial CRC, if requested. */
635  if (buf == Z_NULL) return 0;
636 
637 #ifdef DYNAMIC_CRC_TABLE
638  once(&made, make_crc_table);
639 #endif /* DYNAMIC_CRC_TABLE */
640 
641  /* Pre-condition the CRC */
642  crc = (~crc) & 0xffffffff;
643 
644  /* Compute the CRC up to a word boundary. */
645  while (len && ((z_size_t)buf & 7) != 0) {
646  len--;
647  val = *buf++;
648  __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
649  }
650 
651  /* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */
652  word = (z_word_t const *)buf;
653  num = len >> 3;
654  len &= 7;
655 
656  /* Do three interleaved CRCs to realize the throughput of one crc32x
657  instruction per cycle. Each CRC is calculated on Z_BATCH words. The
658  three CRCs are combined into a single CRC after each set of batches. */
659  while (num >= 3 * Z_BATCH) {
660  crc1 = 0;
661  crc2 = 0;
662  for (i = 0; i < Z_BATCH; i++) {
663  val0 = word[i];
664  val1 = word[i + Z_BATCH];
665  val2 = word[i + 2 * Z_BATCH];
666  __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
667  __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
668  __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
669  }
670  word += 3 * Z_BATCH;
671  num -= 3 * Z_BATCH;
672  crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1;
673  crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2;
674  }
675 
676  /* Do one last smaller batch with the remaining words, if there are enough
677  to pay for the combination of CRCs. */
678  last = num / 3;
679  if (last >= Z_BATCH_MIN) {
680  last2 = last << 1;
681  crc1 = 0;
682  crc2 = 0;
683  for (i = 0; i < last; i++) {
684  val0 = word[i];
685  val1 = word[i + last];
686  val2 = word[i + last2];
687  __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
688  __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
689  __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
690  }
691  word += 3 * last;
692  num -= 3 * last;
693  val = x2nmodp(last, 6);
694  crc = multmodp(val, crc) ^ crc1;
695  crc = multmodp(val, crc) ^ crc2;
696  }
697 
698  /* Compute the CRC on any remaining words. */
699  for (i = 0; i < num; i++) {
700  val0 = word[i];
701  __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
702  }
703  word += num;
704 
705  /* Complete the CRC on any remaining bytes. */
706  buf = (const unsigned char FAR *)word;
707  while (len) {
708  len--;
709  val = *buf++;
710  __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
711  }
712 
713  /* Return the CRC, post-conditioned. */
714  return crc ^ 0xffffffff;
715 }
716 
717 #else
718 
719 #ifdef W
720 
721 /*
722  Return the CRC of the W bytes in the word_t data, taking the
723  least-significant byte of the word as the first byte of data, without any pre
724  or post conditioning. This is used to combine the CRCs of each braid.
725  */
726 local z_crc_t crc_word(data)
727  z_word_t data;
728 {
729  int k;
730  for (k = 0; k < W; k++)
731  data = (data >> 8) ^ crc_table[data & 0xff];
732  return (z_crc_t)data;
733 }
734 
735 local z_word_t crc_word_big(data)
736  z_word_t data;
737 {
738  int k;
739  for (k = 0; k < W; k++)
740  data = (data << 8) ^
741  crc_big_table[(data >> ((W - 1) << 3)) & 0xff];
742  return data;
743 }
744 
745 #endif
746 
747 /* ========================================================================= */
748 unsigned long ZEXPORT crc32_z(crc, buf, len)
749  unsigned long crc;
750  const unsigned char FAR *buf;
751  z_size_t len;
752 {
753  /* Return initial CRC, if requested. */
754  if (buf == Z_NULL) return 0;
755 
756 #ifdef DYNAMIC_CRC_TABLE
757  once(&made, make_crc_table);
758 #endif /* DYNAMIC_CRC_TABLE */
759 
760  /* Pre-condition the CRC */
761  crc = (~crc) & 0xffffffff;
762 
763 #ifdef W
764 
765  /* If provided enough bytes, do a braided CRC calculation. */
766  if (len >= N * W + W - 1) {
767  z_size_t blks;
768  z_word_t const *words;
769  unsigned endian;
770  int k;
771 
772  /* Compute the CRC up to a z_word_t boundary. */
773  while (len && ((z_size_t)buf & (W - 1)) != 0) {
774  len--;
775  crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
776  }
777 
778  /* Compute the CRC on as many N z_word_t blocks as are available. */
779  blks = len / (N * W);
780  len -= blks * N * W;
781  words = (z_word_t const *)buf;
782 
783  /* Do endian check at execution time instead of compile time, since ARM
784  processors can change the endianess at execution time. If the
785  compiler knows what the endianess will be, it can optimize out the
786  check and the unused branch. */
787  endian = 1;
788  if (*(unsigned char *)&endian) {
789  /* Little endian. */
790 
791  z_crc_t crc0;
792  z_word_t word0;
793 #if N > 1
794  z_crc_t crc1;
795  z_word_t word1;
796 #if N > 2
797  z_crc_t crc2;
798  z_word_t word2;
799 #if N > 3
800  z_crc_t crc3;
801  z_word_t word3;
802 #if N > 4
803  z_crc_t crc4;
804  z_word_t word4;
805 #if N > 5
806  z_crc_t crc5;
807  z_word_t word5;
808 #endif
809 #endif
810 #endif
811 #endif
812 #endif
813 
814  /* Initialize the CRC for each braid. */
815  crc0 = crc;
816 #if N > 1
817  crc1 = 0;
818 #if N > 2
819  crc2 = 0;
820 #if N > 3
821  crc3 = 0;
822 #if N > 4
823  crc4 = 0;
824 #if N > 5
825  crc5 = 0;
826 #endif
827 #endif
828 #endif
829 #endif
830 #endif
831 
832  /*
833  Process the first blks-1 blocks, computing the CRCs on each braid
834  independently.
835  */
836  while (--blks) {
837  /* Load the word for each braid into registers. */
838  word0 = crc0 ^ words[0];
839 #if N > 1
840  word1 = crc1 ^ words[1];
841 #if N > 2
842  word2 = crc2 ^ words[2];
843 #if N > 3
844  word3 = crc3 ^ words[3];
845 #if N > 4
846  word4 = crc4 ^ words[4];
847 #if N > 5
848  word5 = crc5 ^ words[5];
849 #endif
850 #endif
851 #endif
852 #endif
853 #endif
854  words += N;
855 
856  /* Compute and update the CRC for each word. The loop should
857  get unrolled. */
858  crc0 = crc_braid_table[0][word0 & 0xff];
859 #if N > 1
860  crc1 = crc_braid_table[0][word1 & 0xff];
861 #if N > 2
862  crc2 = crc_braid_table[0][word2 & 0xff];
863 #if N > 3
864  crc3 = crc_braid_table[0][word3 & 0xff];
865 #if N > 4
866  crc4 = crc_braid_table[0][word4 & 0xff];
867 #if N > 5
868  crc5 = crc_braid_table[0][word5 & 0xff];
869 #endif
870 #endif
871 #endif
872 #endif
873 #endif
874  for (k = 1; k < W; k++) {
875  crc0 ^= crc_braid_table[k][(word0 >> (k << 3)) & 0xff];
876 #if N > 1
877  crc1 ^= crc_braid_table[k][(word1 >> (k << 3)) & 0xff];
878 #if N > 2
879  crc2 ^= crc_braid_table[k][(word2 >> (k << 3)) & 0xff];
880 #if N > 3
881  crc3 ^= crc_braid_table[k][(word3 >> (k << 3)) & 0xff];
882 #if N > 4
883  crc4 ^= crc_braid_table[k][(word4 >> (k << 3)) & 0xff];
884 #if N > 5
885  crc5 ^= crc_braid_table[k][(word5 >> (k << 3)) & 0xff];
886 #endif
887 #endif
888 #endif
889 #endif
890 #endif
891  }
892  }
893 
894  /*
895  Process the last block, combining the CRCs of the N braids at the
896  same time.
897  */
898  crc = crc_word(crc0 ^ words[0]);
899 #if N > 1
900  crc = crc_word(crc1 ^ words[1] ^ crc);
901 #if N > 2
902  crc = crc_word(crc2 ^ words[2] ^ crc);
903 #if N > 3
904  crc = crc_word(crc3 ^ words[3] ^ crc);
905 #if N > 4
906  crc = crc_word(crc4 ^ words[4] ^ crc);
907 #if N > 5
908  crc = crc_word(crc5 ^ words[5] ^ crc);
909 #endif
910 #endif
911 #endif
912 #endif
913 #endif
914  words += N;
915  }
916  else {
917  /* Big endian. */
918 
919  z_word_t crc0, word0, comb;
920 #if N > 1
921  z_word_t crc1, word1;
922 #if N > 2
923  z_word_t crc2, word2;
924 #if N > 3
925  z_word_t crc3, word3;
926 #if N > 4
927  z_word_t crc4, word4;
928 #if N > 5
929  z_word_t crc5, word5;
930 #endif
931 #endif
932 #endif
933 #endif
934 #endif
935 
936  /* Initialize the CRC for each braid. */
937  crc0 = byte_swap(crc);
938 #if N > 1
939  crc1 = 0;
940 #if N > 2
941  crc2 = 0;
942 #if N > 3
943  crc3 = 0;
944 #if N > 4
945  crc4 = 0;
946 #if N > 5
947  crc5 = 0;
948 #endif
949 #endif
950 #endif
951 #endif
952 #endif
953 
954  /*
955  Process the first blks-1 blocks, computing the CRCs on each braid
956  independently.
957  */
958  while (--blks) {
959  /* Load the word for each braid into registers. */
960  word0 = crc0 ^ words[0];
961 #if N > 1
962  word1 = crc1 ^ words[1];
963 #if N > 2
964  word2 = crc2 ^ words[2];
965 #if N > 3
966  word3 = crc3 ^ words[3];
967 #if N > 4
968  word4 = crc4 ^ words[4];
969 #if N > 5
970  word5 = crc5 ^ words[5];
971 #endif
972 #endif
973 #endif
974 #endif
975 #endif
976  words += N;
977 
978  /* Compute and update the CRC for each word. The loop should
979  get unrolled. */
980  crc0 = crc_braid_big_table[0][word0 & 0xff];
981 #if N > 1
982  crc1 = crc_braid_big_table[0][word1 & 0xff];
983 #if N > 2
984  crc2 = crc_braid_big_table[0][word2 & 0xff];
985 #if N > 3
986  crc3 = crc_braid_big_table[0][word3 & 0xff];
987 #if N > 4
988  crc4 = crc_braid_big_table[0][word4 & 0xff];
989 #if N > 5
990  crc5 = crc_braid_big_table[0][word5 & 0xff];
991 #endif
992 #endif
993 #endif
994 #endif
995 #endif
996  for (k = 1; k < W; k++) {
997  crc0 ^= crc_braid_big_table[k][(word0 >> (k << 3)) & 0xff];
998 #if N > 1
999  crc1 ^= crc_braid_big_table[k][(word1 >> (k << 3)) & 0xff];
1000 #if N > 2
1001  crc2 ^= crc_braid_big_table[k][(word2 >> (k << 3)) & 0xff];
1002 #if N > 3
1003  crc3 ^= crc_braid_big_table[k][(word3 >> (k << 3)) & 0xff];
1004 #if N > 4
1005  crc4 ^= crc_braid_big_table[k][(word4 >> (k << 3)) & 0xff];
1006 #if N > 5
1007  crc5 ^= crc_braid_big_table[k][(word5 >> (k << 3)) & 0xff];
1008 #endif
1009 #endif
1010 #endif
1011 #endif
1012 #endif
1013  }
1014  }
1015 
1016  /*
1017  Process the last block, combining the CRCs of the N braids at the
1018  same time.
1019  */
1020  comb = crc_word_big(crc0 ^ words[0]);
1021 #if N > 1
1022  comb = crc_word_big(crc1 ^ words[1] ^ comb);
1023 #if N > 2
1024  comb = crc_word_big(crc2 ^ words[2] ^ comb);
1025 #if N > 3
1026  comb = crc_word_big(crc3 ^ words[3] ^ comb);
1027 #if N > 4
1028  comb = crc_word_big(crc4 ^ words[4] ^ comb);
1029 #if N > 5
1030  comb = crc_word_big(crc5 ^ words[5] ^ comb);
1031 #endif
1032 #endif
1033 #endif
1034 #endif
1035 #endif
1036  words += N;
1037  crc = byte_swap(comb);
1038  }
1039 
1040  /*
1041  Update the pointer to the remaining bytes to process.
1042  */
1043  buf = (unsigned char const *)words;
1044  }
1045 
1046 #endif /* W */
1047 
1048  /* Complete the computation of the CRC on any remaining bytes. */
1049  while (len >= 8) {
1050  len -= 8;
1051  crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1052  crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1053  crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1054  crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1055  crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1056  crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1057  crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1058  crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1059  }
1060  while (len) {
1061  len--;
1062  crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1063  }
1064 
1065  /* Return the CRC, post-conditioned. */
1066  return crc ^ 0xffffffff;
1067 }
1068 
1069 #endif
1070 
1071 /* ========================================================================= */
1072 unsigned long ZEXPORT crc32(crc, buf, len)
1073  unsigned long crc;
1074  const unsigned char FAR *buf;
1075  uInt len;
1076 {
1077  return crc32_z(crc, buf, len);
1078 }
1079 
1080 /* ========================================================================= */
1081 uLong ZEXPORT crc32_combine64(crc1, crc2, len2)
1082  uLong crc1;
1083  uLong crc2;
1084  z_off64_t len2;
1085 {
1086 #ifdef DYNAMIC_CRC_TABLE
1087  once(&made, make_crc_table);
1088 #endif /* DYNAMIC_CRC_TABLE */
1089  return multmodp(x2nmodp(len2, 3), crc1) ^ (crc2 & 0xffffffff);
1090 }
1091 
1092 /* ========================================================================= */
1093 uLong ZEXPORT crc32_combine(crc1, crc2, len2)
1094  uLong crc1;
1095  uLong crc2;
1096  z_off_t len2;
1097 {
1098  return crc32_combine64(crc1, crc2, (z_off64_t)len2);
1099 }
1100 
1101 /* ========================================================================= */
1103  z_off64_t len2;
1104 {
1105 #ifdef DYNAMIC_CRC_TABLE
1106  once(&made, make_crc_table);
1107 #endif /* DYNAMIC_CRC_TABLE */
1108  return x2nmodp(len2, 3);
1109 }
1110 
1111 /* ========================================================================= */
1113  z_off_t len2;
1114 {
1115  return crc32_combine_gen64((z_off64_t)len2);
1116 }
1117 
1118 /* ========================================================================= */
1120  uLong crc1;
1121  uLong crc2;
1122  uLong op;
1123 {
1124  return multmodp(op, crc1) ^ (crc2 & 0xffffffff);
1125 }
zutil.h
flag
uint32_t flag
Definition: ssl_versions.cc:162
grpc::testing::val1
const char val1[]
Definition: client_context_test_peer_test.cc:34
gen_build_yaml.out
dictionary out
Definition: src/benchmark/gen_build_yaml.py:24
init
const char * init
Definition: upb/upb/bindings/lua/main.c:49
absl::str_format_internal::LengthMod::j
@ j
ZEXPORT
#define ZEXPORT
Definition: bloaty/third_party/zlib/zconf.h:380
crc32_z
unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf, z_size_t len)
Definition: zlib/crc32.c:748
N
#define N
Definition: zlib/crc32.c:57
buf
voidpf void * buf
Definition: bloaty/third_party/zlib/contrib/minizip/ioapi.h:136
OF
z_crc_t multmodp OF((z_crc_t a, z_crc_t b))
crc32_combine_gen
uLong ZEXPORT crc32_combine_gen(z_off_t len2)
Definition: zlib/crc32.c:1112
z_size_t
size_t z_size_t
Definition: bloaty.cc:20
x2nmodp
z_crc_t x2nmodp(z_off64_t n, unsigned k)
Definition: zlib/crc32.c:575
words
std::vector< std::string > words
Definition: bloaty/third_party/protobuf/src/google/protobuf/repeated_field_unittest.cc:1787
crc_table
const z_crc_t FAR crc_table[TBLS][256]
Definition: bloaty/third_party/zlib/crc32.h:5
a
int a
Definition: abseil-cpp/absl/container/internal/hash_policy_traits_test.cc:88
xds_manager.p
p
Definition: xds_manager.py:60
setup.k
k
Definition: third_party/bloaty/third_party/capstone/bindings/python/setup.py:42
crc32
unsigned long ZEXPORT crc32(unsigned long crc, const unsigned char FAR *buf, uInt len)
Definition: zlib/crc32.c:1072
crc32_combine_gen64
uLong ZEXPORT crc32_combine_gen64(z_off64_t len2)
Definition: zlib/crc32.c:1102
multmodp
z_crc_t multmodp(z_crc_t a, z_crc_t b)
Definition: zlib/crc32.c:551
W
#define W
Definition: zlib/crc32.c:85
crc32.h
crc32_combine_op
uLong ZEXPORT crc32_combine_op(uLong crc1, uLong crc2, uLong op)
Definition: zlib/crc32.c:1119
FAR
#define FAR
Definition: bloaty/third_party/zlib/zconf.h:387
main
int main(int argc, char **argv)
Definition: examples/cpp/compression/greeter_client.cc:78
crc32_combine64
uLong ZEXPORT crc32_combine64(uLong crc1, uLong crc2, z_off64_t len2)
Definition: zlib/crc32.c:1081
grpc::testing::val2
const char val2[]
Definition: client_context_test_peer_test.cc:35
done
struct tab * done
Definition: bloaty/third_party/zlib/examples/enough.c:176
uLong
unsigned long uLong
Definition: bloaty/third_party/zlib/zconf.h:394
data
char data[kBufferLength]
Definition: abseil-cpp/absl/strings/internal/str_format/float_conversion.cc:1006
b
uint64_t b
Definition: abseil-cpp/absl/container/internal/layout_test.cc:53
n
int n
Definition: abseil-cpp/absl/container/btree_test.cc:1080
z_crc_t
unsigned long z_crc_t
Definition: bloaty/third_party/zlib/zconf.h:431
Z_NULL
#define Z_NULL
Definition: bloaty/third_party/zlib/zlib.h:212
x2n_table
const z_crc_t FAR x2n_table[]
Definition: zlib/crc32.h:9439
once
absl::once_flag once
Definition: bm_opencensus_plugin.cc:38
benchmark.FILE
FILE
Definition: benchmark.py:21
uInt
unsigned int uInt
Definition: bloaty/third_party/zlib/zconf.h:393
grpc::fclose
fclose(creds_file)
xds_manager.num
num
Definition: xds_manager.py:56
state
Definition: bloaty/third_party/zlib/contrib/blast/blast.c:41
z_off64_t
#define z_off64_t
Definition: bloaty/third_party/zlib/zconf.h:513
local
#define local
Definition: bloaty/third_party/zlib/contrib/blast/blast.c:36
absl::str_format_internal::LengthMod::q
@ q
table
uint8_t table[256]
Definition: hpack_parser.cc:456
POLY
#define POLY
Definition: zlib/crc32.c:150
get_crc_table
const z_crc_t FAR *ZEXPORT get_crc_table()
Definition: zlib/crc32.c:595
len
int len
Definition: abseil-cpp/absl/base/internal/low_level_alloc_test.cc:46
regress.m
m
Definition: regress/regress.py:25
z_off_t
#define z_off_t
Definition: bloaty/third_party/zlib/zconf.h:504
op
static grpc_op * op
Definition: test/core/fling/client.cc:47
crc32_combine
uLong ZEXPORT crc32_combine(uLong crc1, uLong crc2, z_off_t len2)
Definition: zlib/crc32.c:1093
i
uint64_t i
Definition: abseil-cpp/absl/container/btree_benchmark.cc:230
state
static struct rpc_state state
Definition: bad_server_response_test.cc:87


grpc
Author(s):
autogenerated on Fri May 16 2025 02:58:06