1 /* crc32.c -- compute the CRC-32 of a data stream
2 * Copyright (C) 1995-2022 Mark Adler
3 * For conditions of distribution and use, see copyright notice in zlib.h
4 *
5 * This interleaved implementation of a CRC makes use of pipelined multiple
6 * arithmetic-logic units, commonly found in modern CPU cores. It is due to
7 * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution.
8 */
9
10 /* @(#) $Id$ */
11
12 /*
13 Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore
14 protection on the static variables used to control the first-use generation
15 of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should
16 first call get_crc_table() to initialize the tables before allowing more than
17 one thread to use crc32().
18
19 MAKECRCH can be #defined to write out crc32.h. A main() routine is also
20 produced, so that this one source file can be compiled to an executable.
21 */
22
23 #ifdef MAKECRCH
24 # include <stdio.h>
25 # ifndef DYNAMIC_CRC_TABLE
26 # define DYNAMIC_CRC_TABLE
27 # endif /* !DYNAMIC_CRC_TABLE */
28 #endif /* MAKECRCH */
29
30 #include "zutil.h" /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */
31
32 /*
33 A CRC of a message is computed on N braids of words in the message, where
34 each word consists of W bytes (4 or 8). If N is 3, for example, then three
35 running sparse CRCs are calculated respectively on each braid, at these
36 indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ...
37 This is done starting at a word boundary, and continues until as many blocks
38 of N * W bytes as are available have been processed. The results are combined
39 into a single CRC at the end. For this code, N must be in the range 1..6 and
40 W must be 4 or 8. The upper limit on N can be increased if desired by adding
41 more #if blocks, extending the patterns apparent in the code. In addition,
42 crc32.h would need to be regenerated, if the maximum N value is increased.
43
44 N and W are chosen empirically by benchmarking the execution time on a given
45 processor. The choices for N and W below were based on testing on Intel Kaby
46 Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64
47 Octeon II processors. The Intel, AMD, and ARM processors were all fastest
48 with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4.
49 They were all tested with either gcc or clang, all using the -O3 optimization
50 level. Your mileage may vary.
51 */
52
53 /* Define N */
54 #ifdef Z_TESTN
55 # define N Z_TESTN
56 #else
57 # define N 5
58 #endif
59 #if N < 1 || N > 6
60 # error N must be in 1..6
61 #endif
62
63 /*
64 z_crc_t must be at least 32 bits. z_word_t must be at least as long as
65 z_crc_t. It is assumed here that z_word_t is either 32 bits or 64 bits, and
66 that bytes are eight bits.
67 */
68
69 /*
70 Define W and the associated z_word_t type. If W is not defined, then a
71 braided calculation is not used, and the associated tables and code are not
72 compiled.
73 */
74 #ifdef Z_TESTW
75 # if Z_TESTW-1 != -1
76 # define W Z_TESTW
77 # endif
78 #else
79 # ifdef MAKECRCH
80 # define W 8 /* required for MAKECRCH */
81 # else
82 # if defined(__x86_64__) || defined(__aarch64__)
83 # define W 8
84 # else
85 # define W 4
86 # endif
87 # endif
88 #endif
89 #ifdef W
90 # if W == 8 && defined(Z_U8)
91 typedef Z_U8 z_word_t;
92 # elif defined(Z_U4)
93 # undef W
94 # define W 4
95 typedef Z_U4 z_word_t;
96 # else
97 # undef W
98 # endif
99 #endif
100
101 /* Local functions. */
102 local z_crc_t multmodp OF((z_crc_t a, z_crc_t b));
103 local z_crc_t x2nmodp OF((z_off64_t n, unsigned k));
104
105 /* If available, use the ARM processor CRC32 instruction. */
106 #if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && W == 8
107 # define ARMCRC32
108 #endif
109
110 #if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE))
111 /*
112 Swap the bytes in a z_word_t to convert between little and big endian. Any
113 self-respecting compiler will optimize this to a single machine byte-swap
114 instruction, if one is available. This assumes that word_t is either 32 bits
115 or 64 bits.
116 */
117 local z_word_t byte_swap OF((z_word_t word));
118
119 local z_word_t byte_swap(word)
120 z_word_t word;
121 {
122 # if W == 8
123 return
124 (word & 0xff00000000000000) >> 56 |
125 (word & 0xff000000000000) >> 40 |
126 (word & 0xff0000000000) >> 24 |
127 (word & 0xff00000000) >> 8 |
128 (word & 0xff000000) << 8 |
129 (word & 0xff0000) << 24 |
130 (word & 0xff00) << 40 |
131 (word & 0xff) << 56;
132 # else /* W == 4 */
133 return
134 (word & 0xff000000) >> 24 |
135 (word & 0xff0000) >> 8 |
136 (word & 0xff00) << 8 |
137 (word & 0xff) << 24;
138 # endif
139 }
140 #endif
141
142 /* CRC polynomial. */
143 #define POLY 0xedb88320 /* p(x) reflected, with x^32 implied */
144
145 #ifdef DYNAMIC_CRC_TABLE
146
147 local z_crc_t FAR crc_table[256];
148 local z_crc_t FAR x2n_table[32];
149 local void make_crc_table OF((void));
150 #ifdef W
151 local z_word_t FAR crc_big_table[256];
152 local z_crc_t FAR crc_braid_table[W][256];
153 local z_word_t FAR crc_braid_big_table[W][256];
154 local void braid OF((z_crc_t [][256], z_word_t [][256], int, int));
155 #endif
156 #ifdef MAKECRCH
157 local void write_table OF((FILE *, const z_crc_t FAR *, int));
158 local void write_table32hi OF((FILE *, const z_word_t FAR *, int));
159 local void write_table64 OF((FILE *, const z_word_t FAR *, int));
160 #endif /* MAKECRCH */
161
162 /*
163 Define a once() function depending on the availability of atomics. If this is
164 compiled with DYNAMIC_CRC_TABLE defined, and if CRCs will be computed in
165 multiple threads, and if atomics are not available, then get_crc_table() must
166 be called to initialize the tables and must return before any threads are
167 allowed to compute or combine CRCs.
168 */
169
170 /* Definition of once functionality. */
171 typedef struct once_s once_t;
172 local void once OF((once_t *, void (*)(void)));
173
174 /* Check for the availability of atomics. */
175 #if defined(__STDC__) && __STDC_VERSION__ >= 201112L && \
176 !defined(__STDC_NO_ATOMICS__)
177
178 #include <stdatomic.h>
179
180 /* Structure for once(), which must be initialized with ONCE_INIT. */
181 struct once_s {
182 atomic_flag begun;
183 atomic_int done;
184 };
185 #define ONCE_INIT {ATOMIC_FLAG_INIT, 0}
186
187 /*
188 Run the provided init() function exactly once, even if multiple threads
189 invoke once() at the same time. The state must be a once_t initialized with
190 ONCE_INIT.
191 */
192 local void once(state, init)
193 once_t *state;
194 void (*init)(void);
195 {
196 if (!atomic_load(&state->done)) {
197 if (atomic_flag_test_and_set(&state->begun))
198 while (!atomic_load(&state->done))
199 ;
200 else {
201 init();
202 atomic_store(&state->done, 1);
203 }
204 }
205 }
206
207 #else /* no atomics */
208
209 /* Structure for once(), which must be initialized with ONCE_INIT. */
210 struct once_s {
211 volatile int begun;
212 volatile int done;
213 };
214 #define ONCE_INIT {0, 0}
215
216 /* Test and set. Alas, not atomic, but tries to minimize the period of
217 vulnerability. */
218 local int test_and_set OF((int volatile *));
219 local int test_and_set(flag)
220 int volatile *flag;
221 {
222 int was;
223
224 was = *flag;
225 *flag = 1;
226 return was;
227 }
228
229 /* Run the provided init() function once. This is not thread-safe. */
230 local void once(state, init)
231 once_t *state;
232 void (*init)(void);
233 {
234 if (!state->done) {
235 if (test_and_set(&state->begun))
236 while (!state->done)
237 ;
238 else {
239 init();
240 state->done = 1;
241 }
242 }
243 }
244
245 #endif
246
247 /* State for once(). */
248 local once_t made = ONCE_INIT;
249
250 /*
251 Generate tables for a byte-wise 32-bit CRC calculation on the polynomial:
252 x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
253
254 Polynomials over GF(2) are represented in binary, one bit per coefficient,
255 with the lowest powers in the most significant bit. Then adding polynomials
256 is just exclusive-or, and multiplying a polynomial by x is a right shift by
257 one. If we call the above polynomial p, and represent a byte as the
258 polynomial q, also with the lowest power in the most significant bit (so the
259 byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p,
260 where a mod b means the remainder after dividing a by b.
261
262 This calculation is done using the shift-register method of multiplying and
263 taking the remainder. The register is initialized to zero, and for each
264 incoming bit, x^32 is added mod p to the register if the bit is a one (where
265 x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x
266 (which is shifting right by one and adding x^32 mod p if the bit shifted out
267 is a one). We start with the highest power (least significant bit) of q and
268 repeat for all eight bits of q.
269
270 The table is simply the CRC of all possible eight bit values. This is all the
271 information needed to generate CRCs on data a byte at a time for all
272 combinations of CRC register values and incoming bytes.
273 */
274
275 local void make_crc_table()
276 {
277 unsigned i, j, n;
278 z_crc_t p;
279
280 /* initialize the CRC of bytes tables */
281 for (i = 0; i < 256; i++) {
282 p = i;
283 for (j = 0; j < 8; j++)
284 p = p & 1 ? (p >> 1) ^ POLY : p >> 1;
285 crc_table[i] = p;
286 #ifdef W
287 crc_big_table[i] = byte_swap(p);
288 #endif
289 }
290
291 /* initialize the x^2^n mod p(x) table */
292 p = (z_crc_t)1 << 30; /* x^1 */
293 x2n_table[0] = p;
294 for (n = 1; n < 32; n++)
295 x2n_table[n] = p = multmodp(p, p);
296
297 #ifdef W
298 /* initialize the braiding tables -- needs x2n_table[] */
299 braid(crc_braid_table, crc_braid_big_table, N, W);
300 #endif
301
302 #ifdef MAKECRCH
303 {
304 /*
305 The crc32.h header file contains tables for both 32-bit and 64-bit
306 z_word_t's, and so requires a 64-bit type be available. In that case,
307 z_word_t must be defined to be 64-bits. This code then also generates
308 and writes out the tables for the case that z_word_t is 32 bits.
309 */
310 #if !defined(W) || W != 8
311 # error Need a 64-bit integer type in order to generate crc32.h.
312 #endif
313 FILE *out;
314 int k, n;
315 z_crc_t ltl[8][256];
316 z_word_t big[8][256];
317
318 out = fopen("crc32.h", "w");
319 if (out == NULL) return;
320
321 /* write out little-endian CRC table to crc32.h */
322 fprintf(out,
323 "/* crc32.h -- tables for rapid CRC calculation\n"
324 " * Generated automatically by crc32.c\n */\n"
325 "\n"
326 "local const z_crc_t FAR crc_table[] = {\n"
327 " ");
328 write_table(out, crc_table, 256);
329 fprintf(out,
330 "};\n");
331
332 /* write out big-endian CRC table for 64-bit z_word_t to crc32.h */
333 fprintf(out,
334 "\n"
335 "#ifdef W\n"
336 "\n"
337 "#if W == 8\n"
338 "\n"
339 "local const z_word_t FAR crc_big_table[] = {\n"
340 " ");
341 write_table64(out, crc_big_table, 256);
342 fprintf(out,
343 "};\n");
344
345 /* write out big-endian CRC table for 32-bit z_word_t to crc32.h */
346 fprintf(out,
347 "\n"
348 "#else /* W == 4 */\n"
349 "\n"
350 "local const z_word_t FAR crc_big_table[] = {\n"
351 " ");
352 write_table32hi(out, crc_big_table, 256);
353 fprintf(out,
354 "};\n"
355 "\n"
356 "#endif\n");
357
358 /* write out braid tables for each value of N */
359 for (n = 1; n <= 6; n++) {
360 fprintf(out,
361 "\n"
362 "#if N == %d\n", n);
363
364 /* compute braid tables for this N and 64-bit word_t */
365 braid(ltl, big, n, 8);
366
367 /* write out braid tables for 64-bit z_word_t to crc32.h */
368 fprintf(out,
369 "\n"
370 "#if W == 8\n"
371 "\n"
372 "local const z_crc_t FAR crc_braid_table[][256] = {\n");
373 for (k = 0; k < 8; k++) {
374 fprintf(out, " {");
375 write_table(out, ltl[k], 256);
376 fprintf(out, "}%s", k < 7 ? ",\n" : "");
377 }
378 fprintf(out,
379 "};\n"
380 "\n"
381 "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
382 for (k = 0; k < 8; k++) {
383 fprintf(out, " {");
384 write_table64(out, big[k], 256);
385 fprintf(out, "}%s", k < 7 ? ",\n" : "");
386 }
387 fprintf(out,
388 "};\n");
389
390 /* compute braid tables for this N and 32-bit word_t */
391 braid(ltl, big, n, 4);
392
393 /* write out braid tables for 32-bit z_word_t to crc32.h */
394 fprintf(out,
395 "\n"
396 "#else /* W == 4 */\n"
397 "\n"
398 "local const z_crc_t FAR crc_braid_table[][256] = {\n");
399 for (k = 0; k < 4; k++) {
400 fprintf(out, " {");
401 write_table(out, ltl[k], 256);
402 fprintf(out, "}%s", k < 3 ? ",\n" : "");
403 }
404 fprintf(out,
405 "};\n"
406 "\n"
407 "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
408 for (k = 0; k < 4; k++) {
409 fprintf(out, " {");
410 write_table32hi(out, big[k], 256);
411 fprintf(out, "}%s", k < 3 ? ",\n" : "");
412 }
413 fprintf(out,
414 "};\n"
415 "\n"
416 "#endif\n"
417 "\n"
418 "#endif\n");
419 }
420 fprintf(out,
421 "\n"
422 "#endif\n");
423
424 /* write out zeros operator table to crc32.h */
425 fprintf(out,
426 "\n"
427 "local const z_crc_t FAR x2n_table[] = {\n"
428 " ");
429 write_table(out, x2n_table, 32);
430 fprintf(out,
431 "};\n");
432 fclose(out);
433 }
434 #endif /* MAKECRCH */
435 }
436
437 #ifdef MAKECRCH
438
439 /*
440 Write the 32-bit values in table[0..k-1] to out, five per line in
441 hexadecimal separated by commas.
442 */
443 local void write_table(out, table, k)
444 FILE *out;
445 const z_crc_t FAR *table;
446 int k;
447 {
448 int n;
449
450 for (n = 0; n < k; n++)
451 fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ",
452 (unsigned long)(table[n]),
453 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
454 }
455
456 /*
457 Write the high 32-bits of each value in table[0..k-1] to out, five per line
458 in hexadecimal separated by commas.
459 */
460 local void write_table32hi(out, table, k)
461 FILE *out;
462 const z_word_t FAR *table;
463 int k;
464 {
465 int n;
466
467 for (n = 0; n < k; n++)
468 fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ",
469 (unsigned long)(table[n] >> 32),
470 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
471 }
472
473 /*
474 Write the 64-bit values in table[0..k-1] to out, three per line in
475 hexadecimal separated by commas. This assumes that if there is a 64-bit
476 type, then there is also a long long integer type, and it is at least 64
477 bits. If not, then the type cast and format string can be adjusted
478 accordingly.
479 */
480 local void write_table64(out, table, k)
481 FILE *out;
482 const z_word_t FAR *table;
483 int k;
484 {
485 int n;
486
487 for (n = 0; n < k; n++)
488 fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : " ",
489 (unsigned long long)(table[n]),
490 n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", "));
491 }
492
493 /* Actually do the deed. */
494 int main()
495 {
496 make_crc_table();
497 return 0;
498 }
499
500 #endif /* MAKECRCH */
501
502 #ifdef W
503 /*
504 Generate the little and big-endian braid tables for the given n and z_word_t
505 size w. Each array must have room for w blocks of 256 elements.
506 */
507 local void braid(ltl, big, n, w)
508 z_crc_t ltl[][256];
509 z_word_t big[][256];
510 int n;
511 int w;
512 {
513 int k;
514 z_crc_t i, p, q;
515 for (k = 0; k < w; k++) {
516 p = x2nmodp((n * w + 3 - k) << 3, 0);
517 ltl[k][0] = 0;
518 big[w - 1 - k][0] = 0;
519 for (i = 1; i < 256; i++) {
520 ltl[k][i] = q = multmodp(i << 24, p);
521 big[w - 1 - k][i] = byte_swap(q);
522 }
523 }
524 }
525 #endif
526
527 #else /* !DYNAMIC_CRC_TABLE */
528 /* ========================================================================
529 * Tables for byte-wise and braided CRC-32 calculations, and a table of powers
530 * of x for combining CRC-32s, all made by make_crc_table().
531 */
532 #include "crc32.h"
533 #endif /* DYNAMIC_CRC_TABLE */
534
535 /* ========================================================================
536 * Routines used for CRC calculation. Some are also required for the table
537 * generation above.
538 */
539
540 /*
541 Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial,
542 reflected. For speed, this requires that a not be zero.
543 */
544 local z_crc_t multmodp(a, b)
545 z_crc_t a;
546 z_crc_t b;
547 {
548 z_crc_t m, p;
549
550 m = (z_crc_t)1 << 31;
551 p = 0;
552 for (;;) {
553 if (a & m) {
554 p ^= b;
555 if ((a & (m - 1)) == 0)
556 break;
557 }
558 m >>= 1;
559 b = b & 1 ? (b >> 1) ^ POLY : b >> 1;
560 }
561 return p;
562 }
563
564 /*
565 Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been
566 initialized.
567 */
568 local z_crc_t x2nmodp(n, k)
569 z_off64_t n;
570 unsigned k;
571 {
572 z_crc_t p;
573
574 p = (z_crc_t)1 << 31; /* x^0 == 1 */
575 while (n) {
576 if (n & 1)
577 p = multmodp(x2n_table[k & 31], p);
578 n >>= 1;
579 k++;
580 }
581 return p;
582 }
583
584 /* =========================================================================
585 * This function can be used by asm versions of crc32(), and to force the
586 * generation of the CRC tables in a threaded application.
587 */
588 const z_crc_t FAR * ZEXPORT get_crc_table()
589 {
590 #ifdef DYNAMIC_CRC_TABLE
591 once(&made, make_crc_table);
592 #endif /* DYNAMIC_CRC_TABLE */
593 return (const z_crc_t FAR *)crc_table;
594 }
595
596 /* =========================================================================
597 * Use ARM machine instructions if available. This will compute the CRC about
598 * ten times faster than the braided calculation. This code does not check for
599 * the presence of the CRC instruction at run time. __ARM_FEATURE_CRC32 will
600 * only be defined if the compilation specifies an ARM processor architecture
601 * that has the instructions. For example, compiling with -march=armv8.1-a or
602 * -march=armv8-a+crc, or -march=native if the compile machine has the crc32
603 * instructions.
604 */
605 #ifdef ARMCRC32
606
607 /*
608 Constants empirically determined to maximize speed. These values are from
609 measurements on a Cortex-A57. Your mileage may vary.
610 */
611 #define Z_BATCH 3990 /* number of words in a batch */
612 #define Z_BATCH_ZEROS 0xa10d3d0c /* computed from Z_BATCH = 3990 */
613 #define Z_BATCH_MIN 800 /* fewest words in a final batch */
614
615 unsigned long ZEXPORT crc32_z(crc, buf, len)
616 unsigned long crc;
617 const unsigned char FAR *buf;
618 z_size_t len;
619 {
620 z_crc_t val;
621 z_word_t crc1, crc2;
622 const z_word_t *word;
623 z_word_t val0, val1, val2;
624 z_size_t last, last2, i;
625 z_size_t num;
626
627 /* Return initial CRC, if requested. */
628 if (buf == Z_NULL) return 0;
629
630 #ifdef DYNAMIC_CRC_TABLE
631 once(&made, make_crc_table);
632 #endif /* DYNAMIC_CRC_TABLE */
633
634 /* Pre-condition the CRC */
635 crc = (~crc) & 0xffffffff;
636
637 /* Compute the CRC up to a word boundary. */
638 while (len && ((z_size_t)buf & 7) != 0) {
639 len--;
640 val = *buf++;
641 __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
642 }
643
644 /* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */
645 word = (z_word_t const *)buf;
646 num = len >> 3;
647 len &= 7;
648
649 /* Do three interleaved CRCs to realize the throughput of one crc32x
650 instruction per cycle. Each CRC is calcuated on Z_BATCH words. The three
651 CRCs are combined into a single CRC after each set of batches. */
652 while (num >= 3 * Z_BATCH) {
653 crc1 = 0;
654 crc2 = 0;
655 for (i = 0; i < Z_BATCH; i++) {
656 val0 = word[i];
657 val1 = word[i + Z_BATCH];
658 val2 = word[i + 2 * Z_BATCH];
659 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
660 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
661 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
662 }
663 word += 3 * Z_BATCH;
664 num -= 3 * Z_BATCH;
665 crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1;
666 crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2;
667 }
668
669 /* Do one last smaller batch with the remaining words, if there are enough
670 to pay for the combination of CRCs. */
671 last = num / 3;
672 if (last >= Z_BATCH_MIN) {
673 last2 = last << 1;
674 crc1 = 0;
675 crc2 = 0;
676 for (i = 0; i < last; i++) {
677 val0 = word[i];
678 val1 = word[i + last];
679 val2 = word[i + last2];
680 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
681 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
682 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
683 }
684 word += 3 * last;
685 num -= 3 * last;
686 val = x2nmodp(last, 6);
687 crc = multmodp(val, crc) ^ crc1;
688 crc = multmodp(val, crc) ^ crc2;
689 }
690
691 /* Compute the CRC on any remaining words. */
692 for (i = 0; i < num; i++) {
693 val0 = word[i];
694 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
695 }
696 word += num;
697
698 /* Complete the CRC on any remaining bytes. */
699 buf = (const unsigned char FAR *)word;
700 while (len) {
701 len--;
702 val = *buf++;
703 __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
704 }
705
706 /* Return the CRC, post-conditioned. */
707 return crc ^ 0xffffffff;
708 }
709
710 #else
711
712 #ifdef W
713
714 /*
715 Return the CRC of the W bytes in the word_t data, taking the
716 least-significant byte of the word as the first byte of data, without any pre
717 or post conditioning. This is used to combine the CRCs of each braid.
718 */
719 local z_crc_t crc_word OF((z_word_t data));
720
721 local z_crc_t crc_word(data)
722 z_word_t data;
723 {
724 int k;
725 for (k = 0; k < W; k++)
726 data = (data >> 8) ^ crc_table[data & 0xff];
727 return (z_crc_t)data;
728 }
729
730 local z_word_t crc_word_big OF((z_word_t data));
731
732 local z_word_t crc_word_big(data)
733 z_word_t data;
734 {
735 int k;
736 for (k = 0; k < W; k++)
737 data = (data << 8) ^
738 crc_big_table[(data >> ((W - 1) << 3)) & 0xff];
739 return data;
740 }
741
742 #endif
743
744 /* ========================================================================= */
745 unsigned long ZEXPORT crc32_z(crc, buf, len)
746 unsigned long crc;
747 const unsigned char FAR *buf;
748 z_size_t len;
749 {
750 /* Return initial CRC, if requested. */
751 if (buf == Z_NULL) return 0;
752
753 #ifdef DYNAMIC_CRC_TABLE
754 once(&made, make_crc_table);
755 #endif /* DYNAMIC_CRC_TABLE */
756
757 /* Pre-condition the CRC */
758 crc = (~crc) & 0xffffffff;
759
760 #ifdef W
761
762 /* If provided enough bytes, do a braided CRC calculation. */
763 if (len >= N * W + W - 1) {
764 z_size_t blks;
765 z_word_t const *words;
766 unsigned endian;
767 int k;
768
769 /* Compute the CRC up to a z_word_t boundary. */
770 while (len && ((z_size_t)buf & (W - 1)) != 0) {
771 len--;
772 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
773 }
774
775 /* Compute the CRC on as many N z_word_t blocks as are available. */
776 blks = len / (N * W);
777 len -= blks * N * W;
778 words = (z_word_t const *)buf;
779
780 /* Do endian check at execution time instead of compile time, since ARM
781 processors can change the endianess at execution time. If the
782 compiler knows what the endianess will be, it can optimize out the
783 check and the unused branch. */
784 endian = 1;
785 if (*(unsigned char *)&endian) {
786 /* Little endian. */
787
788 z_crc_t crc0;
789 z_word_t word0;
790 #if N > 1
791 z_crc_t crc1;
792 z_word_t word1;
793 #if N > 2
794 z_crc_t crc2;
795 z_word_t word2;
796 #if N > 3
797 z_crc_t crc3;
798 z_word_t word3;
799 #if N > 4
800 z_crc_t crc4;
801 z_word_t word4;
802 #if N > 5
803 z_crc_t crc5;
804 z_word_t word5;
805 #endif
806 #endif
807 #endif
808 #endif
809 #endif
810
811 /* Initialize the CRC for each braid. */
812 crc0 = crc;
813 #if N > 1
814 crc1 = 0;
815 #if N > 2
816 crc2 = 0;
817 #if N > 3
818 crc3 = 0;
819 #if N > 4
820 crc4 = 0;
821 #if N > 5
822 crc5 = 0;
823 #endif
824 #endif
825 #endif
826 #endif
827 #endif
828
829 /*
830 Process the first blks-1 blocks, computing the CRCs on each braid
831 independently.
832 */
833 while (--blks) {
834 /* Load the word for each braid into registers. */
835 word0 = crc0 ^ words[0];
836 #if N > 1
837 word1 = crc1 ^ words[1];
838 #if N > 2
839 word2 = crc2 ^ words[2];
840 #if N > 3
841 word3 = crc3 ^ words[3];
842 #if N > 4
843 word4 = crc4 ^ words[4];
844 #if N > 5
845 word5 = crc5 ^ words[5];
846 #endif
847 #endif
848 #endif
849 #endif
850 #endif
851 words += N;
852
853 /* Compute and update the CRC for each word. The loop should
854 get unrolled. */
855 crc0 = crc_braid_table[0][word0 & 0xff];
856 #if N > 1
857 crc1 = crc_braid_table[0][word1 & 0xff];
858 #if N > 2
859 crc2 = crc_braid_table[0][word2 & 0xff];
860 #if N > 3
861 crc3 = crc_braid_table[0][word3 & 0xff];
862 #if N > 4
863 crc4 = crc_braid_table[0][word4 & 0xff];
864 #if N > 5
865 crc5 = crc_braid_table[0][word5 & 0xff];
866 #endif
867 #endif
868 #endif
869 #endif
870 #endif
871 for (k = 1; k < W; k++) {
872 crc0 ^= crc_braid_table[k][(word0 >> (k << 3)) & 0xff];
873 #if N > 1
874 crc1 ^= crc_braid_table[k][(word1 >> (k << 3)) & 0xff];
875 #if N > 2
876 crc2 ^= crc_braid_table[k][(word2 >> (k << 3)) & 0xff];
877 #if N > 3
878 crc3 ^= crc_braid_table[k][(word3 >> (k << 3)) & 0xff];
879 #if N > 4
880 crc4 ^= crc_braid_table[k][(word4 >> (k << 3)) & 0xff];
881 #if N > 5
882 crc5 ^= crc_braid_table[k][(word5 >> (k << 3)) & 0xff];
883 #endif
884 #endif
885 #endif
886 #endif
887 #endif
888 }
889 }
890
891 /*
892 Process the last block, combining the CRCs of the N braids at the
893 same time.
894 */
895 crc = crc_word(crc0 ^ words[0]);
896 #if N > 1
897 crc = crc_word(crc1 ^ words[1] ^ crc);
898 #if N > 2
899 crc = crc_word(crc2 ^ words[2] ^ crc);
900 #if N > 3
901 crc = crc_word(crc3 ^ words[3] ^ crc);
902 #if N > 4
903 crc = crc_word(crc4 ^ words[4] ^ crc);
904 #if N > 5
905 crc = crc_word(crc5 ^ words[5] ^ crc);
906 #endif
907 #endif
908 #endif
909 #endif
910 #endif
911 words += N;
912 }
913 else {
914 /* Big endian. */
915
916 z_word_t crc0, word0, comb;
917 #if N > 1
918 z_word_t crc1, word1;
919 #if N > 2
920 z_word_t crc2, word2;
921 #if N > 3
922 z_word_t crc3, word3;
923 #if N > 4
924 z_word_t crc4, word4;
925 #if N > 5
926 z_word_t crc5, word5;
927 #endif
928 #endif
929 #endif
930 #endif
931 #endif
932
933 /* Initialize the CRC for each braid. */
934 crc0 = byte_swap(crc);
935 #if N > 1
936 crc1 = 0;
937 #if N > 2
938 crc2 = 0;
939 #if N > 3
940 crc3 = 0;
941 #if N > 4
942 crc4 = 0;
943 #if N > 5
944 crc5 = 0;
945 #endif
946 #endif
947 #endif
948 #endif
949 #endif
950
951 /*
952 Process the first blks-1 blocks, computing the CRCs on each braid
953 independently.
954 */
955 while (--blks) {
956 /* Load the word for each braid into registers. */
957 word0 = crc0 ^ words[0];
958 #if N > 1
959 word1 = crc1 ^ words[1];
960 #if N > 2
961 word2 = crc2 ^ words[2];
962 #if N > 3
963 word3 = crc3 ^ words[3];
964 #if N > 4
965 word4 = crc4 ^ words[4];
966 #if N > 5
967 word5 = crc5 ^ words[5];
968 #endif
969 #endif
970 #endif
971 #endif
972 #endif
973 words += N;
974
975 /* Compute and update the CRC for each word. The loop should
976 get unrolled. */
977 crc0 = crc_braid_big_table[0][word0 & 0xff];
978 #if N > 1
979 crc1 = crc_braid_big_table[0][word1 & 0xff];
980 #if N > 2
981 crc2 = crc_braid_big_table[0][word2 & 0xff];
982 #if N > 3
983 crc3 = crc_braid_big_table[0][word3 & 0xff];
984 #if N > 4
985 crc4 = crc_braid_big_table[0][word4 & 0xff];
986 #if N > 5
987 crc5 = crc_braid_big_table[0][word5 & 0xff];
988 #endif
989 #endif
990 #endif
991 #endif
992 #endif
993 for (k = 1; k < W; k++) {
994 crc0 ^= crc_braid_big_table[k][(word0 >> (k << 3)) & 0xff];
995 #if N > 1
996 crc1 ^= crc_braid_big_table[k][(word1 >> (k << 3)) & 0xff];
997 #if N > 2
998 crc2 ^= crc_braid_big_table[k][(word2 >> (k << 3)) & 0xff];
999 #if N > 3
1000 crc3 ^= crc_braid_big_table[k][(word3 >> (k << 3)) & 0xff];
1001 #if N > 4
1002 crc4 ^= crc_braid_big_table[k][(word4 >> (k << 3)) & 0xff];
1003 #if N > 5
1004 crc5 ^= crc_braid_big_table[k][(word5 >> (k << 3)) & 0xff];
1005 #endif
1006 #endif
1007 #endif
1008 #endif
1009 #endif
1010 }
1011 }
1012
1013 /*
1014 Process the last block, combining the CRCs of the N braids at the
1015 same time.
1016 */
1017 comb = crc_word_big(crc0 ^ words[0]);
1018 #if N > 1
1019 comb = crc_word_big(crc1 ^ words[1] ^ comb);
1020 #if N > 2
1021 comb = crc_word_big(crc2 ^ words[2] ^ comb);
1022 #if N > 3
1023 comb = crc_word_big(crc3 ^ words[3] ^ comb);
1024 #if N > 4
1025 comb = crc_word_big(crc4 ^ words[4] ^ comb);
1026 #if N > 5
1027 comb = crc_word_big(crc5 ^ words[5] ^ comb);
1028 #endif
1029 #endif
1030 #endif
1031 #endif
1032 #endif
1033 words += N;
1034 crc = byte_swap(comb);
1035 }
1036
1037 /*
1038 Update the pointer to the remaining bytes to process.
1039 */
1040 buf = (unsigned char const *)words;
1041 }
1042
1043 #endif /* W */
1044
1045 /* Complete the computation of the CRC on any remaining bytes. */
1046 while (len >= 8) {
1047 len -= 8;
1048 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1049 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1050 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1051 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1052 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1053 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1054 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1055 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1056 }
1057 while (len) {
1058 len--;
1059 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1060 }
1061
1062 /* Return the CRC, post-conditioned. */
1063 return crc ^ 0xffffffff;
1064 }
1065
1066 #endif
1067
1068 /* ========================================================================= */
1069 unsigned long ZEXPORT crc32(crc, buf, len)
1070 unsigned long crc;
1071 const unsigned char FAR *buf;
1072 uInt len;
1073 {
1074 return crc32_z(crc, buf, len);
1075 }
1076
1077 /* ========================================================================= */
1078 uLong ZEXPORT crc32_combine64(crc1, crc2, len2)
1079 uLong crc1;
1080 uLong crc2;
1081 z_off64_t len2;
1082 {
1083 #ifdef DYNAMIC_CRC_TABLE
1084 once(&made, make_crc_table);
1085 #endif /* DYNAMIC_CRC_TABLE */
1086 return multmodp(x2nmodp(len2, 3), crc1) ^ (crc2 & 0xffffffff);
1087 }
1088
1089 /* ========================================================================= */
1090 uLong ZEXPORT crc32_combine(crc1, crc2, len2)
1091 uLong crc1;
1092 uLong crc2;
1093 z_off_t len2;
1094 {
1095 return crc32_combine64(crc1, crc2, len2);
1096 }
1097
1098 /* ========================================================================= */
1099 uLong ZEXPORT crc32_combine_gen64(len2)
1100 z_off64_t len2;
1101 {
1102 #ifdef DYNAMIC_CRC_TABLE
1103 once(&made, make_crc_table);
1104 #endif /* DYNAMIC_CRC_TABLE */
1105 return x2nmodp(len2, 3);
1106 }
1107
1108 /* ========================================================================= */
1109 uLong ZEXPORT crc32_combine_gen(len2)
1110 z_off_t len2;
1111 {
1112 return crc32_combine_gen64(len2);
1113 }
1114
1115 /* ========================================================================= */
1116 uLong crc32_combine_op(crc1, crc2, op)
1117 uLong crc1;
1118 uLong crc2;
1119 uLong op;
1120 {
1121 return multmodp(op, crc1) ^ (crc2 & 0xffffffff);
1122 }
Cache object: ad1f189f53ce127fed80bcce7a130014
|