The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/contrib/zlib/crc32.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /* crc32.c -- compute the CRC-32 of a data stream
    2  * Copyright (C) 1995-2022 Mark Adler
    3  * For conditions of distribution and use, see copyright notice in zlib.h
    4  *
    5  * This interleaved implementation of a CRC makes use of pipelined multiple
    6  * arithmetic-logic units, commonly found in modern CPU cores. It is due to
    7  * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution.
    8  */
    9 
   10 /* @(#) $Id$ */
   11 
   12 /*
   13   Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore
   14   protection on the static variables used to control the first-use generation
   15   of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should
   16   first call get_crc_table() to initialize the tables before allowing more than
   17   one thread to use crc32().
   18 
   19   MAKECRCH can be #defined to write out crc32.h. A main() routine is also
   20   produced, so that this one source file can be compiled to an executable.
   21  */
   22 
   23 #ifdef MAKECRCH
   24 #  include <stdio.h>
   25 #  ifndef DYNAMIC_CRC_TABLE
   26 #    define DYNAMIC_CRC_TABLE
   27 #  endif /* !DYNAMIC_CRC_TABLE */
   28 #endif /* MAKECRCH */
   29 
   30 #include "zutil.h"      /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */
   31 
   32  /*
   33   A CRC of a message is computed on N braids of words in the message, where
   34   each word consists of W bytes (4 or 8). If N is 3, for example, then three
   35   running sparse CRCs are calculated respectively on each braid, at these
   36   indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ...
   37   This is done starting at a word boundary, and continues until as many blocks
   38   of N * W bytes as are available have been processed. The results are combined
   39   into a single CRC at the end. For this code, N must be in the range 1..6 and
   40   W must be 4 or 8. The upper limit on N can be increased if desired by adding
   41   more #if blocks, extending the patterns apparent in the code. In addition,
   42   crc32.h would need to be regenerated, if the maximum N value is increased.
   43 
   44   N and W are chosen empirically by benchmarking the execution time on a given
   45   processor. The choices for N and W below were based on testing on Intel Kaby
   46   Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64
   47   Octeon II processors. The Intel, AMD, and ARM processors were all fastest
   48   with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4.
   49   They were all tested with either gcc or clang, all using the -O3 optimization
   50   level. Your mileage may vary.
   51  */
   52 
   53 /* Define N */
   54 #ifdef Z_TESTN
   55 #  define N Z_TESTN
   56 #else
   57 #  define N 5
   58 #endif
   59 #if N < 1 || N > 6
   60 #  error N must be in 1..6
   61 #endif
   62 
   63 /*
   64   z_crc_t must be at least 32 bits. z_word_t must be at least as long as
   65   z_crc_t. It is assumed here that z_word_t is either 32 bits or 64 bits, and
   66   that bytes are eight bits.
   67  */
   68 
   69 /*
   70   Define W and the associated z_word_t type. If W is not defined, then a
   71   braided calculation is not used, and the associated tables and code are not
   72   compiled.
   73  */
   74 #ifdef Z_TESTW
   75 #  if Z_TESTW-1 != -1
   76 #    define W Z_TESTW
   77 #  endif
   78 #else
   79 #  ifdef MAKECRCH
   80 #    define W 8         /* required for MAKECRCH */
   81 #  else
   82 #    if defined(__x86_64__) || defined(__aarch64__)
   83 #      define W 8
   84 #    else
   85 #      define W 4
   86 #    endif
   87 #  endif
   88 #endif
   89 #ifdef W
   90 #  if W == 8 && defined(Z_U8)
   91      typedef Z_U8 z_word_t;
   92 #  elif defined(Z_U4)
   93 #    undef W
   94 #    define W 4
   95      typedef Z_U4 z_word_t;
   96 #  else
   97 #    undef W
   98 #  endif
   99 #endif
  100 
  101 /* Local functions. */
  102 local z_crc_t multmodp OF((z_crc_t a, z_crc_t b));
  103 local z_crc_t x2nmodp OF((z_off64_t n, unsigned k));
  104 
  105 /* If available, use the ARM processor CRC32 instruction. */
  106 #if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && W == 8
  107 #  define ARMCRC32
  108 #endif
  109 
  110 #if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE))
  111 /*
  112   Swap the bytes in a z_word_t to convert between little and big endian. Any
  113   self-respecting compiler will optimize this to a single machine byte-swap
  114   instruction, if one is available. This assumes that word_t is either 32 bits
  115   or 64 bits.
  116  */
  117 local z_word_t byte_swap OF((z_word_t word));
  118 
  119 local z_word_t byte_swap(word)
  120     z_word_t word;
  121 {
  122 #  if W == 8
  123     return
  124         (word & 0xff00000000000000) >> 56 |
  125         (word & 0xff000000000000) >> 40 |
  126         (word & 0xff0000000000) >> 24 |
  127         (word & 0xff00000000) >> 8 |
  128         (word & 0xff000000) << 8 |
  129         (word & 0xff0000) << 24 |
  130         (word & 0xff00) << 40 |
  131         (word & 0xff) << 56;
  132 #  else   /* W == 4 */
  133     return
  134         (word & 0xff000000) >> 24 |
  135         (word & 0xff0000) >> 8 |
  136         (word & 0xff00) << 8 |
  137         (word & 0xff) << 24;
  138 #  endif
  139 }
  140 #endif
  141 
  142 /* CRC polynomial. */
  143 #define POLY 0xedb88320         /* p(x) reflected, with x^32 implied */
  144 
  145 #ifdef DYNAMIC_CRC_TABLE
  146 
  147 local z_crc_t FAR crc_table[256];
  148 local z_crc_t FAR x2n_table[32];
  149 local void make_crc_table OF((void));
  150 #ifdef W
  151    local z_word_t FAR crc_big_table[256];
  152    local z_crc_t FAR crc_braid_table[W][256];
  153    local z_word_t FAR crc_braid_big_table[W][256];
  154    local void braid OF((z_crc_t [][256], z_word_t [][256], int, int));
  155 #endif
  156 #ifdef MAKECRCH
  157    local void write_table OF((FILE *, const z_crc_t FAR *, int));
  158    local void write_table32hi OF((FILE *, const z_word_t FAR *, int));
  159    local void write_table64 OF((FILE *, const z_word_t FAR *, int));
  160 #endif /* MAKECRCH */
  161 
  162 /*
  163   Define a once() function depending on the availability of atomics. If this is
  164   compiled with DYNAMIC_CRC_TABLE defined, and if CRCs will be computed in
  165   multiple threads, and if atomics are not available, then get_crc_table() must
  166   be called to initialize the tables and must return before any threads are
  167   allowed to compute or combine CRCs.
  168  */
  169 
  170 /* Definition of once functionality. */
  171 typedef struct once_s once_t;
  172 local void once OF((once_t *, void (*)(void)));
  173 
  174 /* Check for the availability of atomics. */
  175 #if defined(__STDC__) && __STDC_VERSION__ >= 201112L && \
  176     !defined(__STDC_NO_ATOMICS__)
  177 
  178 #include <stdatomic.h>
  179 
  180 /* Structure for once(), which must be initialized with ONCE_INIT. */
  181 struct once_s {
  182     atomic_flag begun;
  183     atomic_int done;
  184 };
  185 #define ONCE_INIT {ATOMIC_FLAG_INIT, 0}
  186 
  187 /*
  188   Run the provided init() function exactly once, even if multiple threads
  189   invoke once() at the same time. The state must be a once_t initialized with
  190   ONCE_INIT.
  191  */
  192 local void once(state, init)
  193     once_t *state;
  194     void (*init)(void);
  195 {
  196     if (!atomic_load(&state->done)) {
  197         if (atomic_flag_test_and_set(&state->begun))
  198             while (!atomic_load(&state->done))
  199                 ;
  200         else {
  201             init();
  202             atomic_store(&state->done, 1);
  203         }
  204     }
  205 }
  206 
  207 #else   /* no atomics */
  208 
  209 /* Structure for once(), which must be initialized with ONCE_INIT. */
  210 struct once_s {
  211     volatile int begun;
  212     volatile int done;
  213 };
  214 #define ONCE_INIT {0, 0}
  215 
  216 /* Test and set. Alas, not atomic, but tries to minimize the period of
  217    vulnerability. */
  218 local int test_and_set OF((int volatile *));
  219 local int test_and_set(flag)
  220     int volatile *flag;
  221 {
  222     int was;
  223 
  224     was = *flag;
  225     *flag = 1;
  226     return was;
  227 }
  228 
  229 /* Run the provided init() function once. This is not thread-safe. */
  230 local void once(state, init)
  231     once_t *state;
  232     void (*init)(void);
  233 {
  234     if (!state->done) {
  235         if (test_and_set(&state->begun))
  236             while (!state->done)
  237                 ;
  238         else {
  239             init();
  240             state->done = 1;
  241         }
  242     }
  243 }
  244 
  245 #endif
  246 
  247 /* State for once(). */
  248 local once_t made = ONCE_INIT;
  249 
  250 /*
  251   Generate tables for a byte-wise 32-bit CRC calculation on the polynomial:
  252   x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
  253 
  254   Polynomials over GF(2) are represented in binary, one bit per coefficient,
  255   with the lowest powers in the most significant bit. Then adding polynomials
  256   is just exclusive-or, and multiplying a polynomial by x is a right shift by
  257   one. If we call the above polynomial p, and represent a byte as the
  258   polynomial q, also with the lowest power in the most significant bit (so the
  259   byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p,
  260   where a mod b means the remainder after dividing a by b.
  261 
  262   This calculation is done using the shift-register method of multiplying and
  263   taking the remainder. The register is initialized to zero, and for each
  264   incoming bit, x^32 is added mod p to the register if the bit is a one (where
  265   x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x
  266   (which is shifting right by one and adding x^32 mod p if the bit shifted out
  267   is a one). We start with the highest power (least significant bit) of q and
  268   repeat for all eight bits of q.
  269 
  270   The table is simply the CRC of all possible eight bit values. This is all the
  271   information needed to generate CRCs on data a byte at a time for all
  272   combinations of CRC register values and incoming bytes.
  273  */
  274 
  275 local void make_crc_table()
  276 {
  277     unsigned i, j, n;
  278     z_crc_t p;
  279 
  280     /* initialize the CRC of bytes tables */
  281     for (i = 0; i < 256; i++) {
  282         p = i;
  283         for (j = 0; j < 8; j++)
  284             p = p & 1 ? (p >> 1) ^ POLY : p >> 1;
  285         crc_table[i] = p;
  286 #ifdef W
  287         crc_big_table[i] = byte_swap(p);
  288 #endif
  289     }
  290 
  291     /* initialize the x^2^n mod p(x) table */
  292     p = (z_crc_t)1 << 30;         /* x^1 */
  293     x2n_table[0] = p;
  294     for (n = 1; n < 32; n++)
  295         x2n_table[n] = p = multmodp(p, p);
  296 
  297 #ifdef W
  298     /* initialize the braiding tables -- needs x2n_table[] */
  299     braid(crc_braid_table, crc_braid_big_table, N, W);
  300 #endif
  301 
  302 #ifdef MAKECRCH
  303     {
  304         /*
  305           The crc32.h header file contains tables for both 32-bit and 64-bit
  306           z_word_t's, and so requires a 64-bit type be available. In that case,
  307           z_word_t must be defined to be 64-bits. This code then also generates
  308           and writes out the tables for the case that z_word_t is 32 bits.
  309          */
  310 #if !defined(W) || W != 8
  311 #  error Need a 64-bit integer type in order to generate crc32.h.
  312 #endif
  313         FILE *out;
  314         int k, n;
  315         z_crc_t ltl[8][256];
  316         z_word_t big[8][256];
  317 
  318         out = fopen("crc32.h", "w");
  319         if (out == NULL) return;
  320 
  321         /* write out little-endian CRC table to crc32.h */
  322         fprintf(out,
  323             "/* crc32.h -- tables for rapid CRC calculation\n"
  324             " * Generated automatically by crc32.c\n */\n"
  325             "\n"
  326             "local const z_crc_t FAR crc_table[] = {\n"
  327             "    ");
  328         write_table(out, crc_table, 256);
  329         fprintf(out,
  330             "};\n");
  331 
  332         /* write out big-endian CRC table for 64-bit z_word_t to crc32.h */
  333         fprintf(out,
  334             "\n"
  335             "#ifdef W\n"
  336             "\n"
  337             "#if W == 8\n"
  338             "\n"
  339             "local const z_word_t FAR crc_big_table[] = {\n"
  340             "    ");
  341         write_table64(out, crc_big_table, 256);
  342         fprintf(out,
  343             "};\n");
  344 
  345         /* write out big-endian CRC table for 32-bit z_word_t to crc32.h */
  346         fprintf(out,
  347             "\n"
  348             "#else /* W == 4 */\n"
  349             "\n"
  350             "local const z_word_t FAR crc_big_table[] = {\n"
  351             "    ");
  352         write_table32hi(out, crc_big_table, 256);
  353         fprintf(out,
  354             "};\n"
  355             "\n"
  356             "#endif\n");
  357 
  358         /* write out braid tables for each value of N */
  359         for (n = 1; n <= 6; n++) {
  360             fprintf(out,
  361             "\n"
  362             "#if N == %d\n", n);
  363 
  364             /* compute braid tables for this N and 64-bit word_t */
  365             braid(ltl, big, n, 8);
  366 
  367             /* write out braid tables for 64-bit z_word_t to crc32.h */
  368             fprintf(out,
  369             "\n"
  370             "#if W == 8\n"
  371             "\n"
  372             "local const z_crc_t FAR crc_braid_table[][256] = {\n");
  373             for (k = 0; k < 8; k++) {
  374                 fprintf(out, "   {");
  375                 write_table(out, ltl[k], 256);
  376                 fprintf(out, "}%s", k < 7 ? ",\n" : "");
  377             }
  378             fprintf(out,
  379             "};\n"
  380             "\n"
  381             "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
  382             for (k = 0; k < 8; k++) {
  383                 fprintf(out, "   {");
  384                 write_table64(out, big[k], 256);
  385                 fprintf(out, "}%s", k < 7 ? ",\n" : "");
  386             }
  387             fprintf(out,
  388             "};\n");
  389 
  390             /* compute braid tables for this N and 32-bit word_t */
  391             braid(ltl, big, n, 4);
  392 
  393             /* write out braid tables for 32-bit z_word_t to crc32.h */
  394             fprintf(out,
  395             "\n"
  396             "#else /* W == 4 */\n"
  397             "\n"
  398             "local const z_crc_t FAR crc_braid_table[][256] = {\n");
  399             for (k = 0; k < 4; k++) {
  400                 fprintf(out, "   {");
  401                 write_table(out, ltl[k], 256);
  402                 fprintf(out, "}%s", k < 3 ? ",\n" : "");
  403             }
  404             fprintf(out,
  405             "};\n"
  406             "\n"
  407             "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
  408             for (k = 0; k < 4; k++) {
  409                 fprintf(out, "   {");
  410                 write_table32hi(out, big[k], 256);
  411                 fprintf(out, "}%s", k < 3 ? ",\n" : "");
  412             }
  413             fprintf(out,
  414             "};\n"
  415             "\n"
  416             "#endif\n"
  417             "\n"
  418             "#endif\n");
  419         }
  420         fprintf(out,
  421             "\n"
  422             "#endif\n");
  423 
  424         /* write out zeros operator table to crc32.h */
  425         fprintf(out,
  426             "\n"
  427             "local const z_crc_t FAR x2n_table[] = {\n"
  428             "    ");
  429         write_table(out, x2n_table, 32);
  430         fprintf(out,
  431             "};\n");
  432         fclose(out);
  433     }
  434 #endif /* MAKECRCH */
  435 }
  436 
  437 #ifdef MAKECRCH
  438 
  439 /*
  440    Write the 32-bit values in table[0..k-1] to out, five per line in
  441    hexadecimal separated by commas.
  442  */
  443 local void write_table(out, table, k)
  444     FILE *out;
  445     const z_crc_t FAR *table;
  446     int k;
  447 {
  448     int n;
  449 
  450     for (n = 0; n < k; n++)
  451         fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : "    ",
  452                 (unsigned long)(table[n]),
  453                 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
  454 }
  455 
  456 /*
  457    Write the high 32-bits of each value in table[0..k-1] to out, five per line
  458    in hexadecimal separated by commas.
  459  */
  460 local void write_table32hi(out, table, k)
  461 FILE *out;
  462 const z_word_t FAR *table;
  463 int k;
  464 {
  465     int n;
  466 
  467     for (n = 0; n < k; n++)
  468         fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : "    ",
  469                 (unsigned long)(table[n] >> 32),
  470                 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
  471 }
  472 
  473 /*
  474   Write the 64-bit values in table[0..k-1] to out, three per line in
  475   hexadecimal separated by commas. This assumes that if there is a 64-bit
  476   type, then there is also a long long integer type, and it is at least 64
  477   bits. If not, then the type cast and format string can be adjusted
  478   accordingly.
  479  */
  480 local void write_table64(out, table, k)
  481     FILE *out;
  482     const z_word_t FAR *table;
  483     int k;
  484 {
  485     int n;
  486 
  487     for (n = 0; n < k; n++)
  488         fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : "    ",
  489                 (unsigned long long)(table[n]),
  490                 n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", "));
  491 }
  492 
  493 /* Actually do the deed. */
  494 int main()
  495 {
  496     make_crc_table();
  497     return 0;
  498 }
  499 
  500 #endif /* MAKECRCH */
  501 
  502 #ifdef W
  503 /*
  504   Generate the little and big-endian braid tables for the given n and z_word_t
  505   size w. Each array must have room for w blocks of 256 elements.
  506  */
  507 local void braid(ltl, big, n, w)
  508     z_crc_t ltl[][256];
  509     z_word_t big[][256];
  510     int n;
  511     int w;
  512 {
  513     int k;
  514     z_crc_t i, p, q;
  515     for (k = 0; k < w; k++) {
  516         p = x2nmodp((n * w + 3 - k) << 3, 0);
  517         ltl[k][0] = 0;
  518         big[w - 1 - k][0] = 0;
  519         for (i = 1; i < 256; i++) {
  520             ltl[k][i] = q = multmodp(i << 24, p);
  521             big[w - 1 - k][i] = byte_swap(q);
  522         }
  523     }
  524 }
  525 #endif
  526 
  527 #else /* !DYNAMIC_CRC_TABLE */
  528 /* ========================================================================
  529  * Tables for byte-wise and braided CRC-32 calculations, and a table of powers
  530  * of x for combining CRC-32s, all made by make_crc_table().
  531  */
  532 #include "crc32.h"
  533 #endif /* DYNAMIC_CRC_TABLE */
  534 
  535 /* ========================================================================
  536  * Routines used for CRC calculation. Some are also required for the table
  537  * generation above.
  538  */
  539 
  540 /*
  541   Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial,
  542   reflected. For speed, this requires that a not be zero.
  543  */
  544 local z_crc_t multmodp(a, b)
  545     z_crc_t a;
  546     z_crc_t b;
  547 {
  548     z_crc_t m, p;
  549 
  550     m = (z_crc_t)1 << 31;
  551     p = 0;
  552     for (;;) {
  553         if (a & m) {
  554             p ^= b;
  555             if ((a & (m - 1)) == 0)
  556                 break;
  557         }
  558         m >>= 1;
  559         b = b & 1 ? (b >> 1) ^ POLY : b >> 1;
  560     }
  561     return p;
  562 }
  563 
  564 /*
  565   Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been
  566   initialized.
  567  */
  568 local z_crc_t x2nmodp(n, k)
  569     z_off64_t n;
  570     unsigned k;
  571 {
  572     z_crc_t p;
  573 
  574     p = (z_crc_t)1 << 31;           /* x^0 == 1 */
  575     while (n) {
  576         if (n & 1)
  577             p = multmodp(x2n_table[k & 31], p);
  578         n >>= 1;
  579         k++;
  580     }
  581     return p;
  582 }
  583 
  584 /* =========================================================================
  585  * This function can be used by asm versions of crc32(), and to force the
  586  * generation of the CRC tables in a threaded application.
  587  */
  588 const z_crc_t FAR * ZEXPORT get_crc_table()
  589 {
  590 #ifdef DYNAMIC_CRC_TABLE
  591     once(&made, make_crc_table);
  592 #endif /* DYNAMIC_CRC_TABLE */
  593     return (const z_crc_t FAR *)crc_table;
  594 }
  595 
  596 /* =========================================================================
  597  * Use ARM machine instructions if available. This will compute the CRC about
  598  * ten times faster than the braided calculation. This code does not check for
  599  * the presence of the CRC instruction at run time. __ARM_FEATURE_CRC32 will
  600  * only be defined if the compilation specifies an ARM processor architecture
  601  * that has the instructions. For example, compiling with -march=armv8.1-a or
  602  * -march=armv8-a+crc, or -march=native if the compile machine has the crc32
  603  * instructions.
  604  */
  605 #ifdef ARMCRC32
  606 
  607 /*
  608    Constants empirically determined to maximize speed. These values are from
  609    measurements on a Cortex-A57. Your mileage may vary.
  610  */
  611 #define Z_BATCH 3990                /* number of words in a batch */
  612 #define Z_BATCH_ZEROS 0xa10d3d0c    /* computed from Z_BATCH = 3990 */
  613 #define Z_BATCH_MIN 800             /* fewest words in a final batch */
  614 
  615 unsigned long ZEXPORT crc32_z(crc, buf, len)
  616     unsigned long crc;
  617     const unsigned char FAR *buf;
  618     z_size_t len;
  619 {
  620     z_crc_t val;
  621     z_word_t crc1, crc2;
  622     const z_word_t *word;
  623     z_word_t val0, val1, val2;
  624     z_size_t last, last2, i;
  625     z_size_t num;
  626 
  627     /* Return initial CRC, if requested. */
  628     if (buf == Z_NULL) return 0;
  629 
  630 #ifdef DYNAMIC_CRC_TABLE
  631     once(&made, make_crc_table);
  632 #endif /* DYNAMIC_CRC_TABLE */
  633 
  634     /* Pre-condition the CRC */
  635     crc = (~crc) & 0xffffffff;
  636 
  637     /* Compute the CRC up to a word boundary. */
  638     while (len && ((z_size_t)buf & 7) != 0) {
  639         len--;
  640         val = *buf++;
  641         __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
  642     }
  643 
  644     /* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */
  645     word = (z_word_t const *)buf;
  646     num = len >> 3;
  647     len &= 7;
  648 
  649     /* Do three interleaved CRCs to realize the throughput of one crc32x
  650        instruction per cycle. Each CRC is calcuated on Z_BATCH words. The three
  651        CRCs are combined into a single CRC after each set of batches. */
  652     while (num >= 3 * Z_BATCH) {
  653         crc1 = 0;
  654         crc2 = 0;
  655         for (i = 0; i < Z_BATCH; i++) {
  656             val0 = word[i];
  657             val1 = word[i + Z_BATCH];
  658             val2 = word[i + 2 * Z_BATCH];
  659             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
  660             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
  661             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
  662         }
  663         word += 3 * Z_BATCH;
  664         num -= 3 * Z_BATCH;
  665         crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1;
  666         crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2;
  667     }
  668 
  669     /* Do one last smaller batch with the remaining words, if there are enough
  670        to pay for the combination of CRCs. */
  671     last = num / 3;
  672     if (last >= Z_BATCH_MIN) {
  673         last2 = last << 1;
  674         crc1 = 0;
  675         crc2 = 0;
  676         for (i = 0; i < last; i++) {
  677             val0 = word[i];
  678             val1 = word[i + last];
  679             val2 = word[i + last2];
  680             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
  681             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
  682             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
  683         }
  684         word += 3 * last;
  685         num -= 3 * last;
  686         val = x2nmodp(last, 6);
  687         crc = multmodp(val, crc) ^ crc1;
  688         crc = multmodp(val, crc) ^ crc2;
  689     }
  690 
  691     /* Compute the CRC on any remaining words. */
  692     for (i = 0; i < num; i++) {
  693         val0 = word[i];
  694         __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
  695     }
  696     word += num;
  697 
  698     /* Complete the CRC on any remaining bytes. */
  699     buf = (const unsigned char FAR *)word;
  700     while (len) {
  701         len--;
  702         val = *buf++;
  703         __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
  704     }
  705 
  706     /* Return the CRC, post-conditioned. */
  707     return crc ^ 0xffffffff;
  708 }
  709 
  710 #else
  711 
  712 #ifdef W
  713 
  714 /*
  715   Return the CRC of the W bytes in the word_t data, taking the
  716   least-significant byte of the word as the first byte of data, without any pre
  717   or post conditioning. This is used to combine the CRCs of each braid.
  718  */
  719 local z_crc_t crc_word OF((z_word_t data));
  720 
  721 local z_crc_t crc_word(data)
  722     z_word_t data;
  723 {
  724     int k;
  725     for (k = 0; k < W; k++)
  726         data = (data >> 8) ^ crc_table[data & 0xff];
  727     return (z_crc_t)data;
  728 }
  729 
  730 local z_word_t crc_word_big OF((z_word_t data));
  731 
  732 local z_word_t crc_word_big(data)
  733     z_word_t data;
  734 {
  735     int k;
  736     for (k = 0; k < W; k++)
  737         data = (data << 8) ^
  738             crc_big_table[(data >> ((W - 1) << 3)) & 0xff];
  739     return data;
  740 }
  741 
  742 #endif
  743 
  744 /* ========================================================================= */
  745 unsigned long ZEXPORT crc32_z(crc, buf, len)
  746     unsigned long crc;
  747     const unsigned char FAR *buf;
  748     z_size_t len;
  749 {
  750     /* Return initial CRC, if requested. */
  751     if (buf == Z_NULL) return 0;
  752 
  753 #ifdef DYNAMIC_CRC_TABLE
  754     once(&made, make_crc_table);
  755 #endif /* DYNAMIC_CRC_TABLE */
  756 
  757     /* Pre-condition the CRC */
  758     crc = (~crc) & 0xffffffff;
  759 
  760 #ifdef W
  761 
  762     /* If provided enough bytes, do a braided CRC calculation. */
  763     if (len >= N * W + W - 1) {
  764         z_size_t blks;
  765         z_word_t const *words;
  766         unsigned endian;
  767         int k;
  768 
  769         /* Compute the CRC up to a z_word_t boundary. */
  770         while (len && ((z_size_t)buf & (W - 1)) != 0) {
  771             len--;
  772             crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
  773         }
  774 
  775         /* Compute the CRC on as many N z_word_t blocks as are available. */
  776         blks = len / (N * W);
  777         len -= blks * N * W;
  778         words = (z_word_t const *)buf;
  779 
  780         /* Do endian check at execution time instead of compile time, since ARM
  781            processors can change the endianess at execution time. If the
  782            compiler knows what the endianess will be, it can optimize out the
  783            check and the unused branch. */
  784         endian = 1;
  785         if (*(unsigned char *)&endian) {
  786             /* Little endian. */
  787 
  788             z_crc_t crc0;
  789             z_word_t word0;
  790 #if N > 1
  791             z_crc_t crc1;
  792             z_word_t word1;
  793 #if N > 2
  794             z_crc_t crc2;
  795             z_word_t word2;
  796 #if N > 3
  797             z_crc_t crc3;
  798             z_word_t word3;
  799 #if N > 4
  800             z_crc_t crc4;
  801             z_word_t word4;
  802 #if N > 5
  803             z_crc_t crc5;
  804             z_word_t word5;
  805 #endif
  806 #endif
  807 #endif
  808 #endif
  809 #endif
  810 
  811             /* Initialize the CRC for each braid. */
  812             crc0 = crc;
  813 #if N > 1
  814             crc1 = 0;
  815 #if N > 2
  816             crc2 = 0;
  817 #if N > 3
  818             crc3 = 0;
  819 #if N > 4
  820             crc4 = 0;
  821 #if N > 5
  822             crc5 = 0;
  823 #endif
  824 #endif
  825 #endif
  826 #endif
  827 #endif
  828 
  829             /*
  830               Process the first blks-1 blocks, computing the CRCs on each braid
  831               independently.
  832              */
  833             while (--blks) {
  834                 /* Load the word for each braid into registers. */
  835                 word0 = crc0 ^ words[0];
  836 #if N > 1
  837                 word1 = crc1 ^ words[1];
  838 #if N > 2
  839                 word2 = crc2 ^ words[2];
  840 #if N > 3
  841                 word3 = crc3 ^ words[3];
  842 #if N > 4
  843                 word4 = crc4 ^ words[4];
  844 #if N > 5
  845                 word5 = crc5 ^ words[5];
  846 #endif
  847 #endif
  848 #endif
  849 #endif
  850 #endif
  851                 words += N;
  852 
  853                 /* Compute and update the CRC for each word. The loop should
  854                    get unrolled. */
  855                 crc0 = crc_braid_table[0][word0 & 0xff];
  856 #if N > 1
  857                 crc1 = crc_braid_table[0][word1 & 0xff];
  858 #if N > 2
  859                 crc2 = crc_braid_table[0][word2 & 0xff];
  860 #if N > 3
  861                 crc3 = crc_braid_table[0][word3 & 0xff];
  862 #if N > 4
  863                 crc4 = crc_braid_table[0][word4 & 0xff];
  864 #if N > 5
  865                 crc5 = crc_braid_table[0][word5 & 0xff];
  866 #endif
  867 #endif
  868 #endif
  869 #endif
  870 #endif
  871                 for (k = 1; k < W; k++) {
  872                     crc0 ^= crc_braid_table[k][(word0 >> (k << 3)) & 0xff];
  873 #if N > 1
  874                     crc1 ^= crc_braid_table[k][(word1 >> (k << 3)) & 0xff];
  875 #if N > 2
  876                     crc2 ^= crc_braid_table[k][(word2 >> (k << 3)) & 0xff];
  877 #if N > 3
  878                     crc3 ^= crc_braid_table[k][(word3 >> (k << 3)) & 0xff];
  879 #if N > 4
  880                     crc4 ^= crc_braid_table[k][(word4 >> (k << 3)) & 0xff];
  881 #if N > 5
  882                     crc5 ^= crc_braid_table[k][(word5 >> (k << 3)) & 0xff];
  883 #endif
  884 #endif
  885 #endif
  886 #endif
  887 #endif
  888                 }
  889             }
  890 
  891             /*
  892               Process the last block, combining the CRCs of the N braids at the
  893               same time.
  894              */
  895             crc = crc_word(crc0 ^ words[0]);
  896 #if N > 1
  897             crc = crc_word(crc1 ^ words[1] ^ crc);
  898 #if N > 2
  899             crc = crc_word(crc2 ^ words[2] ^ crc);
  900 #if N > 3
  901             crc = crc_word(crc3 ^ words[3] ^ crc);
  902 #if N > 4
  903             crc = crc_word(crc4 ^ words[4] ^ crc);
  904 #if N > 5
  905             crc = crc_word(crc5 ^ words[5] ^ crc);
  906 #endif
  907 #endif
  908 #endif
  909 #endif
  910 #endif
  911             words += N;
  912         }
  913         else {
  914             /* Big endian. */
  915 
  916             z_word_t crc0, word0, comb;
  917 #if N > 1
  918             z_word_t crc1, word1;
  919 #if N > 2
  920             z_word_t crc2, word2;
  921 #if N > 3
  922             z_word_t crc3, word3;
  923 #if N > 4
  924             z_word_t crc4, word4;
  925 #if N > 5
  926             z_word_t crc5, word5;
  927 #endif
  928 #endif
  929 #endif
  930 #endif
  931 #endif
  932 
  933             /* Initialize the CRC for each braid. */
  934             crc0 = byte_swap(crc);
  935 #if N > 1
  936             crc1 = 0;
  937 #if N > 2
  938             crc2 = 0;
  939 #if N > 3
  940             crc3 = 0;
  941 #if N > 4
  942             crc4 = 0;
  943 #if N > 5
  944             crc5 = 0;
  945 #endif
  946 #endif
  947 #endif
  948 #endif
  949 #endif
  950 
  951             /*
  952               Process the first blks-1 blocks, computing the CRCs on each braid
  953               independently.
  954              */
  955             while (--blks) {
  956                 /* Load the word for each braid into registers. */
  957                 word0 = crc0 ^ words[0];
  958 #if N > 1
  959                 word1 = crc1 ^ words[1];
  960 #if N > 2
  961                 word2 = crc2 ^ words[2];
  962 #if N > 3
  963                 word3 = crc3 ^ words[3];
  964 #if N > 4
  965                 word4 = crc4 ^ words[4];
  966 #if N > 5
  967                 word5 = crc5 ^ words[5];
  968 #endif
  969 #endif
  970 #endif
  971 #endif
  972 #endif
  973                 words += N;
  974 
  975                 /* Compute and update the CRC for each word. The loop should
  976                    get unrolled. */
  977                 crc0 = crc_braid_big_table[0][word0 & 0xff];
  978 #if N > 1
  979                 crc1 = crc_braid_big_table[0][word1 & 0xff];
  980 #if N > 2
  981                 crc2 = crc_braid_big_table[0][word2 & 0xff];
  982 #if N > 3
  983                 crc3 = crc_braid_big_table[0][word3 & 0xff];
  984 #if N > 4
  985                 crc4 = crc_braid_big_table[0][word4 & 0xff];
  986 #if N > 5
  987                 crc5 = crc_braid_big_table[0][word5 & 0xff];
  988 #endif
  989 #endif
  990 #endif
  991 #endif
  992 #endif
  993                 for (k = 1; k < W; k++) {
  994                     crc0 ^= crc_braid_big_table[k][(word0 >> (k << 3)) & 0xff];
  995 #if N > 1
  996                     crc1 ^= crc_braid_big_table[k][(word1 >> (k << 3)) & 0xff];
  997 #if N > 2
  998                     crc2 ^= crc_braid_big_table[k][(word2 >> (k << 3)) & 0xff];
  999 #if N > 3
 1000                     crc3 ^= crc_braid_big_table[k][(word3 >> (k << 3)) & 0xff];
 1001 #if N > 4
 1002                     crc4 ^= crc_braid_big_table[k][(word4 >> (k << 3)) & 0xff];
 1003 #if N > 5
 1004                     crc5 ^= crc_braid_big_table[k][(word5 >> (k << 3)) & 0xff];
 1005 #endif
 1006 #endif
 1007 #endif
 1008 #endif
 1009 #endif
 1010                 }
 1011             }
 1012 
 1013             /*
 1014               Process the last block, combining the CRCs of the N braids at the
 1015               same time.
 1016              */
 1017             comb = crc_word_big(crc0 ^ words[0]);
 1018 #if N > 1
 1019             comb = crc_word_big(crc1 ^ words[1] ^ comb);
 1020 #if N > 2
 1021             comb = crc_word_big(crc2 ^ words[2] ^ comb);
 1022 #if N > 3
 1023             comb = crc_word_big(crc3 ^ words[3] ^ comb);
 1024 #if N > 4
 1025             comb = crc_word_big(crc4 ^ words[4] ^ comb);
 1026 #if N > 5
 1027             comb = crc_word_big(crc5 ^ words[5] ^ comb);
 1028 #endif
 1029 #endif
 1030 #endif
 1031 #endif
 1032 #endif
 1033             words += N;
 1034             crc = byte_swap(comb);
 1035         }
 1036 
 1037         /*
 1038           Update the pointer to the remaining bytes to process.
 1039          */
 1040         buf = (unsigned char const *)words;
 1041     }
 1042 
 1043 #endif /* W */
 1044 
 1045     /* Complete the computation of the CRC on any remaining bytes. */
 1046     while (len >= 8) {
 1047         len -= 8;
 1048         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
 1049         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
 1050         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
 1051         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
 1052         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
 1053         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
 1054         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
 1055         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
 1056     }
 1057     while (len) {
 1058         len--;
 1059         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
 1060     }
 1061 
 1062     /* Return the CRC, post-conditioned. */
 1063     return crc ^ 0xffffffff;
 1064 }
 1065 
 1066 #endif
 1067 
 1068 /* ========================================================================= */
 1069 unsigned long ZEXPORT crc32(crc, buf, len)
 1070     unsigned long crc;
 1071     const unsigned char FAR *buf;
 1072     uInt len;
 1073 {
 1074     return crc32_z(crc, buf, len);
 1075 }
 1076 
 1077 /* ========================================================================= */
 1078 uLong ZEXPORT crc32_combine64(crc1, crc2, len2)
 1079     uLong crc1;
 1080     uLong crc2;
 1081     z_off64_t len2;
 1082 {
 1083 #ifdef DYNAMIC_CRC_TABLE
 1084     once(&made, make_crc_table);
 1085 #endif /* DYNAMIC_CRC_TABLE */
 1086     return multmodp(x2nmodp(len2, 3), crc1) ^ (crc2 & 0xffffffff);
 1087 }
 1088 
 1089 /* ========================================================================= */
 1090 uLong ZEXPORT crc32_combine(crc1, crc2, len2)
 1091     uLong crc1;
 1092     uLong crc2;
 1093     z_off_t len2;
 1094 {
 1095     return crc32_combine64(crc1, crc2, len2);
 1096 }
 1097 
 1098 /* ========================================================================= */
 1099 uLong ZEXPORT crc32_combine_gen64(len2)
 1100     z_off64_t len2;
 1101 {
 1102 #ifdef DYNAMIC_CRC_TABLE
 1103     once(&made, make_crc_table);
 1104 #endif /* DYNAMIC_CRC_TABLE */
 1105     return x2nmodp(len2, 3);
 1106 }
 1107 
 1108 /* ========================================================================= */
 1109 uLong ZEXPORT crc32_combine_gen(len2)
 1110     z_off_t len2;
 1111 {
 1112     return crc32_combine_gen64(len2);
 1113 }
 1114 
 1115 /* ========================================================================= */
 1116 uLong crc32_combine_op(crc1, crc2, op)
 1117     uLong crc1;
 1118     uLong crc2;
 1119     uLong op;
 1120 {
 1121     return multmodp(op, crc1) ^ (crc2 & 0xffffffff);
 1122 }

Cache object: ad1f189f53ce127fed80bcce7a130014


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.