The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/contrib/zstd/lib/compress/zstd_double_fast.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) Yann Collet, Facebook, Inc.
    3  * All rights reserved.
    4  *
    5  * This source code is licensed under both the BSD-style license (found in the
    6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
    7  * in the COPYING file in the root directory of this source tree).
    8  * You may select, at your option, one of the above-listed licenses.
    9  */
   10 
   11 #include "zstd_compress_internal.h"
   12 #include "zstd_double_fast.h"
   13 
   14 
   15 void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
   16                               void const* end, ZSTD_dictTableLoadMethod_e dtlm)
   17 {
   18     const ZSTD_compressionParameters* const cParams = &ms->cParams;
   19     U32* const hashLarge = ms->hashTable;
   20     U32  const hBitsL = cParams->hashLog;
   21     U32  const mls = cParams->minMatch;
   22     U32* const hashSmall = ms->chainTable;
   23     U32  const hBitsS = cParams->chainLog;
   24     const BYTE* const base = ms->window.base;
   25     const BYTE* ip = base + ms->nextToUpdate;
   26     const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
   27     const U32 fastHashFillStep = 3;
   28 
   29     /* Always insert every fastHashFillStep position into the hash tables.
   30      * Insert the other positions into the large hash table if their entry
   31      * is empty.
   32      */
   33     for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
   34         U32 const curr = (U32)(ip - base);
   35         U32 i;
   36         for (i = 0; i < fastHashFillStep; ++i) {
   37             size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls);
   38             size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8);
   39             if (i == 0)
   40                 hashSmall[smHash] = curr + i;
   41             if (i == 0 || hashLarge[lgHash] == 0)
   42                 hashLarge[lgHash] = curr + i;
   43             /* Only load extra positions for ZSTD_dtlm_full */
   44             if (dtlm == ZSTD_dtlm_fast)
   45                 break;
   46     }   }
   47 }
   48 
   49 
   50 FORCE_INLINE_TEMPLATE
   51 size_t ZSTD_compressBlock_doubleFast_noDict_generic(
   52         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
   53         void const* src, size_t srcSize, U32 const mls /* template */)
   54 {
   55     ZSTD_compressionParameters const* cParams = &ms->cParams;
   56     U32* const hashLong = ms->hashTable;
   57     const U32 hBitsL = cParams->hashLog;
   58     U32* const hashSmall = ms->chainTable;
   59     const U32 hBitsS = cParams->chainLog;
   60     const BYTE* const base = ms->window.base;
   61     const BYTE* const istart = (const BYTE*)src;
   62     const BYTE* anchor = istart;
   63     const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
   64     /* presumes that, if there is a dictionary, it must be using Attach mode */
   65     const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
   66     const BYTE* const prefixLowest = base + prefixLowestIndex;
   67     const BYTE* const iend = istart + srcSize;
   68     const BYTE* const ilimit = iend - HASH_READ_SIZE;
   69     U32 offset_1=rep[0], offset_2=rep[1];
   70     U32 offsetSaved = 0;
   71 
   72     size_t mLength;
   73     U32 offset;
   74     U32 curr;
   75 
   76     /* how many positions to search before increasing step size */
   77     const size_t kStepIncr = 1 << kSearchStrength;
   78     /* the position at which to increment the step size if no match is found */
   79     const BYTE* nextStep;
   80     size_t step; /* the current step size */
   81 
   82     size_t hl0; /* the long hash at ip */
   83     size_t hl1; /* the long hash at ip1 */
   84 
   85     U32 idxl0; /* the long match index for ip */
   86     U32 idxl1; /* the long match index for ip1 */
   87 
   88     const BYTE* matchl0; /* the long match for ip */
   89     const BYTE* matchs0; /* the short match for ip */
   90     const BYTE* matchl1; /* the long match for ip1 */
   91 
   92     const BYTE* ip = istart; /* the current position */
   93     const BYTE* ip1; /* the next position */
   94 
   95     DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_noDict_generic");
   96 
   97     /* init */
   98     ip += ((ip - prefixLowest) == 0);
   99     {
  100         U32 const current = (U32)(ip - base);
  101         U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
  102         U32 const maxRep = current - windowLow;
  103         if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
  104         if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
  105     }
  106 
  107     /* Outer Loop: one iteration per match found and stored */
  108     while (1) {
  109         step = 1;
  110         nextStep = ip + kStepIncr;
  111         ip1 = ip + step;
  112 
  113         if (ip1 > ilimit) {
  114             goto _cleanup;
  115         }
  116 
  117         hl0 = ZSTD_hashPtr(ip, hBitsL, 8);
  118         idxl0 = hashLong[hl0];
  119         matchl0 = base + idxl0;
  120 
  121         /* Inner Loop: one iteration per search / position */
  122         do {
  123             const size_t hs0 = ZSTD_hashPtr(ip, hBitsS, mls);
  124             const U32 idxs0 = hashSmall[hs0];
  125             curr = (U32)(ip-base);
  126             matchs0 = base + idxs0;
  127 
  128             hashLong[hl0] = hashSmall[hs0] = curr;   /* update hash tables */
  129 
  130             /* check noDict repcode */
  131             if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
  132                 mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
  133                 ip++;
  134                 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
  135                 goto _match_stored;
  136             }
  137 
  138             hl1 = ZSTD_hashPtr(ip1, hBitsL, 8);
  139 
  140             if (idxl0 > prefixLowestIndex) {
  141                 /* check prefix long match */
  142                 if (MEM_read64(matchl0) == MEM_read64(ip)) {
  143                     mLength = ZSTD_count(ip+8, matchl0+8, iend) + 8;
  144                     offset = (U32)(ip-matchl0);
  145                     while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-1] == matchl0[-1])) { ip--; matchl0--; mLength++; } /* catch up */
  146                     goto _match_found;
  147                 }
  148             }
  149 
  150             idxl1 = hashLong[hl1];
  151             matchl1 = base + idxl1;
  152 
  153             if (idxs0 > prefixLowestIndex) {
  154                 /* check prefix short match */
  155                 if (MEM_read32(matchs0) == MEM_read32(ip)) {
  156                     goto _search_next_long;
  157                 }
  158             }
  159 
  160             if (ip1 >= nextStep) {
  161                 PREFETCH_L1(ip1 + 64);
  162                 PREFETCH_L1(ip1 + 128);
  163                 step++;
  164                 nextStep += kStepIncr;
  165             }
  166             ip = ip1;
  167             ip1 += step;
  168 
  169             hl0 = hl1;
  170             idxl0 = idxl1;
  171             matchl0 = matchl1;
  172     #if defined(__aarch64__)
  173             PREFETCH_L1(ip+256);
  174     #endif
  175         } while (ip1 <= ilimit);
  176 
  177 _cleanup:
  178         /* save reps for next block */
  179         rep[0] = offset_1 ? offset_1 : offsetSaved;
  180         rep[1] = offset_2 ? offset_2 : offsetSaved;
  181 
  182         /* Return the last literals size */
  183         return (size_t)(iend - anchor);
  184 
  185 _search_next_long:
  186 
  187         /* check prefix long +1 match */
  188         if (idxl1 > prefixLowestIndex) {
  189             if (MEM_read64(matchl1) == MEM_read64(ip1)) {
  190                 ip = ip1;
  191                 mLength = ZSTD_count(ip+8, matchl1+8, iend) + 8;
  192                 offset = (U32)(ip-matchl1);
  193                 while (((ip>anchor) & (matchl1>prefixLowest)) && (ip[-1] == matchl1[-1])) { ip--; matchl1--; mLength++; } /* catch up */
  194                 goto _match_found;
  195             }
  196         }
  197 
  198         /* if no long +1 match, explore the short match we found */
  199         mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4;
  200         offset = (U32)(ip - matchs0);
  201         while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* catch up */
  202 
  203         /* fall-through */
  204 
  205 _match_found: /* requires ip, offset, mLength */
  206         offset_2 = offset_1;
  207         offset_1 = offset;
  208 
  209         if (step < 4) {
  210             /* It is unsafe to write this value back to the hashtable when ip1 is
  211              * greater than or equal to the new ip we will have after we're done
  212              * processing this match. Rather than perform that test directly
  213              * (ip1 >= ip + mLength), which costs speed in practice, we do a simpler
  214              * more predictable test. The minmatch even if we take a short match is
  215              * 4 bytes, so as long as step, the distance between ip and ip1
  216              * (initially) is less than 4, we know ip1 < new ip. */
  217             hashLong[hl1] = (U32)(ip1 - base);
  218         }
  219 
  220         ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
  221 
  222 _match_stored:
  223         /* match found */
  224         ip += mLength;
  225         anchor = ip;
  226 
  227         if (ip <= ilimit) {
  228             /* Complementary insertion */
  229             /* done after iLimit test, as candidates could be > iend-8 */
  230             {   U32 const indexToInsert = curr+2;
  231                 hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
  232                 hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
  233                 hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
  234                 hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
  235             }
  236 
  237             /* check immediate repcode */
  238             while ( (ip <= ilimit)
  239                  && ( (offset_2>0)
  240                     & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
  241                 /* store sequence */
  242                 size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
  243                 U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff;  /* swap offset_2 <=> offset_1 */
  244                 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
  245                 hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
  246                 ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, rLength);
  247                 ip += rLength;
  248                 anchor = ip;
  249                 continue;   /* faster when present ... (?) */
  250             }
  251         }
  252     }
  253 }
  254 
  255 
  256 FORCE_INLINE_TEMPLATE
  257 size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
  258         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  259         void const* src, size_t srcSize,
  260         U32 const mls /* template */)
  261 {
  262     ZSTD_compressionParameters const* cParams = &ms->cParams;
  263     U32* const hashLong = ms->hashTable;
  264     const U32 hBitsL = cParams->hashLog;
  265     U32* const hashSmall = ms->chainTable;
  266     const U32 hBitsS = cParams->chainLog;
  267     const BYTE* const base = ms->window.base;
  268     const BYTE* const istart = (const BYTE*)src;
  269     const BYTE* ip = istart;
  270     const BYTE* anchor = istart;
  271     const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
  272     /* presumes that, if there is a dictionary, it must be using Attach mode */
  273     const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
  274     const BYTE* const prefixLowest = base + prefixLowestIndex;
  275     const BYTE* const iend = istart + srcSize;
  276     const BYTE* const ilimit = iend - HASH_READ_SIZE;
  277     U32 offset_1=rep[0], offset_2=rep[1];
  278     U32 offsetSaved = 0;
  279 
  280     const ZSTD_matchState_t* const dms = ms->dictMatchState;
  281     const ZSTD_compressionParameters* const dictCParams = &dms->cParams;
  282     const U32* const dictHashLong  = dms->hashTable;
  283     const U32* const dictHashSmall = dms->chainTable;
  284     const U32 dictStartIndex       = dms->window.dictLimit;
  285     const BYTE* const dictBase     = dms->window.base;
  286     const BYTE* const dictStart    = dictBase + dictStartIndex;
  287     const BYTE* const dictEnd      = dms->window.nextSrc;
  288     const U32 dictIndexDelta       = prefixLowestIndex - (U32)(dictEnd - dictBase);
  289     const U32 dictHBitsL           = dictCParams->hashLog;
  290     const U32 dictHBitsS           = dictCParams->chainLog;
  291     const U32 dictAndPrefixLength  = (U32)((ip - prefixLowest) + (dictEnd - dictStart));
  292 
  293     DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic");
  294 
  295     /* if a dictionary is attached, it must be within window range */
  296     assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
  297 
  298     /* init */
  299     ip += (dictAndPrefixLength == 0);
  300 
  301     /* dictMatchState repCode checks don't currently handle repCode == 0
  302      * disabling. */
  303     assert(offset_1 <= dictAndPrefixLength);
  304     assert(offset_2 <= dictAndPrefixLength);
  305 
  306     /* Main Search Loop */
  307     while (ip < ilimit) {   /* < instead of <=, because repcode check at (ip+1) */
  308         size_t mLength;
  309         U32 offset;
  310         size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
  311         size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
  312         size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8);
  313         size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls);
  314         U32 const curr = (U32)(ip-base);
  315         U32 const matchIndexL = hashLong[h2];
  316         U32 matchIndexS = hashSmall[h];
  317         const BYTE* matchLong = base + matchIndexL;
  318         const BYTE* match = base + matchIndexS;
  319         const U32 repIndex = curr + 1 - offset_1;
  320         const BYTE* repMatch = (repIndex < prefixLowestIndex) ?
  321                                dictBase + (repIndex - dictIndexDelta) :
  322                                base + repIndex;
  323         hashLong[h2] = hashSmall[h] = curr;   /* update hash tables */
  324 
  325         /* check repcode */
  326         if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
  327             && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
  328             const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
  329             mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
  330             ip++;
  331             ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
  332             goto _match_stored;
  333         }
  334 
  335         if (matchIndexL > prefixLowestIndex) {
  336             /* check prefix long match */
  337             if (MEM_read64(matchLong) == MEM_read64(ip)) {
  338                 mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8;
  339                 offset = (U32)(ip-matchLong);
  340                 while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
  341                 goto _match_found;
  342             }
  343         } else {
  344             /* check dictMatchState long match */
  345             U32 const dictMatchIndexL = dictHashLong[dictHL];
  346             const BYTE* dictMatchL = dictBase + dictMatchIndexL;
  347             assert(dictMatchL < dictEnd);
  348 
  349             if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) {
  350                 mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8;
  351                 offset = (U32)(curr - dictMatchIndexL - dictIndexDelta);
  352                 while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
  353                 goto _match_found;
  354         }   }
  355 
  356         if (matchIndexS > prefixLowestIndex) {
  357             /* check prefix short match */
  358             if (MEM_read32(match) == MEM_read32(ip)) {
  359                 goto _search_next_long;
  360             }
  361         } else {
  362             /* check dictMatchState short match */
  363             U32 const dictMatchIndexS = dictHashSmall[dictHS];
  364             match = dictBase + dictMatchIndexS;
  365             matchIndexS = dictMatchIndexS + dictIndexDelta;
  366 
  367             if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) {
  368                 goto _search_next_long;
  369         }   }
  370 
  371         ip += ((ip-anchor) >> kSearchStrength) + 1;
  372 #if defined(__aarch64__)
  373         PREFETCH_L1(ip+256);
  374 #endif
  375         continue;
  376 
  377 _search_next_long:
  378 
  379         {   size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
  380             size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
  381             U32 const matchIndexL3 = hashLong[hl3];
  382             const BYTE* matchL3 = base + matchIndexL3;
  383             hashLong[hl3] = curr + 1;
  384 
  385             /* check prefix long +1 match */
  386             if (matchIndexL3 > prefixLowestIndex) {
  387                 if (MEM_read64(matchL3) == MEM_read64(ip+1)) {
  388                     mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8;
  389                     ip++;
  390                     offset = (U32)(ip-matchL3);
  391                     while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
  392                     goto _match_found;
  393                 }
  394             } else {
  395                 /* check dict long +1 match */
  396                 U32 const dictMatchIndexL3 = dictHashLong[dictHLNext];
  397                 const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3;
  398                 assert(dictMatchL3 < dictEnd);
  399                 if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {
  400                     mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8;
  401                     ip++;
  402                     offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta);
  403                     while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
  404                     goto _match_found;
  405         }   }   }
  406 
  407         /* if no long +1 match, explore the short match we found */
  408         if (matchIndexS < prefixLowestIndex) {
  409             mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4;
  410             offset = (U32)(curr - matchIndexS);
  411             while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
  412         } else {
  413             mLength = ZSTD_count(ip+4, match+4, iend) + 4;
  414             offset = (U32)(ip - match);
  415             while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
  416         }
  417 
  418 _match_found:
  419         offset_2 = offset_1;
  420         offset_1 = offset;
  421 
  422         ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
  423 
  424 _match_stored:
  425         /* match found */
  426         ip += mLength;
  427         anchor = ip;
  428 
  429         if (ip <= ilimit) {
  430             /* Complementary insertion */
  431             /* done after iLimit test, as candidates could be > iend-8 */
  432             {   U32 const indexToInsert = curr+2;
  433                 hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
  434                 hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
  435                 hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
  436                 hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
  437             }
  438 
  439             /* check immediate repcode */
  440             while (ip <= ilimit) {
  441                 U32 const current2 = (U32)(ip-base);
  442                 U32 const repIndex2 = current2 - offset_2;
  443                 const BYTE* repMatch2 = repIndex2 < prefixLowestIndex ?
  444                         dictBase + repIndex2 - dictIndexDelta :
  445                         base + repIndex2;
  446                 if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
  447                    && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
  448                     const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
  449                     size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
  450                     U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
  451                     ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2);
  452                     hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
  453                     hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
  454                     ip += repLength2;
  455                     anchor = ip;
  456                     continue;
  457                 }
  458                 break;
  459             }
  460         }
  461     }   /* while (ip < ilimit) */
  462 
  463     /* save reps for next block */
  464     rep[0] = offset_1 ? offset_1 : offsetSaved;
  465     rep[1] = offset_2 ? offset_2 : offsetSaved;
  466 
  467     /* Return the last literals size */
  468     return (size_t)(iend - anchor);
  469 }
  470 
  471 #define ZSTD_GEN_DFAST_FN(dictMode, mls)                                                                 \
  472     static size_t ZSTD_compressBlock_doubleFast_##dictMode##_##mls(                                      \
  473             ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],                          \
  474             void const* src, size_t srcSize)                                                             \
  475     {                                                                                                    \
  476         return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \
  477     }
  478 
  479 ZSTD_GEN_DFAST_FN(noDict, 4)
  480 ZSTD_GEN_DFAST_FN(noDict, 5)
  481 ZSTD_GEN_DFAST_FN(noDict, 6)
  482 ZSTD_GEN_DFAST_FN(noDict, 7)
  483 
  484 ZSTD_GEN_DFAST_FN(dictMatchState, 4)
  485 ZSTD_GEN_DFAST_FN(dictMatchState, 5)
  486 ZSTD_GEN_DFAST_FN(dictMatchState, 6)
  487 ZSTD_GEN_DFAST_FN(dictMatchState, 7)
  488 
  489 
  490 size_t ZSTD_compressBlock_doubleFast(
  491         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  492         void const* src, size_t srcSize)
  493 {
  494     const U32 mls = ms->cParams.minMatch;
  495     switch(mls)
  496     {
  497     default: /* includes case 3 */
  498     case 4 :
  499         return ZSTD_compressBlock_doubleFast_noDict_4(ms, seqStore, rep, src, srcSize);
  500     case 5 :
  501         return ZSTD_compressBlock_doubleFast_noDict_5(ms, seqStore, rep, src, srcSize);
  502     case 6 :
  503         return ZSTD_compressBlock_doubleFast_noDict_6(ms, seqStore, rep, src, srcSize);
  504     case 7 :
  505         return ZSTD_compressBlock_doubleFast_noDict_7(ms, seqStore, rep, src, srcSize);
  506     }
  507 }
  508 
  509 
  510 size_t ZSTD_compressBlock_doubleFast_dictMatchState(
  511         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  512         void const* src, size_t srcSize)
  513 {
  514     const U32 mls = ms->cParams.minMatch;
  515     switch(mls)
  516     {
  517     default: /* includes case 3 */
  518     case 4 :
  519         return ZSTD_compressBlock_doubleFast_dictMatchState_4(ms, seqStore, rep, src, srcSize);
  520     case 5 :
  521         return ZSTD_compressBlock_doubleFast_dictMatchState_5(ms, seqStore, rep, src, srcSize);
  522     case 6 :
  523         return ZSTD_compressBlock_doubleFast_dictMatchState_6(ms, seqStore, rep, src, srcSize);
  524     case 7 :
  525         return ZSTD_compressBlock_doubleFast_dictMatchState_7(ms, seqStore, rep, src, srcSize);
  526     }
  527 }
  528 
  529 
  530 static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
  531         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  532         void const* src, size_t srcSize,
  533         U32 const mls /* template */)
  534 {
  535     ZSTD_compressionParameters const* cParams = &ms->cParams;
  536     U32* const hashLong = ms->hashTable;
  537     U32  const hBitsL = cParams->hashLog;
  538     U32* const hashSmall = ms->chainTable;
  539     U32  const hBitsS = cParams->chainLog;
  540     const BYTE* const istart = (const BYTE*)src;
  541     const BYTE* ip = istart;
  542     const BYTE* anchor = istart;
  543     const BYTE* const iend = istart + srcSize;
  544     const BYTE* const ilimit = iend - 8;
  545     const BYTE* const base = ms->window.base;
  546     const U32   endIndex = (U32)((size_t)(istart - base) + srcSize);
  547     const U32   lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
  548     const U32   dictStartIndex = lowLimit;
  549     const U32   dictLimit = ms->window.dictLimit;
  550     const U32   prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit;
  551     const BYTE* const prefixStart = base + prefixStartIndex;
  552     const BYTE* const dictBase = ms->window.dictBase;
  553     const BYTE* const dictStart = dictBase + dictStartIndex;
  554     const BYTE* const dictEnd = dictBase + prefixStartIndex;
  555     U32 offset_1=rep[0], offset_2=rep[1];
  556 
  557     DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize);
  558 
  559     /* if extDict is invalidated due to maxDistance, switch to "regular" variant */
  560     if (prefixStartIndex == dictStartIndex)
  561         return ZSTD_compressBlock_doubleFast(ms, seqStore, rep, src, srcSize);
  562 
  563     /* Search Loop */
  564     while (ip < ilimit) {  /* < instead of <=, because (ip+1) */
  565         const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);
  566         const U32 matchIndex = hashSmall[hSmall];
  567         const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
  568         const BYTE* match = matchBase + matchIndex;
  569 
  570         const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8);
  571         const U32 matchLongIndex = hashLong[hLong];
  572         const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base;
  573         const BYTE* matchLong = matchLongBase + matchLongIndex;
  574 
  575         const U32 curr = (U32)(ip-base);
  576         const U32 repIndex = curr + 1 - offset_1;   /* offset_1 expected <= curr +1 */
  577         const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
  578         const BYTE* const repMatch = repBase + repIndex;
  579         size_t mLength;
  580         hashSmall[hSmall] = hashLong[hLong] = curr;   /* update hash table */
  581 
  582         if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
  583             & (offset_1 <= curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */
  584           && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
  585             const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
  586             mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
  587             ip++;
  588             ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
  589         } else {
  590             if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
  591                 const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
  592                 const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart;
  593                 U32 offset;
  594                 mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8;
  595                 offset = curr - matchLongIndex;
  596                 while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; }   /* catch up */
  597                 offset_2 = offset_1;
  598                 offset_1 = offset;
  599                 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
  600 
  601             } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
  602                 size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
  603                 U32 const matchIndex3 = hashLong[h3];
  604                 const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base;
  605                 const BYTE* match3 = match3Base + matchIndex3;
  606                 U32 offset;
  607                 hashLong[h3] = curr + 1;
  608                 if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
  609                     const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend;
  610                     const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart;
  611                     mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8;
  612                     ip++;
  613                     offset = curr+1 - matchIndex3;
  614                     while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
  615                 } else {
  616                     const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
  617                     const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
  618                     mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
  619                     offset = curr - matchIndex;
  620                     while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; }   /* catch up */
  621                 }
  622                 offset_2 = offset_1;
  623                 offset_1 = offset;
  624                 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
  625 
  626             } else {
  627                 ip += ((ip-anchor) >> kSearchStrength) + 1;
  628                 continue;
  629         }   }
  630 
  631         /* move to next sequence start */
  632         ip += mLength;
  633         anchor = ip;
  634 
  635         if (ip <= ilimit) {
  636             /* Complementary insertion */
  637             /* done after iLimit test, as candidates could be > iend-8 */
  638             {   U32 const indexToInsert = curr+2;
  639                 hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
  640                 hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
  641                 hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
  642                 hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
  643             }
  644 
  645             /* check immediate repcode */
  646             while (ip <= ilimit) {
  647                 U32 const current2 = (U32)(ip-base);
  648                 U32 const repIndex2 = current2 - offset_2;
  649                 const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
  650                 if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3)   /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */
  651                     & (offset_2 <= current2 - dictStartIndex))
  652                   && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
  653                     const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
  654                     size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
  655                     U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
  656                     ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2);
  657                     hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
  658                     hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
  659                     ip += repLength2;
  660                     anchor = ip;
  661                     continue;
  662                 }
  663                 break;
  664     }   }   }
  665 
  666     /* save reps for next block */
  667     rep[0] = offset_1;
  668     rep[1] = offset_2;
  669 
  670     /* Return the last literals size */
  671     return (size_t)(iend - anchor);
  672 }
  673 
  674 ZSTD_GEN_DFAST_FN(extDict, 4)
  675 ZSTD_GEN_DFAST_FN(extDict, 5)
  676 ZSTD_GEN_DFAST_FN(extDict, 6)
  677 ZSTD_GEN_DFAST_FN(extDict, 7)
  678 
  679 size_t ZSTD_compressBlock_doubleFast_extDict(
  680         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  681         void const* src, size_t srcSize)
  682 {
  683     U32 const mls = ms->cParams.minMatch;
  684     switch(mls)
  685     {
  686     default: /* includes case 3 */
  687     case 4 :
  688         return ZSTD_compressBlock_doubleFast_extDict_4(ms, seqStore, rep, src, srcSize);
  689     case 5 :
  690         return ZSTD_compressBlock_doubleFast_extDict_5(ms, seqStore, rep, src, srcSize);
  691     case 6 :
  692         return ZSTD_compressBlock_doubleFast_extDict_6(ms, seqStore, rep, src, srcSize);
  693     case 7 :
  694         return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize);
  695     }
  696 }

Cache object: 17287bfb4dcdb156b14ea42c6fcf7ef2


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.