The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/contrib/openzfs/module/zstd/lib/compress/zstd_lazy.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
    3  * All rights reserved.
    4  *
    5  * This source code is licensed under both the BSD-style license (found in the
    6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
    7  * in the COPYING file in the root directory of this source tree).
    8  * You may select, at your option, one of the above-listed licenses.
    9  */
   10 
   11 #include "zstd_compress_internal.h"
   12 #include "zstd_lazy.h"
   13 
   14 
   15 /*-*************************************
   16 *  Binary Tree search
   17 ***************************************/
   18 
   19 static void
   20 ZSTD_updateDUBT(ZSTD_matchState_t* ms,
   21                 const BYTE* ip, const BYTE* iend,
   22                 U32 mls)
   23 {
   24     const ZSTD_compressionParameters* const cParams = &ms->cParams;
   25     U32* const hashTable = ms->hashTable;
   26     U32  const hashLog = cParams->hashLog;
   27 
   28     U32* const bt = ms->chainTable;
   29     U32  const btLog  = cParams->chainLog - 1;
   30     U32  const btMask = (1 << btLog) - 1;
   31 
   32     const BYTE* const base = ms->window.base;
   33     U32 const target = (U32)(ip - base);
   34     U32 idx = ms->nextToUpdate;
   35 
   36     if (idx != target)
   37         DEBUGLOG(7, "ZSTD_updateDUBT, from %u to %u (dictLimit:%u)",
   38                     idx, target, ms->window.dictLimit);
   39     assert(ip + 8 <= iend);   /* condition for ZSTD_hashPtr */
   40     (void)iend;
   41 
   42     assert(idx >= ms->window.dictLimit);   /* condition for valid base+idx */
   43     for ( ; idx < target ; idx++) {
   44         size_t const h  = ZSTD_hashPtr(base + idx, hashLog, mls);   /* assumption : ip + 8 <= iend */
   45         U32    const matchIndex = hashTable[h];
   46 
   47         U32*   const nextCandidatePtr = bt + 2*(idx&btMask);
   48         U32*   const sortMarkPtr  = nextCandidatePtr + 1;
   49 
   50         DEBUGLOG(8, "ZSTD_updateDUBT: insert %u", idx);
   51         hashTable[h] = idx;   /* Update Hash Table */
   52         *nextCandidatePtr = matchIndex;   /* update BT like a chain */
   53         *sortMarkPtr = ZSTD_DUBT_UNSORTED_MARK;
   54     }
   55     ms->nextToUpdate = target;
   56 }
   57 
   58 
   59 /** ZSTD_insertDUBT1() :
   60  *  sort one already inserted but unsorted position
   61  *  assumption : current >= btlow == (current - btmask)
   62  *  doesn't fail */
   63 static void
   64 ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
   65                  U32 current, const BYTE* inputEnd,
   66                  U32 nbCompares, U32 btLow,
   67                  const ZSTD_dictMode_e dictMode)
   68 {
   69     const ZSTD_compressionParameters* const cParams = &ms->cParams;
   70     U32* const bt = ms->chainTable;
   71     U32  const btLog  = cParams->chainLog - 1;
   72     U32  const btMask = (1 << btLog) - 1;
   73     size_t commonLengthSmaller=0, commonLengthLarger=0;
   74     const BYTE* const base = ms->window.base;
   75     const BYTE* const dictBase = ms->window.dictBase;
   76     const U32 dictLimit = ms->window.dictLimit;
   77     const BYTE* const ip = (current>=dictLimit) ? base + current : dictBase + current;
   78     const BYTE* const iend = (current>=dictLimit) ? inputEnd : dictBase + dictLimit;
   79     const BYTE* const dictEnd = dictBase + dictLimit;
   80     const BYTE* const prefixStart = base + dictLimit;
   81     const BYTE* match;
   82     U32* smallerPtr = bt + 2*(current&btMask);
   83     U32* largerPtr  = smallerPtr + 1;
   84     U32 matchIndex = *smallerPtr;   /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
   85     U32 dummy32;   /* to be nullified at the end */
   86     U32 const windowValid = ms->window.lowLimit;
   87     U32 const maxDistance = 1U << cParams->windowLog;
   88     U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid;
   89 
   90 
   91     DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
   92                 current, dictLimit, windowLow);
   93     assert(current >= btLow);
   94     assert(ip < iend);   /* condition for ZSTD_count */
   95 
   96     while (nbCompares-- && (matchIndex > windowLow)) {
   97         U32* const nextPtr = bt + 2*(matchIndex & btMask);
   98         size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
   99         assert(matchIndex < current);
  100         /* note : all candidates are now supposed sorted,
  101          * but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK
  102          * when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */
  103 
  104         if ( (dictMode != ZSTD_extDict)
  105           || (matchIndex+matchLength >= dictLimit)  /* both in current segment*/
  106           || (current < dictLimit) /* both in extDict */) {
  107             const BYTE* const mBase = ( (dictMode != ZSTD_extDict)
  108                                      || (matchIndex+matchLength >= dictLimit)) ?
  109                                         base : dictBase;
  110             assert( (matchIndex+matchLength >= dictLimit)   /* might be wrong if extDict is incorrectly set to 0 */
  111                  || (current < dictLimit) );
  112             match = mBase + matchIndex;
  113             matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
  114         } else {
  115             match = dictBase + matchIndex;
  116             matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
  117             if (matchIndex+matchLength >= dictLimit)
  118                 match = base + matchIndex;   /* preparation for next read of match[matchLength] */
  119         }
  120 
  121         DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ",
  122                     current, matchIndex, (U32)matchLength);
  123 
  124         if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
  125             break;   /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
  126         }
  127 
  128         if (match[matchLength] < ip[matchLength]) {  /* necessarily within buffer */
  129             /* match is smaller than current */
  130             *smallerPtr = matchIndex;             /* update smaller idx */
  131             commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
  132             if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
  133             DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is smaller : next => %u",
  134                         matchIndex, btLow, nextPtr[1]);
  135             smallerPtr = nextPtr+1;               /* new "candidate" => larger than match, which was smaller than target */
  136             matchIndex = nextPtr[1];              /* new matchIndex, larger than previous and closer to current */
  137         } else {
  138             /* match is larger than current */
  139             *largerPtr = matchIndex;
  140             commonLengthLarger = matchLength;
  141             if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
  142             DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is larger => %u",
  143                         matchIndex, btLow, nextPtr[0]);
  144             largerPtr = nextPtr;
  145             matchIndex = nextPtr[0];
  146     }   }
  147 
  148     *smallerPtr = *largerPtr = 0;
  149 }
  150 
  151 
  152 static size_t
  153 ZSTD_DUBT_findBetterDictMatch (
  154         ZSTD_matchState_t* ms,
  155         const BYTE* const ip, const BYTE* const iend,
  156         size_t* offsetPtr,
  157         size_t bestLength,
  158         U32 nbCompares,
  159         U32 const mls,
  160         const ZSTD_dictMode_e dictMode)
  161 {
  162     const ZSTD_matchState_t * const dms = ms->dictMatchState;
  163     const ZSTD_compressionParameters* const dmsCParams = &dms->cParams;
  164     const U32 * const dictHashTable = dms->hashTable;
  165     U32         const hashLog = dmsCParams->hashLog;
  166     size_t      const h  = ZSTD_hashPtr(ip, hashLog, mls);
  167     U32               dictMatchIndex = dictHashTable[h];
  168 
  169     const BYTE* const base = ms->window.base;
  170     const BYTE* const prefixStart = base + ms->window.dictLimit;
  171     U32         const current = (U32)(ip-base);
  172     const BYTE* const dictBase = dms->window.base;
  173     const BYTE* const dictEnd = dms->window.nextSrc;
  174     U32         const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base);
  175     U32         const dictLowLimit = dms->window.lowLimit;
  176     U32         const dictIndexDelta = ms->window.lowLimit - dictHighLimit;
  177 
  178     U32*        const dictBt = dms->chainTable;
  179     U32         const btLog  = dmsCParams->chainLog - 1;
  180     U32         const btMask = (1 << btLog) - 1;
  181     U32         const btLow = (btMask >= dictHighLimit - dictLowLimit) ? dictLowLimit : dictHighLimit - btMask;
  182 
  183     size_t commonLengthSmaller=0, commonLengthLarger=0;
  184 
  185     (void)dictMode;
  186     assert(dictMode == ZSTD_dictMatchState);
  187 
  188     while (nbCompares-- && (dictMatchIndex > dictLowLimit)) {
  189         U32* const nextPtr = dictBt + 2*(dictMatchIndex & btMask);
  190         size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
  191         const BYTE* match = dictBase + dictMatchIndex;
  192         matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
  193         if (dictMatchIndex+matchLength >= dictHighLimit)
  194             match = base + dictMatchIndex + dictIndexDelta;   /* to prepare for next usage of match[matchLength] */
  195 
  196         if (matchLength > bestLength) {
  197             U32 matchIndex = dictMatchIndex + dictIndexDelta;
  198             if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
  199                 DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",
  200                     current, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + current - matchIndex, dictMatchIndex, matchIndex);
  201                 bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
  202             }
  203             if (ip+matchLength == iend) {   /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */
  204                 break;   /* drop, to guarantee consistency (miss a little bit of compression) */
  205             }
  206         }
  207 
  208         if (match[matchLength] < ip[matchLength]) {
  209             if (dictMatchIndex <= btLow) { break; }   /* beyond tree size, stop the search */
  210             commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
  211             dictMatchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
  212         } else {
  213             /* match is larger than current */
  214             if (dictMatchIndex <= btLow) { break; }   /* beyond tree size, stop the search */
  215             commonLengthLarger = matchLength;
  216             dictMatchIndex = nextPtr[0];
  217         }
  218     }
  219 
  220     if (bestLength >= MINMATCH) {
  221         U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
  222         DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
  223                     current, (U32)bestLength, (U32)*offsetPtr, mIndex);
  224     }
  225     return bestLength;
  226 
  227 }
  228 
  229 
  230 static size_t
  231 ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
  232                         const BYTE* const ip, const BYTE* const iend,
  233                         size_t* offsetPtr,
  234                         U32 const mls,
  235                         const ZSTD_dictMode_e dictMode)
  236 {
  237     const ZSTD_compressionParameters* const cParams = &ms->cParams;
  238     U32*   const hashTable = ms->hashTable;
  239     U32    const hashLog = cParams->hashLog;
  240     size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
  241     U32          matchIndex  = hashTable[h];
  242 
  243     const BYTE* const base = ms->window.base;
  244     U32    const current = (U32)(ip-base);
  245     U32    const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog);
  246 
  247     U32*   const bt = ms->chainTable;
  248     U32    const btLog  = cParams->chainLog - 1;
  249     U32    const btMask = (1 << btLog) - 1;
  250     U32    const btLow = (btMask >= current) ? 0 : current - btMask;
  251     U32    const unsortLimit = MAX(btLow, windowLow);
  252 
  253     U32*         nextCandidate = bt + 2*(matchIndex&btMask);
  254     U32*         unsortedMark = bt + 2*(matchIndex&btMask) + 1;
  255     U32          nbCompares = 1U << cParams->searchLog;
  256     U32          nbCandidates = nbCompares;
  257     U32          previousCandidate = 0;
  258 
  259     DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", current);
  260     assert(ip <= iend-8);   /* required for h calculation */
  261 
  262     /* reach end of unsorted candidates list */
  263     while ( (matchIndex > unsortLimit)
  264          && (*unsortedMark == ZSTD_DUBT_UNSORTED_MARK)
  265          && (nbCandidates > 1) ) {
  266         DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted",
  267                     matchIndex);
  268         *unsortedMark = previousCandidate;  /* the unsortedMark becomes a reversed chain, to move up back to original position */
  269         previousCandidate = matchIndex;
  270         matchIndex = *nextCandidate;
  271         nextCandidate = bt + 2*(matchIndex&btMask);
  272         unsortedMark = bt + 2*(matchIndex&btMask) + 1;
  273         nbCandidates --;
  274     }
  275 
  276     /* nullify last candidate if it's still unsorted
  277      * simplification, detrimental to compression ratio, beneficial for speed */
  278     if ( (matchIndex > unsortLimit)
  279       && (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) {
  280         DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u",
  281                     matchIndex);
  282         *nextCandidate = *unsortedMark = 0;
  283     }
  284 
  285     /* batch sort stacked candidates */
  286     matchIndex = previousCandidate;
  287     while (matchIndex) {  /* will end on matchIndex == 0 */
  288         U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1;
  289         U32 const nextCandidateIdx = *nextCandidateIdxPtr;
  290         ZSTD_insertDUBT1(ms, matchIndex, iend,
  291                          nbCandidates, unsortLimit, dictMode);
  292         matchIndex = nextCandidateIdx;
  293         nbCandidates++;
  294     }
  295 
  296     /* find longest match */
  297     {   size_t commonLengthSmaller = 0, commonLengthLarger = 0;
  298         const BYTE* const dictBase = ms->window.dictBase;
  299         const U32 dictLimit = ms->window.dictLimit;
  300         const BYTE* const dictEnd = dictBase + dictLimit;
  301         const BYTE* const prefixStart = base + dictLimit;
  302         U32* smallerPtr = bt + 2*(current&btMask);
  303         U32* largerPtr  = bt + 2*(current&btMask) + 1;
  304         U32 matchEndIdx = current + 8 + 1;
  305         U32 dummy32;   /* to be nullified at the end */
  306         size_t bestLength = 0;
  307 
  308         matchIndex  = hashTable[h];
  309         hashTable[h] = current;   /* Update Hash Table */
  310 
  311         while (nbCompares-- && (matchIndex > windowLow)) {
  312             U32* const nextPtr = bt + 2*(matchIndex & btMask);
  313             size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
  314             const BYTE* match;
  315 
  316             if ((dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit)) {
  317                 match = base + matchIndex;
  318                 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
  319             } else {
  320                 match = dictBase + matchIndex;
  321                 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
  322                 if (matchIndex+matchLength >= dictLimit)
  323                     match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
  324             }
  325 
  326             if (matchLength > bestLength) {
  327                 if (matchLength > matchEndIdx - matchIndex)
  328                     matchEndIdx = matchIndex + (U32)matchLength;
  329                 if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
  330                     bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
  331                 if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
  332                     if (dictMode == ZSTD_dictMatchState) {
  333                         nbCompares = 0; /* in addition to avoiding checking any
  334                                          * further in this loop, make sure we
  335                                          * skip checking in the dictionary. */
  336                     }
  337                     break;   /* drop, to guarantee consistency (miss a little bit of compression) */
  338                 }
  339             }
  340 
  341             if (match[matchLength] < ip[matchLength]) {
  342                 /* match is smaller than current */
  343                 *smallerPtr = matchIndex;             /* update smaller idx */
  344                 commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
  345                 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
  346                 smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
  347                 matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
  348             } else {
  349                 /* match is larger than current */
  350                 *largerPtr = matchIndex;
  351                 commonLengthLarger = matchLength;
  352                 if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
  353                 largerPtr = nextPtr;
  354                 matchIndex = nextPtr[0];
  355         }   }
  356 
  357         *smallerPtr = *largerPtr = 0;
  358 
  359         if (dictMode == ZSTD_dictMatchState && nbCompares) {
  360             bestLength = ZSTD_DUBT_findBetterDictMatch(
  361                     ms, ip, iend,
  362                     offsetPtr, bestLength, nbCompares,
  363                     mls, dictMode);
  364         }
  365 
  366         assert(matchEndIdx > current+8); /* ensure nextToUpdate is increased */
  367         ms->nextToUpdate = matchEndIdx - 8;   /* skip repetitive patterns */
  368         if (bestLength >= MINMATCH) {
  369             U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
  370             DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
  371                         current, (U32)bestLength, (U32)*offsetPtr, mIndex);
  372         }
  373         return bestLength;
  374     }
  375 }
  376 
  377 
  378 /** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
  379 FORCE_INLINE_TEMPLATE size_t
  380 ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,
  381                 const BYTE* const ip, const BYTE* const iLimit,
  382                       size_t* offsetPtr,
  383                 const U32 mls /* template */,
  384                 const ZSTD_dictMode_e dictMode)
  385 {
  386     DEBUGLOG(7, "ZSTD_BtFindBestMatch");
  387     if (ip < ms->window.base + ms->nextToUpdate) return 0;   /* skipped area */
  388     ZSTD_updateDUBT(ms, ip, iLimit, mls);
  389     return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode);
  390 }
  391 
  392 
  393 static size_t
  394 ZSTD_BtFindBestMatch_selectMLS (  ZSTD_matchState_t* ms,
  395                             const BYTE* ip, const BYTE* const iLimit,
  396                                   size_t* offsetPtr)
  397 {
  398     switch(ms->cParams.minMatch)
  399     {
  400     default : /* includes case 3 */
  401     case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
  402     case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
  403     case 7 :
  404     case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
  405     }
  406 }
  407 
  408 
  409 static size_t ZSTD_BtFindBestMatch_dictMatchState_selectMLS (
  410                         ZSTD_matchState_t* ms,
  411                         const BYTE* ip, const BYTE* const iLimit,
  412                         size_t* offsetPtr)
  413 {
  414     switch(ms->cParams.minMatch)
  415     {
  416     default : /* includes case 3 */
  417     case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
  418     case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
  419     case 7 :
  420     case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
  421     }
  422 }
  423 
  424 
  425 static size_t ZSTD_BtFindBestMatch_extDict_selectMLS (
  426                         ZSTD_matchState_t* ms,
  427                         const BYTE* ip, const BYTE* const iLimit,
  428                         size_t* offsetPtr)
  429 {
  430     switch(ms->cParams.minMatch)
  431     {
  432     default : /* includes case 3 */
  433     case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
  434     case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
  435     case 7 :
  436     case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
  437     }
  438 }
  439 
  440 
  441 
  442 /* *********************************
  443 *  Hash Chain
  444 ***********************************/
  445 #define NEXT_IN_CHAIN(d, mask)   chainTable[(d) & (mask)]
  446 
  447 /* Update chains up to ip (excluded)
  448    Assumption : always within prefix (i.e. not within extDict) */
  449 static U32 ZSTD_insertAndFindFirstIndex_internal(
  450                         ZSTD_matchState_t* ms,
  451                         const ZSTD_compressionParameters* const cParams,
  452                         const BYTE* ip, U32 const mls)
  453 {
  454     U32* const hashTable  = ms->hashTable;
  455     const U32 hashLog = cParams->hashLog;
  456     U32* const chainTable = ms->chainTable;
  457     const U32 chainMask = (1 << cParams->chainLog) - 1;
  458     const BYTE* const base = ms->window.base;
  459     const U32 target = (U32)(ip - base);
  460     U32 idx = ms->nextToUpdate;
  461 
  462     while(idx < target) { /* catch up */
  463         size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls);
  464         NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
  465         hashTable[h] = idx;
  466         idx++;
  467     }
  468 
  469     ms->nextToUpdate = target;
  470     return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
  471 }
  472 
  473 U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
  474     const ZSTD_compressionParameters* const cParams = &ms->cParams;
  475     return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
  476 }
  477 
  478 
  479 /* inlining is important to hardwire a hot branch (template emulation) */
  480 FORCE_INLINE_TEMPLATE
  481 size_t ZSTD_HcFindBestMatch_generic (
  482                         ZSTD_matchState_t* ms,
  483                         const BYTE* const ip, const BYTE* const iLimit,
  484                         size_t* offsetPtr,
  485                         const U32 mls, const ZSTD_dictMode_e dictMode)
  486 {
  487     const ZSTD_compressionParameters* const cParams = &ms->cParams;
  488     U32* const chainTable = ms->chainTable;
  489     const U32 chainSize = (1 << cParams->chainLog);
  490     const U32 chainMask = chainSize-1;
  491     const BYTE* const base = ms->window.base;
  492     const BYTE* const dictBase = ms->window.dictBase;
  493     const U32 dictLimit = ms->window.dictLimit;
  494     const BYTE* const prefixStart = base + dictLimit;
  495     const BYTE* const dictEnd = dictBase + dictLimit;
  496     const U32 current = (U32)(ip-base);
  497     const U32 maxDistance = 1U << cParams->windowLog;
  498     const U32 lowestValid = ms->window.lowLimit;
  499     const U32 withinMaxDistance = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
  500     const U32 isDictionary = (ms->loadedDictEnd != 0);
  501     const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
  502     const U32 minChain = current > chainSize ? current - chainSize : 0;
  503     U32 nbAttempts = 1U << cParams->searchLog;
  504     size_t ml=4-1;
  505 
  506     /* HC4 match finder */
  507     U32 matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
  508 
  509     for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) {
  510         size_t currentMl=0;
  511         if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
  512             const BYTE* const match = base + matchIndex;
  513             assert(matchIndex >= dictLimit);   /* ensures this is true if dictMode != ZSTD_extDict */
  514             if (match[ml] == ip[ml])   /* potentially better */
  515                 currentMl = ZSTD_count(ip, match, iLimit);
  516         } else {
  517             const BYTE* const match = dictBase + matchIndex;
  518             assert(match+4 <= dictEnd);
  519             if (MEM_read32(match) == MEM_read32(ip))   /* assumption : matchIndex <= dictLimit-4 (by table construction) */
  520                 currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4;
  521         }
  522 
  523         /* save best solution */
  524         if (currentMl > ml) {
  525             ml = currentMl;
  526             *offsetPtr = current - matchIndex + ZSTD_REP_MOVE;
  527             if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
  528         }
  529 
  530         if (matchIndex <= minChain) break;
  531         matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
  532     }
  533 
  534     if (dictMode == ZSTD_dictMatchState) {
  535         const ZSTD_matchState_t* const dms = ms->dictMatchState;
  536         const U32* const dmsChainTable = dms->chainTable;
  537         const U32 dmsChainSize         = (1 << dms->cParams.chainLog);
  538         const U32 dmsChainMask         = dmsChainSize - 1;
  539         const U32 dmsLowestIndex       = dms->window.dictLimit;
  540         const BYTE* const dmsBase      = dms->window.base;
  541         const BYTE* const dmsEnd       = dms->window.nextSrc;
  542         const U32 dmsSize              = (U32)(dmsEnd - dmsBase);
  543         const U32 dmsIndexDelta        = dictLimit - dmsSize;
  544         const U32 dmsMinChain = dmsSize > dmsChainSize ? dmsSize - dmsChainSize : 0;
  545 
  546         matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)];
  547 
  548         for ( ; (matchIndex>dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) {
  549             size_t currentMl=0;
  550             const BYTE* const match = dmsBase + matchIndex;
  551             assert(match+4 <= dmsEnd);
  552             if (MEM_read32(match) == MEM_read32(ip))   /* assumption : matchIndex <= dictLimit-4 (by table construction) */
  553                 currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4;
  554 
  555             /* save best solution */
  556             if (currentMl > ml) {
  557                 ml = currentMl;
  558                 *offsetPtr = current - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE;
  559                 if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
  560             }
  561 
  562             if (matchIndex <= dmsMinChain) break;
  563             matchIndex = dmsChainTable[matchIndex & dmsChainMask];
  564         }
  565     }
  566 
  567     return ml;
  568 }
  569 
  570 
  571 FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS (
  572                         ZSTD_matchState_t* ms,
  573                         const BYTE* ip, const BYTE* const iLimit,
  574                         size_t* offsetPtr)
  575 {
  576     switch(ms->cParams.minMatch)
  577     {
  578     default : /* includes case 3 */
  579     case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
  580     case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
  581     case 7 :
  582     case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
  583     }
  584 }
  585 
  586 
  587 static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS (
  588                         ZSTD_matchState_t* ms,
  589                         const BYTE* ip, const BYTE* const iLimit,
  590                         size_t* offsetPtr)
  591 {
  592     switch(ms->cParams.minMatch)
  593     {
  594     default : /* includes case 3 */
  595     case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
  596     case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
  597     case 7 :
  598     case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
  599     }
  600 }
  601 
  602 
  603 FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
  604                         ZSTD_matchState_t* ms,
  605                         const BYTE* ip, const BYTE* const iLimit,
  606                         size_t* offsetPtr)
  607 {
  608     switch(ms->cParams.minMatch)
  609     {
  610     default : /* includes case 3 */
  611     case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
  612     case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
  613     case 7 :
  614     case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
  615     }
  616 }
  617 
  618 
  619 /* *******************************
  620 *  Common parser - lazy strategy
  621 *********************************/
  622 typedef enum { search_hashChain, search_binaryTree } searchMethod_e;
  623 
  624 FORCE_INLINE_TEMPLATE size_t
  625 ZSTD_compressBlock_lazy_generic(
  626                         ZSTD_matchState_t* ms, seqStore_t* seqStore,
  627                         U32 rep[ZSTD_REP_NUM],
  628                         const void* src, size_t srcSize,
  629                         const searchMethod_e searchMethod, const U32 depth,
  630                         ZSTD_dictMode_e const dictMode)
  631 {
  632     const BYTE* const istart = (const BYTE*)src;
  633     const BYTE* ip = istart;
  634     const BYTE* anchor = istart;
  635     const BYTE* const iend = istart + srcSize;
  636     const BYTE* const ilimit = iend - 8;
  637     const BYTE* const base = ms->window.base;
  638     const U32 prefixLowestIndex = ms->window.dictLimit;
  639     const BYTE* const prefixLowest = base + prefixLowestIndex;
  640 
  641     typedef size_t (*searchMax_f)(
  642                         ZSTD_matchState_t* ms,
  643                         const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
  644     searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ?
  645         (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS
  646                                          : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) :
  647         (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_selectMLS
  648                                          : ZSTD_HcFindBestMatch_selectMLS);
  649     U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
  650 
  651     const ZSTD_matchState_t* const dms = ms->dictMatchState;
  652     const U32 dictLowestIndex      = dictMode == ZSTD_dictMatchState ?
  653                                      dms->window.dictLimit : 0;
  654     const BYTE* const dictBase     = dictMode == ZSTD_dictMatchState ?
  655                                      dms->window.base : NULL;
  656     const BYTE* const dictLowest   = dictMode == ZSTD_dictMatchState ?
  657                                      dictBase + dictLowestIndex : NULL;
  658     const BYTE* const dictEnd      = dictMode == ZSTD_dictMatchState ?
  659                                      dms->window.nextSrc : NULL;
  660     const U32 dictIndexDelta       = dictMode == ZSTD_dictMatchState ?
  661                                      prefixLowestIndex - (U32)(dictEnd - dictBase) :
  662                                      0;
  663     const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest));
  664 
  665     DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode);
  666 
  667     /* init */
  668     ip += (dictAndPrefixLength == 0);
  669     if (dictMode == ZSTD_noDict) {
  670         U32 const current = (U32)(ip - base);
  671         U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, ms->cParams.windowLog);
  672         U32 const maxRep = current - windowLow;
  673         if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
  674         if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
  675     }
  676     if (dictMode == ZSTD_dictMatchState) {
  677         /* dictMatchState repCode checks don't currently handle repCode == 0
  678          * disabling. */
  679         assert(offset_1 <= dictAndPrefixLength);
  680         assert(offset_2 <= dictAndPrefixLength);
  681     }
  682 
  683     /* Match Loop */
  684 #if defined(__GNUC__) && defined(__x86_64__)
  685     /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
  686      * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
  687      */
  688     __asm__(".p2align 5");
  689 #endif
  690     while (ip < ilimit) {
  691         size_t matchLength=0;
  692         size_t offset=0;
  693         const BYTE* start=ip+1;
  694 
  695         /* check repCode */
  696         if (dictMode == ZSTD_dictMatchState) {
  697             const U32 repIndex = (U32)(ip - base) + 1 - offset_1;
  698             const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
  699                                 && repIndex < prefixLowestIndex) ?
  700                                    dictBase + (repIndex - dictIndexDelta) :
  701                                    base + repIndex;
  702             if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
  703                 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
  704                 const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
  705                 matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
  706                 if (depth==0) goto _storeSequence;
  707             }
  708         }
  709         if ( dictMode == ZSTD_noDict
  710           && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
  711             matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
  712             if (depth==0) goto _storeSequence;
  713         }
  714 
  715         /* first search (depth 0) */
  716         {   size_t offsetFound = 999999999;
  717             size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
  718             if (ml2 > matchLength)
  719                 matchLength = ml2, start = ip, offset=offsetFound;
  720         }
  721 
  722         if (matchLength < 4) {
  723             ip += ((ip-anchor) >> kSearchStrength) + 1;   /* jump faster over incompressible sections */
  724             continue;
  725         }
  726 
  727         /* let's try to find a better solution */
  728         if (depth>=1)
  729         while (ip<ilimit) {
  730             ip ++;
  731             if ( (dictMode == ZSTD_noDict)
  732               && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
  733                 size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
  734                 int const gain2 = (int)(mlRep * 3);
  735                 int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
  736                 if ((mlRep >= 4) && (gain2 > gain1))
  737                     matchLength = mlRep, offset = 0, start = ip;
  738             }
  739             if (dictMode == ZSTD_dictMatchState) {
  740                 const U32 repIndex = (U32)(ip - base) - offset_1;
  741                 const BYTE* repMatch = repIndex < prefixLowestIndex ?
  742                                dictBase + (repIndex - dictIndexDelta) :
  743                                base + repIndex;
  744                 if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
  745                     && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
  746                     const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
  747                     size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
  748                     int const gain2 = (int)(mlRep * 3);
  749                     int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
  750                     if ((mlRep >= 4) && (gain2 > gain1))
  751                         matchLength = mlRep, offset = 0, start = ip;
  752                 }
  753             }
  754             {   size_t offset2=999999999;
  755                 size_t const ml2 = searchMax(ms, ip, iend, &offset2);
  756                 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
  757                 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
  758                 if ((ml2 >= 4) && (gain2 > gain1)) {
  759                     matchLength = ml2, offset = offset2, start = ip;
  760                     continue;   /* search a better one */
  761             }   }
  762 
  763             /* let's find an even better one */
  764             if ((depth==2) && (ip<ilimit)) {
  765                 ip ++;
  766                 if ( (dictMode == ZSTD_noDict)
  767                   && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
  768                     size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
  769                     int const gain2 = (int)(mlRep * 4);
  770                     int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
  771                     if ((mlRep >= 4) && (gain2 > gain1))
  772                         matchLength = mlRep, offset = 0, start = ip;
  773                 }
  774                 if (dictMode == ZSTD_dictMatchState) {
  775                     const U32 repIndex = (U32)(ip - base) - offset_1;
  776                     const BYTE* repMatch = repIndex < prefixLowestIndex ?
  777                                    dictBase + (repIndex - dictIndexDelta) :
  778                                    base + repIndex;
  779                     if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
  780                         && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
  781                         const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
  782                         size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
  783                         int const gain2 = (int)(mlRep * 4);
  784                         int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
  785                         if ((mlRep >= 4) && (gain2 > gain1))
  786                             matchLength = mlRep, offset = 0, start = ip;
  787                     }
  788                 }
  789                 {   size_t offset2=999999999;
  790                     size_t const ml2 = searchMax(ms, ip, iend, &offset2);
  791                     int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
  792                     int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
  793                     if ((ml2 >= 4) && (gain2 > gain1)) {
  794                         matchLength = ml2, offset = offset2, start = ip;
  795                         continue;
  796             }   }   }
  797             break;  /* nothing found : store previous solution */
  798         }
  799 
  800         /* NOTE:
  801          * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior.
  802          * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which
  803          * overflows the pointer, which is undefined behavior.
  804          */
  805         /* catch up */
  806         if (offset) {
  807             if (dictMode == ZSTD_noDict) {
  808                 while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > prefixLowest))
  809                      && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) )  /* only search for offset within prefix */
  810                     { start--; matchLength++; }
  811             }
  812             if (dictMode == ZSTD_dictMatchState) {
  813                 U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
  814                 const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex;
  815                 const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest;
  816                 while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }  /* catch up */
  817             }
  818             offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
  819         }
  820         /* store sequence */
  821 _storeSequence:
  822         {   size_t const litLength = start - anchor;
  823             ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
  824             anchor = ip = start + matchLength;
  825         }
  826 
  827         /* check immediate repcode */
  828         if (dictMode == ZSTD_dictMatchState) {
  829             while (ip <= ilimit) {
  830                 U32 const current2 = (U32)(ip-base);
  831                 U32 const repIndex = current2 - offset_2;
  832                 const BYTE* repMatch = dictMode == ZSTD_dictMatchState
  833                     && repIndex < prefixLowestIndex ?
  834                         dictBase - dictIndexDelta + repIndex :
  835                         base + repIndex;
  836                 if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */)
  837                    && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
  838                     const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
  839                     matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
  840                     offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;   /* swap offset_2 <=> offset_1 */
  841                     ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
  842                     ip += matchLength;
  843                     anchor = ip;
  844                     continue;
  845                 }
  846                 break;
  847             }
  848         }
  849 
  850         if (dictMode == ZSTD_noDict) {
  851             while ( ((ip <= ilimit) & (offset_2>0))
  852                  && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
  853                 /* store sequence */
  854                 matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
  855                 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
  856                 ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
  857                 ip += matchLength;
  858                 anchor = ip;
  859                 continue;   /* faster when present ... (?) */
  860     }   }   }
  861 
  862     /* Save reps for next block */
  863     rep[0] = offset_1 ? offset_1 : savedOffset;
  864     rep[1] = offset_2 ? offset_2 : savedOffset;
  865 
  866     /* Return the last literals size */
  867     return (size_t)(iend - anchor);
  868 }
  869 
  870 
  871 size_t ZSTD_compressBlock_btlazy2(
  872         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  873         void const* src, size_t srcSize)
  874 {
  875     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
  876 }
  877 
  878 size_t ZSTD_compressBlock_lazy2(
  879         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  880         void const* src, size_t srcSize)
  881 {
  882     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
  883 }
  884 
  885 size_t ZSTD_compressBlock_lazy(
  886         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  887         void const* src, size_t srcSize)
  888 {
  889     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
  890 }
  891 
  892 size_t ZSTD_compressBlock_greedy(
  893         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  894         void const* src, size_t srcSize)
  895 {
  896     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
  897 }
  898 
  899 size_t ZSTD_compressBlock_btlazy2_dictMatchState(
  900         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  901         void const* src, size_t srcSize)
  902 {
  903     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
  904 }
  905 
  906 size_t ZSTD_compressBlock_lazy2_dictMatchState(
  907         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  908         void const* src, size_t srcSize)
  909 {
  910     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
  911 }
  912 
  913 size_t ZSTD_compressBlock_lazy_dictMatchState(
  914         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  915         void const* src, size_t srcSize)
  916 {
  917     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
  918 }
  919 
  920 size_t ZSTD_compressBlock_greedy_dictMatchState(
  921         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  922         void const* src, size_t srcSize)
  923 {
  924     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
  925 }
  926 
  927 
  928 FORCE_INLINE_TEMPLATE
  929 size_t ZSTD_compressBlock_lazy_extDict_generic(
  930                         ZSTD_matchState_t* ms, seqStore_t* seqStore,
  931                         U32 rep[ZSTD_REP_NUM],
  932                         const void* src, size_t srcSize,
  933                         const searchMethod_e searchMethod, const U32 depth)
  934 {
  935     const BYTE* const istart = (const BYTE*)src;
  936     const BYTE* ip = istart;
  937     const BYTE* anchor = istart;
  938     const BYTE* const iend = istart + srcSize;
  939     const BYTE* const ilimit = iend - 8;
  940     const BYTE* const base = ms->window.base;
  941     const U32 dictLimit = ms->window.dictLimit;
  942     const BYTE* const prefixStart = base + dictLimit;
  943     const BYTE* const dictBase = ms->window.dictBase;
  944     const BYTE* const dictEnd  = dictBase + dictLimit;
  945     const BYTE* const dictStart  = dictBase + ms->window.lowLimit;
  946     const U32 windowLog = ms->cParams.windowLog;
  947 
  948     typedef size_t (*searchMax_f)(
  949                         ZSTD_matchState_t* ms,
  950                         const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
  951     searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
  952 
  953     U32 offset_1 = rep[0], offset_2 = rep[1];
  954 
  955     DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic");
  956 
  957     /* init */
  958     ip += (ip == prefixStart);
  959 
  960     /* Match Loop */
  961 #if defined(__GNUC__) && defined(__x86_64__)
  962     /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
  963      * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
  964      */
  965     __asm__(".p2align 5");
  966 #endif
  967     while (ip < ilimit) {
  968         size_t matchLength=0;
  969         size_t offset=0;
  970         const BYTE* start=ip+1;
  971         U32 current = (U32)(ip-base);
  972 
  973         /* check repCode */
  974         {   const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current+1, windowLog);
  975             const U32 repIndex = (U32)(current+1 - offset_1);
  976             const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
  977             const BYTE* const repMatch = repBase + repIndex;
  978             if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))   /* intentional overflow */
  979             if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
  980                 /* repcode detected we should take it */
  981                 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
  982                 matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4;
  983                 if (depth==0) goto _storeSequence;
  984         }   }
  985 
  986         /* first search (depth 0) */
  987         {   size_t offsetFound = 999999999;
  988             size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
  989             if (ml2 > matchLength)
  990                 matchLength = ml2, start = ip, offset=offsetFound;
  991         }
  992 
  993          if (matchLength < 4) {
  994             ip += ((ip-anchor) >> kSearchStrength) + 1;   /* jump faster over incompressible sections */
  995             continue;
  996         }
  997 
  998         /* let's try to find a better solution */
  999         if (depth>=1)
 1000         while (ip<ilimit) {
 1001             ip ++;
 1002             current++;
 1003             /* check repCode */
 1004             if (offset) {
 1005                 const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog);
 1006                 const U32 repIndex = (U32)(current - offset_1);
 1007                 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
 1008                 const BYTE* const repMatch = repBase + repIndex;
 1009                 if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))  /* intentional overflow */
 1010                 if (MEM_read32(ip) == MEM_read32(repMatch)) {
 1011                     /* repcode detected */
 1012                     const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
 1013                     size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
 1014                     int const gain2 = (int)(repLength * 3);
 1015                     int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
 1016                     if ((repLength >= 4) && (gain2 > gain1))
 1017                         matchLength = repLength, offset = 0, start = ip;
 1018             }   }
 1019 
 1020             /* search match, depth 1 */
 1021             {   size_t offset2=999999999;
 1022                 size_t const ml2 = searchMax(ms, ip, iend, &offset2);
 1023                 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
 1024                 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
 1025                 if ((ml2 >= 4) && (gain2 > gain1)) {
 1026                     matchLength = ml2, offset = offset2, start = ip;
 1027                     continue;   /* search a better one */
 1028             }   }
 1029 
 1030             /* let's find an even better one */
 1031             if ((depth==2) && (ip<ilimit)) {
 1032                 ip ++;
 1033                 current++;
 1034                 /* check repCode */
 1035                 if (offset) {
 1036                     const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog);
 1037                     const U32 repIndex = (U32)(current - offset_1);
 1038                     const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
 1039                     const BYTE* const repMatch = repBase + repIndex;
 1040                     if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))  /* intentional overflow */
 1041                     if (MEM_read32(ip) == MEM_read32(repMatch)) {
 1042                         /* repcode detected */
 1043                         const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
 1044                         size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
 1045                         int const gain2 = (int)(repLength * 4);
 1046                         int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
 1047                         if ((repLength >= 4) && (gain2 > gain1))
 1048                             matchLength = repLength, offset = 0, start = ip;
 1049                 }   }
 1050 
 1051                 /* search match, depth 2 */
 1052                 {   size_t offset2=999999999;
 1053                     size_t const ml2 = searchMax(ms, ip, iend, &offset2);
 1054                     int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
 1055                     int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
 1056                     if ((ml2 >= 4) && (gain2 > gain1)) {
 1057                         matchLength = ml2, offset = offset2, start = ip;
 1058                         continue;
 1059             }   }   }
 1060             break;  /* nothing found : store previous solution */
 1061         }
 1062 
 1063         /* catch up */
 1064         if (offset) {
 1065             U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
 1066             const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
 1067             const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
 1068             while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }  /* catch up */
 1069             offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
 1070         }
 1071 
 1072         /* store sequence */
 1073 _storeSequence:
 1074         {   size_t const litLength = start - anchor;
 1075             ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
 1076             anchor = ip = start + matchLength;
 1077         }
 1078 
 1079         /* check immediate repcode */
 1080         while (ip <= ilimit) {
 1081             const U32 repCurrent = (U32)(ip-base);
 1082             const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog);
 1083             const U32 repIndex = repCurrent - offset_2;
 1084             const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
 1085             const BYTE* const repMatch = repBase + repIndex;
 1086             if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))  /* intentional overflow */
 1087             if (MEM_read32(ip) == MEM_read32(repMatch)) {
 1088                 /* repcode detected we should take it */
 1089                 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
 1090                 matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
 1091                 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;   /* swap offset history */
 1092                 ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
 1093                 ip += matchLength;
 1094                 anchor = ip;
 1095                 continue;   /* faster when present ... (?) */
 1096             }
 1097             break;
 1098     }   }
 1099 
 1100     /* Save reps for next block */
 1101     rep[0] = offset_1;
 1102     rep[1] = offset_2;
 1103 
 1104     /* Return the last literals size */
 1105     return (size_t)(iend - anchor);
 1106 }
 1107 
 1108 
 1109 size_t ZSTD_compressBlock_greedy_extDict(
 1110         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
 1111         void const* src, size_t srcSize)
 1112 {
 1113     return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0);
 1114 }
 1115 
 1116 size_t ZSTD_compressBlock_lazy_extDict(
 1117         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
 1118         void const* src, size_t srcSize)
 1119 
 1120 {
 1121     return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
 1122 }
 1123 
 1124 size_t ZSTD_compressBlock_lazy2_extDict(
 1125         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
 1126         void const* src, size_t srcSize)
 1127 
 1128 {
 1129     return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
 1130 }
 1131 
 1132 size_t ZSTD_compressBlock_btlazy2_extDict(
 1133         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
 1134         void const* src, size_t srcSize)
 1135 
 1136 {
 1137     return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
 1138 }

Cache object: 66120458032066da000488bddf1961a5


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.