The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/contrib/openzfs/module/zstd/lib/compress/zstd_ldm.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
    3  * All rights reserved.
    4  *
    5  * This source code is licensed under both the BSD-style license (found in the
    6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
    7  * in the COPYING file in the root directory of this source tree).
    8  * You may select, at your option, one of the above-listed licenses.
    9  */
   10 
   11 #include "zstd_ldm.h"
   12 
   13 #include "../common/debug.h"
   14 #include "zstd_fast.h"          /* ZSTD_fillHashTable() */
   15 #include "zstd_double_fast.h"   /* ZSTD_fillDoubleHashTable() */
   16 
   17 #define LDM_BUCKET_SIZE_LOG 3
   18 #define LDM_MIN_MATCH_LENGTH 64
   19 #define LDM_HASH_RLOG 7
   20 #define LDM_HASH_CHAR_OFFSET 10
   21 
   22 void ZSTD_ldm_adjustParameters(ldmParams_t* params,
   23                                ZSTD_compressionParameters const* cParams)
   24 {
   25     params->windowLog = cParams->windowLog;
   26     ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX);
   27     DEBUGLOG(4, "ZSTD_ldm_adjustParameters");
   28     if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
   29     if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH;
   30     if (cParams->strategy >= ZSTD_btopt) {
   31       /* Get out of the way of the optimal parser */
   32       U32 const minMatch = MAX(cParams->targetLength, params->minMatchLength);
   33       assert(minMatch >= ZSTD_LDM_MINMATCH_MIN);
   34       assert(minMatch <= ZSTD_LDM_MINMATCH_MAX);
   35       params->minMatchLength = minMatch;
   36     }
   37     if (params->hashLog == 0) {
   38         params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG);
   39         assert(params->hashLog <= ZSTD_HASHLOG_MAX);
   40     }
   41     if (params->hashRateLog == 0) {
   42         params->hashRateLog = params->windowLog < params->hashLog
   43                                    ? 0
   44                                    : params->windowLog - params->hashLog;
   45     }
   46     params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog);
   47 }
   48 
   49 size_t ZSTD_ldm_getTableSize(ldmParams_t params)
   50 {
   51     size_t const ldmHSize = ((size_t)1) << params.hashLog;
   52     size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog);
   53     size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
   54     size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize)
   55                            + ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t));
   56     return params.enableLdm ? totalSize : 0;
   57 }
   58 
   59 size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize)
   60 {
   61     return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0;
   62 }
   63 
   64 /** ZSTD_ldm_getSmallHash() :
   65  *  numBits should be <= 32
   66  *  If numBits==0, returns 0.
   67  *  @return : the most significant numBits of value. */
   68 static U32 ZSTD_ldm_getSmallHash(U64 value, U32 numBits)
   69 {
   70     assert(numBits <= 32);
   71     return numBits == 0 ? 0 : (U32)(value >> (64 - numBits));
   72 }
   73 
   74 /** ZSTD_ldm_getChecksum() :
   75  *  numBitsToDiscard should be <= 32
   76  *  @return : the next most significant 32 bits after numBitsToDiscard */
   77 static U32 ZSTD_ldm_getChecksum(U64 hash, U32 numBitsToDiscard)
   78 {
   79     assert(numBitsToDiscard <= 32);
   80     return (hash >> (64 - 32 - numBitsToDiscard)) & 0xFFFFFFFF;
   81 }
   82 
   83 /** ZSTD_ldm_getTag() ;
   84  *  Given the hash, returns the most significant numTagBits bits
   85  *  after (32 + hbits) bits.
   86  *
   87  *  If there are not enough bits remaining, return the last
   88  *  numTagBits bits. */
   89 static U32 ZSTD_ldm_getTag(U64 hash, U32 hbits, U32 numTagBits)
   90 {
   91     assert(numTagBits < 32 && hbits <= 32);
   92     if (32 - hbits < numTagBits) {
   93         return hash & (((U32)1 << numTagBits) - 1);
   94     } else {
   95         return (hash >> (32 - hbits - numTagBits)) & (((U32)1 << numTagBits) - 1);
   96     }
   97 }
   98 
   99 /** ZSTD_ldm_getBucket() :
  100  *  Returns a pointer to the start of the bucket associated with hash. */
  101 static ldmEntry_t* ZSTD_ldm_getBucket(
  102         ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams)
  103 {
  104     return ldmState->hashTable + (hash << ldmParams.bucketSizeLog);
  105 }
  106 
  107 /** ZSTD_ldm_insertEntry() :
  108  *  Insert the entry with corresponding hash into the hash table */
  109 static void ZSTD_ldm_insertEntry(ldmState_t* ldmState,
  110                                  size_t const hash, const ldmEntry_t entry,
  111                                  ldmParams_t const ldmParams)
  112 {
  113     BYTE* const bucketOffsets = ldmState->bucketOffsets;
  114     *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + bucketOffsets[hash]) = entry;
  115     bucketOffsets[hash]++;
  116     bucketOffsets[hash] &= ((U32)1 << ldmParams.bucketSizeLog) - 1;
  117 }
  118 
  119 /** ZSTD_ldm_makeEntryAndInsertByTag() :
  120  *
  121  *  Gets the small hash, checksum, and tag from the rollingHash.
  122  *
  123  *  If the tag matches (1 << ldmParams.hashRateLog)-1, then
  124  *  creates an ldmEntry from the offset, and inserts it into the hash table.
  125  *
  126  *  hBits is the length of the small hash, which is the most significant hBits
  127  *  of rollingHash. The checksum is the next 32 most significant bits, followed
  128  *  by ldmParams.hashRateLog bits that make up the tag. */
  129 static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
  130                                              U64 const rollingHash,
  131                                              U32 const hBits,
  132                                              U32 const offset,
  133                                              ldmParams_t const ldmParams)
  134 {
  135     U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashRateLog);
  136     U32 const tagMask = ((U32)1 << ldmParams.hashRateLog) - 1;
  137     if (tag == tagMask) {
  138         U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits);
  139         U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
  140         ldmEntry_t entry;
  141         entry.offset = offset;
  142         entry.checksum = checksum;
  143         ZSTD_ldm_insertEntry(ldmState, hash, entry, ldmParams);
  144     }
  145 }
  146 
  147 /** ZSTD_ldm_countBackwardsMatch() :
  148  *  Returns the number of bytes that match backwards before pIn and pMatch.
  149  *
  150  *  We count only bytes where pMatch >= pBase and pIn >= pAnchor. */
  151 static size_t ZSTD_ldm_countBackwardsMatch(
  152             const BYTE* pIn, const BYTE* pAnchor,
  153             const BYTE* pMatch, const BYTE* pBase)
  154 {
  155     size_t matchLength = 0;
  156     while (pIn > pAnchor && pMatch > pBase && pIn[-1] == pMatch[-1]) {
  157         pIn--;
  158         pMatch--;
  159         matchLength++;
  160     }
  161     return matchLength;
  162 }
  163 
  164 /** ZSTD_ldm_fillFastTables() :
  165  *
  166  *  Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies.
  167  *  This is similar to ZSTD_loadDictionaryContent.
  168  *
  169  *  The tables for the other strategies are filled within their
  170  *  block compressors. */
  171 static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
  172                                       void const* end)
  173 {
  174     const BYTE* const iend = (const BYTE*)end;
  175 
  176     switch(ms->cParams.strategy)
  177     {
  178     case ZSTD_fast:
  179         ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast);
  180         break;
  181 
  182     case ZSTD_dfast:
  183         ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast);
  184         break;
  185 
  186     case ZSTD_greedy:
  187     case ZSTD_lazy:
  188     case ZSTD_lazy2:
  189     case ZSTD_btlazy2:
  190     case ZSTD_btopt:
  191     case ZSTD_btultra:
  192     case ZSTD_btultra2:
  193         break;
  194     default:
  195         assert(0);  /* not possible : not a valid strategy id */
  196     }
  197 
  198     return 0;
  199 }
  200 
  201 /** ZSTD_ldm_fillLdmHashTable() :
  202  *
  203  *  Fills hashTable from (lastHashed + 1) to iend (non-inclusive).
  204  *  lastHash is the rolling hash that corresponds to lastHashed.
  205  *
  206  *  Returns the rolling hash corresponding to position iend-1. */
  207 static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
  208                                      U64 lastHash, const BYTE* lastHashed,
  209                                      const BYTE* iend, const BYTE* base,
  210                                      U32 hBits, ldmParams_t const ldmParams)
  211 {
  212     U64 rollingHash = lastHash;
  213     const BYTE* cur = lastHashed + 1;
  214 
  215     while (cur < iend) {
  216         rollingHash = ZSTD_rollingHash_rotate(rollingHash, cur[-1],
  217                                               cur[ldmParams.minMatchLength-1],
  218                                               state->hashPower);
  219         ZSTD_ldm_makeEntryAndInsertByTag(state,
  220                                          rollingHash, hBits,
  221                                          (U32)(cur - base), ldmParams);
  222         ++cur;
  223     }
  224     return rollingHash;
  225 }
  226 
  227 void ZSTD_ldm_fillHashTable(
  228             ldmState_t* state, const BYTE* ip,
  229             const BYTE* iend, ldmParams_t const* params)
  230 {
  231     DEBUGLOG(5, "ZSTD_ldm_fillHashTable");
  232     if ((size_t)(iend - ip) >= params->minMatchLength) {
  233         U64 startingHash = ZSTD_rollingHash_compute(ip, params->minMatchLength);
  234         ZSTD_ldm_fillLdmHashTable(
  235             state, startingHash, ip, iend - params->minMatchLength, state->window.base,
  236             params->hashLog - params->bucketSizeLog,
  237             *params);
  238     }
  239 }
  240 
  241 
  242 /** ZSTD_ldm_limitTableUpdate() :
  243  *
  244  *  Sets cctx->nextToUpdate to a position corresponding closer to anchor
  245  *  if it is far way
  246  *  (after a long match, only update tables a limited amount). */
  247 static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor)
  248 {
  249     U32 const current = (U32)(anchor - ms->window.base);
  250     if (current > ms->nextToUpdate + 1024) {
  251         ms->nextToUpdate =
  252             current - MIN(512, current - ms->nextToUpdate - 1024);
  253     }
  254 }
  255 
  256 static size_t ZSTD_ldm_generateSequences_internal(
  257         ldmState_t* ldmState, rawSeqStore_t* rawSeqStore,
  258         ldmParams_t const* params, void const* src, size_t srcSize)
  259 {
  260     /* LDM parameters */
  261     int const extDict = ZSTD_window_hasExtDict(ldmState->window);
  262     U32 const minMatchLength = params->minMatchLength;
  263     U64 const hashPower = ldmState->hashPower;
  264     U32 const hBits = params->hashLog - params->bucketSizeLog;
  265     U32 const ldmBucketSize = 1U << params->bucketSizeLog;
  266     U32 const hashRateLog = params->hashRateLog;
  267     U32 const ldmTagMask = (1U << params->hashRateLog) - 1;
  268     /* Prefix and extDict parameters */
  269     U32 const dictLimit = ldmState->window.dictLimit;
  270     U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit;
  271     BYTE const* const base = ldmState->window.base;
  272     BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL;
  273     BYTE const* const dictStart = extDict ? dictBase + lowestIndex : NULL;
  274     BYTE const* const dictEnd = extDict ? dictBase + dictLimit : NULL;
  275     BYTE const* const lowPrefixPtr = base + dictLimit;
  276     /* Input bounds */
  277     BYTE const* const istart = (BYTE const*)src;
  278     BYTE const* const iend = istart + srcSize;
  279     BYTE const* const ilimit = iend - MAX(minMatchLength, HASH_READ_SIZE);
  280     /* Input positions */
  281     BYTE const* anchor = istart;
  282     BYTE const* ip = istart;
  283     /* Rolling hash */
  284     BYTE const* lastHashed = NULL;
  285     U64 rollingHash = 0;
  286 
  287     while (ip <= ilimit) {
  288         size_t mLength;
  289         U32 const current = (U32)(ip - base);
  290         size_t forwardMatchLength = 0, backwardMatchLength = 0;
  291         ldmEntry_t* bestEntry = NULL;
  292         if (ip != istart) {
  293             rollingHash = ZSTD_rollingHash_rotate(rollingHash, lastHashed[0],
  294                                                   lastHashed[minMatchLength],
  295                                                   hashPower);
  296         } else {
  297             rollingHash = ZSTD_rollingHash_compute(ip, minMatchLength);
  298         }
  299         lastHashed = ip;
  300 
  301         /* Do not insert and do not look for a match */
  302         if (ZSTD_ldm_getTag(rollingHash, hBits, hashRateLog) != ldmTagMask) {
  303            ip++;
  304            continue;
  305         }
  306 
  307         /* Get the best entry and compute the match lengths */
  308         {
  309             ldmEntry_t* const bucket =
  310                 ZSTD_ldm_getBucket(ldmState,
  311                                    ZSTD_ldm_getSmallHash(rollingHash, hBits),
  312                                    *params);
  313             ldmEntry_t* cur;
  314             size_t bestMatchLength = 0;
  315             U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
  316 
  317             for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) {
  318                 size_t curForwardMatchLength, curBackwardMatchLength,
  319                        curTotalMatchLength;
  320                 if (cur->checksum != checksum || cur->offset <= lowestIndex) {
  321                     continue;
  322                 }
  323                 if (extDict) {
  324                     BYTE const* const curMatchBase =
  325                         cur->offset < dictLimit ? dictBase : base;
  326                     BYTE const* const pMatch = curMatchBase + cur->offset;
  327                     BYTE const* const matchEnd =
  328                         cur->offset < dictLimit ? dictEnd : iend;
  329                     BYTE const* const lowMatchPtr =
  330                         cur->offset < dictLimit ? dictStart : lowPrefixPtr;
  331 
  332                     curForwardMatchLength = ZSTD_count_2segments(
  333                                                 ip, pMatch, iend,
  334                                                 matchEnd, lowPrefixPtr);
  335                     if (curForwardMatchLength < minMatchLength) {
  336                         continue;
  337                     }
  338                     curBackwardMatchLength =
  339                         ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,
  340                                                      lowMatchPtr);
  341                     curTotalMatchLength = curForwardMatchLength +
  342                                           curBackwardMatchLength;
  343                 } else { /* !extDict */
  344                     BYTE const* const pMatch = base + cur->offset;
  345                     curForwardMatchLength = ZSTD_count(ip, pMatch, iend);
  346                     if (curForwardMatchLength < minMatchLength) {
  347                         continue;
  348                     }
  349                     curBackwardMatchLength =
  350                         ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,
  351                                                      lowPrefixPtr);
  352                     curTotalMatchLength = curForwardMatchLength +
  353                                           curBackwardMatchLength;
  354                 }
  355 
  356                 if (curTotalMatchLength > bestMatchLength) {
  357                     bestMatchLength = curTotalMatchLength;
  358                     forwardMatchLength = curForwardMatchLength;
  359                     backwardMatchLength = curBackwardMatchLength;
  360                     bestEntry = cur;
  361                 }
  362             }
  363         }
  364 
  365         /* No match found -- continue searching */
  366         if (bestEntry == NULL) {
  367             ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash,
  368                                              hBits, current,
  369                                              *params);
  370             ip++;
  371             continue;
  372         }
  373 
  374         /* Match found */
  375         mLength = forwardMatchLength + backwardMatchLength;
  376         ip -= backwardMatchLength;
  377 
  378         {
  379             /* Store the sequence:
  380              * ip = current - backwardMatchLength
  381              * The match is at (bestEntry->offset - backwardMatchLength)
  382              */
  383             U32 const matchIndex = bestEntry->offset;
  384             U32 const offset = current - matchIndex;
  385             rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
  386 
  387             /* Out of sequence storage */
  388             if (rawSeqStore->size == rawSeqStore->capacity)
  389                 return ERROR(dstSize_tooSmall);
  390             seq->litLength = (U32)(ip - anchor);
  391             seq->matchLength = (U32)mLength;
  392             seq->offset = offset;
  393             rawSeqStore->size++;
  394         }
  395 
  396         /* Insert the current entry into the hash table */
  397         ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
  398                                          (U32)(lastHashed - base),
  399                                          *params);
  400 
  401         assert(ip + backwardMatchLength == lastHashed);
  402 
  403         /* Fill the hash table from lastHashed+1 to ip+mLength*/
  404         /* Heuristic: don't need to fill the entire table at end of block */
  405         if (ip + mLength <= ilimit) {
  406             rollingHash = ZSTD_ldm_fillLdmHashTable(
  407                               ldmState, rollingHash, lastHashed,
  408                               ip + mLength, base, hBits, *params);
  409             lastHashed = ip + mLength - 1;
  410         }
  411         ip += mLength;
  412         anchor = ip;
  413     }
  414     return iend - anchor;
  415 }
  416 
  417 /*! ZSTD_ldm_reduceTable() :
  418  *  reduce table indexes by `reducerValue` */
  419 static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size,
  420                                  U32 const reducerValue)
  421 {
  422     U32 u;
  423     for (u = 0; u < size; u++) {
  424         if (table[u].offset < reducerValue) table[u].offset = 0;
  425         else table[u].offset -= reducerValue;
  426     }
  427 }
  428 
  429 size_t ZSTD_ldm_generateSequences(
  430         ldmState_t* ldmState, rawSeqStore_t* sequences,
  431         ldmParams_t const* params, void const* src, size_t srcSize)
  432 {
  433     U32 const maxDist = 1U << params->windowLog;
  434     BYTE const* const istart = (BYTE const*)src;
  435     BYTE const* const iend = istart + srcSize;
  436     size_t const kMaxChunkSize = 1 << 20;
  437     size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0);
  438     size_t chunk;
  439     size_t leftoverSize = 0;
  440 
  441     assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize);
  442     /* Check that ZSTD_window_update() has been called for this chunk prior
  443      * to passing it to this function.
  444      */
  445     assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize);
  446     /* The input could be very large (in zstdmt), so it must be broken up into
  447      * chunks to enforce the maximum distance and handle overflow correction.
  448      */
  449     assert(sequences->pos <= sequences->size);
  450     assert(sequences->size <= sequences->capacity);
  451     for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) {
  452         BYTE const* const chunkStart = istart + chunk * kMaxChunkSize;
  453         size_t const remaining = (size_t)(iend - chunkStart);
  454         BYTE const *const chunkEnd =
  455             (remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize;
  456         size_t const chunkSize = chunkEnd - chunkStart;
  457         size_t newLeftoverSize;
  458         size_t const prevSize = sequences->size;
  459 
  460         assert(chunkStart < iend);
  461         /* 1. Perform overflow correction if necessary. */
  462         if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
  463             U32 const ldmHSize = 1U << params->hashLog;
  464             U32 const correction = ZSTD_window_correctOverflow(
  465                 &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
  466             ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
  467             /* invalidate dictionaries on overflow correction */
  468             ldmState->loadedDictEnd = 0;
  469         }
  470         /* 2. We enforce the maximum offset allowed.
  471          *
  472          * kMaxChunkSize should be small enough that we don't lose too much of
  473          * the window through early invalidation.
  474          * TODO: * Test the chunk size.
  475          *       * Try invalidation after the sequence generation and test the
  476          *         the offset against maxDist directly.
  477          *
  478          * NOTE: Because of dictionaries + sequence splitting we MUST make sure
  479          * that any offset used is valid at the END of the sequence, since it may
  480          * be split into two sequences. This condition holds when using
  481          * ZSTD_window_enforceMaxDist(), but if we move to checking offsets
  482          * against maxDist directly, we'll have to carefully handle that case.
  483          */
  484         ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL);
  485         /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
  486         newLeftoverSize = ZSTD_ldm_generateSequences_internal(
  487             ldmState, sequences, params, chunkStart, chunkSize);
  488         if (ZSTD_isError(newLeftoverSize))
  489             return newLeftoverSize;
  490         /* 4. We add the leftover literals from previous iterations to the first
  491          *    newly generated sequence, or add the `newLeftoverSize` if none are
  492          *    generated.
  493          */
  494         /* Prepend the leftover literals from the last call */
  495         if (prevSize < sequences->size) {
  496             sequences->seq[prevSize].litLength += (U32)leftoverSize;
  497             leftoverSize = newLeftoverSize;
  498         } else {
  499             assert(newLeftoverSize == chunkSize);
  500             leftoverSize += chunkSize;
  501         }
  502     }
  503     return 0;
  504 }
  505 
  506 void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) {
  507     while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) {
  508         rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos;
  509         if (srcSize <= seq->litLength) {
  510             /* Skip past srcSize literals */
  511             seq->litLength -= (U32)srcSize;
  512             return;
  513         }
  514         srcSize -= seq->litLength;
  515         seq->litLength = 0;
  516         if (srcSize < seq->matchLength) {
  517             /* Skip past the first srcSize of the match */
  518             seq->matchLength -= (U32)srcSize;
  519             if (seq->matchLength < minMatch) {
  520                 /* The match is too short, omit it */
  521                 if (rawSeqStore->pos + 1 < rawSeqStore->size) {
  522                     seq[1].litLength += seq[0].matchLength;
  523                 }
  524                 rawSeqStore->pos++;
  525             }
  526             return;
  527         }
  528         srcSize -= seq->matchLength;
  529         seq->matchLength = 0;
  530         rawSeqStore->pos++;
  531     }
  532 }
  533 
  534 /**
  535  * If the sequence length is longer than remaining then the sequence is split
  536  * between this block and the next.
  537  *
  538  * Returns the current sequence to handle, or if the rest of the block should
  539  * be literals, it returns a sequence with offset == 0.
  540  */
  541 static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
  542                                  U32 const remaining, U32 const minMatch)
  543 {
  544     rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos];
  545     assert(sequence.offset > 0);
  546     /* Likely: No partial sequence */
  547     if (remaining >= sequence.litLength + sequence.matchLength) {
  548         rawSeqStore->pos++;
  549         return sequence;
  550     }
  551     /* Cut the sequence short (offset == 0 ==> rest is literals). */
  552     if (remaining <= sequence.litLength) {
  553         sequence.offset = 0;
  554     } else if (remaining < sequence.litLength + sequence.matchLength) {
  555         sequence.matchLength = remaining - sequence.litLength;
  556         if (sequence.matchLength < minMatch) {
  557             sequence.offset = 0;
  558         }
  559     }
  560     /* Skip past `remaining` bytes for the future sequences. */
  561     ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch);
  562     return sequence;
  563 }
  564 
  565 size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
  566     ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  567     void const* src, size_t srcSize)
  568 {
  569     const ZSTD_compressionParameters* const cParams = &ms->cParams;
  570     unsigned const minMatch = cParams->minMatch;
  571     ZSTD_blockCompressor const blockCompressor =
  572         ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms));
  573     /* Input bounds */
  574     BYTE const* const istart = (BYTE const*)src;
  575     BYTE const* const iend = istart + srcSize;
  576     /* Input positions */
  577     BYTE const* ip = istart;
  578 
  579     DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize);
  580     assert(rawSeqStore->pos <= rawSeqStore->size);
  581     assert(rawSeqStore->size <= rawSeqStore->capacity);
  582     /* Loop through each sequence and apply the block compressor to the lits */
  583     while (rawSeqStore->pos < rawSeqStore->size && ip < iend) {
  584         /* maybeSplitSequence updates rawSeqStore->pos */
  585         rawSeq const sequence = maybeSplitSequence(rawSeqStore,
  586                                                    (U32)(iend - ip), minMatch);
  587         int i;
  588         /* End signal */
  589         if (sequence.offset == 0)
  590             break;
  591 
  592         assert(ip + sequence.litLength + sequence.matchLength <= iend);
  593 
  594         /* Fill tables for block compressor */
  595         ZSTD_ldm_limitTableUpdate(ms, ip);
  596         ZSTD_ldm_fillFastTables(ms, ip);
  597         /* Run the block compressor */
  598         DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength);
  599         {
  600             size_t const newLitLength =
  601                 blockCompressor(ms, seqStore, rep, ip, sequence.litLength);
  602             ip += sequence.litLength;
  603             /* Update the repcodes */
  604             for (i = ZSTD_REP_NUM - 1; i > 0; i--)
  605                 rep[i] = rep[i-1];
  606             rep[0] = sequence.offset;
  607             /* Store the sequence */
  608             ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
  609                           sequence.offset + ZSTD_REP_MOVE,
  610                           sequence.matchLength - MINMATCH);
  611             ip += sequence.matchLength;
  612         }
  613     }
  614     /* Fill the tables for the block compressor */
  615     ZSTD_ldm_limitTableUpdate(ms, ip);
  616     ZSTD_ldm_fillFastTables(ms, ip);
  617     /* Compress the last literals */
  618     return blockCompressor(ms, seqStore, rep, ip, iend - ip);
  619 }

Cache object: 57df91ff97b9ea69634d133d6b10cbe3


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.