The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/contrib/zstd/lib/compress/fse_compress.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /* ******************************************************************
    2  * FSE : Finite State Entropy encoder
    3  * Copyright (c) Yann Collet, Facebook, Inc.
    4  *
    5  *  You can contact the author at :
    6  *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
    7  *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
    8  *
    9  * This source code is licensed under both the BSD-style license (found in the
   10  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
   11  * in the COPYING file in the root directory of this source tree).
   12  * You may select, at your option, one of the above-listed licenses.
   13 ****************************************************************** */
   14 
   15 /* **************************************************************
   16 *  Includes
   17 ****************************************************************/
   18 #include "../common/compiler.h"
   19 #include "../common/mem.h"        /* U32, U16, etc. */
   20 #include "../common/debug.h"      /* assert, DEBUGLOG */
   21 #include "hist.h"       /* HIST_count_wksp */
   22 #include "../common/bitstream.h"
   23 #define FSE_STATIC_LINKING_ONLY
   24 #include "../common/fse.h"
   25 #include "../common/error_private.h"
   26 #define ZSTD_DEPS_NEED_MALLOC
   27 #define ZSTD_DEPS_NEED_MATH64
   28 #include "../common/zstd_deps.h"  /* ZSTD_malloc, ZSTD_free, ZSTD_memcpy, ZSTD_memset */
   29 
   30 
   31 /* **************************************************************
   32 *  Error Management
   33 ****************************************************************/
   34 #define FSE_isError ERR_isError
   35 
   36 
   37 /* **************************************************************
   38 *  Templates
   39 ****************************************************************/
   40 /*
   41   designed to be included
   42   for type-specific functions (template emulation in C)
   43   Objective is to write these functions only once, for improved maintenance
   44 */
   45 
   46 /* safety checks */
   47 #ifndef FSE_FUNCTION_EXTENSION
   48 #  error "FSE_FUNCTION_EXTENSION must be defined"
   49 #endif
   50 #ifndef FSE_FUNCTION_TYPE
   51 #  error "FSE_FUNCTION_TYPE must be defined"
   52 #endif
   53 
   54 /* Function names */
   55 #define FSE_CAT(X,Y) X##Y
   56 #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
   57 #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
   58 
   59 
   60 /* Function templates */
   61 
   62 /* FSE_buildCTable_wksp() :
   63  * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
   64  * wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)`
   65  * workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements
   66  */
   67 size_t FSE_buildCTable_wksp(FSE_CTable* ct,
   68                       const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
   69                             void* workSpace, size_t wkspSize)
   70 {
   71     U32 const tableSize = 1 << tableLog;
   72     U32 const tableMask = tableSize - 1;
   73     void* const ptr = ct;
   74     U16* const tableU16 = ( (U16*) ptr) + 2;
   75     void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;
   76     FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
   77     U32 const step = FSE_TABLESTEP(tableSize);
   78     U32 const maxSV1 = maxSymbolValue+1;
   79 
   80     U16* cumul = (U16*)workSpace;   /* size = maxSV1 */
   81     FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSV1+1));  /* size = tableSize */
   82 
   83     U32 highThreshold = tableSize-1;
   84 
   85     assert(((size_t)workSpace & 1) == 0);  /* Must be 2 bytes-aligned */
   86     if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge);
   87     /* CTable header */
   88     tableU16[-2] = (U16) tableLog;
   89     tableU16[-1] = (U16) maxSymbolValue;
   90     assert(tableLog < 16);   /* required for threshold strategy to work */
   91 
   92     /* For explanations on how to distribute symbol values over the table :
   93      * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
   94 
   95      #ifdef __clang_analyzer__
   96      ZSTD_memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize);   /* useless initialization, just to keep scan-build happy */
   97      #endif
   98 
   99     /* symbol start positions */
  100     {   U32 u;
  101         cumul[0] = 0;
  102         for (u=1; u <= maxSV1; u++) {
  103             if (normalizedCounter[u-1]==-1) {  /* Low proba symbol */
  104                 cumul[u] = cumul[u-1] + 1;
  105                 tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);
  106             } else {
  107                 assert(normalizedCounter[u-1] >= 0);
  108                 cumul[u] = cumul[u-1] + (U16)normalizedCounter[u-1];
  109                 assert(cumul[u] >= cumul[u-1]);  /* no overflow */
  110         }   }
  111         cumul[maxSV1] = (U16)(tableSize+1);
  112     }
  113 
  114     /* Spread symbols */
  115     if (highThreshold == tableSize - 1) {
  116         /* Case for no low prob count symbols. Lay down 8 bytes at a time
  117          * to reduce branch misses since we are operating on a small block
  118          */
  119         BYTE* const spread = tableSymbol + tableSize; /* size = tableSize + 8 (may write beyond tableSize) */
  120         {   U64 const add = 0x0101010101010101ull;
  121             size_t pos = 0;
  122             U64 sv = 0;
  123             U32 s;
  124             for (s=0; s<maxSV1; ++s, sv += add) {
  125                 int i;
  126                 int const n = normalizedCounter[s];
  127                 MEM_write64(spread + pos, sv);
  128                 for (i = 8; i < n; i += 8) {
  129                     MEM_write64(spread + pos + i, sv);
  130                 }
  131                 assert(n>=0);
  132                 pos += (size_t)n;
  133             }
  134         }
  135         /* Spread symbols across the table. Lack of lowprob symbols means that
  136          * we don't need variable sized inner loop, so we can unroll the loop and
  137          * reduce branch misses.
  138          */
  139         {   size_t position = 0;
  140             size_t s;
  141             size_t const unroll = 2; /* Experimentally determined optimal unroll */
  142             assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
  143             for (s = 0; s < (size_t)tableSize; s += unroll) {
  144                 size_t u;
  145                 for (u = 0; u < unroll; ++u) {
  146                     size_t const uPosition = (position + (u * step)) & tableMask;
  147                     tableSymbol[uPosition] = spread[s + u];
  148                 }
  149                 position = (position + (unroll * step)) & tableMask;
  150             }
  151             assert(position == 0);   /* Must have initialized all positions */
  152         }
  153     } else {
  154         U32 position = 0;
  155         U32 symbol;
  156         for (symbol=0; symbol<maxSV1; symbol++) {
  157             int nbOccurrences;
  158             int const freq = normalizedCounter[symbol];
  159             for (nbOccurrences=0; nbOccurrences<freq; nbOccurrences++) {
  160                 tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
  161                 position = (position + step) & tableMask;
  162                 while (position > highThreshold)
  163                     position = (position + step) & tableMask;   /* Low proba area */
  164         }   }
  165         assert(position==0);  /* Must have initialized all positions */
  166     }
  167 
  168     /* Build table */
  169     {   U32 u; for (u=0; u<tableSize; u++) {
  170         FSE_FUNCTION_TYPE s = tableSymbol[u];   /* note : static analyzer may not understand tableSymbol is properly initialized */
  171         tableU16[cumul[s]++] = (U16) (tableSize+u);   /* TableU16 : sorted by symbol order; gives next state value */
  172     }   }
  173 
  174     /* Build Symbol Transformation Table */
  175     {   unsigned total = 0;
  176         unsigned s;
  177         for (s=0; s<=maxSymbolValue; s++) {
  178             switch (normalizedCounter[s])
  179             {
  180             case  0:
  181                 /* filling nonetheless, for compatibility with FSE_getMaxNbBits() */
  182                 symbolTT[s].deltaNbBits = ((tableLog+1) << 16) - (1<<tableLog);
  183                 break;
  184 
  185             case -1:
  186             case  1:
  187                 symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog);
  188                 assert(total <= INT_MAX);
  189                 symbolTT[s].deltaFindState = (int)(total - 1);
  190                 total ++;
  191                 break;
  192             default :
  193                 assert(normalizedCounter[s] > 1);
  194                 {   U32 const maxBitsOut = tableLog - BIT_highbit32 ((U32)normalizedCounter[s]-1);
  195                     U32 const minStatePlus = (U32)normalizedCounter[s] << maxBitsOut;
  196                     symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
  197                     symbolTT[s].deltaFindState = (int)(total - (unsigned)normalizedCounter[s]);
  198                     total +=  (unsigned)normalizedCounter[s];
  199     }   }   }   }
  200 
  201 #if 0  /* debug : symbol costs */
  202     DEBUGLOG(5, "\n --- table statistics : ");
  203     {   U32 symbol;
  204         for (symbol=0; symbol<=maxSymbolValue; symbol++) {
  205             DEBUGLOG(5, "%3u: w=%3i,   maxBits=%u, fracBits=%.2f",
  206                 symbol, normalizedCounter[symbol],
  207                 FSE_getMaxNbBits(symbolTT, symbol),
  208                 (double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256);
  209     }   }
  210 #endif
  211 
  212     return 0;
  213 }
  214 
  215 
  216 
  217 #ifndef FSE_COMMONDEFS_ONLY
  218 
  219 /*-**************************************************************
  220 *  FSE NCount encoding
  221 ****************************************************************/
  222 size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
  223 {
  224     size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog
  225                                    + 4 /* bitCount initialized at 4 */
  226                                    + 2 /* first two symbols may use one additional bit each */) / 8)
  227                                     + 1 /* round up to whole nb bytes */
  228                                     + 2 /* additional two bytes for bitstream flush */;
  229     return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND;  /* maxSymbolValue==0 ? use default */
  230 }
  231 
  232 static size_t
  233 FSE_writeNCount_generic (void* header, size_t headerBufferSize,
  234                    const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
  235                          unsigned writeIsSafe)
  236 {
  237     BYTE* const ostart = (BYTE*) header;
  238     BYTE* out = ostart;
  239     BYTE* const oend = ostart + headerBufferSize;
  240     int nbBits;
  241     const int tableSize = 1 << tableLog;
  242     int remaining;
  243     int threshold;
  244     U32 bitStream = 0;
  245     int bitCount = 0;
  246     unsigned symbol = 0;
  247     unsigned const alphabetSize = maxSymbolValue + 1;
  248     int previousIs0 = 0;
  249 
  250     /* Table Size */
  251     bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount;
  252     bitCount  += 4;
  253 
  254     /* Init */
  255     remaining = tableSize+1;   /* +1 for extra accuracy */
  256     threshold = tableSize;
  257     nbBits = tableLog+1;
  258 
  259     while ((symbol < alphabetSize) && (remaining>1)) {  /* stops at 1 */
  260         if (previousIs0) {
  261             unsigned start = symbol;
  262             while ((symbol < alphabetSize) && !normalizedCounter[symbol]) symbol++;
  263             if (symbol == alphabetSize) break;   /* incorrect distribution */
  264             while (symbol >= start+24) {
  265                 start+=24;
  266                 bitStream += 0xFFFFU << bitCount;
  267                 if ((!writeIsSafe) && (out > oend-2))
  268                     return ERROR(dstSize_tooSmall);   /* Buffer overflow */
  269                 out[0] = (BYTE) bitStream;
  270                 out[1] = (BYTE)(bitStream>>8);
  271                 out+=2;
  272                 bitStream>>=16;
  273             }
  274             while (symbol >= start+3) {
  275                 start+=3;
  276                 bitStream += 3 << bitCount;
  277                 bitCount += 2;
  278             }
  279             bitStream += (symbol-start) << bitCount;
  280             bitCount += 2;
  281             if (bitCount>16) {
  282                 if ((!writeIsSafe) && (out > oend - 2))
  283                     return ERROR(dstSize_tooSmall);   /* Buffer overflow */
  284                 out[0] = (BYTE)bitStream;
  285                 out[1] = (BYTE)(bitStream>>8);
  286                 out += 2;
  287                 bitStream >>= 16;
  288                 bitCount -= 16;
  289         }   }
  290         {   int count = normalizedCounter[symbol++];
  291             int const max = (2*threshold-1) - remaining;
  292             remaining -= count < 0 ? -count : count;
  293             count++;   /* +1 for extra accuracy */
  294             if (count>=threshold)
  295                 count += max;   /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
  296             bitStream += count << bitCount;
  297             bitCount  += nbBits;
  298             bitCount  -= (count<max);
  299             previousIs0  = (count==1);
  300             if (remaining<1) return ERROR(GENERIC);
  301             while (remaining<threshold) { nbBits--; threshold>>=1; }
  302         }
  303         if (bitCount>16) {
  304             if ((!writeIsSafe) && (out > oend - 2))
  305                 return ERROR(dstSize_tooSmall);   /* Buffer overflow */
  306             out[0] = (BYTE)bitStream;
  307             out[1] = (BYTE)(bitStream>>8);
  308             out += 2;
  309             bitStream >>= 16;
  310             bitCount -= 16;
  311     }   }
  312 
  313     if (remaining != 1)
  314         return ERROR(GENERIC);  /* incorrect normalized distribution */
  315     assert(symbol <= alphabetSize);
  316 
  317     /* flush remaining bitStream */
  318     if ((!writeIsSafe) && (out > oend - 2))
  319         return ERROR(dstSize_tooSmall);   /* Buffer overflow */
  320     out[0] = (BYTE)bitStream;
  321     out[1] = (BYTE)(bitStream>>8);
  322     out+= (bitCount+7) /8;
  323 
  324     return (out-ostart);
  325 }
  326 
  327 
  328 size_t FSE_writeNCount (void* buffer, size_t bufferSize,
  329                   const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
  330 {
  331     if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);   /* Unsupported */
  332     if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC);   /* Unsupported */
  333 
  334     if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
  335         return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
  336 
  337     return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1 /* write in buffer is safe */);
  338 }
  339 
  340 
  341 /*-**************************************************************
  342 *  FSE Compression Code
  343 ****************************************************************/
  344 
  345 FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
  346 {
  347     size_t size;
  348     if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
  349     size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
  350     return (FSE_CTable*)ZSTD_malloc(size);
  351 }
  352 
  353 void FSE_freeCTable (FSE_CTable* ct) { ZSTD_free(ct); }
  354 
  355 /* provides the minimum logSize to safely represent a distribution */
  356 static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
  357 {
  358     U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1;
  359     U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
  360     U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
  361     assert(srcSize > 1); /* Not supported, RLE should be used instead */
  362     return minBits;
  363 }
  364 
  365 unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
  366 {
  367     U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
  368     U32 tableLog = maxTableLog;
  369     U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
  370     assert(srcSize > 1); /* Not supported, RLE should be used instead */
  371     if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
  372     if (maxBitsSrc < tableLog) tableLog = maxBitsSrc;   /* Accuracy can be reduced */
  373     if (minBits > tableLog) tableLog = minBits;   /* Need a minimum to safely represent all symbol values */
  374     if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG;
  375     if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG;
  376     return tableLog;
  377 }
  378 
  379 unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
  380 {
  381     return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2);
  382 }
  383 
  384 /* Secondary normalization method.
  385    To be used when primary method fails. */
  386 
  387 static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue, short lowProbCount)
  388 {
  389     short const NOT_YET_ASSIGNED = -2;
  390     U32 s;
  391     U32 distributed = 0;
  392     U32 ToDistribute;
  393 
  394     /* Init */
  395     U32 const lowThreshold = (U32)(total >> tableLog);
  396     U32 lowOne = (U32)((total * 3) >> (tableLog + 1));
  397 
  398     for (s=0; s<=maxSymbolValue; s++) {
  399         if (count[s] == 0) {
  400             norm[s]=0;
  401             continue;
  402         }
  403         if (count[s] <= lowThreshold) {
  404             norm[s] = lowProbCount;
  405             distributed++;
  406             total -= count[s];
  407             continue;
  408         }
  409         if (count[s] <= lowOne) {
  410             norm[s] = 1;
  411             distributed++;
  412             total -= count[s];
  413             continue;
  414         }
  415 
  416         norm[s]=NOT_YET_ASSIGNED;
  417     }
  418     ToDistribute = (1 << tableLog) - distributed;
  419 
  420     if (ToDistribute == 0)
  421         return 0;
  422 
  423     if ((total / ToDistribute) > lowOne) {
  424         /* risk of rounding to zero */
  425         lowOne = (U32)((total * 3) / (ToDistribute * 2));
  426         for (s=0; s<=maxSymbolValue; s++) {
  427             if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) {
  428                 norm[s] = 1;
  429                 distributed++;
  430                 total -= count[s];
  431                 continue;
  432         }   }
  433         ToDistribute = (1 << tableLog) - distributed;
  434     }
  435 
  436     if (distributed == maxSymbolValue+1) {
  437         /* all values are pretty poor;
  438            probably incompressible data (should have already been detected);
  439            find max, then give all remaining points to max */
  440         U32 maxV = 0, maxC = 0;
  441         for (s=0; s<=maxSymbolValue; s++)
  442             if (count[s] > maxC) { maxV=s; maxC=count[s]; }
  443         norm[maxV] += (short)ToDistribute;
  444         return 0;
  445     }
  446 
  447     if (total == 0) {
  448         /* all of the symbols were low enough for the lowOne or lowThreshold */
  449         for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1))
  450             if (norm[s] > 0) { ToDistribute--; norm[s]++; }
  451         return 0;
  452     }
  453 
  454     {   U64 const vStepLog = 62 - tableLog;
  455         U64 const mid = (1ULL << (vStepLog-1)) - 1;
  456         U64 const rStep = ZSTD_div64((((U64)1<<vStepLog) * ToDistribute) + mid, (U32)total);   /* scale on remaining */
  457         U64 tmpTotal = mid;
  458         for (s=0; s<=maxSymbolValue; s++) {
  459             if (norm[s]==NOT_YET_ASSIGNED) {
  460                 U64 const end = tmpTotal + (count[s] * rStep);
  461                 U32 const sStart = (U32)(tmpTotal >> vStepLog);
  462                 U32 const sEnd = (U32)(end >> vStepLog);
  463                 U32 const weight = sEnd - sStart;
  464                 if (weight < 1)
  465                     return ERROR(GENERIC);
  466                 norm[s] = (short)weight;
  467                 tmpTotal = end;
  468     }   }   }
  469 
  470     return 0;
  471 }
  472 
  473 size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
  474                            const unsigned* count, size_t total,
  475                            unsigned maxSymbolValue, unsigned useLowProbCount)
  476 {
  477     /* Sanity checks */
  478     if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
  479     if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC);   /* Unsupported size */
  480     if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);   /* Unsupported size */
  481     if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC);   /* Too small tableLog, compression potentially impossible */
  482 
  483     {   static U32 const rtbTable[] = {     0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
  484         short const lowProbCount = useLowProbCount ? -1 : 1;
  485         U64 const scale = 62 - tableLog;
  486         U64 const step = ZSTD_div64((U64)1<<62, (U32)total);   /* <== here, one division ! */
  487         U64 const vStep = 1ULL<<(scale-20);
  488         int stillToDistribute = 1<<tableLog;
  489         unsigned s;
  490         unsigned largest=0;
  491         short largestP=0;
  492         U32 lowThreshold = (U32)(total >> tableLog);
  493 
  494         for (s=0; s<=maxSymbolValue; s++) {
  495             if (count[s] == total) return 0;   /* rle special case */
  496             if (count[s] == 0) { normalizedCounter[s]=0; continue; }
  497             if (count[s] <= lowThreshold) {
  498                 normalizedCounter[s] = lowProbCount;
  499                 stillToDistribute--;
  500             } else {
  501                 short proba = (short)((count[s]*step) >> scale);
  502                 if (proba<8) {
  503                     U64 restToBeat = vStep * rtbTable[proba];
  504                     proba += (count[s]*step) - ((U64)proba<<scale) > restToBeat;
  505                 }
  506                 if (proba > largestP) { largestP=proba; largest=s; }
  507                 normalizedCounter[s] = proba;
  508                 stillToDistribute -= proba;
  509         }   }
  510         if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) {
  511             /* corner case, need another normalization method */
  512             size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue, lowProbCount);
  513             if (FSE_isError(errorCode)) return errorCode;
  514         }
  515         else normalizedCounter[largest] += (short)stillToDistribute;
  516     }
  517 
  518 #if 0
  519     {   /* Print Table (debug) */
  520         U32 s;
  521         U32 nTotal = 0;
  522         for (s=0; s<=maxSymbolValue; s++)
  523             RAWLOG(2, "%3i: %4i \n", s, normalizedCounter[s]);
  524         for (s=0; s<=maxSymbolValue; s++)
  525             nTotal += abs(normalizedCounter[s]);
  526         if (nTotal != (1U<<tableLog))
  527             RAWLOG(2, "Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);
  528         getchar();
  529     }
  530 #endif
  531 
  532     return tableLog;
  533 }
  534 
  535 
  536 /* fake FSE_CTable, for raw (uncompressed) input */
  537 size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits)
  538 {
  539     const unsigned tableSize = 1 << nbBits;
  540     const unsigned tableMask = tableSize - 1;
  541     const unsigned maxSymbolValue = tableMask;
  542     void* const ptr = ct;
  543     U16* const tableU16 = ( (U16*) ptr) + 2;
  544     void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableSize>>1);   /* assumption : tableLog >= 1 */
  545     FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
  546     unsigned s;
  547 
  548     /* Sanity checks */
  549     if (nbBits < 1) return ERROR(GENERIC);             /* min size */
  550 
  551     /* header */
  552     tableU16[-2] = (U16) nbBits;
  553     tableU16[-1] = (U16) maxSymbolValue;
  554 
  555     /* Build table */
  556     for (s=0; s<tableSize; s++)
  557         tableU16[s] = (U16)(tableSize + s);
  558 
  559     /* Build Symbol Transformation Table */
  560     {   const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
  561         for (s=0; s<=maxSymbolValue; s++) {
  562             symbolTT[s].deltaNbBits = deltaNbBits;
  563             symbolTT[s].deltaFindState = s-1;
  564     }   }
  565 
  566     return 0;
  567 }
  568 
  569 /* fake FSE_CTable, for rle input (always same symbol) */
  570 size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)
  571 {
  572     void* ptr = ct;
  573     U16* tableU16 = ( (U16*) ptr) + 2;
  574     void* FSCTptr = (U32*)ptr + 2;
  575     FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) FSCTptr;
  576 
  577     /* header */
  578     tableU16[-2] = (U16) 0;
  579     tableU16[-1] = (U16) symbolValue;
  580 
  581     /* Build table */
  582     tableU16[0] = 0;
  583     tableU16[1] = 0;   /* just in case */
  584 
  585     /* Build Symbol Transformation Table */
  586     symbolTT[symbolValue].deltaNbBits = 0;
  587     symbolTT[symbolValue].deltaFindState = 0;
  588 
  589     return 0;
  590 }
  591 
  592 
  593 static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize,
  594                            const void* src, size_t srcSize,
  595                            const FSE_CTable* ct, const unsigned fast)
  596 {
  597     const BYTE* const istart = (const BYTE*) src;
  598     const BYTE* const iend = istart + srcSize;
  599     const BYTE* ip=iend;
  600 
  601     BIT_CStream_t bitC;
  602     FSE_CState_t CState1, CState2;
  603 
  604     /* init */
  605     if (srcSize <= 2) return 0;
  606     { size_t const initError = BIT_initCStream(&bitC, dst, dstSize);
  607       if (FSE_isError(initError)) return 0; /* not enough space available to write a bitstream */ }
  608 
  609 #define FSE_FLUSHBITS(s)  (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
  610 
  611     if (srcSize & 1) {
  612         FSE_initCState2(&CState1, ct, *--ip);
  613         FSE_initCState2(&CState2, ct, *--ip);
  614         FSE_encodeSymbol(&bitC, &CState1, *--ip);
  615         FSE_FLUSHBITS(&bitC);
  616     } else {
  617         FSE_initCState2(&CState2, ct, *--ip);
  618         FSE_initCState2(&CState1, ct, *--ip);
  619     }
  620 
  621     /* join to mod 4 */
  622     srcSize -= 2;
  623     if ((sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) {  /* test bit 2 */
  624         FSE_encodeSymbol(&bitC, &CState2, *--ip);
  625         FSE_encodeSymbol(&bitC, &CState1, *--ip);
  626         FSE_FLUSHBITS(&bitC);
  627     }
  628 
  629     /* 2 or 4 encoding per loop */
  630     while ( ip>istart ) {
  631 
  632         FSE_encodeSymbol(&bitC, &CState2, *--ip);
  633 
  634         if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 )   /* this test must be static */
  635             FSE_FLUSHBITS(&bitC);
  636 
  637         FSE_encodeSymbol(&bitC, &CState1, *--ip);
  638 
  639         if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) {  /* this test must be static */
  640             FSE_encodeSymbol(&bitC, &CState2, *--ip);
  641             FSE_encodeSymbol(&bitC, &CState1, *--ip);
  642         }
  643 
  644         FSE_FLUSHBITS(&bitC);
  645     }
  646 
  647     FSE_flushCState(&bitC, &CState2);
  648     FSE_flushCState(&bitC, &CState1);
  649     return BIT_closeCStream(&bitC);
  650 }
  651 
  652 size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
  653                            const void* src, size_t srcSize,
  654                            const FSE_CTable* ct)
  655 {
  656     unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize));
  657 
  658     if (fast)
  659         return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1);
  660     else
  661         return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0);
  662 }
  663 
  664 
  665 size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
  666 
  667 #ifndef ZSTD_NO_UNUSED_FUNCTIONS
  668 /* FSE_compress_wksp() :
  669  * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
  670  * `wkspSize` size must be `(1<<tableLog)`.
  671  */
  672 size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
  673 {
  674     BYTE* const ostart = (BYTE*) dst;
  675     BYTE* op = ostart;
  676     BYTE* const oend = ostart + dstSize;
  677 
  678     unsigned count[FSE_MAX_SYMBOL_VALUE+1];
  679     S16   norm[FSE_MAX_SYMBOL_VALUE+1];
  680     FSE_CTable* CTable = (FSE_CTable*)workSpace;
  681     size_t const CTableSize = FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue);
  682     void* scratchBuffer = (void*)(CTable + CTableSize);
  683     size_t const scratchBufferSize = wkspSize - (CTableSize * sizeof(FSE_CTable));
  684 
  685     /* init conditions */
  686     if (wkspSize < FSE_COMPRESS_WKSP_SIZE_U32(tableLog, maxSymbolValue)) return ERROR(tableLog_tooLarge);
  687     if (srcSize <= 1) return 0;  /* Not compressible */
  688     if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
  689     if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
  690 
  691     /* Scan input and build symbol stats */
  692     {   CHECK_V_F(maxCount, HIST_count_wksp(count, &maxSymbolValue, src, srcSize, scratchBuffer, scratchBufferSize) );
  693         if (maxCount == srcSize) return 1;   /* only a single symbol in src : rle */
  694         if (maxCount == 1) return 0;         /* each symbol present maximum once => not compressible */
  695         if (maxCount < (srcSize >> 7)) return 0;   /* Heuristic : not compressible enough */
  696     }
  697 
  698     tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
  699     CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue, /* useLowProbCount */ srcSize >= 2048) );
  700 
  701     /* Write table description header */
  702     {   CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) );
  703         op += nc_err;
  704     }
  705 
  706     /* Compress */
  707     CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, scratchBufferSize) );
  708     {   CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, src, srcSize, CTable) );
  709         if (cSize == 0) return 0;   /* not enough space for compressed data */
  710         op += cSize;
  711     }
  712 
  713     /* check compressibility */
  714     if ( (size_t)(op-ostart) >= srcSize-1 ) return 0;
  715 
  716     return op-ostart;
  717 }
  718 
  719 typedef struct {
  720     FSE_CTable CTable_max[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
  721     union {
  722       U32 hist_wksp[HIST_WKSP_SIZE_U32];
  723       BYTE scratchBuffer[1 << FSE_MAX_TABLELOG];
  724     } workspace;
  725 } fseWkspMax_t;
  726 
  727 size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
  728 {
  729     fseWkspMax_t scratchBuffer;
  730     DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_COMPRESS_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE));   /* compilation failures here means scratchBuffer is not large enough */
  731     if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
  732     return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer));
  733 }
  734 
  735 size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
  736 {
  737     return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
  738 }
  739 #endif
  740 
  741 #endif   /* FSE_COMMONDEFS_ONLY */

Cache object: 6da73d3e8e92fb9ff4eb2ccd4c8a89fc


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.