The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/contrib/openzfs/module/zfs/lz4.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2    LZ4 - Fast LZ compression algorithm
    3    Copyright (C) 2011-present, Yann Collet.
    4 
    5    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
    6 
    7    Redistribution and use in source and binary forms, with or without
    8    modification, are permitted provided that the following conditions are
    9    met:
   10 
   11        * Redistributions of source code must retain the above copyright
   12    notice, this list of conditions and the following disclaimer.
   13        * Redistributions in binary form must reproduce the above
   14    copyright notice, this list of conditions and the following disclaimer
   15    in the documentation and/or other materials provided with the
   16    distribution.
   17 
   18    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   19    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   20    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
   21    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
   22    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   23    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   24    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   25    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   26    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   27    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
   28    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   29 
   30    You can contact the author at :
   31     - LZ4 homepage : http://www.lz4.org
   32     - LZ4 source repository : https://github.com/lz4/lz4
   33 */
   34 
   35 /*
   36  * This file contains unmodified code from lz4 1.9.3's decompressor, plus
   37  * associated macros and constants.
   38  *
   39  * It also contains a couple of defines from the old lz4.c to make things
   40  * fit together smoothly.
   41  *
   42  */
   43 
   44 #include <sys/zfs_context.h>
   45 
   46 int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
   47     int isize, int maxOutputSize);
   48 
   49 /*
   50  * Tuning parameters
   51  */
   52 
   53 /*
   54  * COMPRESSIONLEVEL: Increasing this value improves compression ratio
   55  *       Lowering this value reduces memory usage. Reduced memory usage
   56  *      typically improves speed, due to cache effect (ex: L1 32KB for Intel,
   57  *      L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
   58  *      (examples : 12 -> 16KB ; 17 -> 512KB)
   59  */
   60 #define COMPRESSIONLEVEL 12
   61 
   62 /*
   63  * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
   64  *      algorithm skip faster data segments considered "incompressible".
   65  *      This may decrease compression ratio dramatically, but will be
   66  *      faster on incompressible data. Increasing this value will make
   67  *      the algorithm search more before declaring a segment "incompressible".
   68  *      This could improve compression a bit, but will be slower on
   69  *      incompressible data. The default value (6) is recommended.
   70  */
   71 #define NOTCOMPRESSIBLE_CONFIRMATION 6
   72 
   73 /*
   74  * Little Endian or Big Endian?
   75  * Note: overwrite the below #define if you know your architecture endianness.
   76  */
   77 #if defined(_ZFS_BIG_ENDIAN)
   78 #define LZ4_BIG_ENDIAN 1
   79 #else
   80 /*
   81  * Little Endian assumed. PDP Endian and other very rare endian format
   82  * are unsupported.
   83  */
   84 #undef LZ4_BIG_ENDIAN
   85 #endif
   86 
   87 /*-************************************
   88 *  CPU Feature Detection
   89 **************************************/
   90 /* LZ4_FORCE_MEMORY_ACCESS
   91  * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
   92  * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
   93  * The below switch allow to select different access method for improved performance.
   94  * Method 0 (default) : use `memcpy()`. Safe and portable.
   95  * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
   96  *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
   97  * Method 2 : direct access. This method is portable but violate C standard.
   98  *            It can generate buggy code on targets which assembly generation depends on alignment.
   99  *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
  100  * See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
  101  * Prefer these methods in priority order (0 > 1 > 2)
  102  */
  103 #ifndef LZ4_FORCE_MEMORY_ACCESS   /* can be defined externally */
  104 #  if defined(__GNUC__) && \
  105   ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) \
  106   || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
  107 #    define LZ4_FORCE_MEMORY_ACCESS 2
  108 #  elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || defined(__GNUC__)
  109 #    define LZ4_FORCE_MEMORY_ACCESS 1
  110 #  endif
  111 #endif
  112 
  113 /*
  114  * LZ4_FORCE_SW_BITCOUNT
  115  * Define this parameter if your target system or compiler does not support hardware bit count
  116  */
  117 /*
  118  * Illumos : we can't use GCC's __builtin_ctz family of builtins in the
  119  * kernel
  120  * Linux : we can use GCC's __builtin_ctz family of builtins in the
  121  * kernel
  122  */
  123 #undef  LZ4_FORCE_SW_BITCOUNT
  124 #if defined(__sunos__)
  125 #define LZ4_FORCE_SW_BITCOUNT
  126 #endif
  127 
  128 /*
  129  * Compiler Options
  130  */
  131 /* Disable restrict */
  132 #define restrict
  133 
  134 /*
  135  * Linux : GCC_VERSION is defined as of 3.9-rc1, so undefine it.
  136  * torvalds/linux@3f3f8d2f48acfd8ed3b8e6b7377935da57b27b16
  137  */
  138 #ifdef GCC_VERSION
  139 #undef GCC_VERSION
  140 #endif
  141 
  142 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
  143 
  144 #ifndef LZ4_FORCE_INLINE
  145 #  ifdef _MSC_VER    /* Visual Studio */
  146 #    define LZ4_FORCE_INLINE static __forceinline
  147 #  else
  148 #    if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
  149 #      ifdef __GNUC__
  150 #        define LZ4_FORCE_INLINE static inline __attribute__((always_inline))
  151 #      else
  152 #        define LZ4_FORCE_INLINE static inline
  153 #      endif
  154 #    else
  155 #      define LZ4_FORCE_INLINE static
  156 #    endif /* __STDC_VERSION__ */
  157 #  endif  /* _MSC_VER */
  158 #endif /* LZ4_FORCE_INLINE */
  159 
  160 /* LZ4_FORCE_O2 and LZ4_FORCE_INLINE
  161  * gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy8,
  162  * together with a simple 8-byte copy loop as a fall-back path.
  163  * However, this optimization hurts the decompression speed by >30%,
  164  * because the execution does not go to the optimized loop
  165  * for typical compressible data, and all of the preamble checks
  166  * before going to the fall-back path become useless overhead.
  167  * This optimization happens only with the -O3 flag, and -O2 generates
  168  * a simple 8-byte copy loop.
  169  * With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy8
  170  * functions are annotated with __attribute__((optimize("O2"))),
  171  * and also LZ4_wildCopy8 is forcibly inlined, so that the O2 attribute
  172  * of LZ4_wildCopy8 does not affect the compression speed.
  173  */
  174 #if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && !defined(__clang__)
  175 #  define LZ4_FORCE_O2  __attribute__((optimize("O2")))
  176 #  undef LZ4_FORCE_INLINE
  177 #  define LZ4_FORCE_INLINE  static __inline __attribute__((optimize("O2"),always_inline))
  178 #else
  179 #  define LZ4_FORCE_O2
  180 #endif
  181 
  182 #ifndef expect
  183 #if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__)
  184 #  define expect(expr,value)    (__builtin_expect ((expr),(value)) )
  185 #else
  186 #  define expect(expr,value)    (expr)
  187 #endif
  188 #endif
  189 
  190 #ifndef likely
  191 #define likely(expr)    expect((expr) != 0, 1)
  192 #endif
  193 
  194 #ifndef unlikely
  195 #define unlikely(expr)  expect((expr) != 0, 0)
  196 #endif
  197 
  198 #ifndef _KERNEL
  199 #include <stdlib.h>   /* malloc, calloc, free */
  200 #include <string.h>   /* memset, memcpy */
  201 #endif
  202 #define ALLOC(s)          malloc(s)
  203 #define ALLOC_AND_ZERO(s) calloc(1,s)
  204 #define FREEMEM(p)        free(p)
  205 
  206 #define MEM_INIT(p,v,s)   memset((p),(v),(s))
  207 
  208 
  209 /*-************************************
  210 *  Common Constants
  211 **************************************/
  212 #define MINMATCH 4
  213 
  214 #define WILDCOPYLENGTH 8
  215 #define LASTLITERALS   5   /* see ../doc/lz4_Block_format.md#parsing-restrictions */
  216 #define MFLIMIT       12   /* see ../doc/lz4_Block_format.md#parsing-restrictions */
  217 #define MATCH_SAFEGUARD_DISTANCE  ((2*WILDCOPYLENGTH) - MINMATCH)   /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */
  218 #define FASTLOOP_SAFE_DISTANCE 64
  219 
  220 #define KB *(1 <<10)
  221 #define MB *(1 <<20)
  222 #define GB *(1U<<30)
  223 
  224 #ifndef LZ4_DISTANCE_MAX   /* history window size; can be user-defined at compile time */
  225 #  define LZ4_DISTANCE_MAX 65535   /* set to maximum value by default */
  226 #endif
  227 
  228 #define LZ4_DISTANCE_ABSOLUTE_MAX 65535
  229 #if (LZ4_DISTANCE_MAX > LZ4_DISTANCE_ABSOLUTE_MAX)   /* max supported by LZ4 format */
  230 #  error "LZ4_DISTANCE_MAX is too big : must be <= 65535"
  231 #endif
  232 
  233 #define ML_BITS  4
  234 #define ML_MASK  ((1U<<ML_BITS)-1)
  235 #define RUN_BITS (8-ML_BITS)
  236 #define RUN_MASK ((1U<<RUN_BITS)-1)
  237 
  238 #define DEBUGLOG(l, ...) {}    /* disabled */
  239 
  240 #ifndef assert
  241 #define assert ASSERT
  242 #endif
  243 
  244 /*-************************************
  245 *  Types
  246 **************************************/
  247 #ifndef _KERNEL
  248 #include <limits.h>
  249 #endif
  250 #if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
  251 #ifndef _KERNEL
  252 #include <stdint.h>
  253 #endif
  254   typedef  uint8_t BYTE;
  255   typedef uint16_t U16;
  256   typedef uint32_t U32;
  257   typedef  int32_t S32;
  258   typedef uint64_t U64;
  259   typedef uintptr_t uptrval;
  260 #else
  261 # if UINT_MAX != 4294967295UL
  262 #   error "LZ4 code (when not C++ or C99) assumes that sizeof(int) == 4"
  263 # endif
  264   typedef unsigned char       BYTE;
  265   typedef unsigned short      U16;
  266   typedef unsigned int        U32;
  267   typedef   signed int        S32;
  268   typedef unsigned long long  U64;
  269   typedef size_t              uptrval;   /* generally true, except OpenVMS-64 */
  270 #endif
  271 
  272 #if defined(__x86_64__)
  273   typedef U64    reg_t;   /* 64-bits in x32 mode */
  274 #else
  275   typedef size_t reg_t;   /* 32-bits in x32 mode */
  276 #endif
  277 
  278 typedef enum {
  279     notLimited = 0,
  280     limitedOutput = 1,
  281     fillOutput = 2
  282 } limitedOutput_directive;
  283 
  284 
  285 /*-************************************
  286 *  Reading and writing into memory
  287 **************************************/
  288 
  289 /**
  290  * LZ4 relies on memcpy with a constant size being inlined. In freestanding
  291  * environments, the compiler can't assume the implementation of memcpy() is
  292  * standard compliant, so it can't apply its specialized memcpy() inlining
  293  * logic. When possible, use __builtin_memcpy() to tell the compiler to analyze
  294  * memcpy() as if it were standard compliant, so it can inline it in freestanding
  295  * environments. This is needed when decompressing the Linux Kernel, for example.
  296  */
  297 #if defined(__GNUC__) && (__GNUC__ >= 4)
  298 #define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size)
  299 #else
  300 #define LZ4_memcpy(dst, src, size) memcpy(dst, src, size)
  301 #endif
  302 
  303 static unsigned LZ4_isLittleEndian(void)
  304 {
  305     const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental */
  306     return one.c[0];
  307 }
  308 
  309 
  310 #if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==2)
  311 /* lie to the compiler about data alignment; use with caution */
  312 
  313 static U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; }
  314 
  315 static void LZ4_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
  316 static void LZ4_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
  317 
  318 #elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==1)
  319 
  320 /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
  321 /* currently only defined for gcc and icc */
  322 typedef union { U16 u16; U32 u32; reg_t uArch; } __attribute__((packed)) unalign;
  323 
  324 static U16 LZ4_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
  325 
  326 static void LZ4_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; }
  327 
  328 #else  /* safe and portable access using memcpy() */
  329 
  330 static U16 LZ4_read16(const void* memPtr)
  331 {
  332     U16 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
  333 }
  334 
  335 static void LZ4_write32(void* memPtr, U32 value)
  336 {
  337     LZ4_memcpy(memPtr, &value, sizeof(value));
  338 }
  339 
  340 #endif /* LZ4_FORCE_MEMORY_ACCESS */
  341 
  342 static U16 LZ4_readLE16(const void* memPtr)
  343 {
  344     if (LZ4_isLittleEndian()) {
  345         return LZ4_read16(memPtr);
  346     } else {
  347         const BYTE* p = (const BYTE*)memPtr;
  348         return (U16)((U16)p[0] + (p[1]<<8));
  349     }
  350 }
  351 
  352 /* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */
  353 LZ4_FORCE_INLINE
  354 void LZ4_wildCopy8(void* dstPtr, const void* srcPtr, void* dstEnd)
  355 {
  356     BYTE* d = (BYTE*)dstPtr;
  357     const BYTE* s = (const BYTE*)srcPtr;
  358     BYTE* const e = (BYTE*)dstEnd;
  359 
  360     do { LZ4_memcpy(d,s,8); d+=8; s+=8; } while (d<e);
  361 }
  362 
  363 static const unsigned inc32table[8] = {0, 1, 2,  1,  0,  4, 4, 4};
  364 static const int      dec64table[8] = {0, 0, 0, -1, -4,  1, 2, 3};
  365 
  366 
  367 #ifndef LZ4_FAST_DEC_LOOP
  368 #  if defined __i386__ || defined _M_IX86 || defined __x86_64__ || defined _M_X64
  369 #    define LZ4_FAST_DEC_LOOP 1
  370 #  elif defined(__aarch64__) && !defined(__clang__)
  371      /* On aarch64, we disable this optimization for clang because on certain
  372       * mobile chipsets, performance is reduced with clang. For information
  373       * refer to https://github.com/lz4/lz4/pull/707 */
  374 #    define LZ4_FAST_DEC_LOOP 1
  375 #  else
  376 #    define LZ4_FAST_DEC_LOOP 0
  377 #  endif
  378 #endif
  379 
  380 #if LZ4_FAST_DEC_LOOP
  381 
  382 LZ4_FORCE_INLINE void
  383 LZ4_memcpy_using_offset_base(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
  384 {
  385     assert(srcPtr + offset == dstPtr);
  386     if (offset < 8) {
  387         LZ4_write32(dstPtr, 0);   /* silence an msan warning when offset==0 */
  388         dstPtr[0] = srcPtr[0];
  389         dstPtr[1] = srcPtr[1];
  390         dstPtr[2] = srcPtr[2];
  391         dstPtr[3] = srcPtr[3];
  392         srcPtr += inc32table[offset];
  393         LZ4_memcpy(dstPtr+4, srcPtr, 4);
  394         srcPtr -= dec64table[offset];
  395         dstPtr += 8;
  396     } else {
  397         LZ4_memcpy(dstPtr, srcPtr, 8);
  398         dstPtr += 8;
  399         srcPtr += 8;
  400     }
  401 
  402     LZ4_wildCopy8(dstPtr, srcPtr, dstEnd);
  403 }
  404 
  405 /* customized variant of memcpy, which can overwrite up to 32 bytes beyond dstEnd
  406  * this version copies two times 16 bytes (instead of one time 32 bytes)
  407  * because it must be compatible with offsets >= 16. */
  408 LZ4_FORCE_INLINE void
  409 LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd)
  410 {
  411     BYTE* d = (BYTE*)dstPtr;
  412     const BYTE* s = (const BYTE*)srcPtr;
  413     BYTE* const e = (BYTE*)dstEnd;
  414 
  415     do { LZ4_memcpy(d,s,16); LZ4_memcpy(d+16,s+16,16); d+=32; s+=32; } while (d<e);
  416 }
  417 
  418 /* LZ4_memcpy_using_offset()  presumes :
  419  * - dstEnd >= dstPtr + MINMATCH
  420  * - there is at least 8 bytes available to write after dstEnd */
  421 LZ4_FORCE_INLINE void
  422 LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
  423 {
  424     BYTE v[8];
  425 
  426     assert(dstEnd >= dstPtr + MINMATCH);
  427 
  428     switch(offset) {
  429     case 1:
  430         MEM_INIT(v, *srcPtr, 8);
  431         break;
  432     case 2:
  433         LZ4_memcpy(v, srcPtr, 2);
  434         LZ4_memcpy(&v[2], srcPtr, 2);
  435         LZ4_memcpy(&v[4], v, 4);
  436         break;
  437     case 4:
  438         LZ4_memcpy(v, srcPtr, 4);
  439         LZ4_memcpy(&v[4], srcPtr, 4);
  440         break;
  441     default:
  442         LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset);
  443         return;
  444     }
  445 
  446     LZ4_memcpy(dstPtr, v, 8);
  447     dstPtr += 8;
  448     while (dstPtr < dstEnd) {
  449         LZ4_memcpy(dstPtr, v, 8);
  450         dstPtr += 8;
  451     }
  452 }
  453 #endif
  454 
  455 
  456 /*-************************************
  457 *  Local Structures and types
  458 **************************************/
  459 typedef enum { clearedTable = 0, byPtr, byU32, byU16 } tableType_t;
  460 
  461 /**
  462  * This enum distinguishes several different modes of accessing previous
  463  * content in the stream.
  464  *
  465  * - noDict        : There is no preceding content.
  466  * - withPrefix64k : Table entries up to ctx->dictSize before the current blob
  467  *                   blob being compressed are valid and refer to the preceding
  468  *                   content (of length ctx->dictSize), which is available
  469  *                   contiguously preceding in memory the content currently
  470  *                   being compressed.
  471  * - usingExtDict  : Like withPrefix64k, but the preceding content is somewhere
  472  *                   else in memory, starting at ctx->dictionary with length
  473  *                   ctx->dictSize.
  474  * - usingDictCtx  : Like usingExtDict, but everything concerning the preceding
  475  *                   content is in a separate context, pointed to by
  476  *                   ctx->dictCtx. ctx->dictionary, ctx->dictSize, and table
  477  *                   entries in the current context that refer to positions
  478  *                   preceding the beginning of the current compression are
  479  *                   ignored. Instead, ctx->dictCtx->dictionary and ctx->dictCtx
  480  *                   ->dictSize describe the location and size of the preceding
  481  *                   content, and matches are found by looking in the ctx
  482  *                   ->dictCtx->hashTable.
  483  */
  484 typedef enum { noDict = 0, withPrefix64k, usingExtDict, usingDictCtx } dict_directive;
  485 typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
  486 
  487 /*-*******************************
  488  *  Decompression functions
  489  ********************************/
  490 
  491 typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
  492 typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive;
  493 
  494 typedef enum { loop_error = -2, initial_error = -1, ok = 0 } variable_length_error;
  495 
  496 LZ4_FORCE_INLINE unsigned
  497 read_variable_length(const BYTE**ip, const BYTE* lencheck,
  498                      int loop_check, int initial_check,
  499                      variable_length_error* error)
  500 {
  501     U32 length = 0;
  502     U32 s;
  503     if (initial_check && unlikely((*ip) >= lencheck)) {    /* overflow detection */
  504         *error = initial_error;
  505         return length;
  506     }
  507     do {
  508         s = **ip;
  509         (*ip)++;
  510         length += s;
  511         if (loop_check && unlikely((*ip) >= lencheck)) {    /* overflow detection */
  512             *error = loop_error;
  513             return length;
  514         }
  515     } while (s==255);
  516 
  517     return length;
  518 }
  519 
  520 #define LZ4_STATIC_ASSERT(c)    ASSERT(c)
  521 
  522 
  523 /*! LZ4_decompress_generic() :
  524  *  This generic decompression function covers all use cases.
  525  *  It shall be instantiated several times, using different sets of directives.
  526  *  Note that it is important for performance that this function really get inlined,
  527  *  in order to remove useless branches during compilation optimization.
  528  */
  529 LZ4_FORCE_INLINE int
  530 LZ4_decompress_generic(
  531                  const char* const src,
  532                  char* const dst,
  533                  int srcSize,
  534                  int outputSize,         /* If endOnInput==endOnInputSize, this value is `dstCapacity` */
  535 
  536                  endCondition_directive endOnInput,   /* endOnOutputSize, endOnInputSize */
  537                  earlyEnd_directive partialDecoding,  /* full, partial */
  538                  dict_directive dict,                 /* noDict, withPrefix64k, usingExtDict */
  539                  const BYTE* const lowPrefix,  /* always <= dst, == dst when no prefix */
  540                  const BYTE* const dictStart,  /* only if dict==usingExtDict */
  541                  const size_t dictSize         /* note : = 0 if noDict */
  542                  )
  543 {
  544     if ((src == NULL) || (outputSize < 0)) { return -1; }
  545 
  546     {   const BYTE* ip = (const BYTE*) src;
  547         const BYTE* const iend = ip + srcSize;
  548 
  549         BYTE* op = (BYTE*) dst;
  550         BYTE* const oend = op + outputSize;
  551         BYTE* cpy;
  552 
  553         const BYTE* const dictEnd = (dictStart == NULL) ? NULL : dictStart + dictSize;
  554 
  555         const int safeDecode = (endOnInput==endOnInputSize);
  556         const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB)));
  557 
  558 
  559         /* Set up the "end" pointers for the shortcut. */
  560         const BYTE* const shortiend = iend - (endOnInput ? 14 : 8) /*maxLL*/ - 2 /*offset*/;
  561         const BYTE* const shortoend = oend - (endOnInput ? 14 : 8) /*maxLL*/ - 18 /*maxML*/;
  562 
  563         const BYTE* match;
  564         size_t offset;
  565         unsigned token;
  566         size_t length;
  567 
  568 
  569         DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize, outputSize);
  570 
  571         /* Special cases */
  572         assert(lowPrefix <= op);
  573         if ((endOnInput) && (unlikely(outputSize==0))) {
  574             /* Empty output buffer */
  575             if (partialDecoding) return 0;
  576             return ((srcSize==1) && (*ip==0)) ? 0 : -1;
  577         }
  578         if ((!endOnInput) && (unlikely(outputSize==0))) { return (*ip==0 ? 1 : -1); }
  579         if ((endOnInput) && unlikely(srcSize==0)) { return -1; }
  580 
  581         /* Currently the fast loop shows a regression on qualcomm arm chips. */
  582 #if LZ4_FAST_DEC_LOOP
  583         if ((oend - op) < FASTLOOP_SAFE_DISTANCE) {
  584             DEBUGLOG(6, "skip fast decode loop");
  585             goto safe_decode;
  586         }
  587 
  588         /* Fast loop : decode sequences as long as output < iend-FASTLOOP_SAFE_DISTANCE */
  589         while (1) {
  590             /* Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE */
  591             assert(oend - op >= FASTLOOP_SAFE_DISTANCE);
  592             if (endOnInput) { assert(ip < iend); }
  593             token = *ip++;
  594             length = token >> ML_BITS;  /* literal length */
  595 
  596             assert(!endOnInput || ip <= iend); /* ip < iend before the increment */
  597 
  598             /* decode literal length */
  599             if (length == RUN_MASK) {
  600                 variable_length_error error = ok;
  601                 length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error);
  602                 if (error == initial_error) { goto _output_error; }
  603                 if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
  604                 if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
  605 
  606                 /* copy literals */
  607                 cpy = op+length;
  608                 LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
  609                 if (endOnInput) {  /* LZ4_decompress_safe() */
  610                     if ((cpy>oend-32) || (ip+length>iend-32)) { goto safe_literal_copy; }
  611                     LZ4_wildCopy32(op, ip, cpy);
  612                 } else {   /* LZ4_decompress_fast() */
  613                     if (cpy>oend-8) { goto safe_literal_copy; }
  614                     LZ4_wildCopy8(op, ip, cpy); /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time :
  615                                                  * it doesn't know input length, and only relies on end-of-block properties */
  616                 }
  617                 ip += length; op = cpy;
  618             } else {
  619                 cpy = op+length;
  620                 if (endOnInput) {  /* LZ4_decompress_safe() */
  621                     DEBUGLOG(7, "copy %u bytes in a 16-bytes stripe", (unsigned)length);
  622                     /* We don't need to check oend, since we check it once for each loop below */
  623                     if (ip > iend-(16 + 1/*max lit + offset + nextToken*/)) { goto safe_literal_copy; }
  624                     /* Literals can only be 14, but hope compilers optimize if we copy by a register size */
  625                     LZ4_memcpy(op, ip, 16);
  626                 } else {  /* LZ4_decompress_fast() */
  627                     /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time :
  628                      * it doesn't know input length, and relies on end-of-block properties */
  629                     LZ4_memcpy(op, ip, 8);
  630                     if (length > 8) { LZ4_memcpy(op+8, ip+8, 8); }
  631                 }
  632                 ip += length; op = cpy;
  633             }
  634 
  635             /* get offset */
  636             offset = LZ4_readLE16(ip); ip+=2;
  637             match = op - offset;
  638             assert(match <= op);
  639 
  640             /* get matchlength */
  641             length = token & ML_MASK;
  642 
  643             if (length == ML_MASK) {
  644                 variable_length_error error = ok;
  645                 if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
  646                 length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error);
  647                 if (error != ok) { goto _output_error; }
  648                 if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) { goto _output_error; } /* overflow detection */
  649                 length += MINMATCH;
  650                 if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
  651                     goto safe_match_copy;
  652                 }
  653             } else {
  654                 length += MINMATCH;
  655                 if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
  656                     goto safe_match_copy;
  657                 }
  658 
  659                 /* Fastpath check: Avoids a branch in LZ4_wildCopy32 if true */
  660                 if ((dict == withPrefix64k) || (match >= lowPrefix)) {
  661                     if (offset >= 8) {
  662                         assert(match >= lowPrefix);
  663                         assert(match <= op);
  664                         assert(op + 18 <= oend);
  665 
  666                         LZ4_memcpy(op, match, 8);
  667                         LZ4_memcpy(op+8, match+8, 8);
  668                         LZ4_memcpy(op+16, match+16, 2);
  669                         op += length;
  670                         continue;
  671             }   }   }
  672 
  673             if (checkOffset && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
  674             /* match starting within external dictionary */
  675             if ((dict==usingExtDict) && (match < lowPrefix)) {
  676                 if (unlikely(op+length > oend-LASTLITERALS)) {
  677                     if (partialDecoding) {
  678                         DEBUGLOG(7, "partialDecoding: dictionary match, close to dstEnd");
  679                         length = MIN(length, (size_t)(oend-op));
  680                     } else {
  681                         goto _output_error;  /* end-of-block condition violated */
  682                 }   }
  683 
  684                 if (length <= (size_t)(lowPrefix-match)) {
  685                     /* match fits entirely within external dictionary : just copy */
  686                     memmove(op, dictEnd - (lowPrefix-match), length);
  687                     op += length;
  688                 } else {
  689                     /* match stretches into both external dictionary and current block */
  690                     size_t const copySize = (size_t)(lowPrefix - match);
  691                     size_t const restSize = length - copySize;
  692                     LZ4_memcpy(op, dictEnd - copySize, copySize);
  693                     op += copySize;
  694                     if (restSize > (size_t)(op - lowPrefix)) {  /* overlap copy */
  695                         BYTE* const endOfMatch = op + restSize;
  696                         const BYTE* copyFrom = lowPrefix;
  697                         while (op < endOfMatch) { *op++ = *copyFrom++; }
  698                     } else {
  699                         LZ4_memcpy(op, lowPrefix, restSize);
  700                         op += restSize;
  701                 }   }
  702                 continue;
  703             }
  704 
  705             /* copy match within block */
  706             cpy = op + length;
  707 
  708             assert((op <= oend) && (oend-op >= 32));
  709             if (unlikely(offset<16)) {
  710                 LZ4_memcpy_using_offset(op, match, cpy, offset);
  711             } else {
  712                 LZ4_wildCopy32(op, match, cpy);
  713             }
  714 
  715             op = cpy;   /* wildcopy correction */
  716         }
  717     safe_decode:
  718 #endif
  719 
  720         /* Main Loop : decode remaining sequences where output < FASTLOOP_SAFE_DISTANCE */
  721         while (1) {
  722             token = *ip++;
  723             length = token >> ML_BITS;  /* literal length */
  724 
  725             assert(!endOnInput || ip <= iend); /* ip < iend before the increment */
  726 
  727             /* A two-stage shortcut for the most common case:
  728              * 1) If the literal length is 0..14, and there is enough space,
  729              * enter the shortcut and copy 16 bytes on behalf of the literals
  730              * (in the fast mode, only 8 bytes can be safely copied this way).
  731              * 2) Further if the match length is 4..18, copy 18 bytes in a similar
  732              * manner; but we ensure that there's enough space in the output for
  733              * those 18 bytes earlier, upon entering the shortcut (in other words,
  734              * there is a combined check for both stages).
  735              */
  736             if ( (endOnInput ? length != RUN_MASK : length <= 8)
  737                 /* strictly "less than" on input, to re-enter the loop with at least one byte */
  738               && likely((endOnInput ? ip < shortiend : 1) & (op <= shortoend)) ) {
  739                 /* Copy the literals */
  740                 LZ4_memcpy(op, ip, endOnInput ? 16 : 8);
  741                 op += length; ip += length;
  742 
  743                 /* The second stage: prepare for match copying, decode full info.
  744                  * If it doesn't work out, the info won't be wasted. */
  745                 length = token & ML_MASK; /* match length */
  746                 offset = LZ4_readLE16(ip); ip += 2;
  747                 match = op - offset;
  748                 assert(match <= op); /* check overflow */
  749 
  750                 /* Do not deal with overlapping matches. */
  751                 if ( (length != ML_MASK)
  752                   && (offset >= 8)
  753                   && (dict==withPrefix64k || match >= lowPrefix) ) {
  754                     /* Copy the match. */
  755                     LZ4_memcpy(op + 0, match + 0, 8);
  756                     LZ4_memcpy(op + 8, match + 8, 8);
  757                     LZ4_memcpy(op +16, match +16, 2);
  758                     op += length + MINMATCH;
  759                     /* Both stages worked, load the next token. */
  760                     continue;
  761                 }
  762 
  763                 /* The second stage didn't work out, but the info is ready.
  764                  * Propel it right to the point of match copying. */
  765                 goto _copy_match;
  766             }
  767 
  768             /* decode literal length */
  769             if (length == RUN_MASK) {
  770                 variable_length_error error = ok;
  771                 length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error);
  772                 if (error == initial_error) { goto _output_error; }
  773                 if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
  774                 if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
  775             }
  776 
  777             /* copy literals */
  778             cpy = op+length;
  779 #if LZ4_FAST_DEC_LOOP
  780         safe_literal_copy:
  781 #endif
  782             LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
  783             if ( ((endOnInput) && ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) )
  784               || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) )
  785             {
  786                 /* We've either hit the input parsing restriction or the output parsing restriction.
  787                  * In the normal scenario, decoding a full block, it must be the last sequence,
  788                  * otherwise it's an error (invalid input or dimensions).
  789                  * In partialDecoding scenario, it's necessary to ensure there is no buffer overflow.
  790                  */
  791                 if (partialDecoding) {
  792                     /* Since we are partial decoding we may be in this block because of the output parsing
  793                      * restriction, which is not valid since the output buffer is allowed to be undersized.
  794                      */
  795                     assert(endOnInput);
  796                     DEBUGLOG(7, "partialDecoding: copying literals, close to input or output end")
  797                     DEBUGLOG(7, "partialDecoding: literal length = %u", (unsigned)length);
  798                     DEBUGLOG(7, "partialDecoding: remaining space in dstBuffer : %i", (int)(oend - op));
  799                     DEBUGLOG(7, "partialDecoding: remaining space in srcBuffer : %i", (int)(iend - ip));
  800                     /* Finishing in the middle of a literals segment,
  801                      * due to lack of input.
  802                      */
  803                     if (ip+length > iend) {
  804                         length = (size_t)(iend-ip);
  805                         cpy = op + length;
  806                     }
  807                     /* Finishing in the middle of a literals segment,
  808                      * due to lack of output space.
  809                      */
  810                     if (cpy > oend) {
  811                         cpy = oend;
  812                         assert(op<=oend);
  813                         length = (size_t)(oend-op);
  814                     }
  815                 } else {
  816                     /* We must be on the last sequence because of the parsing limitations so check
  817                      * that we exactly regenerate the original size (must be exact when !endOnInput).
  818                      */
  819                     if ((!endOnInput) && (cpy != oend)) { goto _output_error; }
  820                      /* We must be on the last sequence (or invalid) because of the parsing limitations
  821                       * so check that we exactly consume the input and don't overrun the output buffer.
  822                       */
  823                     if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) {
  824                         DEBUGLOG(6, "should have been last run of literals")
  825                         DEBUGLOG(6, "ip(%p) + length(%i) = %p != iend (%p)", ip, (int)length, ip+length, iend);
  826                         DEBUGLOG(6, "or cpy(%p) > oend(%p)", cpy, oend);
  827                         goto _output_error;
  828                     }
  829                 }
  830                 memmove(op, ip, length);  /* supports overlapping memory regions; only matters for in-place decompression scenarios */
  831                 ip += length;
  832                 op += length;
  833                 /* Necessarily EOF when !partialDecoding.
  834                  * When partialDecoding, it is EOF if we've either
  835                  * filled the output buffer or
  836                  * can't proceed with reading an offset for following match.
  837                  */
  838                 if (!partialDecoding || (cpy == oend) || (ip >= (iend-2))) {
  839                     break;
  840                 }
  841             } else {
  842                 LZ4_wildCopy8(op, ip, cpy);   /* may overwrite up to WILDCOPYLENGTH beyond cpy */
  843                 ip += length; op = cpy;
  844             }
  845 
  846             /* get offset */
  847             offset = LZ4_readLE16(ip); ip+=2;
  848             match = op - offset;
  849 
  850             /* get matchlength */
  851             length = token & ML_MASK;
  852 
  853     _copy_match:
  854             if (length == ML_MASK) {
  855               variable_length_error error = ok;
  856               length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error);
  857               if (error != ok) goto _output_error;
  858                 if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error;   /* overflow detection */
  859             }
  860             length += MINMATCH;
  861 
  862 #if LZ4_FAST_DEC_LOOP
  863         safe_match_copy:
  864 #endif
  865             if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error;   /* Error : offset outside buffers */
  866             /* match starting within external dictionary */
  867             if ((dict==usingExtDict) && (match < lowPrefix)) {
  868                 if (unlikely(op+length > oend-LASTLITERALS)) {
  869                     if (partialDecoding) length = MIN(length, (size_t)(oend-op));
  870                     else goto _output_error;   /* doesn't respect parsing restriction */
  871                 }
  872 
  873                 if (length <= (size_t)(lowPrefix-match)) {
  874                     /* match fits entirely within external dictionary : just copy */
  875                     memmove(op, dictEnd - (lowPrefix-match), length);
  876                     op += length;
  877                 } else {
  878                     /* match stretches into both external dictionary and current block */
  879                     size_t const copySize = (size_t)(lowPrefix - match);
  880                     size_t const restSize = length - copySize;
  881                     LZ4_memcpy(op, dictEnd - copySize, copySize);
  882                     op += copySize;
  883                     if (restSize > (size_t)(op - lowPrefix)) {  /* overlap copy */
  884                         BYTE* const endOfMatch = op + restSize;
  885                         const BYTE* copyFrom = lowPrefix;
  886                         while (op < endOfMatch) *op++ = *copyFrom++;
  887                     } else {
  888                         LZ4_memcpy(op, lowPrefix, restSize);
  889                         op += restSize;
  890                 }   }
  891                 continue;
  892             }
  893             assert(match >= lowPrefix);
  894 
  895             /* copy match within block */
  896             cpy = op + length;
  897 
  898             /* partialDecoding : may end anywhere within the block */
  899             assert(op<=oend);
  900             if (partialDecoding && (cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
  901                 size_t const mlen = MIN(length, (size_t)(oend-op));
  902                 const BYTE* const matchEnd = match + mlen;
  903                 BYTE* const copyEnd = op + mlen;
  904                 if (matchEnd > op) {   /* overlap copy */
  905                     while (op < copyEnd) { *op++ = *match++; }
  906                 } else {
  907                     LZ4_memcpy(op, match, mlen);
  908                 }
  909                 op = copyEnd;
  910                 if (op == oend) { break; }
  911                 continue;
  912             }
  913 
  914             if (unlikely(offset<8)) {
  915                 LZ4_write32(op, 0);   /* silence msan warning when offset==0 */
  916                 op[0] = match[0];
  917                 op[1] = match[1];
  918                 op[2] = match[2];
  919                 op[3] = match[3];
  920                 match += inc32table[offset];
  921                 LZ4_memcpy(op+4, match, 4);
  922                 match -= dec64table[offset];
  923             } else {
  924                 LZ4_memcpy(op, match, 8);
  925                 match += 8;
  926             }
  927             op += 8;
  928 
  929             if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
  930                 BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1);
  931                 if (cpy > oend-LASTLITERALS) { goto _output_error; } /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
  932                 if (op < oCopyLimit) {
  933                     LZ4_wildCopy8(op, match, oCopyLimit);
  934                     match += oCopyLimit - op;
  935                     op = oCopyLimit;
  936                 }
  937                 while (op < cpy) { *op++ = *match++; }
  938             } else {
  939                 LZ4_memcpy(op, match, 8);
  940                 if (length > 16)  { LZ4_wildCopy8(op+8, match+8, cpy); }
  941             }
  942             op = cpy;   /* wildcopy correction */
  943         }
  944 
  945         /* end of decoding */
  946         if (endOnInput) {
  947             DEBUGLOG(5, "decoded %i bytes", (int) (((char*)op)-dst));
  948            return (int) (((char*)op)-dst);     /* Nb of output bytes decoded */
  949        } else {
  950            return (int) (((const char*)ip)-src);   /* Nb of input bytes read */
  951        }
  952 
  953         /* Overflow error detected */
  954     _output_error:
  955         return (int) (-(((const char*)ip)-src))-1;
  956     }
  957 }
  958 
  959 /*
  960  * LZ4_uncompress_unknownOutputSize() :
  961  *      isize  : is the input size, therefore the compressed size
  962  *      maxOutputSize : is the size of the destination buffer (which must be
  963  *              already allocated)
  964  *      return : the number of bytes decoded in the destination buffer
  965  *              (necessarily <= maxOutputSize). If the source stream is
  966  *              malformed, the function will stop decoding and return a
  967  *              negative result, indicating the byte position of the faulty
  968  *              instruction. This function never writes beyond dest +
  969  *              maxOutputSize, and is therefore protected against malicious
  970  *              data packets.
  971  *      note   : Destination buffer must be already allocated.
  972  *              This version is slightly slower than real_LZ4_uncompress()
  973  *
  974  */
  975 
  976 /*
  977  * Note: In upstream code, LZ4_uncompress_unknownOutputSize is now a legacy
  978  *       wrapper for LZ4_decompress_safe which is a wrapper for
  979  *       LZ4_decompress_generic; this wrapper flattens that, rather than
  980  *       rewriting the callers.
  981  */
  982 int LZ4_uncompress_unknownOutputSize(const char* source, char* dest, int compressedSize, int maxDecompressedSize)
  983 {
  984     return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize,
  985                                   endOnInputSize, decode_full_block, noDict,
  986                                   (BYTE*)dest, NULL, 0);
  987 }

Cache object: 93053865f9415126c906b65a6e95f849


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.