fxr.watson.org: sys/contrib/openzfs/module/zfs/lz4.c

FreeBSD/Linux Kernel Cross Reference
sys/contrib/openzfs/module/zfs/lz4.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10

1 /* 2 LZ4 - Fast LZ compression algorithm 3 Copyright (C) 2011-present, Yann Collet. 4 5 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 6 7 Redistribution and use in source and binary forms, with or without 8 modification, are permitted provided that the following conditions are 9 met: 10 11 * Redistributions of source code must retain the above copyright 12 notice, this list of conditions and the following disclaimer. 13 * Redistributions in binary form must reproduce the above 14 copyright notice, this list of conditions and the following disclaimer 15 in the documentation and/or other materials provided with the 16 distribution. 17 18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30 You can contact the author at : 31 - LZ4 homepage : http://www.lz4.org 32 - LZ4 source repository : https://github.com/lz4/lz4 33 */ 34 35 /* 36 * This file contains unmodified code from lz4 1.9.3's decompressor, plus 37 * associated macros and constants. 38 * 39 * It also contains a couple of defines from the old lz4.c to make things 40 * fit together smoothly. 41 * 42 */ 43 44 #include <sys/zfs_context.h> 45 46 int LZ4_uncompress_unknownOutputSize(const char *source, char *dest, 47 int isize, int maxOutputSize); 48 49 /* 50 * Tuning parameters 51 */ 52 53 /* 54 * COMPRESSIONLEVEL: Increasing this value improves compression ratio 55 * Lowering this value reduces memory usage. Reduced memory usage 56 * typically improves speed, due to cache effect (ex: L1 32KB for Intel, 57 * L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes 58 * (examples : 12 -> 16KB ; 17 -> 512KB) 59 */ 60 #define COMPRESSIONLEVEL 12 61 62 /* 63 * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the 64 * algorithm skip faster data segments considered "incompressible". 65 * This may decrease compression ratio dramatically, but will be 66 * faster on incompressible data. Increasing this value will make 67 * the algorithm search more before declaring a segment "incompressible". 68 * This could improve compression a bit, but will be slower on 69 * incompressible data. The default value (6) is recommended. 70 */ 71 #define NOTCOMPRESSIBLE_CONFIRMATION 6 72 73 /* 74 * Little Endian or Big Endian? 75 * Note: overwrite the below #define if you know your architecture endianness. 76 */ 77 #if defined(_ZFS_BIG_ENDIAN) 78 #define LZ4_BIG_ENDIAN 1 79 #else 80 /* 81 * Little Endian assumed. PDP Endian and other very rare endian format 82 * are unsupported. 83 */ 84 #undef LZ4_BIG_ENDIAN 85 #endif 86 87 /*-************************************ 88 * CPU Feature Detection 89 **************************************/ 90 /* LZ4_FORCE_MEMORY_ACCESS 91 * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. 92 * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. 93 * The below switch allow to select different access method for improved performance. 94 * Method 0 (default) : use `memcpy()`. Safe and portable. 95 * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). 96 * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. 97 * Method 2 : direct access. This method is portable but violate C standard. 98 * It can generate buggy code on targets which assembly generation depends on alignment. 99 * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) 100 * See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. 101 * Prefer these methods in priority order (0 > 1 > 2) 102 */ 103 #ifndef LZ4_FORCE_MEMORY_ACCESS /* can be defined externally */ 104 # if defined(__GNUC__) && \ 105 ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) \ 106 || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) 107 # define LZ4_FORCE_MEMORY_ACCESS 2 108 # elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || defined(__GNUC__) 109 # define LZ4_FORCE_MEMORY_ACCESS 1 110 # endif 111 #endif 112 113 /* 114 * LZ4_FORCE_SW_BITCOUNT 115 * Define this parameter if your target system or compiler does not support hardware bit count 116 */ 117 /* 118 * Illumos : we can't use GCC's __builtin_ctz family of builtins in the 119 * kernel 120 * Linux : we can use GCC's __builtin_ctz family of builtins in the 121 * kernel 122 */ 123 #undef LZ4_FORCE_SW_BITCOUNT 124 #if defined(__sunos__) 125 #define LZ4_FORCE_SW_BITCOUNT 126 #endif 127 128 /* 129 * Compiler Options 130 */ 131 /* Disable restrict */ 132 #define restrict 133 134 /* 135 * Linux : GCC_VERSION is defined as of 3.9-rc1, so undefine it. 136 * torvalds/linux@3f3f8d2f48acfd8ed3b8e6b7377935da57b27b16 137 */ 138 #ifdef GCC_VERSION 139 #undef GCC_VERSION 140 #endif 141 142 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) 143 144 #ifndef LZ4_FORCE_INLINE 145 # ifdef _MSC_VER /* Visual Studio */ 146 # define LZ4_FORCE_INLINE static __forceinline 147 # else 148 # if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ 149 # ifdef __GNUC__ 150 # define LZ4_FORCE_INLINE static inline __attribute__((always_inline)) 151 # else 152 # define LZ4_FORCE_INLINE static inline 153 # endif 154 # else 155 # define LZ4_FORCE_INLINE static 156 # endif /* __STDC_VERSION__ */ 157 # endif /* _MSC_VER */ 158 #endif /* LZ4_FORCE_INLINE */ 159 160 /* LZ4_FORCE_O2 and LZ4_FORCE_INLINE 161 * gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy8, 162 * together with a simple 8-byte copy loop as a fall-back path. 163 * However, this optimization hurts the decompression speed by >30%, 164 * because the execution does not go to the optimized loop 165 * for typical compressible data, and all of the preamble checks 166 * before going to the fall-back path become useless overhead. 167 * This optimization happens only with the -O3 flag, and -O2 generates 168 * a simple 8-byte copy loop. 169 * With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy8 170 * functions are annotated with __attribute__((optimize("O2"))), 171 * and also LZ4_wildCopy8 is forcibly inlined, so that the O2 attribute 172 * of LZ4_wildCopy8 does not affect the compression speed. 173 */ 174 #if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && !defined(__clang__) 175 # define LZ4_FORCE_O2 __attribute__((optimize("O2"))) 176 # undef LZ4_FORCE_INLINE 177 # define LZ4_FORCE_INLINE static __inline __attribute__((optimize("O2"),always_inline)) 178 #else 179 # define LZ4_FORCE_O2 180 #endif 181 182 #ifndef expect 183 #if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__) 184 # define expect(expr,value) (__builtin_expect ((expr),(value)) ) 185 #else 186 # define expect(expr,value) (expr) 187 #endif 188 #endif 189 190 #ifndef likely 191 #define likely(expr) expect((expr) != 0, 1) 192 #endif 193 194 #ifndef unlikely 195 #define unlikely(expr) expect((expr) != 0, 0) 196 #endif 197 198 #ifndef _KERNEL 199 #include <stdlib.h> /* malloc, calloc, free */ 200 #include <string.h> /* memset, memcpy */ 201 #endif 202 #define ALLOC(s) malloc(s) 203 #define ALLOC_AND_ZERO(s) calloc(1,s) 204 #define FREEMEM(p) free(p) 205 206 #define MEM_INIT(p,v,s) memset((p),(v),(s)) 207 208 209 /*-************************************ 210 * Common Constants 211 **************************************/ 212 #define MINMATCH 4 213 214 #define WILDCOPYLENGTH 8 215 #define LASTLITERALS 5 /* see ../doc/lz4_Block_format.md#parsing-restrictions */ 216 #define MFLIMIT 12 /* see ../doc/lz4_Block_format.md#parsing-restrictions */ 217 #define MATCH_SAFEGUARD_DISTANCE ((2*WILDCOPYLENGTH) - MINMATCH) /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */ 218 #define FASTLOOP_SAFE_DISTANCE 64 219 220 #define KB *(1 <<10) 221 #define MB *(1 <<20) 222 #define GB *(1U<<30) 223 224 #ifndef LZ4_DISTANCE_MAX /* history window size; can be user-defined at compile time */ 225 # define LZ4_DISTANCE_MAX 65535 /* set to maximum value by default */ 226 #endif 227 228 #define LZ4_DISTANCE_ABSOLUTE_MAX 65535 229 #if (LZ4_DISTANCE_MAX > LZ4_DISTANCE_ABSOLUTE_MAX) /* max supported by LZ4 format */ 230 # error "LZ4_DISTANCE_MAX is too big : must be <= 65535" 231 #endif 232 233 #define ML_BITS 4 234 #define ML_MASK ((1U<<ML_BITS)-1) 235 #define RUN_BITS (8-ML_BITS) 236 #define RUN_MASK ((1U<<RUN_BITS)-1) 237 238 #define DEBUGLOG(l, ...) {} /* disabled */ 239 240 #ifndef assert 241 #define assert ASSERT 242 #endif 243 244 /*-************************************ 245 * Types 246 **************************************/ 247 #ifndef _KERNEL 248 #include <limits.h> 249 #endif 250 #if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) 251 #ifndef _KERNEL 252 #include <stdint.h> 253 #endif 254 typedef uint8_t BYTE; 255 typedef uint16_t U16; 256 typedef uint32_t U32; 257 typedef int32_t S32; 258 typedef uint64_t U64; 259 typedef uintptr_t uptrval; 260 #else 261 # if UINT_MAX != 4294967295UL 262 # error "LZ4 code (when not C++ or C99) assumes that sizeof(int) == 4" 263 # endif 264 typedef unsigned char BYTE; 265 typedef unsigned short U16; 266 typedef unsigned int U32; 267 typedef signed int S32; 268 typedef unsigned long long U64; 269 typedef size_t uptrval; /* generally true, except OpenVMS-64 */ 270 #endif 271 272 #if defined(__x86_64__) 273 typedef U64 reg_t; /* 64-bits in x32 mode */ 274 #else 275 typedef size_t reg_t; /* 32-bits in x32 mode */ 276 #endif 277 278 typedef enum { 279 notLimited = 0, 280 limitedOutput = 1, 281 fillOutput = 2 282 } limitedOutput_directive; 283 284 285 /*-************************************ 286 * Reading and writing into memory 287 **************************************/ 288 289 /** 290 * LZ4 relies on memcpy with a constant size being inlined. In freestanding 291 * environments, the compiler can't assume the implementation of memcpy() is 292 * standard compliant, so it can't apply its specialized memcpy() inlining 293 * logic. When possible, use __builtin_memcpy() to tell the compiler to analyze 294 * memcpy() as if it were standard compliant, so it can inline it in freestanding 295 * environments. This is needed when decompressing the Linux Kernel, for example. 296 */ 297 #if defined(__GNUC__) && (__GNUC__ >= 4) 298 #define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size) 299 #else 300 #define LZ4_memcpy(dst, src, size) memcpy(dst, src, size) 301 #endif 302 303 static unsigned LZ4_isLittleEndian(void) 304 { 305 const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ 306 return one.c[0]; 307 } 308 309 310 #if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==2) 311 /* lie to the compiler about data alignment; use with caution */ 312 313 static U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; } 314 315 static void LZ4_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } 316 static void LZ4_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } 317 318 #elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==1) 319 320 /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ 321 /* currently only defined for gcc and icc */ 322 typedef union { U16 u16; U32 u32; reg_t uArch; } __attribute__((packed)) unalign; 323 324 static U16 LZ4_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } 325 326 static void LZ4_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; } 327 328 #else /* safe and portable access using memcpy() */ 329 330 static U16 LZ4_read16(const void* memPtr) 331 { 332 U16 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val; 333 } 334 335 static void LZ4_write32(void* memPtr, U32 value) 336 { 337 LZ4_memcpy(memPtr, &value, sizeof(value)); 338 } 339 340 #endif /* LZ4_FORCE_MEMORY_ACCESS */ 341 342 static U16 LZ4_readLE16(const void* memPtr) 343 { 344 if (LZ4_isLittleEndian()) { 345 return LZ4_read16(memPtr); 346 } else { 347 const BYTE* p = (const BYTE*)memPtr; 348 return (U16)((U16)p[0] + (p[1]<<8)); 349 } 350 } 351 352 /* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */ 353 LZ4_FORCE_INLINE 354 void LZ4_wildCopy8(void* dstPtr, const void* srcPtr, void* dstEnd) 355 { 356 BYTE* d = (BYTE*)dstPtr; 357 const BYTE* s = (const BYTE*)srcPtr; 358 BYTE* const e = (BYTE*)dstEnd; 359 360 do { LZ4_memcpy(d,s,8); d+=8; s+=8; } while (d<e); 361 } 362 363 static const unsigned inc32table[8] = {0, 1, 2, 1, 0, 4, 4, 4}; 364 static const int dec64table[8] = {0, 0, 0, -1, -4, 1, 2, 3}; 365 366 367 #ifndef LZ4_FAST_DEC_LOOP 368 # if defined __i386__ || defined _M_IX86 || defined __x86_64__ || defined _M_X64 369 # define LZ4_FAST_DEC_LOOP 1 370 # elif defined(__aarch64__) && !defined(__clang__) 371 /* On aarch64, we disable this optimization for clang because on certain 372 * mobile chipsets, performance is reduced with clang. For information 373 * refer to https://github.com/lz4/lz4/pull/707 */ 374 # define LZ4_FAST_DEC_LOOP 1 375 # else 376 # define LZ4_FAST_DEC_LOOP 0 377 # endif 378 #endif 379 380 #if LZ4_FAST_DEC_LOOP 381 382 LZ4_FORCE_INLINE void 383 LZ4_memcpy_using_offset_base(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset) 384 { 385 assert(srcPtr + offset == dstPtr); 386 if (offset < 8) { 387 LZ4_write32(dstPtr, 0); /* silence an msan warning when offset==0 */ 388 dstPtr[0] = srcPtr[0]; 389 dstPtr[1] = srcPtr[1]; 390 dstPtr[2] = srcPtr[2]; 391 dstPtr[3] = srcPtr[3]; 392 srcPtr += inc32table[offset]; 393 LZ4_memcpy(dstPtr+4, srcPtr, 4); 394 srcPtr -= dec64table[offset]; 395 dstPtr += 8; 396 } else { 397 LZ4_memcpy(dstPtr, srcPtr, 8); 398 dstPtr += 8; 399 srcPtr += 8; 400 } 401 402 LZ4_wildCopy8(dstPtr, srcPtr, dstEnd); 403 } 404 405 /* customized variant of memcpy, which can overwrite up to 32 bytes beyond dstEnd 406 * this version copies two times 16 bytes (instead of one time 32 bytes) 407 * because it must be compatible with offsets >= 16. */ 408 LZ4_FORCE_INLINE void 409 LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd) 410 { 411 BYTE* d = (BYTE*)dstPtr; 412 const BYTE* s = (const BYTE*)srcPtr; 413 BYTE* const e = (BYTE*)dstEnd; 414 415 do { LZ4_memcpy(d,s,16); LZ4_memcpy(d+16,s+16,16); d+=32; s+=32; } while (d<e); 416 } 417 418 /* LZ4_memcpy_using_offset() presumes : 419 * - dstEnd >= dstPtr + MINMATCH 420 * - there is at least 8 bytes available to write after dstEnd */ 421 LZ4_FORCE_INLINE void 422 LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset) 423 { 424 BYTE v[8]; 425 426 assert(dstEnd >= dstPtr + MINMATCH); 427 428 switch(offset) { 429 case 1: 430 MEM_INIT(v, *srcPtr, 8); 431 break; 432 case 2: 433 LZ4_memcpy(v, srcPtr, 2); 434 LZ4_memcpy(&v[2], srcPtr, 2); 435 LZ4_memcpy(&v[4], v, 4); 436 break; 437 case 4: 438 LZ4_memcpy(v, srcPtr, 4); 439 LZ4_memcpy(&v[4], srcPtr, 4); 440 break; 441 default: 442 LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset); 443 return; 444 } 445 446 LZ4_memcpy(dstPtr, v, 8); 447 dstPtr += 8; 448 while (dstPtr < dstEnd) { 449 LZ4_memcpy(dstPtr, v, 8); 450 dstPtr += 8; 451 } 452 } 453 #endif 454 455 456 /*-************************************ 457 * Local Structures and types 458 **************************************/ 459 typedef enum { clearedTable = 0, byPtr, byU32, byU16 } tableType_t; 460 461 /** 462 * This enum distinguishes several different modes of accessing previous 463 * content in the stream. 464 * 465 * - noDict : There is no preceding content. 466 * - withPrefix64k : Table entries up to ctx->dictSize before the current blob 467 * blob being compressed are valid and refer to the preceding 468 * content (of length ctx->dictSize), which is available 469 * contiguously preceding in memory the content currently 470 * being compressed. 471 * - usingExtDict : Like withPrefix64k, but the preceding content is somewhere 472 * else in memory, starting at ctx->dictionary with length 473 * ctx->dictSize. 474 * - usingDictCtx : Like usingExtDict, but everything concerning the preceding 475 * content is in a separate context, pointed to by 476 * ctx->dictCtx. ctx->dictionary, ctx->dictSize, and table 477 * entries in the current context that refer to positions 478 * preceding the beginning of the current compression are 479 * ignored. Instead, ctx->dictCtx->dictionary and ctx->dictCtx 480 * ->dictSize describe the location and size of the preceding 481 * content, and matches are found by looking in the ctx 482 * ->dictCtx->hashTable. 483 */ 484 typedef enum { noDict = 0, withPrefix64k, usingExtDict, usingDictCtx } dict_directive; 485 typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive; 486 487 /*-******************************* 488 * Decompression functions 489 ********************************/ 490 491 typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive; 492 typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive; 493 494 typedef enum { loop_error = -2, initial_error = -1, ok = 0 } variable_length_error; 495 496 LZ4_FORCE_INLINE unsigned 497 read_variable_length(const BYTE**ip, const BYTE* lencheck, 498 int loop_check, int initial_check, 499 variable_length_error* error) 500 { 501 U32 length = 0; 502 U32 s; 503 if (initial_check && unlikely((*ip) >= lencheck)) { /* overflow detection */ 504 *error = initial_error; 505 return length; 506 } 507 do { 508 s = **ip; 509 (*ip)++; 510 length += s; 511 if (loop_check && unlikely((*ip) >= lencheck)) { /* overflow detection */ 512 *error = loop_error; 513 return length; 514 } 515 } while (s==255); 516 517 return length; 518 } 519 520 #define LZ4_STATIC_ASSERT(c) ASSERT(c) 521 522 523 /*! LZ4_decompress_generic() : 524 * This generic decompression function covers all use cases. 525 * It shall be instantiated several times, using different sets of directives. 526 * Note that it is important for performance that this function really get inlined, 527 * in order to remove useless branches during compilation optimization. 528 */ 529 LZ4_FORCE_INLINE int 530 LZ4_decompress_generic( 531 const char* const src, 532 char* const dst, 533 int srcSize, 534 int outputSize, /* If endOnInput==endOnInputSize, this value is `dstCapacity` */ 535 536 endCondition_directive endOnInput, /* endOnOutputSize, endOnInputSize */ 537 earlyEnd_directive partialDecoding, /* full, partial */ 538 dict_directive dict, /* noDict, withPrefix64k, usingExtDict */ 539 const BYTE* const lowPrefix, /* always <= dst, == dst when no prefix */ 540 const BYTE* const dictStart, /* only if dict==usingExtDict */ 541 const size_t dictSize /* note : = 0 if noDict */ 542 ) 543 { 544 if ((src == NULL) || (outputSize < 0)) { return -1; } 545 546 { const BYTE* ip = (const BYTE*) src; 547 const BYTE* const iend = ip + srcSize; 548 549 BYTE* op = (BYTE*) dst; 550 BYTE* const oend = op + outputSize; 551 BYTE* cpy; 552 553 const BYTE* const dictEnd = (dictStart == NULL) ? NULL : dictStart + dictSize; 554 555 const int safeDecode = (endOnInput==endOnInputSize); 556 const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB))); 557 558 559 /* Set up the "end" pointers for the shortcut. */ 560 const BYTE* const shortiend = iend - (endOnInput ? 14 : 8) /*maxLL*/ - 2 /*offset*/; 561 const BYTE* const shortoend = oend - (endOnInput ? 14 : 8) /*maxLL*/ - 18 /*maxML*/; 562 563 const BYTE* match; 564 size_t offset; 565 unsigned token; 566 size_t length; 567 568 569 DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize, outputSize); 570 571 /* Special cases */ 572 assert(lowPrefix <= op); 573 if ((endOnInput) && (unlikely(outputSize==0))) { 574 /* Empty output buffer */ 575 if (partialDecoding) return 0; 576 return ((srcSize==1) && (*ip==0)) ? 0 : -1; 577 } 578 if ((!endOnInput) && (unlikely(outputSize==0))) { return (*ip==0 ? 1 : -1); } 579 if ((endOnInput) && unlikely(srcSize==0)) { return -1; } 580 581 /* Currently the fast loop shows a regression on qualcomm arm chips. */ 582 #if LZ4_FAST_DEC_LOOP 583 if ((oend - op) < FASTLOOP_SAFE_DISTANCE) { 584 DEBUGLOG(6, "skip fast decode loop"); 585 goto safe_decode; 586 } 587 588 /* Fast loop : decode sequences as long as output < iend-FASTLOOP_SAFE_DISTANCE */ 589 while (1) { 590 /* Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE */ 591 assert(oend - op >= FASTLOOP_SAFE_DISTANCE); 592 if (endOnInput) { assert(ip < iend); } 593 token = *ip++; 594 length = token >> ML_BITS; /* literal length */ 595 596 assert(!endOnInput || ip <= iend); /* ip < iend before the increment */ 597 598 /* decode literal length */ 599 if (length == RUN_MASK) { 600 variable_length_error error = ok; 601 length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error); 602 if (error == initial_error) { goto _output_error; } 603 if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */ 604 if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */ 605 606 /* copy literals */ 607 cpy = op+length; 608 LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH); 609 if (endOnInput) { /* LZ4_decompress_safe() */ 610 if ((cpy>oend-32) || (ip+length>iend-32)) { goto safe_literal_copy; } 611 LZ4_wildCopy32(op, ip, cpy); 612 } else { /* LZ4_decompress_fast() */ 613 if (cpy>oend-8) { goto safe_literal_copy; } 614 LZ4_wildCopy8(op, ip, cpy); /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time : 615 * it doesn't know input length, and only relies on end-of-block properties */ 616 } 617 ip += length; op = cpy; 618 } else { 619 cpy = op+length; 620 if (endOnInput) { /* LZ4_decompress_safe() */ 621 DEBUGLOG(7, "copy %u bytes in a 16-bytes stripe", (unsigned)length); 622 /* We don't need to check oend, since we check it once for each loop below */ 623 if (ip > iend-(16 + 1/*max lit + offset + nextToken*/)) { goto safe_literal_copy; } 624 /* Literals can only be 14, but hope compilers optimize if we copy by a register size */ 625 LZ4_memcpy(op, ip, 16); 626 } else { /* LZ4_decompress_fast() */ 627 /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time : 628 * it doesn't know input length, and relies on end-of-block properties */ 629 LZ4_memcpy(op, ip, 8); 630 if (length > 8) { LZ4_memcpy(op+8, ip+8, 8); } 631 } 632 ip += length; op = cpy; 633 } 634 635 /* get offset */ 636 offset = LZ4_readLE16(ip); ip+=2; 637 match = op - offset; 638 assert(match <= op); 639 640 /* get matchlength */ 641 length = token & ML_MASK; 642 643 if (length == ML_MASK) { 644 variable_length_error error = ok; 645 if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */ 646 length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error); 647 if (error != ok) { goto _output_error; } 648 if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) { goto _output_error; } /* overflow detection */ 649 length += MINMATCH; 650 if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) { 651 goto safe_match_copy; 652 } 653 } else { 654 length += MINMATCH; 655 if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) { 656 goto safe_match_copy; 657 } 658 659 /* Fastpath check: Avoids a branch in LZ4_wildCopy32 if true */ 660 if ((dict == withPrefix64k) || (match >= lowPrefix)) { 661 if (offset >= 8) { 662 assert(match >= lowPrefix); 663 assert(match <= op); 664 assert(op + 18 <= oend); 665 666 LZ4_memcpy(op, match, 8); 667 LZ4_memcpy(op+8, match+8, 8); 668 LZ4_memcpy(op+16, match+16, 2); 669 op += length; 670 continue; 671 } } } 672 673 if (checkOffset && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */ 674 /* match starting within external dictionary */ 675 if ((dict==usingExtDict) && (match < lowPrefix)) { 676 if (unlikely(op+length > oend-LASTLITERALS)) { 677 if (partialDecoding) { 678 DEBUGLOG(7, "partialDecoding: dictionary match, close to dstEnd"); 679 length = MIN(length, (size_t)(oend-op)); 680 } else { 681 goto _output_error; /* end-of-block condition violated */ 682 } } 683 684 if (length <= (size_t)(lowPrefix-match)) { 685 /* match fits entirely within external dictionary : just copy */ 686 memmove(op, dictEnd - (lowPrefix-match), length); 687 op += length; 688 } else { 689 /* match stretches into both external dictionary and current block */ 690 size_t const copySize = (size_t)(lowPrefix - match); 691 size_t const restSize = length - copySize; 692 LZ4_memcpy(op, dictEnd - copySize, copySize); 693 op += copySize; 694 if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */ 695 BYTE* const endOfMatch = op + restSize; 696 const BYTE* copyFrom = lowPrefix; 697 while (op < endOfMatch) { *op++ = *copyFrom++; } 698 } else { 699 LZ4_memcpy(op, lowPrefix, restSize); 700 op += restSize; 701 } } 702 continue; 703 } 704 705 /* copy match within block */ 706 cpy = op + length; 707 708 assert((op <= oend) && (oend-op >= 32)); 709 if (unlikely(offset<16)) { 710 LZ4_memcpy_using_offset(op, match, cpy, offset); 711 } else { 712 LZ4_wildCopy32(op, match, cpy); 713 } 714 715 op = cpy; /* wildcopy correction */ 716 } 717 safe_decode: 718 #endif 719 720 /* Main Loop : decode remaining sequences where output < FASTLOOP_SAFE_DISTANCE */ 721 while (1) { 722 token = *ip++; 723 length = token >> ML_BITS; /* literal length */ 724 725 assert(!endOnInput || ip <= iend); /* ip < iend before the increment */ 726 727 /* A two-stage shortcut for the most common case: 728 * 1) If the literal length is 0..14, and there is enough space, 729 * enter the shortcut and copy 16 bytes on behalf of the literals 730 * (in the fast mode, only 8 bytes can be safely copied this way). 731 * 2) Further if the match length is 4..18, copy 18 bytes in a similar 732 * manner; but we ensure that there's enough space in the output for 733 * those 18 bytes earlier, upon entering the shortcut (in other words, 734 * there is a combined check for both stages). 735 */ 736 if ( (endOnInput ? length != RUN_MASK : length <= 8) 737 /* strictly "less than" on input, to re-enter the loop with at least one byte */ 738 && likely((endOnInput ? ip < shortiend : 1) & (op <= shortoend)) ) { 739 /* Copy the literals */ 740 LZ4_memcpy(op, ip, endOnInput ? 16 : 8); 741 op += length; ip += length; 742 743 /* The second stage: prepare for match copying, decode full info. 744 * If it doesn't work out, the info won't be wasted. */ 745 length = token & ML_MASK; /* match length */ 746 offset = LZ4_readLE16(ip); ip += 2; 747 match = op - offset; 748 assert(match <= op); /* check overflow */ 749 750 /* Do not deal with overlapping matches. */ 751 if ( (length != ML_MASK) 752 && (offset >= 8) 753 && (dict==withPrefix64k || match >= lowPrefix) ) { 754 /* Copy the match. */ 755 LZ4_memcpy(op + 0, match + 0, 8); 756 LZ4_memcpy(op + 8, match + 8, 8); 757 LZ4_memcpy(op +16, match +16, 2); 758 op += length + MINMATCH; 759 /* Both stages worked, load the next token. */ 760 continue; 761 } 762 763 /* The second stage didn't work out, but the info is ready. 764 * Propel it right to the point of match copying. */ 765 goto _copy_match; 766 } 767 768 /* decode literal length */ 769 if (length == RUN_MASK) { 770 variable_length_error error = ok; 771 length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error); 772 if (error == initial_error) { goto _output_error; } 773 if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */ 774 if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */ 775 } 776 777 /* copy literals */ 778 cpy = op+length; 779 #if LZ4_FAST_DEC_LOOP 780 safe_literal_copy: 781 #endif 782 LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH); 783 if ( ((endOnInput) && ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) ) 784 || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) ) 785 { 786 /* We've either hit the input parsing restriction or the output parsing restriction. 787 * In the normal scenario, decoding a full block, it must be the last sequence, 788 * otherwise it's an error (invalid input or dimensions). 789 * In partialDecoding scenario, it's necessary to ensure there is no buffer overflow. 790 */ 791 if (partialDecoding) { 792 /* Since we are partial decoding we may be in this block because of the output parsing 793 * restriction, which is not valid since the output buffer is allowed to be undersized. 794 */ 795 assert(endOnInput); 796 DEBUGLOG(7, "partialDecoding: copying literals, close to input or output end") 797 DEBUGLOG(7, "partialDecoding: literal length = %u", (unsigned)length); 798 DEBUGLOG(7, "partialDecoding: remaining space in dstBuffer : %i", (int)(oend - op)); 799 DEBUGLOG(7, "partialDecoding: remaining space in srcBuffer : %i", (int)(iend - ip)); 800 /* Finishing in the middle of a literals segment, 801 * due to lack of input. 802 */ 803 if (ip+length > iend) { 804 length = (size_t)(iend-ip); 805 cpy = op + length; 806 } 807 /* Finishing in the middle of a literals segment, 808 * due to lack of output space. 809 */ 810 if (cpy > oend) { 811 cpy = oend; 812 assert(op<=oend); 813 length = (size_t)(oend-op); 814 } 815 } else { 816 /* We must be on the last sequence because of the parsing limitations so check 817 * that we exactly regenerate the original size (must be exact when !endOnInput). 818 */ 819 if ((!endOnInput) && (cpy != oend)) { goto _output_error; } 820 /* We must be on the last sequence (or invalid) because of the parsing limitations 821 * so check that we exactly consume the input and don't overrun the output buffer. 822 */ 823 if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) { 824 DEBUGLOG(6, "should have been last run of literals") 825 DEBUGLOG(6, "ip(%p) + length(%i) = %p != iend (%p)", ip, (int)length, ip+length, iend); 826 DEBUGLOG(6, "or cpy(%p) > oend(%p)", cpy, oend); 827 goto _output_error; 828 } 829 } 830 memmove(op, ip, length); /* supports overlapping memory regions; only matters for in-place decompression scenarios */ 831 ip += length; 832 op += length; 833 /* Necessarily EOF when !partialDecoding. 834 * When partialDecoding, it is EOF if we've either 835 * filled the output buffer or 836 * can't proceed with reading an offset for following match. 837 */ 838 if (!partialDecoding || (cpy == oend) || (ip >= (iend-2))) { 839 break; 840 } 841 } else { 842 LZ4_wildCopy8(op, ip, cpy); /* may overwrite up to WILDCOPYLENGTH beyond cpy */ 843 ip += length; op = cpy; 844 } 845 846 /* get offset */ 847 offset = LZ4_readLE16(ip); ip+=2; 848 match = op - offset; 849 850 /* get matchlength */ 851 length = token & ML_MASK; 852 853 _copy_match: 854 if (length == ML_MASK) { 855 variable_length_error error = ok; 856 length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error); 857 if (error != ok) goto _output_error; 858 if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error; /* overflow detection */ 859 } 860 length += MINMATCH; 861 862 #if LZ4_FAST_DEC_LOOP 863 safe_match_copy: 864 #endif 865 if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error; /* Error : offset outside buffers */ 866 /* match starting within external dictionary */ 867 if ((dict==usingExtDict) && (match < lowPrefix)) { 868 if (unlikely(op+length > oend-LASTLITERALS)) { 869 if (partialDecoding) length = MIN(length, (size_t)(oend-op)); 870 else goto _output_error; /* doesn't respect parsing restriction */ 871 } 872 873 if (length <= (size_t)(lowPrefix-match)) { 874 /* match fits entirely within external dictionary : just copy */ 875 memmove(op, dictEnd - (lowPrefix-match), length); 876 op += length; 877 } else { 878 /* match stretches into both external dictionary and current block */ 879 size_t const copySize = (size_t)(lowPrefix - match); 880 size_t const restSize = length - copySize; 881 LZ4_memcpy(op, dictEnd - copySize, copySize); 882 op += copySize; 883 if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */ 884 BYTE* const endOfMatch = op + restSize; 885 const BYTE* copyFrom = lowPrefix; 886 while (op < endOfMatch) *op++ = *copyFrom++; 887 } else { 888 LZ4_memcpy(op, lowPrefix, restSize); 889 op += restSize; 890 } } 891 continue; 892 } 893 assert(match >= lowPrefix); 894 895 /* copy match within block */ 896 cpy = op + length; 897 898 /* partialDecoding : may end anywhere within the block */ 899 assert(op<=oend); 900 if (partialDecoding && (cpy > oend-MATCH_SAFEGUARD_DISTANCE)) { 901 size_t const mlen = MIN(length, (size_t)(oend-op)); 902 const BYTE* const matchEnd = match + mlen; 903 BYTE* const copyEnd = op + mlen; 904 if (matchEnd > op) { /* overlap copy */ 905 while (op < copyEnd) { *op++ = *match++; } 906 } else { 907 LZ4_memcpy(op, match, mlen); 908 } 909 op = copyEnd; 910 if (op == oend) { break; } 911 continue; 912 } 913 914 if (unlikely(offset<8)) { 915 LZ4_write32(op, 0); /* silence msan warning when offset==0 */ 916 op[0] = match[0]; 917 op[1] = match[1]; 918 op[2] = match[2]; 919 op[3] = match[3]; 920 match += inc32table[offset]; 921 LZ4_memcpy(op+4, match, 4); 922 match -= dec64table[offset]; 923 } else { 924 LZ4_memcpy(op, match, 8); 925 match += 8; 926 } 927 op += 8; 928 929 if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) { 930 BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1); 931 if (cpy > oend-LASTLITERALS) { goto _output_error; } /* Error : last LASTLITERALS bytes must be literals (uncompressed) */ 932 if (op < oCopyLimit) { 933 LZ4_wildCopy8(op, match, oCopyLimit); 934 match += oCopyLimit - op; 935 op = oCopyLimit; 936 } 937 while (op < cpy) { *op++ = *match++; } 938 } else { 939 LZ4_memcpy(op, match, 8); 940 if (length > 16) { LZ4_wildCopy8(op+8, match+8, cpy); } 941 } 942 op = cpy; /* wildcopy correction */ 943 } 944 945 /* end of decoding */ 946 if (endOnInput) { 947 DEBUGLOG(5, "decoded %i bytes", (int) (((char*)op)-dst)); 948 return (int) (((char*)op)-dst); /* Nb of output bytes decoded */ 949 } else { 950 return (int) (((const char*)ip)-src); /* Nb of input bytes read */ 951 } 952 953 /* Overflow error detected */ 954 _output_error: 955 return (int) (-(((const char*)ip)-src))-1; 956 } 957 } 958 959 /* 960 * LZ4_uncompress_unknownOutputSize() : 961 * isize : is the input size, therefore the compressed size 962 * maxOutputSize : is the size of the destination buffer (which must be 963 * already allocated) 964 * return : the number of bytes decoded in the destination buffer 965 * (necessarily <= maxOutputSize). If the source stream is 966 * malformed, the function will stop decoding and return a 967 * negative result, indicating the byte position of the faulty 968 * instruction. This function never writes beyond dest + 969 * maxOutputSize, and is therefore protected against malicious 970 * data packets. 971 * note : Destination buffer must be already allocated. 972 * This version is slightly slower than real_LZ4_uncompress() 973 * 974 */ 975 976 /* 977 * Note: In upstream code, LZ4_uncompress_unknownOutputSize is now a legacy 978 * wrapper for LZ4_decompress_safe which is a wrapper for 979 * LZ4_decompress_generic; this wrapper flattens that, rather than 980 * rewriting the callers. 981 */ 982 int LZ4_uncompress_unknownOutputSize(const char* source, char* dest, int compressedSize, int maxDecompressedSize) 983 { 984 return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, 985 endOnInputSize, decode_full_block, noDict, 986 (BYTE*)dest, NULL, 0); 987 }

Cache object: 93053865f9415126c906b65a6e95f849

FreeBSD/Linux Kernel Cross Reference sys/contrib/openzfs/module/zfs/lz4.c

FreeBSD/Linux Kernel Cross Reference
sys/contrib/openzfs/module/zfs/lz4.c