The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/contrib/zstd/doc/educational_decoder/zstd_decompress.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) Facebook, Inc.
    3  * All rights reserved.
    4  *
    5  * This source code is licensed under both the BSD-style license (found in the
    6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
    7  * in the COPYING file in the root directory of this source tree).
    8  * You may select, at your option, one of the above-listed licenses.
    9  */
   10 
   11 /// Zstandard educational decoder implementation
   12 /// See https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md
   13 
   14 #include <stdint.h>   // uint8_t, etc.
   15 #include <stdlib.h>   // malloc, free, exit
   16 #include <stdio.h>    // fprintf
   17 #include <string.h>   // memset, memcpy
   18 #include "zstd_decompress.h"
   19 
   20 
   21 /******* IMPORTANT CONSTANTS *********************************************/
   22 
   23 // Zstandard frame
   24 // "Magic_Number
   25 // 4 Bytes, little-endian format. Value : 0xFD2FB528"
   26 #define ZSTD_MAGIC_NUMBER 0xFD2FB528U
   27 
   28 // The size of `Block_Content` is limited by `Block_Maximum_Size`,
   29 #define ZSTD_BLOCK_SIZE_MAX ((size_t)128 * 1024)
   30 
   31 // literal blocks can't be larger than their block
   32 #define MAX_LITERALS_SIZE ZSTD_BLOCK_SIZE_MAX
   33 
   34 
   35 /******* UTILITY MACROS AND TYPES *********************************************/
   36 #define MAX(a, b) ((a) > (b) ? (a) : (b))
   37 #define MIN(a, b) ((a) < (b) ? (a) : (b))
   38 
   39 #if defined(ZDEC_NO_MESSAGE)
   40 #define MESSAGE(...)
   41 #else
   42 #define MESSAGE(...)  fprintf(stderr, "" __VA_ARGS__)
   43 #endif
   44 
   45 /// This decoder calls exit(1) when it encounters an error, however a production
   46 /// library should propagate error codes
   47 #define ERROR(s)                                                               \
   48     do {                                                                       \
   49         MESSAGE("Error: %s\n", s);                                     \
   50         exit(1);                                                               \
   51     } while (0)
   52 #define INP_SIZE()                                                             \
   53     ERROR("Input buffer smaller than it should be or input is "                \
   54           "corrupted")
   55 #define OUT_SIZE() ERROR("Output buffer too small for output")
   56 #define CORRUPTION() ERROR("Corruption detected while decompressing")
   57 #define BAD_ALLOC() ERROR("Memory allocation error")
   58 #define IMPOSSIBLE() ERROR("An impossibility has occurred")
   59 
   60 typedef uint8_t  u8;
   61 typedef uint16_t u16;
   62 typedef uint32_t u32;
   63 typedef uint64_t u64;
   64 
   65 typedef int8_t  i8;
   66 typedef int16_t i16;
   67 typedef int32_t i32;
   68 typedef int64_t i64;
   69 /******* END UTILITY MACROS AND TYPES *****************************************/
   70 
   71 /******* IMPLEMENTATION PRIMITIVE PROTOTYPES **********************************/
   72 /// The implementations for these functions can be found at the bottom of this
   73 /// file.  They implement low-level functionality needed for the higher level
   74 /// decompression functions.
   75 
   76 /*** IO STREAM OPERATIONS *************/
   77 
   78 /// ostream_t/istream_t are used to wrap the pointers/length data passed into
   79 /// ZSTD_decompress, so that all IO operations are safely bounds checked
   80 /// They are written/read forward, and reads are treated as little-endian
   81 /// They should be used opaquely to ensure safety
   82 typedef struct {
   83     u8 *ptr;
   84     size_t len;
   85 } ostream_t;
   86 
   87 typedef struct {
   88     const u8 *ptr;
   89     size_t len;
   90 
   91     // Input often reads a few bits at a time, so maintain an internal offset
   92     int bit_offset;
   93 } istream_t;
   94 
   95 /// The following two functions are the only ones that allow the istream to be
   96 /// non-byte aligned
   97 
   98 /// Reads `num` bits from a bitstream, and updates the internal offset
   99 static inline u64 IO_read_bits(istream_t *const in, const int num_bits);
  100 /// Backs-up the stream by `num` bits so they can be read again
  101 static inline void IO_rewind_bits(istream_t *const in, const int num_bits);
  102 /// If the remaining bits in a byte will be unused, advance to the end of the
  103 /// byte
  104 static inline void IO_align_stream(istream_t *const in);
  105 
  106 /// Write the given byte into the output stream
  107 static inline void IO_write_byte(ostream_t *const out, u8 symb);
  108 
  109 /// Returns the number of bytes left to be read in this stream.  The stream must
  110 /// be byte aligned.
  111 static inline size_t IO_istream_len(const istream_t *const in);
  112 
  113 /// Advances the stream by `len` bytes, and returns a pointer to the chunk that
  114 /// was skipped.  The stream must be byte aligned.
  115 static inline const u8 *IO_get_read_ptr(istream_t *const in, size_t len);
  116 /// Advances the stream by `len` bytes, and returns a pointer to the chunk that
  117 /// was skipped so it can be written to.
  118 static inline u8 *IO_get_write_ptr(ostream_t *const out, size_t len);
  119 
  120 /// Advance the inner state by `len` bytes.  The stream must be byte aligned.
  121 static inline void IO_advance_input(istream_t *const in, size_t len);
  122 
  123 /// Returns an `ostream_t` constructed from the given pointer and length.
  124 static inline ostream_t IO_make_ostream(u8 *out, size_t len);
  125 /// Returns an `istream_t` constructed from the given pointer and length.
  126 static inline istream_t IO_make_istream(const u8 *in, size_t len);
  127 
  128 /// Returns an `istream_t` with the same base as `in`, and length `len`.
  129 /// Then, advance `in` to account for the consumed bytes.
  130 /// `in` must be byte aligned.
  131 static inline istream_t IO_make_sub_istream(istream_t *const in, size_t len);
  132 /*** END IO STREAM OPERATIONS *********/
  133 
  134 /*** BITSTREAM OPERATIONS *************/
  135 /// Read `num` bits (up to 64) from `src + offset`, where `offset` is in bits,
  136 /// and return them interpreted as a little-endian unsigned integer.
  137 static inline u64 read_bits_LE(const u8 *src, const int num_bits,
  138                                const size_t offset);
  139 
  140 /// Read bits from the end of a HUF or FSE bitstream.  `offset` is in bits, so
  141 /// it updates `offset` to `offset - bits`, and then reads `bits` bits from
  142 /// `src + offset`.  If the offset becomes negative, the extra bits at the
  143 /// bottom are filled in with `0` bits instead of reading from before `src`.
  144 static inline u64 STREAM_read_bits(const u8 *src, const int bits,
  145                                    i64 *const offset);
  146 /*** END BITSTREAM OPERATIONS *********/
  147 
  148 /*** BIT COUNTING OPERATIONS **********/
  149 /// Returns the index of the highest set bit in `num`, or `-1` if `num == 0`
  150 static inline int highest_set_bit(const u64 num);
  151 /*** END BIT COUNTING OPERATIONS ******/
  152 
  153 /*** HUFFMAN PRIMITIVES ***************/
  154 // Table decode method uses exponential memory, so we need to limit depth
  155 #define HUF_MAX_BITS (16)
  156 
  157 // Limit the maximum number of symbols to 256 so we can store a symbol in a byte
  158 #define HUF_MAX_SYMBS (256)
  159 
  160 /// Structure containing all tables necessary for efficient Huffman decoding
  161 typedef struct {
  162     u8 *symbols;
  163     u8 *num_bits;
  164     int max_bits;
  165 } HUF_dtable;
  166 
  167 /// Decode a single symbol and read in enough bits to refresh the state
  168 static inline u8 HUF_decode_symbol(const HUF_dtable *const dtable,
  169                                    u16 *const state, const u8 *const src,
  170                                    i64 *const offset);
  171 /// Read in a full state's worth of bits to initialize it
  172 static inline void HUF_init_state(const HUF_dtable *const dtable,
  173                                   u16 *const state, const u8 *const src,
  174                                   i64 *const offset);
  175 
  176 /// Decompresses a single Huffman stream, returns the number of bytes decoded.
  177 /// `src_len` must be the exact length of the Huffman-coded block.
  178 static size_t HUF_decompress_1stream(const HUF_dtable *const dtable,
  179                                      ostream_t *const out, istream_t *const in);
  180 /// Same as previous but decodes 4 streams, formatted as in the Zstandard
  181 /// specification.
  182 /// `src_len` must be the exact length of the Huffman-coded block.
  183 static size_t HUF_decompress_4stream(const HUF_dtable *const dtable,
  184                                      ostream_t *const out, istream_t *const in);
  185 
  186 /// Initialize a Huffman decoding table using the table of bit counts provided
  187 static void HUF_init_dtable(HUF_dtable *const table, const u8 *const bits,
  188                             const int num_symbs);
  189 /// Initialize a Huffman decoding table using the table of weights provided
  190 /// Weights follow the definition provided in the Zstandard specification
  191 static void HUF_init_dtable_usingweights(HUF_dtable *const table,
  192                                          const u8 *const weights,
  193                                          const int num_symbs);
  194 
  195 /// Free the malloc'ed parts of a decoding table
  196 static void HUF_free_dtable(HUF_dtable *const dtable);
  197 /*** END HUFFMAN PRIMITIVES ***********/
  198 
  199 /*** FSE PRIMITIVES *******************/
  200 /// For more description of FSE see
  201 /// https://github.com/Cyan4973/FiniteStateEntropy/
  202 
  203 // FSE table decoding uses exponential memory, so limit the maximum accuracy
  204 #define FSE_MAX_ACCURACY_LOG (15)
  205 // Limit the maximum number of symbols so they can be stored in a single byte
  206 #define FSE_MAX_SYMBS (256)
  207 
  208 /// The tables needed to decode FSE encoded streams
  209 typedef struct {
  210     u8 *symbols;
  211     u8 *num_bits;
  212     u16 *new_state_base;
  213     int accuracy_log;
  214 } FSE_dtable;
  215 
  216 /// Return the symbol for the current state
  217 static inline u8 FSE_peek_symbol(const FSE_dtable *const dtable,
  218                                  const u16 state);
  219 /// Read the number of bits necessary to update state, update, and shift offset
  220 /// back to reflect the bits read
  221 static inline void FSE_update_state(const FSE_dtable *const dtable,
  222                                     u16 *const state, const u8 *const src,
  223                                     i64 *const offset);
  224 
  225 /// Combine peek and update: decode a symbol and update the state
  226 static inline u8 FSE_decode_symbol(const FSE_dtable *const dtable,
  227                                    u16 *const state, const u8 *const src,
  228                                    i64 *const offset);
  229 
  230 /// Read bits from the stream to initialize the state and shift offset back
  231 static inline void FSE_init_state(const FSE_dtable *const dtable,
  232                                   u16 *const state, const u8 *const src,
  233                                   i64 *const offset);
  234 
  235 /// Decompress two interleaved bitstreams (e.g. compressed Huffman weights)
  236 /// using an FSE decoding table.  `src_len` must be the exact length of the
  237 /// block.
  238 static size_t FSE_decompress_interleaved2(const FSE_dtable *const dtable,
  239                                           ostream_t *const out,
  240                                           istream_t *const in);
  241 
  242 /// Initialize a decoding table using normalized frequencies.
  243 static void FSE_init_dtable(FSE_dtable *const dtable,
  244                             const i16 *const norm_freqs, const int num_symbs,
  245                             const int accuracy_log);
  246 
  247 /// Decode an FSE header as defined in the Zstandard format specification and
  248 /// use the decoded frequencies to initialize a decoding table.
  249 static void FSE_decode_header(FSE_dtable *const dtable, istream_t *const in,
  250                                 const int max_accuracy_log);
  251 
  252 /// Initialize an FSE table that will always return the same symbol and consume
  253 /// 0 bits per symbol, to be used for RLE mode in sequence commands
  254 static void FSE_init_dtable_rle(FSE_dtable *const dtable, const u8 symb);
  255 
  256 /// Free the malloc'ed parts of a decoding table
  257 static void FSE_free_dtable(FSE_dtable *const dtable);
  258 /*** END FSE PRIMITIVES ***************/
  259 
  260 /******* END IMPLEMENTATION PRIMITIVE PROTOTYPES ******************************/
  261 
  262 /******* ZSTD HELPER STRUCTS AND PROTOTYPES ***********************************/
  263 
  264 /// A small structure that can be reused in various places that need to access
  265 /// frame header information
  266 typedef struct {
  267     // The size of window that we need to be able to contiguously store for
  268     // references
  269     size_t window_size;
  270     // The total output size of this compressed frame
  271     size_t frame_content_size;
  272 
  273     // The dictionary id if this frame uses one
  274     u32 dictionary_id;
  275 
  276     // Whether or not the content of this frame has a checksum
  277     int content_checksum_flag;
  278     // Whether or not the output for this frame is in a single segment
  279     int single_segment_flag;
  280 } frame_header_t;
  281 
  282 /// The context needed to decode blocks in a frame
  283 typedef struct {
  284     frame_header_t header;
  285 
  286     // The total amount of data available for backreferences, to determine if an
  287     // offset too large to be correct
  288     size_t current_total_output;
  289 
  290     const u8 *dict_content;
  291     size_t dict_content_len;
  292 
  293     // Entropy encoding tables so they can be repeated by future blocks instead
  294     // of retransmitting
  295     HUF_dtable literals_dtable;
  296     FSE_dtable ll_dtable;
  297     FSE_dtable ml_dtable;
  298     FSE_dtable of_dtable;
  299 
  300     // The last 3 offsets for the special "repeat offsets".
  301     u64 previous_offsets[3];
  302 } frame_context_t;
  303 
  304 /// The decoded contents of a dictionary so that it doesn't have to be repeated
  305 /// for each frame that uses it
  306 struct dictionary_s {
  307     // Entropy tables
  308     HUF_dtable literals_dtable;
  309     FSE_dtable ll_dtable;
  310     FSE_dtable ml_dtable;
  311     FSE_dtable of_dtable;
  312 
  313     // Raw content for backreferences
  314     u8 *content;
  315     size_t content_size;
  316 
  317     // Offset history to prepopulate the frame's history
  318     u64 previous_offsets[3];
  319 
  320     u32 dictionary_id;
  321 };
  322 
  323 /// A tuple containing the parts necessary to decode and execute a ZSTD sequence
  324 /// command
  325 typedef struct {
  326     u32 literal_length;
  327     u32 match_length;
  328     u32 offset;
  329 } sequence_command_t;
  330 
  331 /// The decoder works top-down, starting at the high level like Zstd frames, and
  332 /// working down to lower more technical levels such as blocks, literals, and
  333 /// sequences.  The high-level functions roughly follow the outline of the
  334 /// format specification:
  335 /// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md
  336 
  337 /// Before the implementation of each high-level function declared here, the
  338 /// prototypes for their helper functions are defined and explained
  339 
  340 /// Decode a single Zstd frame, or error if the input is not a valid frame.
  341 /// Accepts a dict argument, which may be NULL indicating no dictionary.
  342 /// See
  343 /// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frame-concatenation
  344 static void decode_frame(ostream_t *const out, istream_t *const in,
  345                          const dictionary_t *const dict);
  346 
  347 // Decode data in a compressed block
  348 static void decompress_block(frame_context_t *const ctx, ostream_t *const out,
  349                              istream_t *const in);
  350 
  351 // Decode the literals section of a block
  352 static size_t decode_literals(frame_context_t *const ctx, istream_t *const in,
  353                               u8 **const literals);
  354 
  355 // Decode the sequences part of a block
  356 static size_t decode_sequences(frame_context_t *const ctx, istream_t *const in,
  357                                sequence_command_t **const sequences);
  358 
  359 // Execute the decoded sequences on the literals block
  360 static void execute_sequences(frame_context_t *const ctx, ostream_t *const out,
  361                               const u8 *const literals,
  362                               const size_t literals_len,
  363                               const sequence_command_t *const sequences,
  364                               const size_t num_sequences);
  365 
  366 // Copies literals and returns the total literal length that was copied
  367 static u32 copy_literals(const size_t seq, istream_t *litstream,
  368                          ostream_t *const out);
  369 
  370 // Given an offset code from a sequence command (either an actual offset value
  371 // or an index for previous offset), computes the correct offset and updates
  372 // the offset history
  373 static size_t compute_offset(sequence_command_t seq, u64 *const offset_hist);
  374 
  375 // Given an offset, match length, and total output, as well as the frame
  376 // context for the dictionary, determines if the dictionary is used and
  377 // executes the copy operation
  378 static void execute_match_copy(frame_context_t *const ctx, size_t offset,
  379                               size_t match_length, size_t total_output,
  380                               ostream_t *const out);
  381 
  382 /******* END ZSTD HELPER STRUCTS AND PROTOTYPES *******************************/
  383 
  384 size_t ZSTD_decompress(void *const dst, const size_t dst_len,
  385                        const void *const src, const size_t src_len) {
  386     dictionary_t* const uninit_dict = create_dictionary();
  387     size_t const decomp_size = ZSTD_decompress_with_dict(dst, dst_len, src,
  388                                                          src_len, uninit_dict);
  389     free_dictionary(uninit_dict);
  390     return decomp_size;
  391 }
  392 
  393 size_t ZSTD_decompress_with_dict(void *const dst, const size_t dst_len,
  394                                  const void *const src, const size_t src_len,
  395                                  dictionary_t* parsed_dict) {
  396 
  397     istream_t in = IO_make_istream(src, src_len);
  398     ostream_t out = IO_make_ostream(dst, dst_len);
  399 
  400     // "A content compressed by Zstandard is transformed into a Zstandard frame.
  401     // Multiple frames can be appended into a single file or stream. A frame is
  402     // totally independent, has a defined beginning and end, and a set of
  403     // parameters which tells the decoder how to decompress it."
  404 
  405     /* this decoder assumes decompression of a single frame */
  406     decode_frame(&out, &in, parsed_dict);
  407 
  408     return (size_t)(out.ptr - (u8 *)dst);
  409 }
  410 
  411 /******* FRAME DECODING ******************************************************/
  412 
  413 static void decode_data_frame(ostream_t *const out, istream_t *const in,
  414                               const dictionary_t *const dict);
  415 static void init_frame_context(frame_context_t *const context,
  416                                istream_t *const in,
  417                                const dictionary_t *const dict);
  418 static void free_frame_context(frame_context_t *const context);
  419 static void parse_frame_header(frame_header_t *const header,
  420                                istream_t *const in);
  421 static void frame_context_apply_dict(frame_context_t *const ctx,
  422                                      const dictionary_t *const dict);
  423 
  424 static void decompress_data(frame_context_t *const ctx, ostream_t *const out,
  425                             istream_t *const in);
  426 
  427 static void decode_frame(ostream_t *const out, istream_t *const in,
  428                          const dictionary_t *const dict) {
  429     const u32 magic_number = (u32)IO_read_bits(in, 32);
  430     if (magic_number == ZSTD_MAGIC_NUMBER) {
  431         // ZSTD frame
  432         decode_data_frame(out, in, dict);
  433 
  434         return;
  435     }
  436 
  437     // not a real frame or a skippable frame
  438     ERROR("Tried to decode non-ZSTD frame");
  439 }
  440 
  441 /// Decode a frame that contains compressed data.  Not all frames do as there
  442 /// are skippable frames.
  443 /// See
  444 /// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#general-structure-of-zstandard-frame-format
  445 static void decode_data_frame(ostream_t *const out, istream_t *const in,
  446                               const dictionary_t *const dict) {
  447     frame_context_t ctx;
  448 
  449     // Initialize the context that needs to be carried from block to block
  450     init_frame_context(&ctx, in, dict);
  451 
  452     if (ctx.header.frame_content_size != 0 &&
  453         ctx.header.frame_content_size > out->len) {
  454         OUT_SIZE();
  455     }
  456 
  457     decompress_data(&ctx, out, in);
  458 
  459     free_frame_context(&ctx);
  460 }
  461 
  462 /// Takes the information provided in the header and dictionary, and initializes
  463 /// the context for this frame
  464 static void init_frame_context(frame_context_t *const context,
  465                                istream_t *const in,
  466                                const dictionary_t *const dict) {
  467     // Most fields in context are correct when initialized to 0
  468     memset(context, 0, sizeof(frame_context_t));
  469 
  470     // Parse data from the frame header
  471     parse_frame_header(&context->header, in);
  472 
  473     // Set up the offset history for the repeat offset commands
  474     context->previous_offsets[0] = 1;
  475     context->previous_offsets[1] = 4;
  476     context->previous_offsets[2] = 8;
  477 
  478     // Apply details from the dict if it exists
  479     frame_context_apply_dict(context, dict);
  480 }
  481 
  482 static void free_frame_context(frame_context_t *const context) {
  483     HUF_free_dtable(&context->literals_dtable);
  484 
  485     FSE_free_dtable(&context->ll_dtable);
  486     FSE_free_dtable(&context->ml_dtable);
  487     FSE_free_dtable(&context->of_dtable);
  488 
  489     memset(context, 0, sizeof(frame_context_t));
  490 }
  491 
  492 static void parse_frame_header(frame_header_t *const header,
  493                                istream_t *const in) {
  494     // "The first header's byte is called the Frame_Header_Descriptor. It tells
  495     // which other fields are present. Decoding this byte is enough to tell the
  496     // size of Frame_Header.
  497     //
  498     // Bit number   Field name
  499     // 7-6  Frame_Content_Size_flag
  500     // 5    Single_Segment_flag
  501     // 4    Unused_bit
  502     // 3    Reserved_bit
  503     // 2    Content_Checksum_flag
  504     // 1-0  Dictionary_ID_flag"
  505     const u8 descriptor = (u8)IO_read_bits(in, 8);
  506 
  507     // decode frame header descriptor into flags
  508     const u8 frame_content_size_flag = descriptor >> 6;
  509     const u8 single_segment_flag = (descriptor >> 5) & 1;
  510     const u8 reserved_bit = (descriptor >> 3) & 1;
  511     const u8 content_checksum_flag = (descriptor >> 2) & 1;
  512     const u8 dictionary_id_flag = descriptor & 3;
  513 
  514     if (reserved_bit != 0) {
  515         CORRUPTION();
  516     }
  517 
  518     header->single_segment_flag = single_segment_flag;
  519     header->content_checksum_flag = content_checksum_flag;
  520 
  521     // decode window size
  522     if (!single_segment_flag) {
  523         // "Provides guarantees on maximum back-reference distance that will be
  524         // used within compressed data. This information is important for
  525         // decoders to allocate enough memory.
  526         //
  527         // Bit numbers  7-3         2-0
  528         // Field name   Exponent    Mantissa"
  529         u8 window_descriptor = (u8)IO_read_bits(in, 8);
  530         u8 exponent = window_descriptor >> 3;
  531         u8 mantissa = window_descriptor & 7;
  532 
  533         // Use the algorithm from the specification to compute window size
  534         // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#window_descriptor
  535         size_t window_base = (size_t)1 << (10 + exponent);
  536         size_t window_add = (window_base / 8) * mantissa;
  537         header->window_size = window_base + window_add;
  538     }
  539 
  540     // decode dictionary id if it exists
  541     if (dictionary_id_flag) {
  542         // "This is a variable size field, which contains the ID of the
  543         // dictionary required to properly decode the frame. Note that this
  544         // field is optional. When it's not present, it's up to the caller to
  545         // make sure it uses the correct dictionary. Format is little-endian."
  546         const int bytes_array[] = {0, 1, 2, 4};
  547         const int bytes = bytes_array[dictionary_id_flag];
  548 
  549         header->dictionary_id = (u32)IO_read_bits(in, bytes * 8);
  550     } else {
  551         header->dictionary_id = 0;
  552     }
  553 
  554     // decode frame content size if it exists
  555     if (single_segment_flag || frame_content_size_flag) {
  556         // "This is the original (uncompressed) size. This information is
  557         // optional. The Field_Size is provided according to value of
  558         // Frame_Content_Size_flag. The Field_Size can be equal to 0 (not
  559         // present), 1, 2, 4 or 8 bytes. Format is little-endian."
  560         //
  561         // if frame_content_size_flag == 0 but single_segment_flag is set, we
  562         // still have a 1 byte field
  563         const int bytes_array[] = {1, 2, 4, 8};
  564         const int bytes = bytes_array[frame_content_size_flag];
  565 
  566         header->frame_content_size = IO_read_bits(in, bytes * 8);
  567         if (bytes == 2) {
  568             // "When Field_Size is 2, the offset of 256 is added."
  569             header->frame_content_size += 256;
  570         }
  571     } else {
  572         header->frame_content_size = 0;
  573     }
  574 
  575     if (single_segment_flag) {
  576         // "The Window_Descriptor byte is optional. It is absent when
  577         // Single_Segment_flag is set. In this case, the maximum back-reference
  578         // distance is the content size itself, which can be any value from 1 to
  579         // 2^64-1 bytes (16 EB)."
  580         header->window_size = header->frame_content_size;
  581     }
  582 }
  583 
  584 /// Decompress the data from a frame block by block
  585 static void decompress_data(frame_context_t *const ctx, ostream_t *const out,
  586                             istream_t *const in) {
  587     // "A frame encapsulates one or multiple blocks. Each block can be
  588     // compressed or not, and has a guaranteed maximum content size, which
  589     // depends on frame parameters. Unlike frames, each block depends on
  590     // previous blocks for proper decoding. However, each block can be
  591     // decompressed without waiting for its successor, allowing streaming
  592     // operations."
  593     int last_block = 0;
  594     do {
  595         // "Last_Block
  596         //
  597         // The lowest bit signals if this block is the last one. Frame ends
  598         // right after this block.
  599         //
  600         // Block_Type and Block_Size
  601         //
  602         // The next 2 bits represent the Block_Type, while the remaining 21 bits
  603         // represent the Block_Size. Format is little-endian."
  604         last_block = (int)IO_read_bits(in, 1);
  605         const int block_type = (int)IO_read_bits(in, 2);
  606         const size_t block_len = IO_read_bits(in, 21);
  607 
  608         switch (block_type) {
  609         case 0: {
  610             // "Raw_Block - this is an uncompressed block. Block_Size is the
  611             // number of bytes to read and copy."
  612             const u8 *const read_ptr = IO_get_read_ptr(in, block_len);
  613             u8 *const write_ptr = IO_get_write_ptr(out, block_len);
  614 
  615             // Copy the raw data into the output
  616             memcpy(write_ptr, read_ptr, block_len);
  617 
  618             ctx->current_total_output += block_len;
  619             break;
  620         }
  621         case 1: {
  622             // "RLE_Block - this is a single byte, repeated N times. In which
  623             // case, Block_Size is the size to regenerate, while the
  624             // "compressed" block is just 1 byte (the byte to repeat)."
  625             const u8 *const read_ptr = IO_get_read_ptr(in, 1);
  626             u8 *const write_ptr = IO_get_write_ptr(out, block_len);
  627 
  628             // Copy `block_len` copies of `read_ptr[0]` to the output
  629             memset(write_ptr, read_ptr[0], block_len);
  630 
  631             ctx->current_total_output += block_len;
  632             break;
  633         }
  634         case 2: {
  635             // "Compressed_Block - this is a Zstandard compressed block,
  636             // detailed in another section of this specification. Block_Size is
  637             // the compressed size.
  638 
  639             // Create a sub-stream for the block
  640             istream_t block_stream = IO_make_sub_istream(in, block_len);
  641             decompress_block(ctx, out, &block_stream);
  642             break;
  643         }
  644         case 3:
  645             // "Reserved - this is not a block. This value cannot be used with
  646             // current version of this specification."
  647             CORRUPTION();
  648             break;
  649         default:
  650             IMPOSSIBLE();
  651         }
  652     } while (!last_block);
  653 
  654     if (ctx->header.content_checksum_flag) {
  655         // This program does not support checking the checksum, so skip over it
  656         // if it's present
  657         IO_advance_input(in, 4);
  658     }
  659 }
  660 /******* END FRAME DECODING ***************************************************/
  661 
  662 /******* BLOCK DECOMPRESSION **************************************************/
  663 static void decompress_block(frame_context_t *const ctx, ostream_t *const out,
  664                              istream_t *const in) {
  665     // "A compressed block consists of 2 sections :
  666     //
  667     // Literals_Section
  668     // Sequences_Section"
  669 
  670 
  671     // Part 1: decode the literals block
  672     u8 *literals = NULL;
  673     const size_t literals_size = decode_literals(ctx, in, &literals);
  674 
  675     // Part 2: decode the sequences block
  676     sequence_command_t *sequences = NULL;
  677     const size_t num_sequences =
  678         decode_sequences(ctx, in, &sequences);
  679 
  680     // Part 3: combine literals and sequence commands to generate output
  681     execute_sequences(ctx, out, literals, literals_size, sequences,
  682                       num_sequences);
  683     free(literals);
  684     free(sequences);
  685 }
  686 /******* END BLOCK DECOMPRESSION **********************************************/
  687 
  688 /******* LITERALS DECODING ****************************************************/
  689 static size_t decode_literals_simple(istream_t *const in, u8 **const literals,
  690                                      const int block_type,
  691                                      const int size_format);
  692 static size_t decode_literals_compressed(frame_context_t *const ctx,
  693                                          istream_t *const in,
  694                                          u8 **const literals,
  695                                          const int block_type,
  696                                          const int size_format);
  697 static void decode_huf_table(HUF_dtable *const dtable, istream_t *const in);
  698 static void fse_decode_hufweights(ostream_t *weights, istream_t *const in,
  699                                     int *const num_symbs);
  700 
  701 static size_t decode_literals(frame_context_t *const ctx, istream_t *const in,
  702                               u8 **const literals) {
  703     // "Literals can be stored uncompressed or compressed using Huffman prefix
  704     // codes. When compressed, an optional tree description can be present,
  705     // followed by 1 or 4 streams."
  706     //
  707     // "Literals_Section_Header
  708     //
  709     // Header is in charge of describing how literals are packed. It's a
  710     // byte-aligned variable-size bitfield, ranging from 1 to 5 bytes, using
  711     // little-endian convention."
  712     //
  713     // "Literals_Block_Type
  714     //
  715     // This field uses 2 lowest bits of first byte, describing 4 different block
  716     // types"
  717     //
  718     // size_format takes between 1 and 2 bits
  719     int block_type = (int)IO_read_bits(in, 2);
  720     int size_format = (int)IO_read_bits(in, 2);
  721 
  722     if (block_type <= 1) {
  723         // Raw or RLE literals block
  724         return decode_literals_simple(in, literals, block_type,
  725                                       size_format);
  726     } else {
  727         // Huffman compressed literals
  728         return decode_literals_compressed(ctx, in, literals, block_type,
  729                                           size_format);
  730     }
  731 }
  732 
  733 /// Decodes literals blocks in raw or RLE form
  734 static size_t decode_literals_simple(istream_t *const in, u8 **const literals,
  735                                      const int block_type,
  736                                      const int size_format) {
  737     size_t size;
  738     switch (size_format) {
  739     // These cases are in the form ?0
  740     // In this case, the ? bit is actually part of the size field
  741     case 0:
  742     case 2:
  743         // "Size_Format uses 1 bit. Regenerated_Size uses 5 bits (0-31)."
  744         IO_rewind_bits(in, 1);
  745         size = IO_read_bits(in, 5);
  746         break;
  747     case 1:
  748         // "Size_Format uses 2 bits. Regenerated_Size uses 12 bits (0-4095)."
  749         size = IO_read_bits(in, 12);
  750         break;
  751     case 3:
  752         // "Size_Format uses 2 bits. Regenerated_Size uses 20 bits (0-1048575)."
  753         size = IO_read_bits(in, 20);
  754         break;
  755     default:
  756         // Size format is in range 0-3
  757         IMPOSSIBLE();
  758     }
  759 
  760     if (size > MAX_LITERALS_SIZE) {
  761         CORRUPTION();
  762     }
  763 
  764     *literals = malloc(size);
  765     if (!*literals) {
  766         BAD_ALLOC();
  767     }
  768 
  769     switch (block_type) {
  770     case 0: {
  771         // "Raw_Literals_Block - Literals are stored uncompressed."
  772         const u8 *const read_ptr = IO_get_read_ptr(in, size);
  773         memcpy(*literals, read_ptr, size);
  774         break;
  775     }
  776     case 1: {
  777         // "RLE_Literals_Block - Literals consist of a single byte value repeated N times."
  778         const u8 *const read_ptr = IO_get_read_ptr(in, 1);
  779         memset(*literals, read_ptr[0], size);
  780         break;
  781     }
  782     default:
  783         IMPOSSIBLE();
  784     }
  785 
  786     return size;
  787 }
  788 
  789 /// Decodes Huffman compressed literals
  790 static size_t decode_literals_compressed(frame_context_t *const ctx,
  791                                          istream_t *const in,
  792                                          u8 **const literals,
  793                                          const int block_type,
  794                                          const int size_format) {
  795     size_t regenerated_size, compressed_size;
  796     // Only size_format=0 has 1 stream, so default to 4
  797     int num_streams = 4;
  798     switch (size_format) {
  799     case 0:
  800         // "A single stream. Both Compressed_Size and Regenerated_Size use 10
  801         // bits (0-1023)."
  802         num_streams = 1;
  803     // Fall through as it has the same size format
  804         /* fallthrough */
  805     case 1:
  806         // "4 streams. Both Compressed_Size and Regenerated_Size use 10 bits
  807         // (0-1023)."
  808         regenerated_size = IO_read_bits(in, 10);
  809         compressed_size = IO_read_bits(in, 10);
  810         break;
  811     case 2:
  812         // "4 streams. Both Compressed_Size and Regenerated_Size use 14 bits
  813         // (0-16383)."
  814         regenerated_size = IO_read_bits(in, 14);
  815         compressed_size = IO_read_bits(in, 14);
  816         break;
  817     case 3:
  818         // "4 streams. Both Compressed_Size and Regenerated_Size use 18 bits
  819         // (0-262143)."
  820         regenerated_size = IO_read_bits(in, 18);
  821         compressed_size = IO_read_bits(in, 18);
  822         break;
  823     default:
  824         // Impossible
  825         IMPOSSIBLE();
  826     }
  827     if (regenerated_size > MAX_LITERALS_SIZE) {
  828         CORRUPTION();
  829     }
  830 
  831     *literals = malloc(regenerated_size);
  832     if (!*literals) {
  833         BAD_ALLOC();
  834     }
  835 
  836     ostream_t lit_stream = IO_make_ostream(*literals, regenerated_size);
  837     istream_t huf_stream = IO_make_sub_istream(in, compressed_size);
  838 
  839     if (block_type == 2) {
  840         // Decode the provided Huffman table
  841         // "This section is only present when Literals_Block_Type type is
  842         // Compressed_Literals_Block (2)."
  843 
  844         HUF_free_dtable(&ctx->literals_dtable);
  845         decode_huf_table(&ctx->literals_dtable, &huf_stream);
  846     } else {
  847         // If the previous Huffman table is being repeated, ensure it exists
  848         if (!ctx->literals_dtable.symbols) {
  849             CORRUPTION();
  850         }
  851     }
  852 
  853     size_t symbols_decoded;
  854     if (num_streams == 1) {
  855         symbols_decoded = HUF_decompress_1stream(&ctx->literals_dtable, &lit_stream, &huf_stream);
  856     } else {
  857         symbols_decoded = HUF_decompress_4stream(&ctx->literals_dtable, &lit_stream, &huf_stream);
  858     }
  859 
  860     if (symbols_decoded != regenerated_size) {
  861         CORRUPTION();
  862     }
  863 
  864     return regenerated_size;
  865 }
  866 
  867 // Decode the Huffman table description
  868 static void decode_huf_table(HUF_dtable *const dtable, istream_t *const in) {
  869     // "All literal values from zero (included) to last present one (excluded)
  870     // are represented by Weight with values from 0 to Max_Number_of_Bits."
  871 
  872     // "This is a single byte value (0-255), which describes how to decode the list of weights."
  873     const u8 header = IO_read_bits(in, 8);
  874 
  875     u8 weights[HUF_MAX_SYMBS];
  876     memset(weights, 0, sizeof(weights));
  877 
  878     int num_symbs;
  879 
  880     if (header >= 128) {
  881         // "This is a direct representation, where each Weight is written
  882         // directly as a 4 bits field (0-15). The full representation occupies
  883         // ((Number_of_Symbols+1)/2) bytes, meaning it uses a last full byte
  884         // even if Number_of_Symbols is odd. Number_of_Symbols = headerByte -
  885         // 127"
  886         num_symbs = header - 127;
  887         const size_t bytes = (num_symbs + 1) / 2;
  888 
  889         const u8 *const weight_src = IO_get_read_ptr(in, bytes);
  890 
  891         for (int i = 0; i < num_symbs; i++) {
  892             // "They are encoded forward, 2
  893             // weights to a byte with the first weight taking the top four bits
  894             // and the second taking the bottom four (e.g. the following
  895             // operations could be used to read the weights: Weight[0] =
  896             // (Byte[0] >> 4), Weight[1] = (Byte[0] & 0xf), etc.)."
  897             if (i % 2 == 0) {
  898                 weights[i] = weight_src[i / 2] >> 4;
  899             } else {
  900                 weights[i] = weight_src[i / 2] & 0xf;
  901             }
  902         }
  903     } else {
  904         // The weights are FSE encoded, decode them before we can construct the
  905         // table
  906         istream_t fse_stream = IO_make_sub_istream(in, header);
  907         ostream_t weight_stream = IO_make_ostream(weights, HUF_MAX_SYMBS);
  908         fse_decode_hufweights(&weight_stream, &fse_stream, &num_symbs);
  909     }
  910 
  911     // Construct the table using the decoded weights
  912     HUF_init_dtable_usingweights(dtable, weights, num_symbs);
  913 }
  914 
  915 static void fse_decode_hufweights(ostream_t *weights, istream_t *const in,
  916                                     int *const num_symbs) {
  917     const int MAX_ACCURACY_LOG = 7;
  918 
  919     FSE_dtable dtable;
  920 
  921     // "An FSE bitstream starts by a header, describing probabilities
  922     // distribution. It will create a Decoding Table. For a list of Huffman
  923     // weights, maximum accuracy is 7 bits."
  924     FSE_decode_header(&dtable, in, MAX_ACCURACY_LOG);
  925 
  926     // Decode the weights
  927     *num_symbs = FSE_decompress_interleaved2(&dtable, weights, in);
  928 
  929     FSE_free_dtable(&dtable);
  930 }
  931 /******* END LITERALS DECODING ************************************************/
  932 
  933 /******* SEQUENCE DECODING ****************************************************/
  934 /// The combination of FSE states needed to decode sequences
  935 typedef struct {
  936     FSE_dtable ll_table;
  937     FSE_dtable of_table;
  938     FSE_dtable ml_table;
  939 
  940     u16 ll_state;
  941     u16 of_state;
  942     u16 ml_state;
  943 } sequence_states_t;
  944 
  945 /// Different modes to signal to decode_seq_tables what to do
  946 typedef enum {
  947     seq_literal_length = 0,
  948     seq_offset = 1,
  949     seq_match_length = 2,
  950 } seq_part_t;
  951 
  952 typedef enum {
  953     seq_predefined = 0,
  954     seq_rle = 1,
  955     seq_fse = 2,
  956     seq_repeat = 3,
  957 } seq_mode_t;
  958 
  959 /// The predefined FSE distribution tables for `seq_predefined` mode
  960 static const i16 SEQ_LITERAL_LENGTH_DEFAULT_DIST[36] = {
  961     4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1,  1,  2,  2,
  962     2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1, -1, -1, -1, -1};
  963 static const i16 SEQ_OFFSET_DEFAULT_DIST[29] = {
  964     1, 1, 1, 1, 1, 1, 2, 2, 2, 1,  1,  1,  1,  1, 1,
  965     1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1};
  966 static const i16 SEQ_MATCH_LENGTH_DEFAULT_DIST[53] = {
  967     1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1,  1,  1,  1,  1,  1,  1, 1,
  968     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  1,  1,  1,  1,  1,  1, 1,
  969     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1};
  970 
  971 /// The sequence decoding baseline and number of additional bits to read/add
  972 /// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#the-codes-for-literals-lengths-match-lengths-and-offsets
  973 static const u32 SEQ_LITERAL_LENGTH_BASELINES[36] = {
  974     0,  1,  2,   3,   4,   5,    6,    7,    8,    9,     10,    11,
  975     12, 13, 14,  15,  16,  18,   20,   22,   24,   28,    32,    40,
  976     48, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536};
  977 static const u8 SEQ_LITERAL_LENGTH_EXTRA_BITS[36] = {
  978     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  1,  1,
  979     1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
  980 
  981 static const u32 SEQ_MATCH_LENGTH_BASELINES[53] = {
  982     3,  4,   5,   6,   7,    8,    9,    10,   11,    12,    13,   14, 15, 16,
  983     17, 18,  19,  20,  21,   22,   23,   24,   25,    26,    27,   28, 29, 30,
  984     31, 32,  33,  34,  35,   37,   39,   41,   43,    47,    51,   59, 67, 83,
  985     99, 131, 259, 515, 1027, 2051, 4099, 8195, 16387, 32771, 65539};
  986 static const u8 SEQ_MATCH_LENGTH_EXTRA_BITS[53] = {
  987     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0,  0, 0,
  988     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  1,  1,  1, 1,
  989     2, 2, 3, 3, 4, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
  990 
  991 /// Offset decoding is simpler so we just need a maximum code value
  992 static const u8 SEQ_MAX_CODES[3] = {35, (u8)-1, 52};
  993 
  994 static void decompress_sequences(frame_context_t *const ctx,
  995                                  istream_t *const in,
  996                                  sequence_command_t *const sequences,
  997                                  const size_t num_sequences);
  998 static sequence_command_t decode_sequence(sequence_states_t *const state,
  999                                           const u8 *const src,
 1000                                           i64 *const offset);
 1001 static void decode_seq_table(FSE_dtable *const table, istream_t *const in,
 1002                                const seq_part_t type, const seq_mode_t mode);
 1003 
 1004 static size_t decode_sequences(frame_context_t *const ctx, istream_t *in,
 1005                                sequence_command_t **const sequences) {
 1006     // "A compressed block is a succession of sequences . A sequence is a
 1007     // literal copy command, followed by a match copy command. A literal copy
 1008     // command specifies a length. It is the number of bytes to be copied (or
 1009     // extracted) from the literal section. A match copy command specifies an
 1010     // offset and a length. The offset gives the position to copy from, which
 1011     // can be within a previous block."
 1012 
 1013     size_t num_sequences;
 1014 
 1015     // "Number_of_Sequences
 1016     //
 1017     // This is a variable size field using between 1 and 3 bytes. Let's call its
 1018     // first byte byte0."
 1019     u8 header = IO_read_bits(in, 8);
 1020     if (header == 0) {
 1021         // "There are no sequences. The sequence section stops there.
 1022         // Regenerated content is defined entirely by literals section."
 1023         *sequences = NULL;
 1024         return 0;
 1025     } else if (header < 128) {
 1026         // "Number_of_Sequences = byte0 . Uses 1 byte."
 1027         num_sequences = header;
 1028     } else if (header < 255) {
 1029         // "Number_of_Sequences = ((byte0-128) << 8) + byte1 . Uses 2 bytes."
 1030         num_sequences = ((header - 128) << 8) + IO_read_bits(in, 8);
 1031     } else {
 1032         // "Number_of_Sequences = byte1 + (byte2<<8) + 0x7F00 . Uses 3 bytes."
 1033         num_sequences = IO_read_bits(in, 16) + 0x7F00;
 1034     }
 1035 
 1036     *sequences = malloc(num_sequences * sizeof(sequence_command_t));
 1037     if (!*sequences) {
 1038         BAD_ALLOC();
 1039     }
 1040 
 1041     decompress_sequences(ctx, in, *sequences, num_sequences);
 1042     return num_sequences;
 1043 }
 1044 
 1045 /// Decompress the FSE encoded sequence commands
 1046 static void decompress_sequences(frame_context_t *const ctx, istream_t *in,
 1047                                  sequence_command_t *const sequences,
 1048                                  const size_t num_sequences) {
 1049     // "The Sequences_Section regroup all symbols required to decode commands.
 1050     // There are 3 symbol types : literals lengths, offsets and match lengths.
 1051     // They are encoded together, interleaved, in a single bitstream."
 1052 
 1053     // "Symbol compression modes
 1054     //
 1055     // This is a single byte, defining the compression mode of each symbol
 1056     // type."
 1057     //
 1058     // Bit number : Field name
 1059     // 7-6        : Literals_Lengths_Mode
 1060     // 5-4        : Offsets_Mode
 1061     // 3-2        : Match_Lengths_Mode
 1062     // 1-0        : Reserved
 1063     u8 compression_modes = IO_read_bits(in, 8);
 1064 
 1065     if ((compression_modes & 3) != 0) {
 1066         // Reserved bits set
 1067         CORRUPTION();
 1068     }
 1069 
 1070     // "Following the header, up to 3 distribution tables can be described. When
 1071     // present, they are in this order :
 1072     //
 1073     // Literals lengths
 1074     // Offsets
 1075     // Match Lengths"
 1076     // Update the tables we have stored in the context
 1077     decode_seq_table(&ctx->ll_dtable, in, seq_literal_length,
 1078                      (compression_modes >> 6) & 3);
 1079 
 1080     decode_seq_table(&ctx->of_dtable, in, seq_offset,
 1081                      (compression_modes >> 4) & 3);
 1082 
 1083     decode_seq_table(&ctx->ml_dtable, in, seq_match_length,
 1084                      (compression_modes >> 2) & 3);
 1085 
 1086 
 1087     sequence_states_t states;
 1088 
 1089     // Initialize the decoding tables
 1090     {
 1091         states.ll_table = ctx->ll_dtable;
 1092         states.of_table = ctx->of_dtable;
 1093         states.ml_table = ctx->ml_dtable;
 1094     }
 1095 
 1096     const size_t len = IO_istream_len(in);
 1097     const u8 *const src = IO_get_read_ptr(in, len);
 1098 
 1099     // "After writing the last bit containing information, the compressor writes
 1100     // a single 1-bit and then fills the byte with 0-7 0 bits of padding."
 1101     const int padding = 8 - highest_set_bit(src[len - 1]);
 1102     // The offset starts at the end because FSE streams are read backwards
 1103     i64 bit_offset = (i64)(len * 8 - (size_t)padding);
 1104 
 1105     // "The bitstream starts with initial state values, each using the required
 1106     // number of bits in their respective accuracy, decoded previously from
 1107     // their normalized distribution.
 1108     //
 1109     // It starts by Literals_Length_State, followed by Offset_State, and finally
 1110     // Match_Length_State."
 1111     FSE_init_state(&states.ll_table, &states.ll_state, src, &bit_offset);
 1112     FSE_init_state(&states.of_table, &states.of_state, src, &bit_offset);
 1113     FSE_init_state(&states.ml_table, &states.ml_state, src, &bit_offset);
 1114 
 1115     for (size_t i = 0; i < num_sequences; i++) {
 1116         // Decode sequences one by one
 1117         sequences[i] = decode_sequence(&states, src, &bit_offset);
 1118     }
 1119 
 1120     if (bit_offset != 0) {
 1121         CORRUPTION();
 1122     }
 1123 }
 1124 
 1125 // Decode a single sequence and update the state
 1126 static sequence_command_t decode_sequence(sequence_states_t *const states,
 1127                                           const u8 *const src,
 1128                                           i64 *const offset) {
 1129     // "Each symbol is a code in its own context, which specifies Baseline and
 1130     // Number_of_Bits to add. Codes are FSE compressed, and interleaved with raw
 1131     // additional bits in the same bitstream."
 1132 
 1133     // Decode symbols, but don't update states
 1134     const u8 of_code = FSE_peek_symbol(&states->of_table, states->of_state);
 1135     const u8 ll_code = FSE_peek_symbol(&states->ll_table, states->ll_state);
 1136     const u8 ml_code = FSE_peek_symbol(&states->ml_table, states->ml_state);
 1137 
 1138     // Offset doesn't need a max value as it's not decoded using a table
 1139     if (ll_code > SEQ_MAX_CODES[seq_literal_length] ||
 1140         ml_code > SEQ_MAX_CODES[seq_match_length]) {
 1141         CORRUPTION();
 1142     }
 1143 
 1144     // Read the interleaved bits
 1145     sequence_command_t seq;
 1146     // "Decoding starts by reading the Number_of_Bits required to decode Offset.
 1147     // It then does the same for Match_Length, and then for Literals_Length."
 1148     seq.offset = ((u32)1 << of_code) + STREAM_read_bits(src, of_code, offset);
 1149 
 1150     seq.match_length =
 1151         SEQ_MATCH_LENGTH_BASELINES[ml_code] +
 1152         STREAM_read_bits(src, SEQ_MATCH_LENGTH_EXTRA_BITS[ml_code], offset);
 1153 
 1154     seq.literal_length =
 1155         SEQ_LITERAL_LENGTH_BASELINES[ll_code] +
 1156         STREAM_read_bits(src, SEQ_LITERAL_LENGTH_EXTRA_BITS[ll_code], offset);
 1157 
 1158     // "If it is not the last sequence in the block, the next operation is to
 1159     // update states. Using the rules pre-calculated in the decoding tables,
 1160     // Literals_Length_State is updated, followed by Match_Length_State, and
 1161     // then Offset_State."
 1162     // If the stream is complete don't read bits to update state
 1163     if (*offset != 0) {
 1164         FSE_update_state(&states->ll_table, &states->ll_state, src, offset);
 1165         FSE_update_state(&states->ml_table, &states->ml_state, src, offset);
 1166         FSE_update_state(&states->of_table, &states->of_state, src, offset);
 1167     }
 1168 
 1169     return seq;
 1170 }
 1171 
 1172 /// Given a sequence part and table mode, decode the FSE distribution
 1173 /// Errors if the mode is `seq_repeat` without a pre-existing table in `table`
 1174 static void decode_seq_table(FSE_dtable *const table, istream_t *const in,
 1175                              const seq_part_t type, const seq_mode_t mode) {
 1176     // Constant arrays indexed by seq_part_t
 1177     const i16 *const default_distributions[] = {SEQ_LITERAL_LENGTH_DEFAULT_DIST,
 1178                                                 SEQ_OFFSET_DEFAULT_DIST,
 1179                                                 SEQ_MATCH_LENGTH_DEFAULT_DIST};
 1180     const size_t default_distribution_lengths[] = {36, 29, 53};
 1181     const size_t default_distribution_accuracies[] = {6, 5, 6};
 1182 
 1183     const size_t max_accuracies[] = {9, 8, 9};
 1184 
 1185     if (mode != seq_repeat) {
 1186         // Free old one before overwriting
 1187         FSE_free_dtable(table);
 1188     }
 1189 
 1190     switch (mode) {
 1191     case seq_predefined: {
 1192         // "Predefined_Mode : uses a predefined distribution table."
 1193         const i16 *distribution = default_distributions[type];
 1194         const size_t symbs = default_distribution_lengths[type];
 1195         const size_t accuracy_log = default_distribution_accuracies[type];
 1196 
 1197         FSE_init_dtable(table, distribution, symbs, accuracy_log);
 1198         break;
 1199     }
 1200     case seq_rle: {
 1201         // "RLE_Mode : it's a single code, repeated Number_of_Sequences times."
 1202         const u8 symb = IO_get_read_ptr(in, 1)[0];
 1203         FSE_init_dtable_rle(table, symb);
 1204         break;
 1205     }
 1206     case seq_fse: {
 1207         // "FSE_Compressed_Mode : standard FSE compression. A distribution table
 1208         // will be present "
 1209         FSE_decode_header(table, in, max_accuracies[type]);
 1210         break;
 1211     }
 1212     case seq_repeat:
 1213         // "Repeat_Mode : re-use distribution table from previous compressed
 1214         // block."
 1215         // Nothing to do here, table will be unchanged
 1216         if (!table->symbols) {
 1217             // This mode is invalid if we don't already have a table
 1218             CORRUPTION();
 1219         }
 1220         break;
 1221     default:
 1222         // Impossible, as mode is from 0-3
 1223         IMPOSSIBLE();
 1224         break;
 1225     }
 1226 
 1227 }
 1228 /******* END SEQUENCE DECODING ************************************************/
 1229 
 1230 /******* SEQUENCE EXECUTION ***************************************************/
 1231 static void execute_sequences(frame_context_t *const ctx, ostream_t *const out,
 1232                               const u8 *const literals,
 1233                               const size_t literals_len,
 1234                               const sequence_command_t *const sequences,
 1235                               const size_t num_sequences) {
 1236     istream_t litstream = IO_make_istream(literals, literals_len);
 1237 
 1238     u64 *const offset_hist = ctx->previous_offsets;
 1239     size_t total_output = ctx->current_total_output;
 1240 
 1241     for (size_t i = 0; i < num_sequences; i++) {
 1242         const sequence_command_t seq = sequences[i];
 1243         {
 1244             const u32 literals_size = copy_literals(seq.literal_length, &litstream, out);
 1245             total_output += literals_size;
 1246         }
 1247 
 1248         size_t const offset = compute_offset(seq, offset_hist);
 1249 
 1250         size_t const match_length = seq.match_length;
 1251 
 1252         execute_match_copy(ctx, offset, match_length, total_output, out);
 1253 
 1254         total_output += match_length;
 1255     }
 1256 
 1257     // Copy any leftover literals
 1258     {
 1259         size_t len = IO_istream_len(&litstream);
 1260         copy_literals(len, &litstream, out);
 1261         total_output += len;
 1262     }
 1263 
 1264     ctx->current_total_output = total_output;
 1265 }
 1266 
 1267 static u32 copy_literals(const size_t literal_length, istream_t *litstream,
 1268                          ostream_t *const out) {
 1269     // If the sequence asks for more literals than are left, the
 1270     // sequence must be corrupted
 1271     if (literal_length > IO_istream_len(litstream)) {
 1272         CORRUPTION();
 1273     }
 1274 
 1275     u8 *const write_ptr = IO_get_write_ptr(out, literal_length);
 1276     const u8 *const read_ptr =
 1277          IO_get_read_ptr(litstream, literal_length);
 1278     // Copy literals to output
 1279     memcpy(write_ptr, read_ptr, literal_length);
 1280 
 1281     return literal_length;
 1282 }
 1283 
 1284 static size_t compute_offset(sequence_command_t seq, u64 *const offset_hist) {
 1285     size_t offset;
 1286     // Offsets are special, we need to handle the repeat offsets
 1287     if (seq.offset <= 3) {
 1288         // "The first 3 values define a repeated offset and we will call
 1289         // them Repeated_Offset1, Repeated_Offset2, and Repeated_Offset3.
 1290         // They are sorted in recency order, with Repeated_Offset1 meaning
 1291         // 'most recent one'".
 1292 
 1293         // Use 0 indexing for the array
 1294         u32 idx = seq.offset - 1;
 1295         if (seq.literal_length == 0) {
 1296             // "There is an exception though, when current sequence's
 1297             // literals length is 0. In this case, repeated offsets are
 1298             // shifted by one, so Repeated_Offset1 becomes Repeated_Offset2,
 1299             // Repeated_Offset2 becomes Repeated_Offset3, and
 1300             // Repeated_Offset3 becomes Repeated_Offset1 - 1_byte."
 1301             idx++;
 1302         }
 1303 
 1304         if (idx == 0) {
 1305             offset = offset_hist[0];
 1306         } else {
 1307             // If idx == 3 then literal length was 0 and the offset was 3,
 1308             // as per the exception listed above
 1309             offset = idx < 3 ? offset_hist[idx] : offset_hist[0] - 1;
 1310 
 1311             // If idx == 1 we don't need to modify offset_hist[2], since
 1312             // we're using the second-most recent code
 1313             if (idx > 1) {
 1314                 offset_hist[2] = offset_hist[1];
 1315             }
 1316             offset_hist[1] = offset_hist[0];
 1317             offset_hist[0] = offset;
 1318         }
 1319     } else {
 1320         // When it's not a repeat offset:
 1321         // "if (Offset_Value > 3) offset = Offset_Value - 3;"
 1322         offset = seq.offset - 3;
 1323 
 1324         // Shift back history
 1325         offset_hist[2] = offset_hist[1];
 1326         offset_hist[1] = offset_hist[0];
 1327         offset_hist[0] = offset;
 1328     }
 1329     return offset;
 1330 }
 1331 
 1332 static void execute_match_copy(frame_context_t *const ctx, size_t offset,
 1333                               size_t match_length, size_t total_output,
 1334                               ostream_t *const out) {
 1335     u8 *write_ptr = IO_get_write_ptr(out, match_length);
 1336     if (total_output <= ctx->header.window_size) {
 1337         // In this case offset might go back into the dictionary
 1338         if (offset > total_output + ctx->dict_content_len) {
 1339             // The offset goes beyond even the dictionary
 1340             CORRUPTION();
 1341         }
 1342 
 1343         if (offset > total_output) {
 1344             // "The rest of the dictionary is its content. The content act
 1345             // as a "past" in front of data to compress or decompress, so it
 1346             // can be referenced in sequence commands."
 1347             const size_t dict_copy =
 1348                 MIN(offset - total_output, match_length);
 1349             const size_t dict_offset =
 1350                 ctx->dict_content_len - (offset - total_output);
 1351 
 1352             memcpy(write_ptr, ctx->dict_content + dict_offset, dict_copy);
 1353             write_ptr += dict_copy;
 1354             match_length -= dict_copy;
 1355         }
 1356     } else if (offset > ctx->header.window_size) {
 1357         CORRUPTION();
 1358     }
 1359 
 1360     // We must copy byte by byte because the match length might be larger
 1361     // than the offset
 1362     // ex: if the output so far was "abc", a command with offset=3 and
 1363     // match_length=6 would produce "abcabcabc" as the new output
 1364     for (size_t j = 0; j < match_length; j++) {
 1365         *write_ptr = *(write_ptr - offset);
 1366         write_ptr++;
 1367     }
 1368 }
 1369 /******* END SEQUENCE EXECUTION ***********************************************/
 1370 
 1371 /******* OUTPUT SIZE COUNTING *************************************************/
 1372 /// Get the decompressed size of an input stream so memory can be allocated in
 1373 /// advance.
 1374 /// This implementation assumes `src` points to a single ZSTD-compressed frame
 1375 size_t ZSTD_get_decompressed_size(const void *src, const size_t src_len) {
 1376     istream_t in = IO_make_istream(src, src_len);
 1377 
 1378     // get decompressed size from ZSTD frame header
 1379     {
 1380         const u32 magic_number = (u32)IO_read_bits(&in, 32);
 1381 
 1382         if (magic_number == ZSTD_MAGIC_NUMBER) {
 1383             // ZSTD frame
 1384             frame_header_t header;
 1385             parse_frame_header(&header, &in);
 1386 
 1387             if (header.frame_content_size == 0 && !header.single_segment_flag) {
 1388                 // Content size not provided, we can't tell
 1389                 return (size_t)-1;
 1390             }
 1391 
 1392             return header.frame_content_size;
 1393         } else {
 1394             // not a real frame or skippable frame
 1395             ERROR("ZSTD frame magic number did not match");
 1396         }
 1397     }
 1398 }
 1399 /******* END OUTPUT SIZE COUNTING *********************************************/
 1400 
 1401 /******* DICTIONARY PARSING ***************************************************/
 1402 dictionary_t* create_dictionary() {
 1403     dictionary_t* const dict = calloc(1, sizeof(dictionary_t));
 1404     if (!dict) {
 1405         BAD_ALLOC();
 1406     }
 1407     return dict;
 1408 }
 1409 
 1410 /// Free an allocated dictionary
 1411 void free_dictionary(dictionary_t *const dict) {
 1412     HUF_free_dtable(&dict->literals_dtable);
 1413     FSE_free_dtable(&dict->ll_dtable);
 1414     FSE_free_dtable(&dict->of_dtable);
 1415     FSE_free_dtable(&dict->ml_dtable);
 1416 
 1417     free(dict->content);
 1418 
 1419     memset(dict, 0, sizeof(dictionary_t));
 1420 
 1421     free(dict);
 1422 }
 1423 
 1424 
 1425 #if !defined(ZDEC_NO_DICTIONARY)
 1426 #define DICT_SIZE_ERROR() ERROR("Dictionary size cannot be less than 8 bytes")
 1427 #define NULL_SRC() ERROR("Tried to create dictionary with pointer to null src");
 1428 
 1429 static void init_dictionary_content(dictionary_t *const dict,
 1430                                     istream_t *const in);
 1431 
 1432 void parse_dictionary(dictionary_t *const dict, const void *src,
 1433                              size_t src_len) {
 1434     const u8 *byte_src = (const u8 *)src;
 1435     memset(dict, 0, sizeof(dictionary_t));
 1436     if (src == NULL) { /* cannot initialize dictionary with null src */
 1437         NULL_SRC();
 1438     }
 1439     if (src_len < 8) {
 1440         DICT_SIZE_ERROR();
 1441     }
 1442 
 1443     istream_t in = IO_make_istream(byte_src, src_len);
 1444 
 1445     const u32 magic_number = IO_read_bits(&in, 32);
 1446     if (magic_number != 0xEC30A437) {
 1447         // raw content dict
 1448         IO_rewind_bits(&in, 32);
 1449         init_dictionary_content(dict, &in);
 1450         return;
 1451     }
 1452 
 1453     dict->dictionary_id = IO_read_bits(&in, 32);
 1454 
 1455     // "Entropy_Tables : following the same format as the tables in compressed
 1456     // blocks. They are stored in following order : Huffman tables for literals,
 1457     // FSE table for offsets, FSE table for match lengths, and FSE table for
 1458     // literals lengths. It's finally followed by 3 offset values, populating
 1459     // recent offsets (instead of using {1,4,8}), stored in order, 4-bytes
 1460     // little-endian each, for a total of 12 bytes. Each recent offset must have
 1461     // a value < dictionary size."
 1462     decode_huf_table(&dict->literals_dtable, &in);
 1463     decode_seq_table(&dict->of_dtable, &in, seq_offset, seq_fse);
 1464     decode_seq_table(&dict->ml_dtable, &in, seq_match_length, seq_fse);
 1465     decode_seq_table(&dict->ll_dtable, &in, seq_literal_length, seq_fse);
 1466 
 1467     // Read in the previous offset history
 1468     dict->previous_offsets[0] = IO_read_bits(&in, 32);
 1469     dict->previous_offsets[1] = IO_read_bits(&in, 32);
 1470     dict->previous_offsets[2] = IO_read_bits(&in, 32);
 1471 
 1472     // Ensure the provided offsets aren't too large
 1473     // "Each recent offset must have a value < dictionary size."
 1474     for (int i = 0; i < 3; i++) {
 1475         if (dict->previous_offsets[i] > src_len) {
 1476             ERROR("Dictionary corrupted");
 1477         }
 1478     }
 1479 
 1480     // "Content : The rest of the dictionary is its content. The content act as
 1481     // a "past" in front of data to compress or decompress, so it can be
 1482     // referenced in sequence commands."
 1483     init_dictionary_content(dict, &in);
 1484 }
 1485 
 1486 static void init_dictionary_content(dictionary_t *const dict,
 1487                                     istream_t *const in) {
 1488     // Copy in the content
 1489     dict->content_size = IO_istream_len(in);
 1490     dict->content = malloc(dict->content_size);
 1491     if (!dict->content) {
 1492         BAD_ALLOC();
 1493     }
 1494 
 1495     const u8 *const content = IO_get_read_ptr(in, dict->content_size);
 1496 
 1497     memcpy(dict->content, content, dict->content_size);
 1498 }
 1499 
 1500 static void HUF_copy_dtable(HUF_dtable *const dst,
 1501                             const HUF_dtable *const src) {
 1502     if (src->max_bits == 0) {
 1503         memset(dst, 0, sizeof(HUF_dtable));
 1504         return;
 1505     }
 1506 
 1507     const size_t size = (size_t)1 << src->max_bits;
 1508     dst->max_bits = src->max_bits;
 1509 
 1510     dst->symbols = malloc(size);
 1511     dst->num_bits = malloc(size);
 1512     if (!dst->symbols || !dst->num_bits) {
 1513         BAD_ALLOC();
 1514     }
 1515 
 1516     memcpy(dst->symbols, src->symbols, size);
 1517     memcpy(dst->num_bits, src->num_bits, size);
 1518 }
 1519 
 1520 static void FSE_copy_dtable(FSE_dtable *const dst, const FSE_dtable *const src) {
 1521     if (src->accuracy_log == 0) {
 1522         memset(dst, 0, sizeof(FSE_dtable));
 1523         return;
 1524     }
 1525 
 1526     size_t size = (size_t)1 << src->accuracy_log;
 1527     dst->accuracy_log = src->accuracy_log;
 1528 
 1529     dst->symbols = malloc(size);
 1530     dst->num_bits = malloc(size);
 1531     dst->new_state_base = malloc(size * sizeof(u16));
 1532     if (!dst->symbols || !dst->num_bits || !dst->new_state_base) {
 1533         BAD_ALLOC();
 1534     }
 1535 
 1536     memcpy(dst->symbols, src->symbols, size);
 1537     memcpy(dst->num_bits, src->num_bits, size);
 1538     memcpy(dst->new_state_base, src->new_state_base, size * sizeof(u16));
 1539 }
 1540 
 1541 /// A dictionary acts as initializing values for the frame context before
 1542 /// decompression, so we implement it by applying it's predetermined
 1543 /// tables and content to the context before beginning decompression
 1544 static void frame_context_apply_dict(frame_context_t *const ctx,
 1545                                      const dictionary_t *const dict) {
 1546     // If the content pointer is NULL then it must be an empty dict
 1547     if (!dict || !dict->content)
 1548         return;
 1549 
 1550     // If the requested dictionary_id is non-zero, the correct dictionary must
 1551     // be present
 1552     if (ctx->header.dictionary_id != 0 &&
 1553         ctx->header.dictionary_id != dict->dictionary_id) {
 1554         ERROR("Wrong dictionary provided");
 1555     }
 1556 
 1557     // Copy the dict content to the context for references during sequence
 1558     // execution
 1559     ctx->dict_content = dict->content;
 1560     ctx->dict_content_len = dict->content_size;
 1561 
 1562     // If it's a formatted dict copy the precomputed tables in so they can
 1563     // be used in the table repeat modes
 1564     if (dict->dictionary_id != 0) {
 1565         // Deep copy the entropy tables so they can be freed independently of
 1566         // the dictionary struct
 1567         HUF_copy_dtable(&ctx->literals_dtable, &dict->literals_dtable);
 1568         FSE_copy_dtable(&ctx->ll_dtable, &dict->ll_dtable);
 1569         FSE_copy_dtable(&ctx->of_dtable, &dict->of_dtable);
 1570         FSE_copy_dtable(&ctx->ml_dtable, &dict->ml_dtable);
 1571 
 1572         // Copy the repeated offsets
 1573         memcpy(ctx->previous_offsets, dict->previous_offsets,
 1574                sizeof(ctx->previous_offsets));
 1575     }
 1576 }
 1577 
 1578 #else  // ZDEC_NO_DICTIONARY is defined
 1579 
 1580 static void frame_context_apply_dict(frame_context_t *const ctx,
 1581                                      const dictionary_t *const dict) {
 1582     (void)ctx;
 1583     if (dict && dict->content) ERROR("dictionary not supported");
 1584 }
 1585 
 1586 #endif
 1587 /******* END DICTIONARY PARSING ***********************************************/
 1588 
 1589 /******* IO STREAM OPERATIONS *************************************************/
 1590 
 1591 /// Reads `num` bits from a bitstream, and updates the internal offset
 1592 static inline u64 IO_read_bits(istream_t *const in, const int num_bits) {
 1593     if (num_bits > 64 || num_bits <= 0) {
 1594         ERROR("Attempt to read an invalid number of bits");
 1595     }
 1596 
 1597     const size_t bytes = (num_bits + in->bit_offset + 7) / 8;
 1598     const size_t full_bytes = (num_bits + in->bit_offset) / 8;
 1599     if (bytes > in->len) {
 1600         INP_SIZE();
 1601     }
 1602 
 1603     const u64 result = read_bits_LE(in->ptr, num_bits, in->bit_offset);
 1604 
 1605     in->bit_offset = (num_bits + in->bit_offset) % 8;
 1606     in->ptr += full_bytes;
 1607     in->len -= full_bytes;
 1608 
 1609     return result;
 1610 }
 1611 
 1612 /// If a non-zero number of bits have been read from the current byte, advance
 1613 /// the offset to the next byte
 1614 static inline void IO_rewind_bits(istream_t *const in, int num_bits) {
 1615     if (num_bits < 0) {
 1616         ERROR("Attempting to rewind stream by a negative number of bits");
 1617     }
 1618 
 1619     // move the offset back by `num_bits` bits
 1620     const int new_offset = in->bit_offset - num_bits;
 1621     // determine the number of whole bytes we have to rewind, rounding up to an
 1622     // integer number (e.g. if `new_offset == -5`, `bytes == 1`)
 1623     const i64 bytes = -(new_offset - 7) / 8;
 1624 
 1625     in->ptr -= bytes;
 1626     in->len += bytes;
 1627     // make sure the resulting `bit_offset` is positive, as mod in C does not
 1628     // convert numbers from negative to positive (e.g. -22 % 8 == -6)
 1629     in->bit_offset = ((new_offset % 8) + 8) % 8;
 1630 }
 1631 
 1632 /// If the remaining bits in a byte will be unused, advance to the end of the
 1633 /// byte
 1634 static inline void IO_align_stream(istream_t *const in) {
 1635     if (in->bit_offset != 0) {
 1636         if (in->len == 0) {
 1637             INP_SIZE();
 1638         }
 1639         in->ptr++;
 1640         in->len--;
 1641         in->bit_offset = 0;
 1642     }
 1643 }
 1644 
 1645 /// Write the given byte into the output stream
 1646 static inline void IO_write_byte(ostream_t *const out, u8 symb) {
 1647     if (out->len == 0) {
 1648         OUT_SIZE();
 1649     }
 1650 
 1651     out->ptr[0] = symb;
 1652     out->ptr++;
 1653     out->len--;
 1654 }
 1655 
 1656 /// Returns the number of bytes left to be read in this stream.  The stream must
 1657 /// be byte aligned.
 1658 static inline size_t IO_istream_len(const istream_t *const in) {
 1659     return in->len;
 1660 }
 1661 
 1662 /// Returns a pointer where `len` bytes can be read, and advances the internal
 1663 /// state.  The stream must be byte aligned.
 1664 static inline const u8 *IO_get_read_ptr(istream_t *const in, size_t len) {
 1665     if (len > in->len) {
 1666         INP_SIZE();
 1667     }
 1668     if (in->bit_offset != 0) {
 1669         ERROR("Attempting to operate on a non-byte aligned stream");
 1670     }
 1671     const u8 *const ptr = in->ptr;
 1672     in->ptr += len;
 1673     in->len -= len;
 1674 
 1675     return ptr;
 1676 }
 1677 /// Returns a pointer to write `len` bytes to, and advances the internal state
 1678 static inline u8 *IO_get_write_ptr(ostream_t *const out, size_t len) {
 1679     if (len > out->len) {
 1680         OUT_SIZE();
 1681     }
 1682     u8 *const ptr = out->ptr;
 1683     out->ptr += len;
 1684     out->len -= len;
 1685 
 1686     return ptr;
 1687 }
 1688 
 1689 /// Advance the inner state by `len` bytes
 1690 static inline void IO_advance_input(istream_t *const in, size_t len) {
 1691     if (len > in->len) {
 1692          INP_SIZE();
 1693     }
 1694     if (in->bit_offset != 0) {
 1695         ERROR("Attempting to operate on a non-byte aligned stream");
 1696     }
 1697 
 1698     in->ptr += len;
 1699     in->len -= len;
 1700 }
 1701 
 1702 /// Returns an `ostream_t` constructed from the given pointer and length
 1703 static inline ostream_t IO_make_ostream(u8 *out, size_t len) {
 1704     return (ostream_t) { out, len };
 1705 }
 1706 
 1707 /// Returns an `istream_t` constructed from the given pointer and length
 1708 static inline istream_t IO_make_istream(const u8 *in, size_t len) {
 1709     return (istream_t) { in, len, 0 };
 1710 }
 1711 
 1712 /// Returns an `istream_t` with the same base as `in`, and length `len`
 1713 /// Then, advance `in` to account for the consumed bytes
 1714 /// `in` must be byte aligned
 1715 static inline istream_t IO_make_sub_istream(istream_t *const in, size_t len) {
 1716     // Consume `len` bytes of the parent stream
 1717     const u8 *const ptr = IO_get_read_ptr(in, len);
 1718 
 1719     // Make a substream using the pointer to those `len` bytes
 1720     return IO_make_istream(ptr, len);
 1721 }
 1722 /******* END IO STREAM OPERATIONS *********************************************/
 1723 
 1724 /******* BITSTREAM OPERATIONS *************************************************/
 1725 /// Read `num` bits (up to 64) from `src + offset`, where `offset` is in bits
 1726 static inline u64 read_bits_LE(const u8 *src, const int num_bits,
 1727                                const size_t offset) {
 1728     if (num_bits > 64) {
 1729         ERROR("Attempt to read an invalid number of bits");
 1730     }
 1731 
 1732     // Skip over bytes that aren't in range
 1733     src += offset / 8;
 1734     size_t bit_offset = offset % 8;
 1735     u64 res = 0;
 1736 
 1737     int shift = 0;
 1738     int left = num_bits;
 1739     while (left > 0) {
 1740         u64 mask = left >= 8 ? 0xff : (((u64)1 << left) - 1);
 1741         // Read the next byte, shift it to account for the offset, and then mask
 1742         // out the top part if we don't need all the bits
 1743         res += (((u64)*src++ >> bit_offset) & mask) << shift;
 1744         shift += 8 - bit_offset;
 1745         left -= 8 - bit_offset;
 1746         bit_offset = 0;
 1747     }
 1748 
 1749     return res;
 1750 }
 1751 
 1752 /// Read bits from the end of a HUF or FSE bitstream.  `offset` is in bits, so
 1753 /// it updates `offset` to `offset - bits`, and then reads `bits` bits from
 1754 /// `src + offset`.  If the offset becomes negative, the extra bits at the
 1755 /// bottom are filled in with `0` bits instead of reading from before `src`.
 1756 static inline u64 STREAM_read_bits(const u8 *const src, const int bits,
 1757                                    i64 *const offset) {
 1758     *offset = *offset - bits;
 1759     size_t actual_off = *offset;
 1760     size_t actual_bits = bits;
 1761     // Don't actually read bits from before the start of src, so if `*offset <
 1762     // 0` fix actual_off and actual_bits to reflect the quantity to read
 1763     if (*offset < 0) {
 1764         actual_bits += *offset;
 1765         actual_off = 0;
 1766     }
 1767     u64 res = read_bits_LE(src, actual_bits, actual_off);
 1768 
 1769     if (*offset < 0) {
 1770         // Fill in the bottom "overflowed" bits with 0's
 1771         res = -*offset >= 64 ? 0 : (res << -*offset);
 1772     }
 1773     return res;
 1774 }
 1775 /******* END BITSTREAM OPERATIONS *********************************************/
 1776 
 1777 /******* BIT COUNTING OPERATIONS **********************************************/
 1778 /// Returns `x`, where `2^x` is the largest power of 2 less than or equal to
 1779 /// `num`, or `-1` if `num == 0`.
 1780 static inline int highest_set_bit(const u64 num) {
 1781     for (int i = 63; i >= 0; i--) {
 1782         if (((u64)1 << i) <= num) {
 1783             return i;
 1784         }
 1785     }
 1786     return -1;
 1787 }
 1788 /******* END BIT COUNTING OPERATIONS ******************************************/
 1789 
 1790 /******* HUFFMAN PRIMITIVES ***************************************************/
 1791 static inline u8 HUF_decode_symbol(const HUF_dtable *const dtable,
 1792                                    u16 *const state, const u8 *const src,
 1793                                    i64 *const offset) {
 1794     // Look up the symbol and number of bits to read
 1795     const u8 symb = dtable->symbols[*state];
 1796     const u8 bits = dtable->num_bits[*state];
 1797     const u16 rest = STREAM_read_bits(src, bits, offset);
 1798     // Shift `bits` bits out of the state, keeping the low order bits that
 1799     // weren't necessary to determine this symbol.  Then add in the new bits
 1800     // read from the stream.
 1801     *state = ((*state << bits) + rest) & (((u16)1 << dtable->max_bits) - 1);
 1802 
 1803     return symb;
 1804 }
 1805 
 1806 static inline void HUF_init_state(const HUF_dtable *const dtable,
 1807                                   u16 *const state, const u8 *const src,
 1808                                   i64 *const offset) {
 1809     // Read in a full `dtable->max_bits` bits to initialize the state
 1810     const u8 bits = dtable->max_bits;
 1811     *state = STREAM_read_bits(src, bits, offset);
 1812 }
 1813 
 1814 static size_t HUF_decompress_1stream(const HUF_dtable *const dtable,
 1815                                      ostream_t *const out,
 1816                                      istream_t *const in) {
 1817     const size_t len = IO_istream_len(in);
 1818     if (len == 0) {
 1819         INP_SIZE();
 1820     }
 1821     const u8 *const src = IO_get_read_ptr(in, len);
 1822 
 1823     // "Each bitstream must be read backward, that is starting from the end down
 1824     // to the beginning. Therefore it's necessary to know the size of each
 1825     // bitstream.
 1826     //
 1827     // It's also necessary to know exactly which bit is the latest. This is
 1828     // detected by a final bit flag : the highest bit of latest byte is a
 1829     // final-bit-flag. Consequently, a last byte of 0 is not possible. And the
 1830     // final-bit-flag itself is not part of the useful bitstream. Hence, the
 1831     // last byte contains between 0 and 7 useful bits."
 1832     const int padding = 8 - highest_set_bit(src[len - 1]);
 1833 
 1834     // Offset starts at the end because HUF streams are read backwards
 1835     i64 bit_offset = len * 8 - padding;
 1836     u16 state;
 1837 
 1838     HUF_init_state(dtable, &state, src, &bit_offset);
 1839 
 1840     size_t symbols_written = 0;
 1841     while (bit_offset > -dtable->max_bits) {
 1842         // Iterate over the stream, decoding one symbol at a time
 1843         IO_write_byte(out, HUF_decode_symbol(dtable, &state, src, &bit_offset));
 1844         symbols_written++;
 1845     }
 1846     // "The process continues up to reading the required number of symbols per
 1847     // stream. If a bitstream is not entirely and exactly consumed, hence
 1848     // reaching exactly its beginning position with all bits consumed, the
 1849     // decoding process is considered faulty."
 1850 
 1851     // When all symbols have been decoded, the final state value shouldn't have
 1852     // any data from the stream, so it should have "read" dtable->max_bits from
 1853     // before the start of `src`
 1854     // Therefore `offset`, the edge to start reading new bits at, should be
 1855     // dtable->max_bits before the start of the stream
 1856     if (bit_offset != -dtable->max_bits) {
 1857         CORRUPTION();
 1858     }
 1859 
 1860     return symbols_written;
 1861 }
 1862 
 1863 static size_t HUF_decompress_4stream(const HUF_dtable *const dtable,
 1864                                      ostream_t *const out, istream_t *const in) {
 1865     // "Compressed size is provided explicitly : in the 4-streams variant,
 1866     // bitstreams are preceded by 3 unsigned little-endian 16-bits values. Each
 1867     // value represents the compressed size of one stream, in order. The last
 1868     // stream size is deducted from total compressed size and from previously
 1869     // decoded stream sizes"
 1870     const size_t csize1 = IO_read_bits(in, 16);
 1871     const size_t csize2 = IO_read_bits(in, 16);
 1872     const size_t csize3 = IO_read_bits(in, 16);
 1873 
 1874     istream_t in1 = IO_make_sub_istream(in, csize1);
 1875     istream_t in2 = IO_make_sub_istream(in, csize2);
 1876     istream_t in3 = IO_make_sub_istream(in, csize3);
 1877     istream_t in4 = IO_make_sub_istream(in, IO_istream_len(in));
 1878 
 1879     size_t total_output = 0;
 1880     // Decode each stream independently for simplicity
 1881     // If we wanted to we could decode all 4 at the same time for speed,
 1882     // utilizing more execution units
 1883     total_output += HUF_decompress_1stream(dtable, out, &in1);
 1884     total_output += HUF_decompress_1stream(dtable, out, &in2);
 1885     total_output += HUF_decompress_1stream(dtable, out, &in3);
 1886     total_output += HUF_decompress_1stream(dtable, out, &in4);
 1887 
 1888     return total_output;
 1889 }
 1890 
 1891 /// Initializes a Huffman table using canonical Huffman codes
 1892 /// For more explanation on canonical Huffman codes see
 1893 /// http://www.cs.uofs.edu/~mccloske/courses/cmps340/huff_canonical_dec2015.html
 1894 /// Codes within a level are allocated in symbol order (i.e. smaller symbols get
 1895 /// earlier codes)
 1896 static void HUF_init_dtable(HUF_dtable *const table, const u8 *const bits,
 1897                             const int num_symbs) {
 1898     memset(table, 0, sizeof(HUF_dtable));
 1899     if (num_symbs > HUF_MAX_SYMBS) {
 1900         ERROR("Too many symbols for Huffman");
 1901     }
 1902 
 1903     u8 max_bits = 0;
 1904     u16 rank_count[HUF_MAX_BITS + 1];
 1905     memset(rank_count, 0, sizeof(rank_count));
 1906 
 1907     // Count the number of symbols for each number of bits, and determine the
 1908     // depth of the tree
 1909     for (int i = 0; i < num_symbs; i++) {
 1910         if (bits[i] > HUF_MAX_BITS) {
 1911             ERROR("Huffman table depth too large");
 1912         }
 1913         max_bits = MAX(max_bits, bits[i]);
 1914         rank_count[bits[i]]++;
 1915     }
 1916 
 1917     const size_t table_size = 1 << max_bits;
 1918     table->max_bits = max_bits;
 1919     table->symbols = malloc(table_size);
 1920     table->num_bits = malloc(table_size);
 1921 
 1922     if (!table->symbols || !table->num_bits) {
 1923         free(table->symbols);
 1924         free(table->num_bits);
 1925         BAD_ALLOC();
 1926     }
 1927 
 1928     // "Symbols are sorted by Weight. Within same Weight, symbols keep natural
 1929     // order. Symbols with a Weight of zero are removed. Then, starting from
 1930     // lowest weight, prefix codes are distributed in order."
 1931 
 1932     u32 rank_idx[HUF_MAX_BITS + 1];
 1933     // Initialize the starting codes for each rank (number of bits)
 1934     rank_idx[max_bits] = 0;
 1935     for (int i = max_bits; i >= 1; i--) {
 1936         rank_idx[i - 1] = rank_idx[i] + rank_count[i] * (1 << (max_bits - i));
 1937         // The entire range takes the same number of bits so we can memset it
 1938         memset(&table->num_bits[rank_idx[i]], i, rank_idx[i - 1] - rank_idx[i]);
 1939     }
 1940 
 1941     if (rank_idx[0] != table_size) {
 1942         CORRUPTION();
 1943     }
 1944 
 1945     // Allocate codes and fill in the table
 1946     for (int i = 0; i < num_symbs; i++) {
 1947         if (bits[i] != 0) {
 1948             // Allocate a code for this symbol and set its range in the table
 1949             const u16 code = rank_idx[bits[i]];
 1950             // Since the code doesn't care about the bottom `max_bits - bits[i]`
 1951             // bits of state, it gets a range that spans all possible values of
 1952             // the lower bits
 1953             const u16 len = 1 << (max_bits - bits[i]);
 1954             memset(&table->symbols[code], i, len);
 1955             rank_idx[bits[i]] += len;
 1956         }
 1957     }
 1958 }
 1959 
 1960 static void HUF_init_dtable_usingweights(HUF_dtable *const table,
 1961                                          const u8 *const weights,
 1962                                          const int num_symbs) {
 1963     // +1 because the last weight is not transmitted in the header
 1964     if (num_symbs + 1 > HUF_MAX_SYMBS) {
 1965         ERROR("Too many symbols for Huffman");
 1966     }
 1967 
 1968     u8 bits[HUF_MAX_SYMBS];
 1969 
 1970     u64 weight_sum = 0;
 1971     for (int i = 0; i < num_symbs; i++) {
 1972         // Weights are in the same range as bit count
 1973         if (weights[i] > HUF_MAX_BITS) {
 1974             CORRUPTION();
 1975         }
 1976         weight_sum += weights[i] > 0 ? (u64)1 << (weights[i] - 1) : 0;
 1977     }
 1978 
 1979     // Find the first power of 2 larger than the sum
 1980     const int max_bits = highest_set_bit(weight_sum) + 1;
 1981     const u64 left_over = ((u64)1 << max_bits) - weight_sum;
 1982     // If the left over isn't a power of 2, the weights are invalid
 1983     if (left_over & (left_over - 1)) {
 1984         CORRUPTION();
 1985     }
 1986 
 1987     // left_over is used to find the last weight as it's not transmitted
 1988     // by inverting 2^(weight - 1) we can determine the value of last_weight
 1989     const int last_weight = highest_set_bit(left_over) + 1;
 1990 
 1991     for (int i = 0; i < num_symbs; i++) {
 1992         // "Number_of_Bits = Number_of_Bits ? Max_Number_of_Bits + 1 - Weight : 0"
 1993         bits[i] = weights[i] > 0 ? (max_bits + 1 - weights[i]) : 0;
 1994     }
 1995     bits[num_symbs] =
 1996         max_bits + 1 - last_weight; // Last weight is always non-zero
 1997 
 1998     HUF_init_dtable(table, bits, num_symbs + 1);
 1999 }
 2000 
 2001 static void HUF_free_dtable(HUF_dtable *const dtable) {
 2002     free(dtable->symbols);
 2003     free(dtable->num_bits);
 2004     memset(dtable, 0, sizeof(HUF_dtable));
 2005 }
 2006 /******* END HUFFMAN PRIMITIVES ***********************************************/
 2007 
 2008 /******* FSE PRIMITIVES *******************************************************/
 2009 /// For more description of FSE see
 2010 /// https://github.com/Cyan4973/FiniteStateEntropy/
 2011 
 2012 /// Allow a symbol to be decoded without updating state
 2013 static inline u8 FSE_peek_symbol(const FSE_dtable *const dtable,
 2014                                  const u16 state) {
 2015     return dtable->symbols[state];
 2016 }
 2017 
 2018 /// Consumes bits from the input and uses the current state to determine the
 2019 /// next state
 2020 static inline void FSE_update_state(const FSE_dtable *const dtable,
 2021                                     u16 *const state, const u8 *const src,
 2022                                     i64 *const offset) {
 2023     const u8 bits = dtable->num_bits[*state];
 2024     const u16 rest = STREAM_read_bits(src, bits, offset);
 2025     *state = dtable->new_state_base[*state] + rest;
 2026 }
 2027 
 2028 /// Decodes a single FSE symbol and updates the offset
 2029 static inline u8 FSE_decode_symbol(const FSE_dtable *const dtable,
 2030                                    u16 *const state, const u8 *const src,
 2031                                    i64 *const offset) {
 2032     const u8 symb = FSE_peek_symbol(dtable, *state);
 2033     FSE_update_state(dtable, state, src, offset);
 2034     return symb;
 2035 }
 2036 
 2037 static inline void FSE_init_state(const FSE_dtable *const dtable,
 2038                                   u16 *const state, const u8 *const src,
 2039                                   i64 *const offset) {
 2040     // Read in a full `accuracy_log` bits to initialize the state
 2041     const u8 bits = dtable->accuracy_log;
 2042     *state = STREAM_read_bits(src, bits, offset);
 2043 }
 2044 
 2045 static size_t FSE_decompress_interleaved2(const FSE_dtable *const dtable,
 2046                                           ostream_t *const out,
 2047                                           istream_t *const in) {
 2048     const size_t len = IO_istream_len(in);
 2049     if (len == 0) {
 2050         INP_SIZE();
 2051     }
 2052     const u8 *const src = IO_get_read_ptr(in, len);
 2053 
 2054     // "Each bitstream must be read backward, that is starting from the end down
 2055     // to the beginning. Therefore it's necessary to know the size of each
 2056     // bitstream.
 2057     //
 2058     // It's also necessary to know exactly which bit is the latest. This is
 2059     // detected by a final bit flag : the highest bit of latest byte is a
 2060     // final-bit-flag. Consequently, a last byte of 0 is not possible. And the
 2061     // final-bit-flag itself is not part of the useful bitstream. Hence, the
 2062     // last byte contains between 0 and 7 useful bits."
 2063     const int padding = 8 - highest_set_bit(src[len - 1]);
 2064     i64 offset = len * 8 - padding;
 2065 
 2066     u16 state1, state2;
 2067     // "The first state (State1) encodes the even indexed symbols, and the
 2068     // second (State2) encodes the odd indexes. State1 is initialized first, and
 2069     // then State2, and they take turns decoding a single symbol and updating
 2070     // their state."
 2071     FSE_init_state(dtable, &state1, src, &offset);
 2072     FSE_init_state(dtable, &state2, src, &offset);
 2073 
 2074     // Decode until we overflow the stream
 2075     // Since we decode in reverse order, overflowing the stream is offset going
 2076     // negative
 2077     size_t symbols_written = 0;
 2078     while (1) {
 2079         // "The number of symbols to decode is determined by tracking bitStream
 2080         // overflow condition: If updating state after decoding a symbol would
 2081         // require more bits than remain in the stream, it is assumed the extra
 2082         // bits are 0. Then, the symbols for each of the final states are
 2083         // decoded and the process is complete."
 2084         IO_write_byte(out, FSE_decode_symbol(dtable, &state1, src, &offset));
 2085         symbols_written++;
 2086         if (offset < 0) {
 2087             // There's still a symbol to decode in state2
 2088             IO_write_byte(out, FSE_peek_symbol(dtable, state2));
 2089             symbols_written++;
 2090             break;
 2091         }
 2092 
 2093         IO_write_byte(out, FSE_decode_symbol(dtable, &state2, src, &offset));
 2094         symbols_written++;
 2095         if (offset < 0) {
 2096             // There's still a symbol to decode in state1
 2097             IO_write_byte(out, FSE_peek_symbol(dtable, state1));
 2098             symbols_written++;
 2099             break;
 2100         }
 2101     }
 2102 
 2103     return symbols_written;
 2104 }
 2105 
 2106 static void FSE_init_dtable(FSE_dtable *const dtable,
 2107                             const i16 *const norm_freqs, const int num_symbs,
 2108                             const int accuracy_log) {
 2109     if (accuracy_log > FSE_MAX_ACCURACY_LOG) {
 2110         ERROR("FSE accuracy too large");
 2111     }
 2112     if (num_symbs > FSE_MAX_SYMBS) {
 2113         ERROR("Too many symbols for FSE");
 2114     }
 2115 
 2116     dtable->accuracy_log = accuracy_log;
 2117 
 2118     const size_t size = (size_t)1 << accuracy_log;
 2119     dtable->symbols = malloc(size * sizeof(u8));
 2120     dtable->num_bits = malloc(size * sizeof(u8));
 2121     dtable->new_state_base = malloc(size * sizeof(u16));
 2122 
 2123     if (!dtable->symbols || !dtable->num_bits || !dtable->new_state_base) {
 2124         BAD_ALLOC();
 2125     }
 2126 
 2127     // Used to determine how many bits need to be read for each state,
 2128     // and where the destination range should start
 2129     // Needs to be u16 because max value is 2 * max number of symbols,
 2130     // which can be larger than a byte can store
 2131     u16 state_desc[FSE_MAX_SYMBS];
 2132 
 2133     // "Symbols are scanned in their natural order for "less than 1"
 2134     // probabilities. Symbols with this probability are being attributed a
 2135     // single cell, starting from the end of the table. These symbols define a
 2136     // full state reset, reading Accuracy_Log bits."
 2137     int high_threshold = size;
 2138     for (int s = 0; s < num_symbs; s++) {
 2139         // Scan for low probability symbols to put at the top
 2140         if (norm_freqs[s] == -1) {
 2141             dtable->symbols[--high_threshold] = s;
 2142             state_desc[s] = 1;
 2143         }
 2144     }
 2145 
 2146     // "All remaining symbols are sorted in their natural order. Starting from
 2147     // symbol 0 and table position 0, each symbol gets attributed as many cells
 2148     // as its probability. Cell allocation is spread, not linear."
 2149     // Place the rest in the table
 2150     const u16 step = (size >> 1) + (size >> 3) + 3;
 2151     const u16 mask = size - 1;
 2152     u16 pos = 0;
 2153     for (int s = 0; s < num_symbs; s++) {
 2154         if (norm_freqs[s] <= 0) {
 2155             continue;
 2156         }
 2157 
 2158         state_desc[s] = norm_freqs[s];
 2159 
 2160         for (int i = 0; i < norm_freqs[s]; i++) {
 2161             // Give `norm_freqs[s]` states to symbol s
 2162             dtable->symbols[pos] = s;
 2163             // "A position is skipped if already occupied, typically by a "less
 2164             // than 1" probability symbol."
 2165             do {
 2166                 pos = (pos + step) & mask;
 2167             } while (pos >=
 2168                      high_threshold);
 2169             // Note: no other collision checking is necessary as `step` is
 2170             // coprime to `size`, so the cycle will visit each position exactly
 2171             // once
 2172         }
 2173     }
 2174     if (pos != 0) {
 2175         CORRUPTION();
 2176     }
 2177 
 2178     // Now we can fill baseline and num bits
 2179     for (size_t i = 0; i < size; i++) {
 2180         u8 symbol = dtable->symbols[i];
 2181         u16 next_state_desc = state_desc[symbol]++;
 2182         // Fills in the table appropriately, next_state_desc increases by symbol
 2183         // over time, decreasing number of bits
 2184         dtable->num_bits[i] = (u8)(accuracy_log - highest_set_bit(next_state_desc));
 2185         // Baseline increases until the bit threshold is passed, at which point
 2186         // it resets to 0
 2187         dtable->new_state_base[i] =
 2188             ((u16)next_state_desc << dtable->num_bits[i]) - size;
 2189     }
 2190 }
 2191 
 2192 /// Decode an FSE header as defined in the Zstandard format specification and
 2193 /// use the decoded frequencies to initialize a decoding table.
 2194 static void FSE_decode_header(FSE_dtable *const dtable, istream_t *const in,
 2195                                 const int max_accuracy_log) {
 2196     // "An FSE distribution table describes the probabilities of all symbols
 2197     // from 0 to the last present one (included) on a normalized scale of 1 <<
 2198     // Accuracy_Log .
 2199     //
 2200     // It's a bitstream which is read forward, in little-endian fashion. It's
 2201     // not necessary to know its exact size, since it will be discovered and
 2202     // reported by the decoding process.
 2203     if (max_accuracy_log > FSE_MAX_ACCURACY_LOG) {
 2204         ERROR("FSE accuracy too large");
 2205     }
 2206 
 2207     // The bitstream starts by reporting on which scale it operates.
 2208     // Accuracy_Log = low4bits + 5. Note that maximum Accuracy_Log for literal
 2209     // and match lengths is 9, and for offsets is 8. Higher values are
 2210     // considered errors."
 2211     const int accuracy_log = 5 + IO_read_bits(in, 4);
 2212     if (accuracy_log > max_accuracy_log) {
 2213         ERROR("FSE accuracy too large");
 2214     }
 2215 
 2216     // "Then follows each symbol value, from 0 to last present one. The number
 2217     // of bits used by each field is variable. It depends on :
 2218     //
 2219     // Remaining probabilities + 1 : example : Presuming an Accuracy_Log of 8,
 2220     // and presuming 100 probabilities points have already been distributed, the
 2221     // decoder may read any value from 0 to 255 - 100 + 1 == 156 (inclusive).
 2222     // Therefore, it must read log2sup(156) == 8 bits.
 2223     //
 2224     // Value decoded : small values use 1 less bit : example : Presuming values
 2225     // from 0 to 156 (inclusive) are possible, 255-156 = 99 values are remaining
 2226     // in an 8-bits field. They are used this way : first 99 values (hence from
 2227     // 0 to 98) use only 7 bits, values from 99 to 156 use 8 bits. "
 2228 
 2229     i32 remaining = 1 << accuracy_log;
 2230     i16 frequencies[FSE_MAX_SYMBS];
 2231 
 2232     int symb = 0;
 2233     while (remaining > 0 && symb < FSE_MAX_SYMBS) {
 2234         // Log of the number of possible values we could read
 2235         int bits = highest_set_bit(remaining + 1) + 1;
 2236 
 2237         u16 val = IO_read_bits(in, bits);
 2238 
 2239         // Try to mask out the lower bits to see if it qualifies for the "small
 2240         // value" threshold
 2241         const u16 lower_mask = ((u16)1 << (bits - 1)) - 1;
 2242         const u16 threshold = ((u16)1 << bits) - 1 - (remaining + 1);
 2243 
 2244         if ((val & lower_mask) < threshold) {
 2245             IO_rewind_bits(in, 1);
 2246             val = val & lower_mask;
 2247         } else if (val > lower_mask) {
 2248             val = val - threshold;
 2249         }
 2250 
 2251         // "Probability is obtained from Value decoded by following formula :
 2252         // Proba = value - 1"
 2253         const i16 proba = (i16)val - 1;
 2254 
 2255         // "It means value 0 becomes negative probability -1. -1 is a special
 2256         // probability, which means "less than 1". Its effect on distribution
 2257         // table is described in next paragraph. For the purpose of calculating
 2258         // cumulated distribution, it counts as one."
 2259         remaining -= proba < 0 ? -proba : proba;
 2260 
 2261         frequencies[symb] = proba;
 2262         symb++;
 2263 
 2264         // "When a symbol has a probability of zero, it is followed by a 2-bits
 2265         // repeat flag. This repeat flag tells how many probabilities of zeroes
 2266         // follow the current one. It provides a number ranging from 0 to 3. If
 2267         // it is a 3, another 2-bits repeat flag follows, and so on."
 2268         if (proba == 0) {
 2269             // Read the next two bits to see how many more 0s
 2270             int repeat = IO_read_bits(in, 2);
 2271 
 2272             while (1) {
 2273                 for (int i = 0; i < repeat && symb < FSE_MAX_SYMBS; i++) {
 2274                     frequencies[symb++] = 0;
 2275                 }
 2276                 if (repeat == 3) {
 2277                     repeat = IO_read_bits(in, 2);
 2278                 } else {
 2279                     break;
 2280                 }
 2281             }
 2282         }
 2283     }
 2284     IO_align_stream(in);
 2285 
 2286     // "When last symbol reaches cumulated total of 1 << Accuracy_Log, decoding
 2287     // is complete. If the last symbol makes cumulated total go above 1 <<
 2288     // Accuracy_Log, distribution is considered corrupted."
 2289     if (remaining != 0 || symb >= FSE_MAX_SYMBS) {
 2290         CORRUPTION();
 2291     }
 2292 
 2293     // Initialize the decoding table using the determined weights
 2294     FSE_init_dtable(dtable, frequencies, symb, accuracy_log);
 2295 }
 2296 
 2297 static void FSE_init_dtable_rle(FSE_dtable *const dtable, const u8 symb) {
 2298     dtable->symbols = malloc(sizeof(u8));
 2299     dtable->num_bits = malloc(sizeof(u8));
 2300     dtable->new_state_base = malloc(sizeof(u16));
 2301 
 2302     if (!dtable->symbols || !dtable->num_bits || !dtable->new_state_base) {
 2303         BAD_ALLOC();
 2304     }
 2305 
 2306     // This setup will always have a state of 0, always return symbol `symb`,
 2307     // and never consume any bits
 2308     dtable->symbols[0] = symb;
 2309     dtable->num_bits[0] = 0;
 2310     dtable->new_state_base[0] = 0;
 2311     dtable->accuracy_log = 0;
 2312 }
 2313 
 2314 static void FSE_free_dtable(FSE_dtable *const dtable) {
 2315     free(dtable->symbols);
 2316     free(dtable->num_bits);
 2317     free(dtable->new_state_base);
 2318     memset(dtable, 0, sizeof(FSE_dtable));
 2319 }
 2320 /******* END FSE PRIMITIVES ***************************************************/

Cache object: 02173840ef2db4e0657dd5866b73eecc


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.