The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/contrib/xz-embedded/linux/lib/xz/xz_dec_bcj.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Branch/Call/Jump (BCJ) filter decoders
    3  *
    4  * Authors: Lasse Collin <lasse.collin@tukaani.org>
    5  *          Igor Pavlov <https://7-zip.org/>
    6  *
    7  * This file has been put into the public domain.
    8  * You can do whatever you want with this file.
    9  */
   10 
   11 #include "xz_private.h"
   12 
   13 /*
   14  * The rest of the file is inside this ifdef. It makes things a little more
   15  * convenient when building without support for any BCJ filters.
   16  */
   17 #ifdef XZ_DEC_BCJ
   18 
   19 struct xz_dec_bcj {
   20         /* Type of the BCJ filter being used */
   21         enum {
   22                 BCJ_X86 = 4,        /* x86 or x86-64 */
   23                 BCJ_POWERPC = 5,    /* Big endian only */
   24                 BCJ_IA64 = 6,       /* Big or little endian */
   25                 BCJ_ARM = 7,        /* Little endian only */
   26                 BCJ_ARMTHUMB = 8,   /* Little endian only */
   27                 BCJ_SPARC = 9       /* Big or little endian */
   28         } type;
   29 
   30         /*
   31          * Return value of the next filter in the chain. We need to preserve
   32          * this information across calls, because we must not call the next
   33          * filter anymore once it has returned XZ_STREAM_END.
   34          */
   35         enum xz_ret ret;
   36 
   37         /* True if we are operating in single-call mode. */
   38         bool single_call;
   39 
   40         /*
   41          * Absolute position relative to the beginning of the uncompressed
   42          * data (in a single .xz Block). We care only about the lowest 32
   43          * bits so this doesn't need to be uint64_t even with big files.
   44          */
   45         uint32_t pos;
   46 
   47         /* x86 filter state */
   48         uint32_t x86_prev_mask;
   49 
   50         /* Temporary space to hold the variables from struct xz_buf */
   51         uint8_t *out;
   52         size_t out_pos;
   53         size_t out_size;
   54 
   55         struct {
   56                 /* Amount of already filtered data in the beginning of buf */
   57                 size_t filtered;
   58 
   59                 /* Total amount of data currently stored in buf  */
   60                 size_t size;
   61 
   62                 /*
   63                  * Buffer to hold a mix of filtered and unfiltered data. This
   64                  * needs to be big enough to hold Alignment + 2 * Look-ahead:
   65                  *
   66                  * Type         Alignment   Look-ahead
   67                  * x86              1           4
   68                  * PowerPC          4           0
   69                  * IA-64           16           0
   70                  * ARM              4           0
   71                  * ARM-Thumb        2           2
   72                  * SPARC            4           0
   73                  */
   74                 uint8_t buf[16];
   75         } temp;
   76 };
   77 
   78 #ifdef XZ_DEC_X86
   79 /*
   80  * This is used to test the most significant byte of a memory address
   81  * in an x86 instruction.
   82  */
   83 static inline int bcj_x86_test_msbyte(uint8_t b)
   84 {
   85         return b == 0x00 || b == 0xFF;
   86 }
   87 
   88 static size_t bcj_x86(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
   89 {
   90         static const bool mask_to_allowed_status[8]
   91                 = { true, true, true, false, true, false, false, false };
   92 
   93         static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 };
   94 
   95         size_t i;
   96         size_t prev_pos = (size_t)-1;
   97         uint32_t prev_mask = s->x86_prev_mask;
   98         uint32_t src;
   99         uint32_t dest;
  100         uint32_t j;
  101         uint8_t b;
  102 
  103         if (size <= 4)
  104                 return 0;
  105 
  106         size -= 4;
  107         for (i = 0; i < size; ++i) {
  108                 if ((buf[i] & 0xFE) != 0xE8)
  109                         continue;
  110 
  111                 prev_pos = i - prev_pos;
  112                 if (prev_pos > 3) {
  113                         prev_mask = 0;
  114                 } else {
  115                         prev_mask = (prev_mask << (prev_pos - 1)) & 7;
  116                         if (prev_mask != 0) {
  117                                 b = buf[i + 4 - mask_to_bit_num[prev_mask]];
  118                                 if (!mask_to_allowed_status[prev_mask]
  119                                                 || bcj_x86_test_msbyte(b)) {
  120                                         prev_pos = i;
  121                                         prev_mask = (prev_mask << 1) | 1;
  122                                         continue;
  123                                 }
  124                         }
  125                 }
  126 
  127                 prev_pos = i;
  128 
  129                 if (bcj_x86_test_msbyte(buf[i + 4])) {
  130                         src = get_unaligned_le32(buf + i + 1);
  131                         while (true) {
  132                                 dest = src - (s->pos + (uint32_t)i + 5);
  133                                 if (prev_mask == 0)
  134                                         break;
  135 
  136                                 j = mask_to_bit_num[prev_mask] * 8;
  137                                 b = (uint8_t)(dest >> (24 - j));
  138                                 if (!bcj_x86_test_msbyte(b))
  139                                         break;
  140 
  141                                 src = dest ^ (((uint32_t)1 << (32 - j)) - 1);
  142                         }
  143 
  144                         dest &= 0x01FFFFFF;
  145                         dest |= (uint32_t)0 - (dest & 0x01000000);
  146                         put_unaligned_le32(dest, buf + i + 1);
  147                         i += 4;
  148                 } else {
  149                         prev_mask = (prev_mask << 1) | 1;
  150                 }
  151         }
  152 
  153         prev_pos = i - prev_pos;
  154         s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1);
  155         return i;
  156 }
  157 #endif
  158 
  159 #ifdef XZ_DEC_POWERPC
  160 static size_t bcj_powerpc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
  161 {
  162         size_t i;
  163         uint32_t instr;
  164 
  165         for (i = 0; i + 4 <= size; i += 4) {
  166                 instr = get_unaligned_be32(buf + i);
  167                 if ((instr & 0xFC000003) == 0x48000001) {
  168                         instr &= 0x03FFFFFC;
  169                         instr -= s->pos + (uint32_t)i;
  170                         instr &= 0x03FFFFFC;
  171                         instr |= 0x48000001;
  172                         put_unaligned_be32(instr, buf + i);
  173                 }
  174         }
  175 
  176         return i;
  177 }
  178 #endif
  179 
  180 #ifdef XZ_DEC_IA64
  181 static size_t bcj_ia64(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
  182 {
  183         static const uint8_t branch_table[32] = {
  184                 0, 0, 0, 0, 0, 0, 0, 0,
  185                 0, 0, 0, 0, 0, 0, 0, 0,
  186                 4, 4, 6, 6, 0, 0, 7, 7,
  187                 4, 4, 0, 0, 4, 4, 0, 0
  188         };
  189 
  190         /*
  191          * The local variables take a little bit stack space, but it's less
  192          * than what LZMA2 decoder takes, so it doesn't make sense to reduce
  193          * stack usage here without doing that for the LZMA2 decoder too.
  194          */
  195 
  196         /* Loop counters */
  197         size_t i;
  198         size_t j;
  199 
  200         /* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
  201         uint32_t slot;
  202 
  203         /* Bitwise offset of the instruction indicated by slot */
  204         uint32_t bit_pos;
  205 
  206         /* bit_pos split into byte and bit parts */
  207         uint32_t byte_pos;
  208         uint32_t bit_res;
  209 
  210         /* Address part of an instruction */
  211         uint32_t addr;
  212 
  213         /* Mask used to detect which instructions to convert */
  214         uint32_t mask;
  215 
  216         /* 41-bit instruction stored somewhere in the lowest 48 bits */
  217         uint64_t instr;
  218 
  219         /* Instruction normalized with bit_res for easier manipulation */
  220         uint64_t norm;
  221 
  222         for (i = 0; i + 16 <= size; i += 16) {
  223                 mask = branch_table[buf[i] & 0x1F];
  224                 for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) {
  225                         if (((mask >> slot) & 1) == 0)
  226                                 continue;
  227 
  228                         byte_pos = bit_pos >> 3;
  229                         bit_res = bit_pos & 7;
  230                         instr = 0;
  231                         for (j = 0; j < 6; ++j)
  232                                 instr |= (uint64_t)(buf[i + j + byte_pos])
  233                                                 << (8 * j);
  234 
  235                         norm = instr >> bit_res;
  236 
  237                         if (((norm >> 37) & 0x0F) == 0x05
  238                                         && ((norm >> 9) & 0x07) == 0) {
  239                                 addr = (norm >> 13) & 0x0FFFFF;
  240                                 addr |= ((uint32_t)(norm >> 36) & 1) << 20;
  241                                 addr <<= 4;
  242                                 addr -= s->pos + (uint32_t)i;
  243                                 addr >>= 4;
  244 
  245                                 norm &= ~((uint64_t)0x8FFFFF << 13);
  246                                 norm |= (uint64_t)(addr & 0x0FFFFF) << 13;
  247                                 norm |= (uint64_t)(addr & 0x100000)
  248                                                 << (36 - 20);
  249 
  250                                 instr &= (1 << bit_res) - 1;
  251                                 instr |= norm << bit_res;
  252 
  253                                 for (j = 0; j < 6; j++)
  254                                         buf[i + j + byte_pos]
  255                                                 = (uint8_t)(instr >> (8 * j));
  256                         }
  257                 }
  258         }
  259 
  260         return i;
  261 }
  262 #endif
  263 
  264 #ifdef XZ_DEC_ARM
  265 static size_t bcj_arm(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
  266 {
  267         size_t i;
  268         uint32_t addr;
  269 
  270         for (i = 0; i + 4 <= size; i += 4) {
  271                 if (buf[i + 3] == 0xEB) {
  272                         addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8)
  273                                         | ((uint32_t)buf[i + 2] << 16);
  274                         addr <<= 2;
  275                         addr -= s->pos + (uint32_t)i + 8;
  276                         addr >>= 2;
  277                         buf[i] = (uint8_t)addr;
  278                         buf[i + 1] = (uint8_t)(addr >> 8);
  279                         buf[i + 2] = (uint8_t)(addr >> 16);
  280                 }
  281         }
  282 
  283         return i;
  284 }
  285 #endif
  286 
  287 #ifdef XZ_DEC_ARMTHUMB
  288 static size_t bcj_armthumb(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
  289 {
  290         size_t i;
  291         uint32_t addr;
  292 
  293         for (i = 0; i + 4 <= size; i += 2) {
  294                 if ((buf[i + 1] & 0xF8) == 0xF0
  295                                 && (buf[i + 3] & 0xF8) == 0xF8) {
  296                         addr = (((uint32_t)buf[i + 1] & 0x07) << 19)
  297                                         | ((uint32_t)buf[i] << 11)
  298                                         | (((uint32_t)buf[i + 3] & 0x07) << 8)
  299                                         | (uint32_t)buf[i + 2];
  300                         addr <<= 1;
  301                         addr -= s->pos + (uint32_t)i + 4;
  302                         addr >>= 1;
  303                         buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07));
  304                         buf[i] = (uint8_t)(addr >> 11);
  305                         buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07));
  306                         buf[i + 2] = (uint8_t)addr;
  307                         i += 2;
  308                 }
  309         }
  310 
  311         return i;
  312 }
  313 #endif
  314 
  315 #ifdef XZ_DEC_SPARC
  316 static size_t bcj_sparc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
  317 {
  318         size_t i;
  319         uint32_t instr;
  320 
  321         for (i = 0; i + 4 <= size; i += 4) {
  322                 instr = get_unaligned_be32(buf + i);
  323                 if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) {
  324                         instr <<= 2;
  325                         instr -= s->pos + (uint32_t)i;
  326                         instr >>= 2;
  327                         instr = ((uint32_t)0x40000000 - (instr & 0x400000))
  328                                         | 0x40000000 | (instr & 0x3FFFFF);
  329                         put_unaligned_be32(instr, buf + i);
  330                 }
  331         }
  332 
  333         return i;
  334 }
  335 #endif
  336 
  337 /*
  338  * Apply the selected BCJ filter. Update *pos and s->pos to match the amount
  339  * of data that got filtered.
  340  *
  341  * NOTE: This is implemented as a switch statement to avoid using function
  342  * pointers, which could be problematic in the kernel boot code, which must
  343  * avoid pointers to static data (at least on x86).
  344  */
  345 static void bcj_apply(struct xz_dec_bcj *s,
  346                       uint8_t *buf, size_t *pos, size_t size)
  347 {
  348         size_t filtered;
  349 
  350         buf += *pos;
  351         size -= *pos;
  352 
  353         switch (s->type) {
  354 #ifdef XZ_DEC_X86
  355         case BCJ_X86:
  356                 filtered = bcj_x86(s, buf, size);
  357                 break;
  358 #endif
  359 #ifdef XZ_DEC_POWERPC
  360         case BCJ_POWERPC:
  361                 filtered = bcj_powerpc(s, buf, size);
  362                 break;
  363 #endif
  364 #ifdef XZ_DEC_IA64
  365         case BCJ_IA64:
  366                 filtered = bcj_ia64(s, buf, size);
  367                 break;
  368 #endif
  369 #ifdef XZ_DEC_ARM
  370         case BCJ_ARM:
  371                 filtered = bcj_arm(s, buf, size);
  372                 break;
  373 #endif
  374 #ifdef XZ_DEC_ARMTHUMB
  375         case BCJ_ARMTHUMB:
  376                 filtered = bcj_armthumb(s, buf, size);
  377                 break;
  378 #endif
  379 #ifdef XZ_DEC_SPARC
  380         case BCJ_SPARC:
  381                 filtered = bcj_sparc(s, buf, size);
  382                 break;
  383 #endif
  384         default:
  385                 /* Never reached but silence compiler warnings. */
  386                 filtered = 0;
  387                 break;
  388         }
  389 
  390         *pos += filtered;
  391         s->pos += filtered;
  392 }
  393 
  394 /*
  395  * Flush pending filtered data from temp to the output buffer.
  396  * Move the remaining mixture of possibly filtered and unfiltered
  397  * data to the beginning of temp.
  398  */
  399 static void bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b)
  400 {
  401         size_t copy_size;
  402 
  403         copy_size = min_t(size_t, s->temp.filtered, b->out_size - b->out_pos);
  404         memcpy(b->out + b->out_pos, s->temp.buf, copy_size);
  405         b->out_pos += copy_size;
  406 
  407         s->temp.filtered -= copy_size;
  408         s->temp.size -= copy_size;
  409         memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size);
  410 }
  411 
  412 /*
  413  * The BCJ filter functions are primitive in sense that they process the
  414  * data in chunks of 1-16 bytes. To hide this issue, this function does
  415  * some buffering.
  416  */
  417 XZ_EXTERN enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s,
  418                                      struct xz_dec_lzma2 *lzma2,
  419                                      struct xz_buf *b)
  420 {
  421         size_t out_start;
  422 
  423         /*
  424          * Flush pending already filtered data to the output buffer. Return
  425          * immediately if we couldn't flush everything, or if the next
  426          * filter in the chain had already returned XZ_STREAM_END.
  427          */
  428         if (s->temp.filtered > 0) {
  429                 bcj_flush(s, b);
  430                 if (s->temp.filtered > 0)
  431                         return XZ_OK;
  432 
  433                 if (s->ret == XZ_STREAM_END)
  434                         return XZ_STREAM_END;
  435         }
  436 
  437         /*
  438          * If we have more output space than what is currently pending in
  439          * temp, copy the unfiltered data from temp to the output buffer
  440          * and try to fill the output buffer by decoding more data from the
  441          * next filter in the chain. Apply the BCJ filter on the new data
  442          * in the output buffer. If everything cannot be filtered, copy it
  443          * to temp and rewind the output buffer position accordingly.
  444          *
  445          * This needs to be always run when temp.size == 0 to handle a special
  446          * case where the output buffer is full and the next filter has no
  447          * more output coming but hasn't returned XZ_STREAM_END yet.
  448          */
  449         if (s->temp.size < b->out_size - b->out_pos || s->temp.size == 0) {
  450                 out_start = b->out_pos;
  451                 memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size);
  452                 b->out_pos += s->temp.size;
  453 
  454                 s->ret = xz_dec_lzma2_run(lzma2, b);
  455                 if (s->ret != XZ_STREAM_END
  456                                 && (s->ret != XZ_OK || s->single_call))
  457                         return s->ret;
  458 
  459                 bcj_apply(s, b->out, &out_start, b->out_pos);
  460 
  461                 /*
  462                  * As an exception, if the next filter returned XZ_STREAM_END,
  463                  * we can do that too, since the last few bytes that remain
  464                  * unfiltered are meant to remain unfiltered.
  465                  */
  466                 if (s->ret == XZ_STREAM_END)
  467                         return XZ_STREAM_END;
  468 
  469                 s->temp.size = b->out_pos - out_start;
  470                 b->out_pos -= s->temp.size;
  471                 memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size);
  472 
  473                 /*
  474                  * If there wasn't enough input to the next filter to fill
  475                  * the output buffer with unfiltered data, there's no point
  476                  * to try decoding more data to temp.
  477                  */
  478                 if (b->out_pos + s->temp.size < b->out_size)
  479                         return XZ_OK;
  480         }
  481 
  482         /*
  483          * We have unfiltered data in temp. If the output buffer isn't full
  484          * yet, try to fill the temp buffer by decoding more data from the
  485          * next filter. Apply the BCJ filter on temp. Then we hopefully can
  486          * fill the actual output buffer by copying filtered data from temp.
  487          * A mix of filtered and unfiltered data may be left in temp; it will
  488          * be taken care on the next call to this function.
  489          */
  490         if (b->out_pos < b->out_size) {
  491                 /* Make b->out{,_pos,_size} temporarily point to s->temp. */
  492                 s->out = b->out;
  493                 s->out_pos = b->out_pos;
  494                 s->out_size = b->out_size;
  495                 b->out = s->temp.buf;
  496                 b->out_pos = s->temp.size;
  497                 b->out_size = sizeof(s->temp.buf);
  498 
  499                 s->ret = xz_dec_lzma2_run(lzma2, b);
  500 
  501                 s->temp.size = b->out_pos;
  502                 b->out = s->out;
  503                 b->out_pos = s->out_pos;
  504                 b->out_size = s->out_size;
  505 
  506                 if (s->ret != XZ_OK && s->ret != XZ_STREAM_END)
  507                         return s->ret;
  508 
  509                 bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size);
  510 
  511                 /*
  512                  * If the next filter returned XZ_STREAM_END, we mark that
  513                  * everything is filtered, since the last unfiltered bytes
  514                  * of the stream are meant to be left as is.
  515                  */
  516                 if (s->ret == XZ_STREAM_END)
  517                         s->temp.filtered = s->temp.size;
  518 
  519                 bcj_flush(s, b);
  520                 if (s->temp.filtered > 0)
  521                         return XZ_OK;
  522         }
  523 
  524         return s->ret;
  525 }
  526 
  527 XZ_EXTERN struct xz_dec_bcj *xz_dec_bcj_create(bool single_call)
  528 {
  529         struct xz_dec_bcj *s = kmalloc(sizeof(*s), GFP_KERNEL);
  530         if (s != NULL)
  531                 s->single_call = single_call;
  532 
  533         return s;
  534 }
  535 
  536 XZ_EXTERN enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id)
  537 {
  538         switch (id) {
  539 #ifdef XZ_DEC_X86
  540         case BCJ_X86:
  541 #endif
  542 #ifdef XZ_DEC_POWERPC
  543         case BCJ_POWERPC:
  544 #endif
  545 #ifdef XZ_DEC_IA64
  546         case BCJ_IA64:
  547 #endif
  548 #ifdef XZ_DEC_ARM
  549         case BCJ_ARM:
  550 #endif
  551 #ifdef XZ_DEC_ARMTHUMB
  552         case BCJ_ARMTHUMB:
  553 #endif
  554 #ifdef XZ_DEC_SPARC
  555         case BCJ_SPARC:
  556 #endif
  557                 break;
  558 
  559         default:
  560                 /* Unsupported Filter ID */
  561                 return XZ_OPTIONS_ERROR;
  562         }
  563 
  564         s->type = id;
  565         s->ret = XZ_OK;
  566         s->pos = 0;
  567         s->x86_prev_mask = 0;
  568         s->temp.filtered = 0;
  569         s->temp.size = 0;
  570 
  571         return XZ_OK;
  572 }
  573 
  574 #endif

Cache object: f4a7257cd581056cef1bfcd85151c452


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.