The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/contrib/zstd/programs/datagen.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) Yann Collet, Facebook, Inc.
    3  * All rights reserved.
    4  *
    5  * This source code is licensed under both the BSD-style license (found in the
    6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
    7  * in the COPYING file in the root directory of this source tree).
    8  * You may select, at your option, one of the above-listed licenses.
    9  */
   10 
   11 
   12 
   13 /*-************************************
   14 *  Dependencies
   15 **************************************/
   16 #include "datagen.h"
   17 #include "platform.h"  /* SET_BINARY_MODE */
   18 #include <stdlib.h>    /* malloc, free */
   19 #include <stdio.h>     /* FILE, fwrite, fprintf */
   20 #include <string.h>    /* memcpy */
   21 #include "../lib/common/mem.h"  /* U32 */
   22 
   23 
   24 /*-************************************
   25 *  Macros
   26 **************************************/
   27 #define KB *(1 <<10)
   28 #define MIN(a,b)  ( (a) < (b) ? (a) : (b) )
   29 
   30 #define RDG_DEBUG 0
   31 #define TRACE(...)   if (RDG_DEBUG) fprintf(stderr, __VA_ARGS__ )
   32 
   33 
   34 /*-************************************
   35 *  Local constants
   36 **************************************/
   37 #define LTLOG 13
   38 #define LTSIZE (1<<LTLOG)
   39 #define LTMASK (LTSIZE-1)
   40 
   41 
   42 /*-*******************************************************
   43 *  Local Functions
   44 *********************************************************/
   45 #define RDG_rotl32(x,r) ((x << r) | (x >> (32 - r)))
   46 static U32 RDG_rand(U32* src)
   47 {
   48     static const U32 prime1 = 2654435761U;
   49     static const U32 prime2 = 2246822519U;
   50     U32 rand32 = *src;
   51     rand32 *= prime1;
   52     rand32 ^= prime2;
   53     rand32  = RDG_rotl32(rand32, 13);
   54     *src = rand32;
   55     return rand32 >> 5;
   56 }
   57 
   58 typedef U32 fixedPoint_24_8;
   59 
   60 static void RDG_fillLiteralDistrib(BYTE* ldt, fixedPoint_24_8 ld)
   61 {
   62     BYTE const firstChar = (ld<=0.0) ?   0 : '(';
   63     BYTE const lastChar  = (ld<=0.0) ? 255 : '}';
   64     BYTE character = (ld<=0.0) ? 0 : '';
   65     U32 u;
   66 
   67     if (ld<=0) ld = 0;
   68     for (u=0; u<LTSIZE; ) {
   69         U32 const weight = (((LTSIZE - u) * ld) >> 8) + 1;
   70         U32 const end = MIN ( u + weight , LTSIZE);
   71         while (u < end) ldt[u++] = character;
   72         character++;
   73         if (character > lastChar) character = firstChar;
   74     }
   75 }
   76 
   77 
   78 static BYTE RDG_genChar(U32* seed, const BYTE* ldt)
   79 {
   80     U32 const id = RDG_rand(seed) & LTMASK;
   81     return ldt[id];  /* memory-sanitizer fails here, stating "uninitialized value" when table initialized with P==0.0. Checked : table is fully initialized */
   82 }
   83 
   84 
   85 static U32 RDG_rand15Bits (U32* seedPtr)
   86 {
   87     return RDG_rand(seedPtr) & 0x7FFF;
   88 }
   89 
   90 static U32 RDG_randLength(U32* seedPtr)
   91 {
   92     if (RDG_rand(seedPtr) & 7) return (RDG_rand(seedPtr) & 0xF);   /* small length */
   93     return (RDG_rand(seedPtr) & 0x1FF) + 0xF;
   94 }
   95 
   96 static void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize,
   97                          double matchProba, const BYTE* ldt, U32* seedPtr)
   98 {
   99     BYTE* const buffPtr = (BYTE*)buffer;
  100     U32 const matchProba32 = (U32)(32768 * matchProba);
  101     size_t pos = prefixSize;
  102     U32 prevOffset = 1;
  103 
  104     /* special case : sparse content */
  105     while (matchProba >= 1.0) {
  106         size_t size0 = RDG_rand(seedPtr) & 3;
  107         size0  = (size_t)1 << (16 + size0 * 2);
  108         size0 += RDG_rand(seedPtr) & (size0-1);   /* because size0 is power of 2*/
  109         if (buffSize < pos + size0) {
  110             memset(buffPtr+pos, 0, buffSize-pos);
  111             return;
  112         }
  113         memset(buffPtr+pos, 0, size0);
  114         pos += size0;
  115         buffPtr[pos-1] = RDG_genChar(seedPtr, ldt);
  116         continue;
  117     }
  118 
  119     /* init */
  120     if (pos==0) buffPtr[0] = RDG_genChar(seedPtr, ldt), pos=1;
  121 
  122     /* Generate compressible data */
  123     while (pos < buffSize) {
  124         /* Select : Literal (char) or Match (within 32K) */
  125         if (RDG_rand15Bits(seedPtr) < matchProba32) {
  126             /* Copy (within 32K) */
  127             U32 const length = RDG_randLength(seedPtr) + 4;
  128             U32 const d = (U32) MIN(pos + length , buffSize);
  129             U32 const repeatOffset = (RDG_rand(seedPtr) & 15) == 2;
  130             U32 const randOffset = RDG_rand15Bits(seedPtr) + 1;
  131             U32 const offset = repeatOffset ? prevOffset : (U32) MIN(randOffset , pos);
  132             size_t match = pos - offset;
  133             while (pos < d) { buffPtr[pos++] = buffPtr[match++];   /* correctly manages overlaps */ }
  134             prevOffset = offset;
  135         } else {
  136             /* Literal (noise) */
  137             U32 const length = RDG_randLength(seedPtr);
  138             U32 const d = (U32) MIN(pos + length, buffSize);
  139             while (pos < d) { buffPtr[pos++] = RDG_genChar(seedPtr, ldt); }
  140     }   }
  141 }
  142 
  143 
  144 void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba, unsigned seed)
  145 {
  146     U32 seed32 = seed;
  147     BYTE ldt[LTSIZE];
  148     memset(ldt, '', sizeof(ldt));  /* yes, character '', this is intentional */
  149     if (litProba<=0.0) litProba = matchProba / 4.5;
  150     RDG_fillLiteralDistrib(ldt, (fixedPoint_24_8)(litProba * 256 + 0.001));
  151     RDG_genBlock(buffer, size, 0, matchProba, ldt, &seed32);
  152 }
  153 
  154 
  155 void RDG_genStdout(unsigned long long size, double matchProba, double litProba, unsigned seed)
  156 {
  157     U32 seed32 = seed;
  158     size_t const stdBlockSize = 128 KB;
  159     size_t const stdDictSize = 32 KB;
  160     BYTE* const buff = (BYTE*)malloc(stdDictSize + stdBlockSize);
  161     U64 total = 0;
  162     BYTE ldt[LTSIZE];   /* literals distribution table */
  163 
  164     /* init */
  165     if (buff==NULL) { perror("datagen"); exit(1); }
  166     if (litProba<=0.0) litProba = matchProba / 4.5;
  167     memset(ldt, '', sizeof(ldt));   /* yes, character '', this is intentional */
  168     RDG_fillLiteralDistrib(ldt, (fixedPoint_24_8)(litProba * 256 + 0.001));
  169     SET_BINARY_MODE(stdout);
  170 
  171     /* Generate initial dict */
  172     RDG_genBlock(buff, stdDictSize, 0, matchProba, ldt, &seed32);
  173 
  174     /* Generate compressible data */
  175     while (total < size) {
  176         size_t const genBlockSize = (size_t) (MIN (stdBlockSize, size-total));
  177         RDG_genBlock(buff, stdDictSize+stdBlockSize, stdDictSize, matchProba, ldt, &seed32);
  178         total += genBlockSize;
  179         { size_t const unused = fwrite(buff, 1, genBlockSize, stdout); (void)unused; }
  180         /* update dict */
  181         memcpy(buff, buff + stdBlockSize, stdDictSize);
  182     }
  183 
  184     /* cleanup */
  185     free(buff);
  186 }

Cache object: 100cb9656a4ee174925d362d7a66c008


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.