The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/contrib/zstd/lib/dictBuilder/cover.h

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) Facebook, Inc.
    3  * All rights reserved.
    4  *
    5  * This source code is licensed under both the BSD-style license (found in the
    6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
    7  * in the COPYING file in the root directory of this source tree).
    8  * You may select, at your option, one of the above-listed licenses.
    9  */
   10 
   11 #ifndef ZDICT_STATIC_LINKING_ONLY
   12 #  define ZDICT_STATIC_LINKING_ONLY
   13 #endif
   14 
   15 #include <stdio.h>  /* fprintf */
   16 #include <stdlib.h> /* malloc, free, qsort */
   17 #include <string.h> /* memset */
   18 #include <time.h>   /* clock */
   19 #include "../common/mem.h" /* read */
   20 #include "../common/pool.h"
   21 #include "../common/threading.h"
   22 #include "../common/zstd_internal.h" /* includes zstd.h */
   23 #include "../zdict.h"
   24 
   25 /**
   26  * COVER_best_t is used for two purposes:
   27  * 1. Synchronizing threads.
   28  * 2. Saving the best parameters and dictionary.
   29  *
   30  * All of the methods except COVER_best_init() are thread safe if zstd is
   31  * compiled with multithreaded support.
   32  */
   33 typedef struct COVER_best_s {
   34   ZSTD_pthread_mutex_t mutex;
   35   ZSTD_pthread_cond_t cond;
   36   size_t liveJobs;
   37   void *dict;
   38   size_t dictSize;
   39   ZDICT_cover_params_t parameters;
   40   size_t compressedSize;
   41 } COVER_best_t;
   42 
   43 /**
   44  * A segment is a range in the source as well as the score of the segment.
   45  */
   46 typedef struct {
   47   U32 begin;
   48   U32 end;
   49   U32 score;
   50 } COVER_segment_t;
   51 
   52 /**
   53  *Number of epochs and size of each epoch.
   54  */
   55 typedef struct {
   56   U32 num;
   57   U32 size;
   58 } COVER_epoch_info_t;
   59 
   60 /**
   61  * Struct used for the dictionary selection function.
   62  */
   63 typedef struct COVER_dictSelection {
   64   BYTE* dictContent;
   65   size_t dictSize;
   66   size_t totalCompressedSize;
   67 } COVER_dictSelection_t;
   68 
   69 /**
   70  * Computes the number of epochs and the size of each epoch.
   71  * We will make sure that each epoch gets at least 10 * k bytes.
   72  *
   73  * The COVER algorithms divide the data up into epochs of equal size and
   74  * select one segment from each epoch.
   75  *
   76  * @param maxDictSize The maximum allowed dictionary size.
   77  * @param nbDmers     The number of dmers we are training on.
   78  * @param k           The parameter k (segment size).
   79  * @param passes      The target number of passes over the dmer corpus.
   80  *                    More passes means a better dictionary.
   81  */
   82 COVER_epoch_info_t COVER_computeEpochs(U32 maxDictSize, U32 nbDmers,
   83                                        U32 k, U32 passes);
   84 
   85 /**
   86  * Warns the user when their corpus is too small.
   87  */
   88 void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel);
   89 
   90 /**
   91  *  Checks total compressed size of a dictionary
   92  */
   93 size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters,
   94                                       const size_t *samplesSizes, const BYTE *samples,
   95                                       size_t *offsets,
   96                                       size_t nbTrainSamples, size_t nbSamples,
   97                                       BYTE *const dict, size_t dictBufferCapacity);
   98 
   99 /**
  100  * Returns the sum of the sample sizes.
  101  */
  102 size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) ;
  103 
  104 /**
  105  * Initialize the `COVER_best_t`.
  106  */
  107 void COVER_best_init(COVER_best_t *best);
  108 
  109 /**
  110  * Wait until liveJobs == 0.
  111  */
  112 void COVER_best_wait(COVER_best_t *best);
  113 
  114 /**
  115  * Call COVER_best_wait() and then destroy the COVER_best_t.
  116  */
  117 void COVER_best_destroy(COVER_best_t *best);
  118 
  119 /**
  120  * Called when a thread is about to be launched.
  121  * Increments liveJobs.
  122  */
  123 void COVER_best_start(COVER_best_t *best);
  124 
  125 /**
  126  * Called when a thread finishes executing, both on error or success.
  127  * Decrements liveJobs and signals any waiting threads if liveJobs == 0.
  128  * If this dictionary is the best so far save it and its parameters.
  129  */
  130 void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
  131                        COVER_dictSelection_t selection);
  132 /**
  133  * Error function for COVER_selectDict function. Checks if the return
  134  * value is an error.
  135  */
  136 unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection);
  137 
  138  /**
  139   * Error function for COVER_selectDict function. Returns a struct where
  140   * return.totalCompressedSize is a ZSTD error.
  141   */
  142 COVER_dictSelection_t COVER_dictSelectionError(size_t error);
  143 
  144 /**
  145  * Always call after selectDict is called to free up used memory from
  146  * newly created dictionary.
  147  */
  148 void COVER_dictSelectionFree(COVER_dictSelection_t selection);
  149 
  150 /**
  151  * Called to finalize the dictionary and select one based on whether or not
  152  * the shrink-dict flag was enabled. If enabled the dictionary used is the
  153  * smallest dictionary within a specified regression of the compressed size
  154  * from the largest dictionary.
  155  */
  156  COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity,
  157                        size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
  158                        size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize);

Cache object: e89d9180b72eaa54f6595a5f5509998a


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.