#include /* fprintf */ #include /* malloc, free, qsort */ #include /* memset */ #include /* clock */ #include "mem.h" /* read */ #include "pool.h" #include "threading.h" #include "zstd_internal.h" /* includes zstd.h */ #ifndef ZDICT_STATIC_LINKING_ONLY #define ZDICT_STATIC_LINKING_ONLY #endif #include "zdict.h" /** * COVER_best_t is used for two purposes: * 1. Synchronizing threads. * 2. Saving the best parameters and dictionary. * * All of the methods except COVER_best_init() are thread safe if zstd is * compiled with multithreaded support. */ typedef struct COVER_best_s { ZSTD_pthread_mutex_t mutex; ZSTD_pthread_cond_t cond; size_t liveJobs; void *dict; size_t dictSize; ZDICT_cover_params_t parameters; size_t compressedSize; } COVER_best_t; /** * A segment is a range in the source as well as the score of the segment. */ typedef struct { U32 begin; U32 end; U32 score; } COVER_segment_t; /** *Number of epochs and size of each epoch. */ typedef struct { U32 num; U32 size; } COVER_epoch_info_t; /** * Computes the number of epochs and the size of each epoch. * We will make sure that each epoch gets at least 10 * k bytes. * * The COVER algorithms divide the data up into epochs of equal size and * select one segment from each epoch. * * @param maxDictSize The maximum allowed dictionary size. * @param nbDmers The number of dmers we are training on. * @param k The parameter k (segment size). * @param passes The target number of passes over the dmer corpus. * More passes means a better dictionary. */ COVER_epoch_info_t COVER_computeEpochs(U32 maxDictSize, U32 nbDmers, U32 k, U32 passes); /** * Warns the user when their corpus is too small. */ void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel); /** * Checks total compressed size of a dictionary */ size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters, const size_t *samplesSizes, const BYTE *samples, size_t *offsets, size_t nbTrainSamples, size_t nbSamples, BYTE *const dict, size_t dictBufferCapacity); /** * Returns the sum of the sample sizes. */ size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) ; /** * Initialize the `COVER_best_t`. */ void COVER_best_init(COVER_best_t *best); /** * Wait until liveJobs == 0. */ void COVER_best_wait(COVER_best_t *best); /** * Call COVER_best_wait() and then destroy the COVER_best_t. */ void COVER_best_destroy(COVER_best_t *best); /** * Called when a thread is about to be launched. * Increments liveJobs. */ void COVER_best_start(COVER_best_t *best); /** * Called when a thread finishes executing, both on error or success. * Decrements liveJobs and signals any waiting threads if liveJobs == 0. * If this dictionary is the best so far save it and its parameters. */ void COVER_best_finish(COVER_best_t *best, size_t compressedSize, ZDICT_cover_params_t parameters, void *dict, size_t dictSize);