tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

metablock_inc.h (7636B)


      1 /* NOLINT(build/header_guard) */
      2 /* Copyright 2015 Google Inc. All Rights Reserved.
      3 
      4   Distributed under MIT license.
      5   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
      6 */
      7 
      8 /* template parameters: FN */
      9 
     10 #define HistogramType FN(Histogram)
     11 
     12 /* Greedy block splitter for one block category (literal, command or distance).
     13 */
     14 typedef struct FN(BlockSplitter) {
     15  /* Alphabet size of particular block category. */
     16  size_t alphabet_size_;
     17  /* We collect at least this many symbols for each block. */
     18  size_t min_block_size_;
     19  /* We merge histograms A and B if
     20       entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
     21     where A is the current histogram and B is the histogram of the last or the
     22     second last block type. */
     23  double split_threshold_;
     24 
     25  size_t num_blocks_;
     26  BlockSplit* split_;  /* not owned */
     27  HistogramType* histograms_;  /* not owned */
     28  size_t* histograms_size_;  /* not owned */
     29 
     30  /* Temporary storage for BlockSplitterFinishBlock. */
     31  HistogramType combined_histo[2];
     32 
     33  /* The number of symbols that we want to collect before deciding on whether
     34     or not to merge the block with a previous one or emit a new block. */
     35  size_t target_block_size_;
     36  /* The number of symbols in the current histogram. */
     37  size_t block_size_;
     38  /* Offset of the current histogram. */
     39  size_t curr_histogram_ix_;
     40  /* Offset of the histograms of the previous two block types. */
     41  size_t last_histogram_ix_[2];
     42  /* Entropy of the previous two block types. */
     43  double last_entropy_[2];
     44  /* The number of times we merged the current block with the last one. */
     45  size_t merge_last_count_;
     46 } FN(BlockSplitter);
     47 
     48 static void FN(InitBlockSplitter)(
     49    MemoryManager* m, FN(BlockSplitter)* self, size_t alphabet_size,
     50    size_t min_block_size, double split_threshold, size_t num_symbols,
     51    BlockSplit* split, HistogramType** histograms, size_t* histograms_size) {
     52  size_t max_num_blocks = num_symbols / min_block_size + 1;
     53  /* We have to allocate one more histogram than the maximum number of block
     54     types for the current histogram when the meta-block is too big. */
     55  size_t max_num_types =
     56      BROTLI_MIN(size_t, max_num_blocks, BROTLI_MAX_NUMBER_OF_BLOCK_TYPES + 1);
     57  self->alphabet_size_ = alphabet_size;
     58  self->min_block_size_ = min_block_size;
     59  self->split_threshold_ = split_threshold;
     60  self->num_blocks_ = 0;
     61  self->split_ = split;
     62  self->histograms_size_ = histograms_size;
     63  self->target_block_size_ = min_block_size;
     64  self->block_size_ = 0;
     65  self->curr_histogram_ix_ = 0;
     66  self->merge_last_count_ = 0;
     67  BROTLI_ENSURE_CAPACITY(m, uint8_t,
     68      split->types, split->types_alloc_size, max_num_blocks);
     69  BROTLI_ENSURE_CAPACITY(m, uint32_t,
     70      split->lengths, split->lengths_alloc_size, max_num_blocks);
     71  if (BROTLI_IS_OOM(m)) return;
     72  self->split_->num_blocks = max_num_blocks;
     73  BROTLI_DCHECK(*histograms == 0);
     74  *histograms_size = max_num_types;
     75  *histograms = BROTLI_ALLOC(m, HistogramType, *histograms_size);
     76  self->histograms_ = *histograms;
     77  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(*histograms)) return;
     78  /* Clear only current histogram. */
     79  FN(HistogramClear)(&self->histograms_[0]);
     80  self->last_histogram_ix_[0] = self->last_histogram_ix_[1] = 0;
     81 }
     82 
     83 /* Does either of three things:
     84     (1) emits the current block with a new block type;
     85     (2) emits the current block with the type of the second last block;
     86     (3) merges the current block with the last block. */
     87 static void FN(BlockSplitterFinishBlock)(
     88    FN(BlockSplitter)* self, BROTLI_BOOL is_final) {
     89  BlockSplit* split = self->split_;
     90  double* last_entropy = self->last_entropy_;
     91  HistogramType* histograms = self->histograms_;
     92  self->block_size_ =
     93      BROTLI_MAX(size_t, self->block_size_, self->min_block_size_);
     94  if (self->num_blocks_ == 0) {
     95    /* Create first block. */
     96    split->lengths[0] = (uint32_t)self->block_size_;
     97    split->types[0] = 0;
     98    last_entropy[0] =
     99        BrotliBitsEntropy(histograms[0].data_, self->alphabet_size_);
    100    last_entropy[1] = last_entropy[0];
    101    ++self->num_blocks_;
    102    ++split->num_types;
    103    ++self->curr_histogram_ix_;
    104    if (self->curr_histogram_ix_ < *self->histograms_size_)
    105      FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
    106    self->block_size_ = 0;
    107  } else if (self->block_size_ > 0) {
    108    double entropy = BrotliBitsEntropy(
    109        histograms[self->curr_histogram_ix_].data_, self->alphabet_size_);
    110    double combined_entropy[2];
    111    double diff[2];
    112    size_t j;
    113    for (j = 0; j < 2; ++j) {
    114      size_t last_histogram_ix = self->last_histogram_ix_[j];
    115      self->combined_histo[j] = histograms[self->curr_histogram_ix_];
    116      FN(HistogramAddHistogram)(&self->combined_histo[j],
    117          &histograms[last_histogram_ix]);
    118      combined_entropy[j] = BrotliBitsEntropy(
    119          &self->combined_histo[j].data_[0], self->alphabet_size_);
    120      diff[j] = combined_entropy[j] - entropy - last_entropy[j];
    121    }
    122 
    123    if (split->num_types < BROTLI_MAX_NUMBER_OF_BLOCK_TYPES &&
    124        diff[0] > self->split_threshold_ &&
    125        diff[1] > self->split_threshold_) {
    126      /* Create new block. */
    127      split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
    128      split->types[self->num_blocks_] = (uint8_t)split->num_types;
    129      self->last_histogram_ix_[1] = self->last_histogram_ix_[0];
    130      self->last_histogram_ix_[0] = (uint8_t)split->num_types;
    131      last_entropy[1] = last_entropy[0];
    132      last_entropy[0] = entropy;
    133      ++self->num_blocks_;
    134      ++split->num_types;
    135      ++self->curr_histogram_ix_;
    136      if (self->curr_histogram_ix_ < *self->histograms_size_)
    137        FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
    138      self->block_size_ = 0;
    139      self->merge_last_count_ = 0;
    140      self->target_block_size_ = self->min_block_size_;
    141    } else if (diff[1] < diff[0] - 20.0) {
    142      /* Combine this block with second last block. */
    143      split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
    144      split->types[self->num_blocks_] = split->types[self->num_blocks_ - 2];
    145      BROTLI_SWAP(size_t, self->last_histogram_ix_, 0, 1);
    146      histograms[self->last_histogram_ix_[0]] = self->combined_histo[1];
    147      last_entropy[1] = last_entropy[0];
    148      last_entropy[0] = combined_entropy[1];
    149      ++self->num_blocks_;
    150      self->block_size_ = 0;
    151      FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
    152      self->merge_last_count_ = 0;
    153      self->target_block_size_ = self->min_block_size_;
    154    } else {
    155      /* Combine this block with last block. */
    156      split->lengths[self->num_blocks_ - 1] += (uint32_t)self->block_size_;
    157      histograms[self->last_histogram_ix_[0]] = self->combined_histo[0];
    158      last_entropy[0] = combined_entropy[0];
    159      if (split->num_types == 1) {
    160        last_entropy[1] = last_entropy[0];
    161      }
    162      self->block_size_ = 0;
    163      FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
    164      if (++self->merge_last_count_ > 1) {
    165        self->target_block_size_ += self->min_block_size_;
    166      }
    167    }
    168  }
    169  if (is_final) {
    170    *self->histograms_size_ = split->num_types;
    171    split->num_blocks = self->num_blocks_;
    172  }
    173 }
    174 
    175 /* Adds the next symbol to the current histogram. When the current histogram
    176   reaches the target size, decides on merging the block. */
    177 static void FN(BlockSplitterAddSymbol)(FN(BlockSplitter)* self, size_t symbol) {
    178  FN(HistogramAdd)(&self->histograms_[self->curr_histogram_ix_], symbol);
    179  ++self->block_size_;
    180  if (self->block_size_ == self->target_block_size_) {
    181    FN(BlockSplitterFinishBlock)(self, /* is_final = */ BROTLI_FALSE);
    182  }
    183 }
    184 
    185 #undef HistogramType