metablock_inc.h (7636B)
1 /* NOLINT(build/header_guard) */ 2 /* Copyright 2015 Google Inc. All Rights Reserved. 3 4 Distributed under MIT license. 5 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT 6 */ 7 8 /* template parameters: FN */ 9 10 #define HistogramType FN(Histogram) 11 12 /* Greedy block splitter for one block category (literal, command or distance). 13 */ 14 typedef struct FN(BlockSplitter) { 15 /* Alphabet size of particular block category. */ 16 size_t alphabet_size_; 17 /* We collect at least this many symbols for each block. */ 18 size_t min_block_size_; 19 /* We merge histograms A and B if 20 entropy(A+B) < entropy(A) + entropy(B) + split_threshold_, 21 where A is the current histogram and B is the histogram of the last or the 22 second last block type. */ 23 double split_threshold_; 24 25 size_t num_blocks_; 26 BlockSplit* split_; /* not owned */ 27 HistogramType* histograms_; /* not owned */ 28 size_t* histograms_size_; /* not owned */ 29 30 /* Temporary storage for BlockSplitterFinishBlock. */ 31 HistogramType combined_histo[2]; 32 33 /* The number of symbols that we want to collect before deciding on whether 34 or not to merge the block with a previous one or emit a new block. */ 35 size_t target_block_size_; 36 /* The number of symbols in the current histogram. */ 37 size_t block_size_; 38 /* Offset of the current histogram. */ 39 size_t curr_histogram_ix_; 40 /* Offset of the histograms of the previous two block types. */ 41 size_t last_histogram_ix_[2]; 42 /* Entropy of the previous two block types. */ 43 double last_entropy_[2]; 44 /* The number of times we merged the current block with the last one. */ 45 size_t merge_last_count_; 46 } FN(BlockSplitter); 47 48 static void FN(InitBlockSplitter)( 49 MemoryManager* m, FN(BlockSplitter)* self, size_t alphabet_size, 50 size_t min_block_size, double split_threshold, size_t num_symbols, 51 BlockSplit* split, HistogramType** histograms, size_t* histograms_size) { 52 size_t max_num_blocks = num_symbols / min_block_size + 1; 53 /* We have to allocate one more histogram than the maximum number of block 54 types for the current histogram when the meta-block is too big. */ 55 size_t max_num_types = 56 BROTLI_MIN(size_t, max_num_blocks, BROTLI_MAX_NUMBER_OF_BLOCK_TYPES + 1); 57 self->alphabet_size_ = alphabet_size; 58 self->min_block_size_ = min_block_size; 59 self->split_threshold_ = split_threshold; 60 self->num_blocks_ = 0; 61 self->split_ = split; 62 self->histograms_size_ = histograms_size; 63 self->target_block_size_ = min_block_size; 64 self->block_size_ = 0; 65 self->curr_histogram_ix_ = 0; 66 self->merge_last_count_ = 0; 67 BROTLI_ENSURE_CAPACITY(m, uint8_t, 68 split->types, split->types_alloc_size, max_num_blocks); 69 BROTLI_ENSURE_CAPACITY(m, uint32_t, 70 split->lengths, split->lengths_alloc_size, max_num_blocks); 71 if (BROTLI_IS_OOM(m)) return; 72 self->split_->num_blocks = max_num_blocks; 73 BROTLI_DCHECK(*histograms == 0); 74 *histograms_size = max_num_types; 75 *histograms = BROTLI_ALLOC(m, HistogramType, *histograms_size); 76 self->histograms_ = *histograms; 77 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(*histograms)) return; 78 /* Clear only current histogram. */ 79 FN(HistogramClear)(&self->histograms_[0]); 80 self->last_histogram_ix_[0] = self->last_histogram_ix_[1] = 0; 81 } 82 83 /* Does either of three things: 84 (1) emits the current block with a new block type; 85 (2) emits the current block with the type of the second last block; 86 (3) merges the current block with the last block. */ 87 static void FN(BlockSplitterFinishBlock)( 88 FN(BlockSplitter)* self, BROTLI_BOOL is_final) { 89 BlockSplit* split = self->split_; 90 double* last_entropy = self->last_entropy_; 91 HistogramType* histograms = self->histograms_; 92 self->block_size_ = 93 BROTLI_MAX(size_t, self->block_size_, self->min_block_size_); 94 if (self->num_blocks_ == 0) { 95 /* Create first block. */ 96 split->lengths[0] = (uint32_t)self->block_size_; 97 split->types[0] = 0; 98 last_entropy[0] = 99 BrotliBitsEntropy(histograms[0].data_, self->alphabet_size_); 100 last_entropy[1] = last_entropy[0]; 101 ++self->num_blocks_; 102 ++split->num_types; 103 ++self->curr_histogram_ix_; 104 if (self->curr_histogram_ix_ < *self->histograms_size_) 105 FN(HistogramClear)(&histograms[self->curr_histogram_ix_]); 106 self->block_size_ = 0; 107 } else if (self->block_size_ > 0) { 108 double entropy = BrotliBitsEntropy( 109 histograms[self->curr_histogram_ix_].data_, self->alphabet_size_); 110 double combined_entropy[2]; 111 double diff[2]; 112 size_t j; 113 for (j = 0; j < 2; ++j) { 114 size_t last_histogram_ix = self->last_histogram_ix_[j]; 115 self->combined_histo[j] = histograms[self->curr_histogram_ix_]; 116 FN(HistogramAddHistogram)(&self->combined_histo[j], 117 &histograms[last_histogram_ix]); 118 combined_entropy[j] = BrotliBitsEntropy( 119 &self->combined_histo[j].data_[0], self->alphabet_size_); 120 diff[j] = combined_entropy[j] - entropy - last_entropy[j]; 121 } 122 123 if (split->num_types < BROTLI_MAX_NUMBER_OF_BLOCK_TYPES && 124 diff[0] > self->split_threshold_ && 125 diff[1] > self->split_threshold_) { 126 /* Create new block. */ 127 split->lengths[self->num_blocks_] = (uint32_t)self->block_size_; 128 split->types[self->num_blocks_] = (uint8_t)split->num_types; 129 self->last_histogram_ix_[1] = self->last_histogram_ix_[0]; 130 self->last_histogram_ix_[0] = (uint8_t)split->num_types; 131 last_entropy[1] = last_entropy[0]; 132 last_entropy[0] = entropy; 133 ++self->num_blocks_; 134 ++split->num_types; 135 ++self->curr_histogram_ix_; 136 if (self->curr_histogram_ix_ < *self->histograms_size_) 137 FN(HistogramClear)(&histograms[self->curr_histogram_ix_]); 138 self->block_size_ = 0; 139 self->merge_last_count_ = 0; 140 self->target_block_size_ = self->min_block_size_; 141 } else if (diff[1] < diff[0] - 20.0) { 142 /* Combine this block with second last block. */ 143 split->lengths[self->num_blocks_] = (uint32_t)self->block_size_; 144 split->types[self->num_blocks_] = split->types[self->num_blocks_ - 2]; 145 BROTLI_SWAP(size_t, self->last_histogram_ix_, 0, 1); 146 histograms[self->last_histogram_ix_[0]] = self->combined_histo[1]; 147 last_entropy[1] = last_entropy[0]; 148 last_entropy[0] = combined_entropy[1]; 149 ++self->num_blocks_; 150 self->block_size_ = 0; 151 FN(HistogramClear)(&histograms[self->curr_histogram_ix_]); 152 self->merge_last_count_ = 0; 153 self->target_block_size_ = self->min_block_size_; 154 } else { 155 /* Combine this block with last block. */ 156 split->lengths[self->num_blocks_ - 1] += (uint32_t)self->block_size_; 157 histograms[self->last_histogram_ix_[0]] = self->combined_histo[0]; 158 last_entropy[0] = combined_entropy[0]; 159 if (split->num_types == 1) { 160 last_entropy[1] = last_entropy[0]; 161 } 162 self->block_size_ = 0; 163 FN(HistogramClear)(&histograms[self->curr_histogram_ix_]); 164 if (++self->merge_last_count_ > 1) { 165 self->target_block_size_ += self->min_block_size_; 166 } 167 } 168 } 169 if (is_final) { 170 *self->histograms_size_ = split->num_types; 171 split->num_blocks = self->num_blocks_; 172 } 173 } 174 175 /* Adds the next symbol to the current histogram. When the current histogram 176 reaches the target size, decides on merging the block. */ 177 static void FN(BlockSplitterAddSymbol)(FN(BlockSplitter)* self, size_t symbol) { 178 FN(HistogramAdd)(&self->histograms_[self->curr_histogram_ix_], symbol); 179 ++self->block_size_; 180 if (self->block_size_ == self->target_block_size_) { 181 FN(BlockSplitterFinishBlock)(self, /* is_final = */ BROTLI_FALSE); 182 } 183 } 184 185 #undef HistogramType