block_splitter.c (7330B)
1 /* Copyright 2013 Google Inc. All Rights Reserved. 2 3 Distributed under MIT license. 4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT 5 */ 6 7 /* Block split point selection utilities. */ 8 9 #include "block_splitter.h" 10 11 #include "../common/platform.h" 12 #include "bit_cost.h" 13 #include "cluster.h" 14 #include "command.h" 15 #include "fast_log.h" 16 #include "histogram.h" 17 #include "memory.h" 18 #include "quality.h" 19 20 #if defined(__cplusplus) || defined(c_plusplus) 21 extern "C" { 22 #endif 23 24 static const size_t kMaxLiteralHistograms = 100; 25 static const size_t kMaxCommandHistograms = 50; 26 static const double kLiteralBlockSwitchCost = 28.1; 27 static const double kCommandBlockSwitchCost = 13.5; 28 static const double kDistanceBlockSwitchCost = 14.6; 29 static const size_t kLiteralStrideLength = 70; 30 static const size_t kCommandStrideLength = 40; 31 static const size_t kDistanceStrideLength = 40; 32 static const size_t kSymbolsPerLiteralHistogram = 544; 33 static const size_t kSymbolsPerCommandHistogram = 530; 34 static const size_t kSymbolsPerDistanceHistogram = 544; 35 static const size_t kMinLengthForBlockSplitting = 128; 36 static const size_t kIterMulForRefining = 2; 37 static const size_t kMinItersForRefining = 100; 38 39 static size_t CountLiterals(const Command* cmds, const size_t num_commands) { 40 /* Count how many we have. */ 41 size_t total_length = 0; 42 size_t i; 43 for (i = 0; i < num_commands; ++i) { 44 total_length += cmds[i].insert_len_; 45 } 46 return total_length; 47 } 48 49 static void CopyLiteralsToByteArray(const Command* cmds, 50 const size_t num_commands, 51 const uint8_t* data, 52 const size_t offset, 53 const size_t mask, 54 uint8_t* literals) { 55 size_t pos = 0; 56 size_t from_pos = offset & mask; 57 size_t i; 58 for (i = 0; i < num_commands; ++i) { 59 size_t insert_len = cmds[i].insert_len_; 60 if (from_pos + insert_len > mask) { 61 size_t head_size = mask + 1 - from_pos; 62 memcpy(literals + pos, data + from_pos, head_size); 63 from_pos = 0; 64 pos += head_size; 65 insert_len -= head_size; 66 } 67 if (insert_len > 0) { 68 memcpy(literals + pos, data + from_pos, insert_len); 69 pos += insert_len; 70 } 71 from_pos = (from_pos + insert_len + CommandCopyLen(&cmds[i])) & mask; 72 } 73 } 74 75 static BROTLI_INLINE uint32_t MyRand(uint32_t* seed) { 76 /* Initial seed should be 7. In this case, loop length is (1 << 29). */ 77 *seed *= 16807U; 78 return *seed; 79 } 80 81 static BROTLI_INLINE double BitCost(size_t count) { 82 return count == 0 ? -2.0 : FastLog2(count); 83 } 84 85 #define HISTOGRAMS_PER_BATCH 64 86 #define CLUSTERS_PER_BATCH 16 87 88 #define FN(X) X ## Literal 89 #define DataType uint8_t 90 /* NOLINTNEXTLINE(build/include) */ 91 #include "block_splitter_inc.h" 92 #undef DataType 93 #undef FN 94 95 #define FN(X) X ## Command 96 #define DataType uint16_t 97 /* NOLINTNEXTLINE(build/include) */ 98 #include "block_splitter_inc.h" 99 #undef FN 100 101 #define FN(X) X ## Distance 102 /* NOLINTNEXTLINE(build/include) */ 103 #include "block_splitter_inc.h" 104 #undef DataType 105 #undef FN 106 107 void BrotliInitBlockSplit(BlockSplit* self) { 108 self->num_types = 0; 109 self->num_blocks = 0; 110 self->types = 0; 111 self->lengths = 0; 112 self->types_alloc_size = 0; 113 self->lengths_alloc_size = 0; 114 } 115 116 void BrotliDestroyBlockSplit(MemoryManager* m, BlockSplit* self) { 117 BROTLI_FREE(m, self->types); 118 BROTLI_FREE(m, self->lengths); 119 } 120 121 /* Extracts literals, command distance and prefix codes, then applies 122 * SplitByteVector to create partitioning. */ 123 void BrotliSplitBlock(MemoryManager* m, 124 const Command* cmds, 125 const size_t num_commands, 126 const uint8_t* data, 127 const size_t pos, 128 const size_t mask, 129 const BrotliEncoderParams* params, 130 BlockSplit* literal_split, 131 BlockSplit* insert_and_copy_split, 132 BlockSplit* dist_split) { 133 { 134 size_t literals_count = CountLiterals(cmds, num_commands); 135 uint8_t* literals = BROTLI_ALLOC(m, uint8_t, literals_count); 136 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(literals)) return; 137 /* Create a continuous array of literals. */ 138 CopyLiteralsToByteArray(cmds, num_commands, data, pos, mask, literals); 139 /* Create the block split on the array of literals. 140 * Literal histograms can have alphabet size up to 256. 141 * Though, to accommodate context modeling, less than half of maximum size 142 * is allowed. */ 143 SplitByteVectorLiteral( 144 m, literals, literals_count, 145 kSymbolsPerLiteralHistogram, kMaxLiteralHistograms, 146 kLiteralStrideLength, kLiteralBlockSwitchCost, params, 147 literal_split); 148 if (BROTLI_IS_OOM(m)) return; 149 BROTLI_FREE(m, literals); 150 /* NB: this might be a good place for injecting extra splitting without 151 * increasing encoder complexity; however, output partition would be less 152 * optimal than one produced with forced splitting inside 153 * SplitByteVector (FindBlocks / ClusterBlocks). */ 154 } 155 156 { 157 /* Compute prefix codes for commands. */ 158 uint16_t* insert_and_copy_codes = BROTLI_ALLOC(m, uint16_t, num_commands); 159 size_t i; 160 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(insert_and_copy_codes)) return; 161 for (i = 0; i < num_commands; ++i) { 162 insert_and_copy_codes[i] = cmds[i].cmd_prefix_; 163 } 164 /* Create the block split on the array of command prefixes. */ 165 SplitByteVectorCommand( 166 m, insert_and_copy_codes, num_commands, 167 kSymbolsPerCommandHistogram, kMaxCommandHistograms, 168 kCommandStrideLength, kCommandBlockSwitchCost, params, 169 insert_and_copy_split); 170 if (BROTLI_IS_OOM(m)) return; 171 /* TODO(eustas): reuse for distances? */ 172 BROTLI_FREE(m, insert_and_copy_codes); 173 } 174 175 { 176 /* Create a continuous array of distance prefixes. */ 177 uint16_t* distance_prefixes = BROTLI_ALLOC(m, uint16_t, num_commands); 178 size_t j = 0; 179 size_t i; 180 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(distance_prefixes)) return; 181 for (i = 0; i < num_commands; ++i) { 182 const Command* cmd = &cmds[i]; 183 if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) { 184 distance_prefixes[j++] = cmd->dist_prefix_ & 0x3FF; 185 } 186 } 187 /* Create the block split on the array of distance prefixes. */ 188 SplitByteVectorDistance( 189 m, distance_prefixes, j, 190 kSymbolsPerDistanceHistogram, kMaxCommandHistograms, 191 kDistanceStrideLength, kDistanceBlockSwitchCost, params, 192 dist_split); 193 if (BROTLI_IS_OOM(m)) return; 194 BROTLI_FREE(m, distance_prefixes); 195 } 196 } 197 198 #if defined(BROTLI_TEST) 199 size_t BrotliCountLiteralsForTest(const Command*, size_t); 200 size_t BrotliCountLiteralsForTest(const Command* cmds, size_t num_commands) { 201 return CountLiterals(cmds, num_commands); 202 } 203 void BrotliCopyLiteralsToByteArrayForTest( 204 const Command*, size_t, const uint8_t*, size_t, size_t, uint8_t*); 205 void BrotliCopyLiteralsToByteArrayForTest(const Command* cmds, 206 size_t num_commands, const uint8_t* data, size_t offset, size_t mask, 207 uint8_t* literals) { 208 CopyLiteralsToByteArray(cmds, num_commands, data, offset, mask, literals); 209 } 210 #endif 211 212 #if defined(__cplusplus) || defined(c_plusplus) 213 } /* extern "C" */ 214 #endif