tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

block_splitter.c (7330B)


      1 /* Copyright 2013 Google Inc. All Rights Reserved.
      2 
      3   Distributed under MIT license.
      4   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
      5 */
      6 
      7 /* Block split point selection utilities. */
      8 
      9 #include "block_splitter.h"
     10 
     11 #include "../common/platform.h"
     12 #include "bit_cost.h"
     13 #include "cluster.h"
     14 #include "command.h"
     15 #include "fast_log.h"
     16 #include "histogram.h"
     17 #include "memory.h"
     18 #include "quality.h"
     19 
     20 #if defined(__cplusplus) || defined(c_plusplus)
     21 extern "C" {
     22 #endif
     23 
     24 static const size_t kMaxLiteralHistograms = 100;
     25 static const size_t kMaxCommandHistograms = 50;
     26 static const double kLiteralBlockSwitchCost = 28.1;
     27 static const double kCommandBlockSwitchCost = 13.5;
     28 static const double kDistanceBlockSwitchCost = 14.6;
     29 static const size_t kLiteralStrideLength = 70;
     30 static const size_t kCommandStrideLength = 40;
     31 static const size_t kDistanceStrideLength = 40;
     32 static const size_t kSymbolsPerLiteralHistogram = 544;
     33 static const size_t kSymbolsPerCommandHistogram = 530;
     34 static const size_t kSymbolsPerDistanceHistogram = 544;
     35 static const size_t kMinLengthForBlockSplitting = 128;
     36 static const size_t kIterMulForRefining = 2;
     37 static const size_t kMinItersForRefining = 100;
     38 
     39 static size_t CountLiterals(const Command* cmds, const size_t num_commands) {
     40  /* Count how many we have. */
     41  size_t total_length = 0;
     42  size_t i;
     43  for (i = 0; i < num_commands; ++i) {
     44    total_length += cmds[i].insert_len_;
     45  }
     46  return total_length;
     47 }
     48 
     49 static void CopyLiteralsToByteArray(const Command* cmds,
     50                                    const size_t num_commands,
     51                                    const uint8_t* data,
     52                                    const size_t offset,
     53                                    const size_t mask,
     54                                    uint8_t* literals) {
     55  size_t pos = 0;
     56  size_t from_pos = offset & mask;
     57  size_t i;
     58  for (i = 0; i < num_commands; ++i) {
     59    size_t insert_len = cmds[i].insert_len_;
     60    if (from_pos + insert_len > mask) {
     61      size_t head_size = mask + 1 - from_pos;
     62      memcpy(literals + pos, data + from_pos, head_size);
     63      from_pos = 0;
     64      pos += head_size;
     65      insert_len -= head_size;
     66    }
     67    if (insert_len > 0) {
     68      memcpy(literals + pos, data + from_pos, insert_len);
     69      pos += insert_len;
     70    }
     71    from_pos = (from_pos + insert_len + CommandCopyLen(&cmds[i])) & mask;
     72  }
     73 }
     74 
     75 static BROTLI_INLINE uint32_t MyRand(uint32_t* seed) {
     76  /* Initial seed should be 7. In this case, loop length is (1 << 29). */
     77  *seed *= 16807U;
     78  return *seed;
     79 }
     80 
     81 static BROTLI_INLINE double BitCost(size_t count) {
     82  return count == 0 ? -2.0 : FastLog2(count);
     83 }
     84 
     85 #define HISTOGRAMS_PER_BATCH 64
     86 #define CLUSTERS_PER_BATCH 16
     87 
     88 #define FN(X) X ## Literal
     89 #define DataType uint8_t
     90 /* NOLINTNEXTLINE(build/include) */
     91 #include "block_splitter_inc.h"
     92 #undef DataType
     93 #undef FN
     94 
     95 #define FN(X) X ## Command
     96 #define DataType uint16_t
     97 /* NOLINTNEXTLINE(build/include) */
     98 #include "block_splitter_inc.h"
     99 #undef FN
    100 
    101 #define FN(X) X ## Distance
    102 /* NOLINTNEXTLINE(build/include) */
    103 #include "block_splitter_inc.h"
    104 #undef DataType
    105 #undef FN
    106 
    107 void BrotliInitBlockSplit(BlockSplit* self) {
    108  self->num_types = 0;
    109  self->num_blocks = 0;
    110  self->types = 0;
    111  self->lengths = 0;
    112  self->types_alloc_size = 0;
    113  self->lengths_alloc_size = 0;
    114 }
    115 
    116 void BrotliDestroyBlockSplit(MemoryManager* m, BlockSplit* self) {
    117  BROTLI_FREE(m, self->types);
    118  BROTLI_FREE(m, self->lengths);
    119 }
    120 
    121 /* Extracts literals, command distance and prefix codes, then applies
    122 * SplitByteVector to create partitioning. */
    123 void BrotliSplitBlock(MemoryManager* m,
    124                      const Command* cmds,
    125                      const size_t num_commands,
    126                      const uint8_t* data,
    127                      const size_t pos,
    128                      const size_t mask,
    129                      const BrotliEncoderParams* params,
    130                      BlockSplit* literal_split,
    131                      BlockSplit* insert_and_copy_split,
    132                      BlockSplit* dist_split) {
    133  {
    134    size_t literals_count = CountLiterals(cmds, num_commands);
    135    uint8_t* literals = BROTLI_ALLOC(m, uint8_t, literals_count);
    136    if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(literals)) return;
    137    /* Create a continuous array of literals. */
    138    CopyLiteralsToByteArray(cmds, num_commands, data, pos, mask, literals);
    139    /* Create the block split on the array of literals.
    140     * Literal histograms can have alphabet size up to 256.
    141     * Though, to accommodate context modeling, less than half of maximum size
    142     * is allowed. */
    143    SplitByteVectorLiteral(
    144        m, literals, literals_count,
    145        kSymbolsPerLiteralHistogram, kMaxLiteralHistograms,
    146        kLiteralStrideLength, kLiteralBlockSwitchCost, params,
    147        literal_split);
    148    if (BROTLI_IS_OOM(m)) return;
    149    BROTLI_FREE(m, literals);
    150    /* NB: this might be a good place for injecting extra splitting without
    151     *     increasing encoder complexity; however, output partition would be less
    152     *     optimal than one produced with forced splitting inside
    153     *     SplitByteVector (FindBlocks / ClusterBlocks). */
    154  }
    155 
    156  {
    157    /* Compute prefix codes for commands. */
    158    uint16_t* insert_and_copy_codes = BROTLI_ALLOC(m, uint16_t, num_commands);
    159    size_t i;
    160    if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(insert_and_copy_codes)) return;
    161    for (i = 0; i < num_commands; ++i) {
    162      insert_and_copy_codes[i] = cmds[i].cmd_prefix_;
    163    }
    164    /* Create the block split on the array of command prefixes. */
    165    SplitByteVectorCommand(
    166        m, insert_and_copy_codes, num_commands,
    167        kSymbolsPerCommandHistogram, kMaxCommandHistograms,
    168        kCommandStrideLength, kCommandBlockSwitchCost, params,
    169        insert_and_copy_split);
    170    if (BROTLI_IS_OOM(m)) return;
    171    /* TODO(eustas): reuse for distances? */
    172    BROTLI_FREE(m, insert_and_copy_codes);
    173  }
    174 
    175  {
    176    /* Create a continuous array of distance prefixes. */
    177    uint16_t* distance_prefixes = BROTLI_ALLOC(m, uint16_t, num_commands);
    178    size_t j = 0;
    179    size_t i;
    180    if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(distance_prefixes)) return;
    181    for (i = 0; i < num_commands; ++i) {
    182      const Command* cmd = &cmds[i];
    183      if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) {
    184        distance_prefixes[j++] = cmd->dist_prefix_ & 0x3FF;
    185      }
    186    }
    187    /* Create the block split on the array of distance prefixes. */
    188    SplitByteVectorDistance(
    189        m, distance_prefixes, j,
    190        kSymbolsPerDistanceHistogram, kMaxCommandHistograms,
    191        kDistanceStrideLength, kDistanceBlockSwitchCost, params,
    192        dist_split);
    193    if (BROTLI_IS_OOM(m)) return;
    194    BROTLI_FREE(m, distance_prefixes);
    195  }
    196 }
    197 
    198 #if defined(BROTLI_TEST)
    199 size_t BrotliCountLiteralsForTest(const Command*, size_t);
    200 size_t BrotliCountLiteralsForTest(const Command* cmds, size_t num_commands) {
    201  return CountLiterals(cmds, num_commands);
    202 }
    203 void BrotliCopyLiteralsToByteArrayForTest(
    204    const Command*, size_t, const uint8_t*, size_t, size_t, uint8_t*);
    205 void BrotliCopyLiteralsToByteArrayForTest(const Command* cmds,
    206    size_t num_commands, const uint8_t* data, size_t offset, size_t mask,
    207    uint8_t* literals) {
    208  CopyLiteralsToByteArray(cmds, num_commands, data, offset, mask, literals);
    209 }
    210 #endif
    211 
    212 #if defined(__cplusplus) || defined(c_plusplus)
    213 }  /* extern "C" */
    214 #endif