tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

entenc.c (11333B)


      1 /*
      2 * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved.
      3 *
      4 * This source code is subject to the terms of the BSD 2 Clause License and
      5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 * was not distributed with this source code in the LICENSE file, you can
      7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 * Media Patent License 1.0 was not distributed with this source code in the
      9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 */
     11 
     12 #include <stdlib.h>
     13 #include <string.h>
     14 #include <math.h>
     15 #include <assert.h>
     16 #include "aom_dsp/entenc.h"
     17 #include "aom_dsp/prob.h"
     18 
     19 #if OD_MEASURE_EC_OVERHEAD
     20 #if !defined(M_LOG2E)
     21 #define M_LOG2E (1.4426950408889634073599246810019)
     22 #endif
     23 #define OD_LOG2(x) (M_LOG2E * log(x))
     24 #endif  // OD_MEASURE_EC_OVERHEAD
     25 
     26 /*A range encoder.
     27  See entdec.c and the references for implementation details \cite{Mar79,MNW98}.
     28 
     29  @INPROCEEDINGS{Mar79,
     30   author="Martin, G.N.N.",
     31   title="Range encoding: an algorithm for removing redundancy from a digitised
     32    message",
     33   booktitle="Video \& Data Recording Conference",
     34   year=1979,
     35   address="Southampton",
     36   month=Jul,
     37   URL="http://www.compressconsult.com/rangecoder/rngcod.pdf.gz"
     38  }
     39  @ARTICLE{MNW98,
     40   author="Alistair Moffat and Radford Neal and Ian H. Witten",
     41   title="Arithmetic Coding Revisited",
     42   journal="{ACM} Transactions on Information Systems",
     43   year=1998,
     44   volume=16,
     45   number=3,
     46   pages="256--294",
     47   month=Jul,
     48   URL="http://researchcommons.waikato.ac.nz/bitstream/handle/10289/78/content.pdf"
     49  }*/
     50 
     51 /*Takes updated low and range values, renormalizes them so that
     52   32768 <= rng < 65536 (flushing bytes from low to the output buffer if
     53   necessary), and stores them back in the encoder context.
     54  low: The new value of low.
     55  rng: The new value of the range.*/
     56 static void od_ec_enc_normalize(od_ec_enc *enc, od_ec_enc_window low,
     57                                unsigned rng) {
     58  int d;
     59  int c;
     60  int s;
     61  if (enc->error) return;
     62  c = enc->cnt;
     63  assert(rng <= 65535U);
     64  /*The number of leading zeros in the 16-bit binary representation of rng.*/
     65  d = 16 - OD_ILOG_NZ(rng);
     66  s = c + d;
     67 
     68  /* We flush every time "low" cannot safely and efficiently accommodate any
     69     more data. Overall, c must not exceed 63 at the time of byte flush out. To
     70     facilitate this, "s" cannot exceed 56-bits because we have to keep 1 byte
     71     for carry. Also, we need to subtract 16 because we want to keep room for
     72     the next symbol worth "d"-bits (max 15). An alternate condition would be if
     73     (e < d), where e = number of leading zeros in "low", indicating there is
     74     not enough rooom to accommodate "rng" worth of "d"-bits in "low". However,
     75     this approach needs additional computations: (i) compute "e", (ii) push
     76     the leading 0x00's as a special case.
     77  */
     78  if (s >= 40) {  // 56 - 16
     79    unsigned char *out = enc->buf;
     80    uint32_t storage = enc->storage;
     81    uint32_t offs = enc->offs;
     82    if (offs + 8 > storage) {
     83      storage = 2 * storage + 8;
     84      out = (unsigned char *)realloc(out, sizeof(*out) * storage);
     85      if (out == NULL) {
     86        enc->error = -1;
     87        return;
     88      }
     89      enc->buf = out;
     90      enc->storage = storage;
     91    }
     92    // Need to add 1 byte here since enc->cnt always counts 1 byte less
     93    // (enc->cnt = -9) to ensure correct operation
     94    uint8_t num_bytes_ready = (s >> 3) + 1;
     95 
     96    // Update "c" to contain the number of non-ready bits in "low". Since "low"
     97    // has 64-bit capacity, we need to add the (64 - 40) cushion bits and take
     98    // off the number of ready bits.
     99    c += 24 - (num_bytes_ready << 3);
    100 
    101    // Prepare "output" and update "low"
    102    uint64_t output = low >> c;
    103    low = low & (((uint64_t)1 << c) - 1);
    104 
    105    // Prepare data and carry mask
    106    uint64_t mask = (uint64_t)1 << (num_bytes_ready << 3);
    107    uint64_t carry = output & mask;
    108 
    109    mask = mask - 0x01;
    110    output = output & mask;
    111 
    112    // Write data in a single operation
    113    write_enc_data_to_out_buf(out, offs, output, carry, &enc->offs,
    114                              num_bytes_ready);
    115 
    116    // Update state of the encoder: enc->cnt to contain the number of residual
    117    // bits
    118    s = c + d - 24;
    119  }
    120  enc->low = low << d;
    121  enc->rng = rng << d;
    122  enc->cnt = s;
    123 }
    124 
    125 /*Initializes the encoder.
    126  size: The initial size of the buffer, in bytes.*/
    127 void od_ec_enc_init(od_ec_enc *enc, uint32_t size) {
    128  od_ec_enc_reset(enc);
    129  enc->buf = (unsigned char *)malloc(sizeof(*enc->buf) * size);
    130  enc->storage = size;
    131  if (size > 0 && enc->buf == NULL) {
    132    enc->storage = 0;
    133    enc->error = -1;
    134  }
    135 }
    136 
    137 /*Reinitializes the encoder.*/
    138 void od_ec_enc_reset(od_ec_enc *enc) {
    139  enc->offs = 0;
    140  enc->low = 0;
    141  enc->rng = 0x8000;
    142  /*This is initialized to -9 so that it crosses zero after we've accumulated
    143     one byte + one carry bit.*/
    144  enc->cnt = -9;
    145  enc->error = 0;
    146 #if OD_MEASURE_EC_OVERHEAD
    147  enc->entropy = 0;
    148  enc->nb_symbols = 0;
    149 #endif
    150 }
    151 
    152 /*Frees the buffers used by the encoder.*/
    153 void od_ec_enc_clear(od_ec_enc *enc) { free(enc->buf); }
    154 
    155 /*Encodes a symbol given its frequency in Q15.
    156  fl: CDF_PROB_TOP minus the cumulative frequency of all symbols that come
    157  before the one to be encoded.
    158  fh: CDF_PROB_TOP minus the cumulative frequency of all symbols up to and
    159  including the one to be encoded.*/
    160 static void od_ec_encode_q15(od_ec_enc *enc, unsigned fl, unsigned fh, int s,
    161                             int nsyms) {
    162  od_ec_enc_window l;
    163  unsigned r;
    164  unsigned u;
    165  unsigned v;
    166  l = enc->low;
    167  r = enc->rng;
    168  assert(32768U <= r);
    169  assert(fh <= fl);
    170  assert(fl <= 32768U);
    171  assert(7 - EC_PROB_SHIFT >= 0);
    172  const int N = nsyms - 1;
    173  if (fl < CDF_PROB_TOP) {
    174    u = ((r >> 8) * (uint32_t)(fl >> EC_PROB_SHIFT) >> (7 - EC_PROB_SHIFT)) +
    175        EC_MIN_PROB * (N - (s - 1));
    176    v = ((r >> 8) * (uint32_t)(fh >> EC_PROB_SHIFT) >> (7 - EC_PROB_SHIFT)) +
    177        EC_MIN_PROB * (N - (s + 0));
    178    l += r - u;
    179    r = u - v;
    180  } else {
    181    r -= ((r >> 8) * (uint32_t)(fh >> EC_PROB_SHIFT) >> (7 - EC_PROB_SHIFT)) +
    182         EC_MIN_PROB * (N - (s + 0));
    183  }
    184  od_ec_enc_normalize(enc, l, r);
    185 #if OD_MEASURE_EC_OVERHEAD
    186  enc->entropy -= OD_LOG2((double)(OD_ICDF(fh) - OD_ICDF(fl)) / CDF_PROB_TOP.);
    187  enc->nb_symbols++;
    188 #endif
    189 }
    190 
    191 /*Encode a single binary value.
    192  val: The value to encode (0 or 1).
    193  f: The probability that the val is one, scaled by 32768.*/
    194 void od_ec_encode_bool_q15(od_ec_enc *enc, int val, unsigned f) {
    195  od_ec_enc_window l;
    196  unsigned r;
    197  unsigned v;
    198  assert(0 < f);
    199  assert(f < 32768U);
    200  l = enc->low;
    201  r = enc->rng;
    202  assert(32768U <= r);
    203  v = ((r >> 8) * (uint32_t)(f >> EC_PROB_SHIFT) >> (7 - EC_PROB_SHIFT));
    204  v += EC_MIN_PROB;
    205  if (val) l += r - v;
    206  r = val ? v : r - v;
    207  od_ec_enc_normalize(enc, l, r);
    208 #if OD_MEASURE_EC_OVERHEAD
    209  enc->entropy -= OD_LOG2((double)(val ? f : (32768 - f)) / 32768.);
    210  enc->nb_symbols++;
    211 #endif
    212 }
    213 
    214 /*Encodes a symbol given a cumulative distribution function (CDF) table in Q15.
    215  s: The index of the symbol to encode.
    216  icdf: 32768 minus the CDF, such that symbol s falls in the range
    217         [s > 0 ? (32768 - icdf[s - 1]) : 0, 32768 - icdf[s]).
    218        The values must be monotonically decreasing, and icdf[nsyms - 1] must
    219         be 0.
    220  nsyms: The number of symbols in the alphabet.
    221         This should be at most 16.*/
    222 void od_ec_encode_cdf_q15(od_ec_enc *enc, int s, const uint16_t *icdf,
    223                          int nsyms) {
    224  (void)nsyms;
    225  assert(s >= 0);
    226  assert(s < nsyms);
    227  assert(icdf[nsyms - 1] == OD_ICDF(CDF_PROB_TOP));
    228  od_ec_encode_q15(enc, s > 0 ? icdf[s - 1] : OD_ICDF(0), icdf[s], s, nsyms);
    229 }
    230 
    231 #if OD_MEASURE_EC_OVERHEAD
    232 #include <stdio.h>
    233 #endif
    234 
    235 /*Indicates that there are no more symbols to encode.
    236  All remaining output bytes are flushed to the output buffer.
    237  od_ec_enc_reset() should be called before using the encoder again.
    238  bytes: Returns the size of the encoded data in the returned buffer.
    239  Return: A pointer to the start of the final buffer, or NULL if there was an
    240           encoding error.*/
    241 unsigned char *od_ec_enc_done(od_ec_enc *enc, uint32_t *nbytes) {
    242  unsigned char *out;
    243  uint32_t storage;
    244  uint32_t offs;
    245  od_ec_enc_window m;
    246  od_ec_enc_window e;
    247  od_ec_enc_window l;
    248  int c;
    249  int s;
    250  if (enc->error) return NULL;
    251 #if OD_MEASURE_EC_OVERHEAD
    252  {
    253    uint32_t tell;
    254    /* Don't count the 1 bit we lose to raw bits as overhead. */
    255    tell = od_ec_enc_tell(enc) - 1;
    256    fprintf(stderr, "overhead: %f%%\n",
    257            100 * (tell - enc->entropy) / enc->entropy);
    258    fprintf(stderr, "efficiency: %f bits/symbol\n",
    259            (double)tell / enc->nb_symbols);
    260  }
    261 #endif
    262 
    263  l = enc->low;
    264  c = enc->cnt;
    265  s = 10;
    266  m = 0x3FFF;
    267  e = ((l + m) & ~m) | (m + 1);
    268  s += c;
    269  offs = enc->offs;
    270 
    271  /*Make sure there's enough room for the entropy-coded bits.*/
    272  out = enc->buf;
    273  storage = enc->storage;
    274  const int s_bits = (s + 7) >> 3;
    275  int b = OD_MAXI(s_bits, 0);
    276  if (offs + b > storage) {
    277    storage = offs + b;
    278    out = (unsigned char *)realloc(out, sizeof(*out) * storage);
    279    if (out == NULL) {
    280      enc->error = -1;
    281      return NULL;
    282    }
    283    enc->buf = out;
    284    enc->storage = storage;
    285  }
    286 
    287  /*We output the minimum number of bits that ensures that the symbols encoded
    288     thus far will be decoded correctly regardless of the bits that follow.*/
    289  if (s > 0) {
    290    uint64_t n;
    291    n = ((uint64_t)1 << (c + 16)) - 1;
    292    do {
    293      assert(offs < storage);
    294      uint16_t val = (uint16_t)(e >> (c + 16));
    295      out[offs] = (unsigned char)(val & 0x00FF);
    296      if (val & 0x0100) {
    297        assert(offs > 0);
    298        propagate_carry_bwd(out, offs - 1);
    299      }
    300      offs++;
    301 
    302      e &= n;
    303      s -= 8;
    304      c -= 8;
    305      n >>= 8;
    306    } while (s > 0);
    307  }
    308  *nbytes = offs;
    309 
    310  return out;
    311 }
    312 
    313 /*Returns the number of bits "used" by the encoded symbols so far.
    314  This same number can be computed in either the encoder or the decoder, and is
    315   suitable for making coding decisions.
    316  Warning: The value returned by this function can decrease compared to an
    317   earlier call, even after encoding more data, if there is an encoding error
    318   (i.e., a failure to allocate enough space for the output buffer).
    319  Return: The number of bits.
    320          This will always be slightly larger than the exact value (e.g., all
    321           rounding error is in the positive direction).*/
    322 int od_ec_enc_tell(const od_ec_enc *enc) {
    323  /*The 10 here counteracts the offset of -9 baked into cnt, and adds 1 extra
    324     bit, which we reserve for terminating the stream.*/
    325  return (enc->cnt + 10) + enc->offs * 8;
    326 }
    327 
    328 /*Returns the number of bits "used" by the encoded symbols so far.
    329  This same number can be computed in either the encoder or the decoder, and is
    330   suitable for making coding decisions.
    331  Warning: The value returned by this function can decrease compared to an
    332   earlier call, even after encoding more data, if there is an encoding error
    333   (i.e., a failure to allocate enough space for the output buffer).
    334  Return: The number of bits scaled by 2**OD_BITRES.
    335          This will always be slightly larger than the exact value (e.g., all
    336           rounding error is in the positive direction).*/
    337 uint32_t od_ec_enc_tell_frac(const od_ec_enc *enc) {
    338  return od_ec_tell_frac(od_ec_enc_tell(enc), enc->rng);
    339 }