tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

backward_references_enc.c (38883B)


      1 // Copyright 2012 Google Inc. All Rights Reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style license
      4 // that can be found in the COPYING file in the root of the source
      5 // tree. An additional intellectual property rights grant can be found
      6 // in the file PATENTS. All contributing project authors may
      7 // be found in the AUTHORS file in the root of the source tree.
      8 // -----------------------------------------------------------------------------
      9 //
     10 // Author: Jyrki Alakuijala (jyrki@google.com)
     11 //
     12 
     13 #include "src/enc/backward_references_enc.h"
     14 
     15 #include <assert.h>
     16 #include <string.h>
     17 
     18 #include "src/dsp/cpu.h"
     19 #include "src/dsp/lossless.h"
     20 #include "src/dsp/lossless_common.h"
     21 #include "src/enc/histogram_enc.h"
     22 #include "src/enc/vp8i_enc.h"
     23 #include "src/utils/color_cache_utils.h"
     24 #include "src/utils/utils.h"
     25 #include "src/webp/encode.h"
     26 #include "src/webp/format_constants.h"
     27 #include "src/webp/types.h"
     28 
     29 #define MIN_BLOCK_SIZE 256  // minimum block size for backward references
     30 
     31 // 1M window (4M bytes) minus 120 special codes for short distances.
     32 #define WINDOW_SIZE ((1 << WINDOW_SIZE_BITS) - 120)
     33 
     34 // Minimum number of pixels for which it is cheaper to encode a
     35 // distance + length instead of each pixel as a literal.
     36 #define MIN_LENGTH 4
     37 
     38 // -----------------------------------------------------------------------------
     39 
     40 static const uint8_t plane_to_code_lut[128] = {
     41 96,   73,  55,  39,  23,  13,   5,  1,  255, 255, 255, 255, 255, 255, 255, 255,
     42 101,  78,  58,  42,  26,  16,   8,  2,    0,   3,  9,   17,  27,  43,  59,  79,
     43 102,  86,  62,  46,  32,  20,  10,  6,    4,   7,  11,  21,  33,  47,  63,  87,
     44 105,  90,  70,  52,  37,  28,  18,  14,  12,  15,  19,  29,  38,  53,  71,  91,
     45 110,  99,  82,  66,  48,  35,  30,  24,  22,  25,  31,  36,  49,  67,  83, 100,
     46 115, 108,  94,  76,  64,  50,  44,  40,  34,  41,  45,  51,  65,  77,  95, 109,
     47 118, 113, 103,  92,  80,  68,  60,  56,  54,  57,  61,  69,  81,  93, 104, 114,
     48 119, 116, 111, 106,  97,  88,  84,  74,  72,  75,  85,  89,  98, 107, 112, 117
     49 };
     50 
     51 extern int VP8LDistanceToPlaneCode(int xsize, int dist);
     52 int VP8LDistanceToPlaneCode(int xsize, int dist) {
     53  const int yoffset = dist / xsize;
     54  const int xoffset = dist - yoffset * xsize;
     55  if (xoffset <= 8 && yoffset < 8) {
     56    return plane_to_code_lut[yoffset * 16 + 8 - xoffset] + 1;
     57  } else if (xoffset > xsize - 8 && yoffset < 7) {
     58    return plane_to_code_lut[(yoffset + 1) * 16 + 8 + (xsize - xoffset)] + 1;
     59  }
     60  return dist + 120;
     61 }
     62 
     63 // Returns the exact index where array1 and array2 are different. For an index
     64 // inferior or equal to best_len_match, the return value just has to be strictly
     65 // inferior to best_len_match. The current behavior is to return 0 if this index
     66 // is best_len_match, and the index itself otherwise.
     67 // If no two elements are the same, it returns max_limit.
     68 static WEBP_INLINE int FindMatchLength(const uint32_t* const array1,
     69                                       const uint32_t* const array2,
     70                                       int best_len_match, int max_limit) {
     71  // Before 'expensive' linear match, check if the two arrays match at the
     72  // current best length index.
     73  if (array1[best_len_match] != array2[best_len_match]) return 0;
     74 
     75  return VP8LVectorMismatch(array1, array2, max_limit);
     76 }
     77 
     78 // -----------------------------------------------------------------------------
     79 //  VP8LBackwardRefs
     80 
     81 struct PixOrCopyBlock {
     82  PixOrCopyBlock* next;   // next block (or NULL)
     83  PixOrCopy* start;       // data start
     84  int size;               // currently used size
     85 };
     86 
     87 extern void VP8LClearBackwardRefs(VP8LBackwardRefs* const refs);
     88 void VP8LClearBackwardRefs(VP8LBackwardRefs* const refs) {
     89  assert(refs != NULL);
     90  if (refs->tail != NULL) {
     91    *refs->tail = refs->free_blocks;  // recycle all blocks at once
     92  }
     93  refs->free_blocks = refs->refs;
     94  refs->tail = &refs->refs;
     95  refs->last_block = NULL;
     96  refs->refs = NULL;
     97 }
     98 
     99 void VP8LBackwardRefsClear(VP8LBackwardRefs* const refs) {
    100  assert(refs != NULL);
    101  VP8LClearBackwardRefs(refs);
    102  while (refs->free_blocks != NULL) {
    103    PixOrCopyBlock* const next = refs->free_blocks->next;
    104    WebPSafeFree(refs->free_blocks);
    105    refs->free_blocks = next;
    106  }
    107 }
    108 
    109 // Swaps the content of two VP8LBackwardRefs.
    110 static void BackwardRefsSwap(VP8LBackwardRefs* const refs1,
    111                             VP8LBackwardRefs* const refs2) {
    112  const int point_to_refs1 =
    113      (refs1->tail != NULL && refs1->tail == &refs1->refs);
    114  const int point_to_refs2 =
    115      (refs2->tail != NULL && refs2->tail == &refs2->refs);
    116  const VP8LBackwardRefs tmp = *refs1;
    117  *refs1 = *refs2;
    118  *refs2 = tmp;
    119  if (point_to_refs2) refs1->tail = &refs1->refs;
    120  if (point_to_refs1) refs2->tail = &refs2->refs;
    121 }
    122 
    123 void VP8LBackwardRefsInit(VP8LBackwardRefs* const refs, int block_size) {
    124  assert(refs != NULL);
    125  memset(refs, 0, sizeof(*refs));
    126  refs->tail = &refs->refs;
    127  refs->block_size =
    128      (block_size < MIN_BLOCK_SIZE) ? MIN_BLOCK_SIZE : block_size;
    129 }
    130 
    131 VP8LRefsCursor VP8LRefsCursorInit(const VP8LBackwardRefs* const refs) {
    132  VP8LRefsCursor c;
    133  c.cur_block = refs->refs;
    134  if (refs->refs != NULL) {
    135    c.cur_pos = c.cur_block->start;
    136    c.last_pos = c.cur_pos + c.cur_block->size;
    137  } else {
    138    c.cur_pos = NULL;
    139    c.last_pos = NULL;
    140  }
    141  return c;
    142 }
    143 
    144 void VP8LRefsCursorNextBlock(VP8LRefsCursor* const c) {
    145  PixOrCopyBlock* const b = c->cur_block->next;
    146  c->cur_pos = (b == NULL) ? NULL : b->start;
    147  c->last_pos = (b == NULL) ? NULL : b->start + b->size;
    148  c->cur_block = b;
    149 }
    150 
    151 // Create a new block, either from the free list or allocated
    152 static PixOrCopyBlock* BackwardRefsNewBlock(VP8LBackwardRefs* const refs) {
    153  PixOrCopyBlock* b = refs->free_blocks;
    154  if (b == NULL) {   // allocate new memory chunk
    155    const size_t total_size =
    156        sizeof(*b) + refs->block_size * sizeof(*b->start);
    157    b = (PixOrCopyBlock*)WebPSafeMalloc(1ULL, total_size);
    158    if (b == NULL) {
    159      refs->error |= 1;
    160      return NULL;
    161    }
    162    b->start = (PixOrCopy*)((uint8_t*)b + sizeof(*b));  // not always aligned
    163  } else {  // recycle from free-list
    164    refs->free_blocks = b->next;
    165  }
    166  *refs->tail = b;
    167  refs->tail = &b->next;
    168  refs->last_block = b;
    169  b->next = NULL;
    170  b->size = 0;
    171  return b;
    172 }
    173 
    174 // Return 1 on success, 0 on error.
    175 static int BackwardRefsClone(const VP8LBackwardRefs* const from,
    176                             VP8LBackwardRefs* const to) {
    177  const PixOrCopyBlock* block_from = from->refs;
    178  VP8LClearBackwardRefs(to);
    179  while (block_from != NULL) {
    180    PixOrCopyBlock* const block_to = BackwardRefsNewBlock(to);
    181    if (block_to == NULL) return 0;
    182    memcpy(block_to->start, block_from->start,
    183           block_from->size * sizeof(PixOrCopy));
    184    block_to->size = block_from->size;
    185    block_from = block_from->next;
    186  }
    187  return 1;
    188 }
    189 
    190 extern void VP8LBackwardRefsCursorAdd(VP8LBackwardRefs* const refs,
    191                                      const PixOrCopy v);
    192 void VP8LBackwardRefsCursorAdd(VP8LBackwardRefs* const refs,
    193                               const PixOrCopy v) {
    194  PixOrCopyBlock* b = refs->last_block;
    195  if (b == NULL || b->size == refs->block_size) {
    196    b = BackwardRefsNewBlock(refs);
    197    if (b == NULL) return;   // refs->error is set
    198  }
    199  b->start[b->size++] = v;
    200 }
    201 
    202 // -----------------------------------------------------------------------------
    203 // Hash chains
    204 
    205 int VP8LHashChainInit(VP8LHashChain* const p, int size) {
    206  assert(p->size == 0);
    207  assert(p->offset_length == NULL);
    208  assert(size > 0);
    209  p->offset_length =
    210      (uint32_t*)WebPSafeMalloc(size, sizeof(*p->offset_length));
    211  if (p->offset_length == NULL) return 0;
    212  p->size = size;
    213 
    214  return 1;
    215 }
    216 
    217 void VP8LHashChainClear(VP8LHashChain* const p) {
    218  assert(p != NULL);
    219  WebPSafeFree(p->offset_length);
    220 
    221  p->size = 0;
    222  p->offset_length = NULL;
    223 }
    224 
    225 // -----------------------------------------------------------------------------
    226 
    227 static const uint32_t kHashMultiplierHi = 0xc6a4a793u;
    228 static const uint32_t kHashMultiplierLo = 0x5bd1e996u;
    229 
    230 static WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW WEBP_INLINE
    231 uint32_t GetPixPairHash64(const uint32_t* const argb) {
    232  uint32_t key;
    233  key  = argb[1] * kHashMultiplierHi;
    234  key += argb[0] * kHashMultiplierLo;
    235  key = key >> (32 - HASH_BITS);
    236  return key;
    237 }
    238 
    239 // Returns the maximum number of hash chain lookups to do for a
    240 // given compression quality. Return value in range [8, 86].
    241 static int GetMaxItersForQuality(int quality) {
    242  return 8 + (quality * quality) / 128;
    243 }
    244 
    245 static int GetWindowSizeForHashChain(int quality, int xsize) {
    246  const int max_window_size = (quality > 75) ? WINDOW_SIZE
    247                            : (quality > 50) ? (xsize << 8)
    248                            : (quality > 25) ? (xsize << 6)
    249                            : (xsize << 4);
    250  assert(xsize > 0);
    251  return (max_window_size > WINDOW_SIZE) ? WINDOW_SIZE : max_window_size;
    252 }
    253 
    254 static WEBP_INLINE int MaxFindCopyLength(int len) {
    255  return (len < MAX_LENGTH) ? len : MAX_LENGTH;
    256 }
    257 
    258 int VP8LHashChainFill(VP8LHashChain* const p, int quality,
    259                      const uint32_t* const argb, int xsize, int ysize,
    260                      int low_effort, const WebPPicture* const pic,
    261                      int percent_range, int* const percent) {
    262  const int size = xsize * ysize;
    263  const int iter_max = GetMaxItersForQuality(quality);
    264  const uint32_t window_size = GetWindowSizeForHashChain(quality, xsize);
    265  int remaining_percent = percent_range;
    266  int percent_start = *percent;
    267  int pos;
    268  int argb_comp;
    269  uint32_t base_position;
    270  int32_t* hash_to_first_index;
    271  // Temporarily use the p->offset_length as a hash chain.
    272  int32_t* chain = (int32_t*)p->offset_length;
    273  assert(size > 0);
    274  assert(p->size != 0);
    275  assert(p->offset_length != NULL);
    276 
    277  if (size <= 2) {
    278    p->offset_length[0] = p->offset_length[size - 1] = 0;
    279    return 1;
    280  }
    281 
    282  hash_to_first_index =
    283      (int32_t*)WebPSafeMalloc(HASH_SIZE, sizeof(*hash_to_first_index));
    284  if (hash_to_first_index == NULL) {
    285    return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
    286  }
    287 
    288  percent_range = remaining_percent / 2;
    289  remaining_percent -= percent_range;
    290 
    291  // Set the int32_t array to -1.
    292  memset(hash_to_first_index, 0xff, HASH_SIZE * sizeof(*hash_to_first_index));
    293  // Fill the chain linking pixels with the same hash.
    294  argb_comp = (argb[0] == argb[1]);
    295  for (pos = 0; pos < size - 2;) {
    296    uint32_t hash_code;
    297    const int argb_comp_next = (argb[pos + 1] == argb[pos + 2]);
    298    if (argb_comp && argb_comp_next) {
    299      // Consecutive pixels with the same color will share the same hash.
    300      // We therefore use a different hash: the color and its repetition
    301      // length.
    302      uint32_t tmp[2];
    303      uint32_t len = 1;
    304      tmp[0] = argb[pos];
    305      // Figure out how far the pixels are the same.
    306      // The last pixel has a different 64 bit hash, as its next pixel does
    307      // not have the same color, so we just need to get to the last pixel equal
    308      // to its follower.
    309      while (pos + (int)len + 2 < size && argb[pos + len + 2] == argb[pos]) {
    310        ++len;
    311      }
    312      if (len > MAX_LENGTH) {
    313        // Skip the pixels that match for distance=1 and length>MAX_LENGTH
    314        // because they are linked to their predecessor and we automatically
    315        // check that in the main for loop below. Skipping means setting no
    316        // predecessor in the chain, hence -1.
    317        memset(chain + pos, 0xff, (len - MAX_LENGTH) * sizeof(*chain));
    318        pos += len - MAX_LENGTH;
    319        len = MAX_LENGTH;
    320      }
    321      // Process the rest of the hash chain.
    322      while (len) {
    323        tmp[1] = len--;
    324        hash_code = GetPixPairHash64(tmp);
    325        chain[pos] = hash_to_first_index[hash_code];
    326        hash_to_first_index[hash_code] = pos++;
    327      }
    328      argb_comp = 0;
    329    } else {
    330      // Just move one pixel forward.
    331      hash_code = GetPixPairHash64(argb + pos);
    332      chain[pos] = hash_to_first_index[hash_code];
    333      hash_to_first_index[hash_code] = pos++;
    334      argb_comp = argb_comp_next;
    335    }
    336 
    337    if (!WebPReportProgress(
    338            pic, percent_start + percent_range * pos / (size - 2), percent)) {
    339      WebPSafeFree(hash_to_first_index);
    340      return 0;
    341    }
    342  }
    343  // Process the penultimate pixel.
    344  chain[pos] = hash_to_first_index[GetPixPairHash64(argb + pos)];
    345 
    346  WebPSafeFree(hash_to_first_index);
    347 
    348  percent_start += percent_range;
    349  if (!WebPReportProgress(pic, percent_start, percent)) return 0;
    350  percent_range = remaining_percent;
    351 
    352  // Find the best match interval at each pixel, defined by an offset to the
    353  // pixel and a length. The right-most pixel cannot match anything to the right
    354  // (hence a best length of 0) and the left-most pixel nothing to the left
    355  // (hence an offset of 0).
    356  assert(size > 2);
    357  p->offset_length[0] = p->offset_length[size - 1] = 0;
    358  for (base_position = size - 2; base_position > 0;) {
    359    const int max_len = MaxFindCopyLength(size - 1 - base_position);
    360    const uint32_t* const argb_start = argb + base_position;
    361    int iter = iter_max;
    362    int best_length = 0;
    363    uint32_t best_distance = 0;
    364    uint32_t best_argb;
    365    const int min_pos =
    366        (base_position > window_size) ? base_position - window_size : 0;
    367    const int length_max = (max_len < 256) ? max_len : 256;
    368    uint32_t max_base_position;
    369 
    370    pos = chain[base_position];
    371    if (!low_effort) {
    372      int curr_length;
    373      // Heuristic: use the comparison with the above line as an initialization.
    374      if (base_position >= (uint32_t)xsize) {
    375        curr_length = FindMatchLength(argb_start - xsize, argb_start,
    376                                      best_length, max_len);
    377        if (curr_length > best_length) {
    378          best_length = curr_length;
    379          best_distance = xsize;
    380        }
    381        --iter;
    382      }
    383      // Heuristic: compare to the previous pixel.
    384      curr_length =
    385          FindMatchLength(argb_start - 1, argb_start, best_length, max_len);
    386      if (curr_length > best_length) {
    387        best_length = curr_length;
    388        best_distance = 1;
    389      }
    390      --iter;
    391      // Skip the for loop if we already have the maximum.
    392      if (best_length == MAX_LENGTH) pos = min_pos - 1;
    393    }
    394    best_argb = argb_start[best_length];
    395 
    396    for (; pos >= min_pos && --iter; pos = chain[pos]) {
    397      int curr_length;
    398      assert(base_position > (uint32_t)pos);
    399 
    400      if (argb[pos + best_length] != best_argb) continue;
    401 
    402      curr_length = VP8LVectorMismatch(argb + pos, argb_start, max_len);
    403      if (best_length < curr_length) {
    404        best_length = curr_length;
    405        best_distance = base_position - pos;
    406        best_argb = argb_start[best_length];
    407        // Stop if we have reached a good enough length.
    408        if (best_length >= length_max) break;
    409      }
    410    }
    411    // We have the best match but in case the two intervals continue matching
    412    // to the left, we have the best matches for the left-extended pixels.
    413    max_base_position = base_position;
    414    while (1) {
    415      assert(best_length <= MAX_LENGTH);
    416      assert(best_distance <= WINDOW_SIZE);
    417      p->offset_length[base_position] =
    418          (best_distance << MAX_LENGTH_BITS) | (uint32_t)best_length;
    419      --base_position;
    420      // Stop if we don't have a match or if we are out of bounds.
    421      if (best_distance == 0 || base_position == 0) break;
    422      // Stop if we cannot extend the matching intervals to the left.
    423      if (base_position < best_distance ||
    424          argb[base_position - best_distance] != argb[base_position]) {
    425        break;
    426      }
    427      // Stop if we are matching at its limit because there could be a closer
    428      // matching interval with the same maximum length. Then again, if the
    429      // matching interval is as close as possible (best_distance == 1), we will
    430      // never find anything better so let's continue.
    431      if (best_length == MAX_LENGTH && best_distance != 1 &&
    432          base_position + MAX_LENGTH < max_base_position) {
    433        break;
    434      }
    435      if (best_length < MAX_LENGTH) {
    436        ++best_length;
    437        max_base_position = base_position;
    438      }
    439    }
    440 
    441    if (!WebPReportProgress(pic,
    442                            percent_start + percent_range *
    443                                                (size - 2 - base_position) /
    444                                                (size - 2),
    445                            percent)) {
    446      return 0;
    447    }
    448  }
    449 
    450  return WebPReportProgress(pic, percent_start + percent_range, percent);
    451 }
    452 
    453 static WEBP_INLINE void AddSingleLiteral(uint32_t pixel, int use_color_cache,
    454                                         VP8LColorCache* const hashers,
    455                                         VP8LBackwardRefs* const refs) {
    456  PixOrCopy v;
    457  if (use_color_cache) {
    458    const uint32_t key = VP8LColorCacheGetIndex(hashers, pixel);
    459    if (VP8LColorCacheLookup(hashers, key) == pixel) {
    460      v = PixOrCopyCreateCacheIdx(key);
    461    } else {
    462      v = PixOrCopyCreateLiteral(pixel);
    463      VP8LColorCacheSet(hashers, key, pixel);
    464    }
    465  } else {
    466    v = PixOrCopyCreateLiteral(pixel);
    467  }
    468  VP8LBackwardRefsCursorAdd(refs, v);
    469 }
    470 
    471 static int BackwardReferencesRle(int xsize, int ysize,
    472                                 const uint32_t* const argb,
    473                                 int cache_bits, VP8LBackwardRefs* const refs) {
    474  const int pix_count = xsize * ysize;
    475  int i, k;
    476  const int use_color_cache = (cache_bits > 0);
    477  VP8LColorCache hashers;
    478 
    479  if (use_color_cache && !VP8LColorCacheInit(&hashers, cache_bits)) {
    480    return 0;
    481  }
    482  VP8LClearBackwardRefs(refs);
    483  // Add first pixel as literal.
    484  AddSingleLiteral(argb[0], use_color_cache, &hashers, refs);
    485  i = 1;
    486  while (i < pix_count) {
    487    const int max_len = MaxFindCopyLength(pix_count - i);
    488    const int rle_len = FindMatchLength(argb + i, argb + i - 1, 0, max_len);
    489    const int prev_row_len = (i < xsize) ? 0 :
    490        FindMatchLength(argb + i, argb + i - xsize, 0, max_len);
    491    if (rle_len >= prev_row_len && rle_len >= MIN_LENGTH) {
    492      VP8LBackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(1, rle_len));
    493      // We don't need to update the color cache here since it is always the
    494      // same pixel being copied, and that does not change the color cache
    495      // state.
    496      i += rle_len;
    497    } else if (prev_row_len >= MIN_LENGTH) {
    498      VP8LBackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(xsize, prev_row_len));
    499      if (use_color_cache) {
    500        for (k = 0; k < prev_row_len; ++k) {
    501          VP8LColorCacheInsert(&hashers, argb[i + k]);
    502        }
    503      }
    504      i += prev_row_len;
    505    } else {
    506      AddSingleLiteral(argb[i], use_color_cache, &hashers, refs);
    507      i++;
    508    }
    509  }
    510  if (use_color_cache) VP8LColorCacheClear(&hashers);
    511  return !refs->error;
    512 }
    513 
    514 static int BackwardReferencesLz77(int xsize, int ysize,
    515                                  const uint32_t* const argb, int cache_bits,
    516                                  const VP8LHashChain* const hash_chain,
    517                                  VP8LBackwardRefs* const refs) {
    518  int i;
    519  int i_last_check = -1;
    520  int ok = 0;
    521  int cc_init = 0;
    522  const int use_color_cache = (cache_bits > 0);
    523  const int pix_count = xsize * ysize;
    524  VP8LColorCache hashers;
    525 
    526  if (use_color_cache) {
    527    cc_init = VP8LColorCacheInit(&hashers, cache_bits);
    528    if (!cc_init) goto Error;
    529  }
    530  VP8LClearBackwardRefs(refs);
    531  for (i = 0; i < pix_count;) {
    532    // Alternative#1: Code the pixels starting at 'i' using backward reference.
    533    int offset = 0;
    534    int len = 0;
    535    int j;
    536    VP8LHashChainFindCopy(hash_chain, i, &offset, &len);
    537    if (len >= MIN_LENGTH) {
    538      const int len_ini = len;
    539      int max_reach = 0;
    540      const int j_max =
    541          (i + len_ini >= pix_count) ? pix_count - 1 : i + len_ini;
    542      // Only start from what we have not checked already.
    543      i_last_check = (i > i_last_check) ? i : i_last_check;
    544      // We know the best match for the current pixel but we try to find the
    545      // best matches for the current pixel AND the next one combined.
    546      // The naive method would use the intervals:
    547      // [i,i+len) + [i+len, length of best match at i+len)
    548      // while we check if we can use:
    549      // [i,j) (where j<=i+len) + [j, length of best match at j)
    550      for (j = i_last_check + 1; j <= j_max; ++j) {
    551        const int len_j = VP8LHashChainFindLength(hash_chain, j);
    552        const int reach =
    553            j + (len_j >= MIN_LENGTH ? len_j : 1);  // 1 for single literal.
    554        if (reach > max_reach) {
    555          len = j - i;
    556          max_reach = reach;
    557          if (max_reach >= pix_count) break;
    558        }
    559      }
    560    } else {
    561      len = 1;
    562    }
    563    // Go with literal or backward reference.
    564    assert(len > 0);
    565    if (len == 1) {
    566      AddSingleLiteral(argb[i], use_color_cache, &hashers, refs);
    567    } else {
    568      VP8LBackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(offset, len));
    569      if (use_color_cache) {
    570        for (j = i; j < i + len; ++j) VP8LColorCacheInsert(&hashers, argb[j]);
    571      }
    572    }
    573    i += len;
    574  }
    575 
    576  ok = !refs->error;
    577 Error:
    578  if (cc_init) VP8LColorCacheClear(&hashers);
    579  return ok;
    580 }
    581 
    582 // Compute an LZ77 by forcing matches to happen within a given distance cost.
    583 // We therefore limit the algorithm to the lowest 32 values in the PlaneCode
    584 // definition.
    585 #define WINDOW_OFFSETS_SIZE_MAX 32
    586 static int BackwardReferencesLz77Box(int xsize, int ysize,
    587                                     const uint32_t* const argb, int cache_bits,
    588                                     const VP8LHashChain* const hash_chain_best,
    589                                     VP8LHashChain* hash_chain,
    590                                     VP8LBackwardRefs* const refs) {
    591  int i;
    592  const int pix_count = xsize * ysize;
    593  uint16_t* counts;
    594  int window_offsets[WINDOW_OFFSETS_SIZE_MAX] = {0};
    595  int window_offsets_new[WINDOW_OFFSETS_SIZE_MAX] = {0};
    596  int window_offsets_size = 0;
    597  int window_offsets_new_size = 0;
    598  uint16_t* const counts_ini =
    599      (uint16_t*)WebPSafeMalloc(xsize * ysize, sizeof(*counts_ini));
    600  int best_offset_prev = -1, best_length_prev = -1;
    601  if (counts_ini == NULL) return 0;
    602 
    603  // counts[i] counts how many times a pixel is repeated starting at position i.
    604  i = pix_count - 2;
    605  counts = counts_ini + i;
    606  counts[1] = 1;
    607  for (; i >= 0; --i, --counts) {
    608    if (argb[i] == argb[i + 1]) {
    609      // Max out the counts to MAX_LENGTH.
    610      counts[0] = counts[1] + (counts[1] != MAX_LENGTH);
    611    } else {
    612      counts[0] = 1;
    613    }
    614  }
    615 
    616  // Figure out the window offsets around a pixel. They are stored in a
    617  // spiraling order around the pixel as defined by VP8LDistanceToPlaneCode.
    618  {
    619    int x, y;
    620    for (y = 0; y <= 6; ++y) {
    621      for (x = -6; x <= 6; ++x) {
    622        const int offset = y * xsize + x;
    623        int plane_code;
    624        // Ignore offsets that bring us after the pixel.
    625        if (offset <= 0) continue;
    626        plane_code = VP8LDistanceToPlaneCode(xsize, offset) - 1;
    627        if (plane_code >= WINDOW_OFFSETS_SIZE_MAX) continue;
    628        window_offsets[plane_code] = offset;
    629      }
    630    }
    631    // For narrow images, not all plane codes are reached, so remove those.
    632    for (i = 0; i < WINDOW_OFFSETS_SIZE_MAX; ++i) {
    633      if (window_offsets[i] == 0) continue;
    634      window_offsets[window_offsets_size++] = window_offsets[i];
    635    }
    636    // Given a pixel P, find the offsets that reach pixels unreachable from P-1
    637    // with any of the offsets in window_offsets[].
    638    for (i = 0; i < window_offsets_size; ++i) {
    639      int j;
    640      int is_reachable = 0;
    641      for (j = 0; j < window_offsets_size && !is_reachable; ++j) {
    642        is_reachable |= (window_offsets[i] == window_offsets[j] + 1);
    643      }
    644      if (!is_reachable) {
    645        window_offsets_new[window_offsets_new_size] = window_offsets[i];
    646        ++window_offsets_new_size;
    647      }
    648    }
    649  }
    650 
    651  hash_chain->offset_length[0] = 0;
    652  for (i = 1; i < pix_count; ++i) {
    653    int ind;
    654    int best_length = VP8LHashChainFindLength(hash_chain_best, i);
    655    int best_offset;
    656    int do_compute = 1;
    657 
    658    if (best_length >= MAX_LENGTH) {
    659      // Do not recompute the best match if we already have a maximal one in the
    660      // window.
    661      best_offset = VP8LHashChainFindOffset(hash_chain_best, i);
    662      for (ind = 0; ind < window_offsets_size; ++ind) {
    663        if (best_offset == window_offsets[ind]) {
    664          do_compute = 0;
    665          break;
    666        }
    667      }
    668    }
    669    if (do_compute) {
    670      // Figure out if we should use the offset/length from the previous pixel
    671      // as an initial guess and therefore only inspect the offsets in
    672      // window_offsets_new[].
    673      const int use_prev =
    674          (best_length_prev > 1) && (best_length_prev < MAX_LENGTH);
    675      const int num_ind =
    676          use_prev ? window_offsets_new_size : window_offsets_size;
    677      best_length = use_prev ? best_length_prev - 1 : 0;
    678      best_offset = use_prev ? best_offset_prev : 0;
    679      // Find the longest match in a window around the pixel.
    680      for (ind = 0; ind < num_ind; ++ind) {
    681        int curr_length = 0;
    682        int j = i;
    683        int j_offset =
    684            use_prev ? i - window_offsets_new[ind] : i - window_offsets[ind];
    685        if (j_offset < 0 || argb[j_offset] != argb[i]) continue;
    686        // The longest match is the sum of how many times each pixel is
    687        // repeated.
    688        do {
    689          const int counts_j_offset = counts_ini[j_offset];
    690          const int counts_j = counts_ini[j];
    691          if (counts_j_offset != counts_j) {
    692            curr_length +=
    693                (counts_j_offset < counts_j) ? counts_j_offset : counts_j;
    694            break;
    695          }
    696          // The same color is repeated counts_pos times at j_offset and j.
    697          curr_length += counts_j_offset;
    698          j_offset += counts_j_offset;
    699          j += counts_j_offset;
    700        } while (curr_length <= MAX_LENGTH && j < pix_count &&
    701                 argb[j_offset] == argb[j]);
    702        if (best_length < curr_length) {
    703          best_offset =
    704              use_prev ? window_offsets_new[ind] : window_offsets[ind];
    705          if (curr_length >= MAX_LENGTH) {
    706            best_length = MAX_LENGTH;
    707            break;
    708          } else {
    709            best_length = curr_length;
    710          }
    711        }
    712      }
    713    }
    714 
    715    assert(i + best_length <= pix_count);
    716    assert(best_length <= MAX_LENGTH);
    717    if (best_length <= MIN_LENGTH) {
    718      hash_chain->offset_length[i] = 0;
    719      best_offset_prev = 0;
    720      best_length_prev = 0;
    721    } else {
    722      hash_chain->offset_length[i] =
    723          (best_offset << MAX_LENGTH_BITS) | (uint32_t)best_length;
    724      best_offset_prev = best_offset;
    725      best_length_prev = best_length;
    726    }
    727  }
    728  hash_chain->offset_length[0] = 0;
    729  WebPSafeFree(counts_ini);
    730 
    731  return BackwardReferencesLz77(xsize, ysize, argb, cache_bits, hash_chain,
    732                                refs);
    733 }
    734 
    735 // -----------------------------------------------------------------------------
    736 
    737 static void BackwardReferences2DLocality(int xsize,
    738                                         const VP8LBackwardRefs* const refs) {
    739  VP8LRefsCursor c = VP8LRefsCursorInit(refs);
    740  while (VP8LRefsCursorOk(&c)) {
    741    if (PixOrCopyIsCopy(c.cur_pos)) {
    742      const int dist = c.cur_pos->argb_or_distance;
    743      const int transformed_dist = VP8LDistanceToPlaneCode(xsize, dist);
    744      c.cur_pos->argb_or_distance = transformed_dist;
    745    }
    746    VP8LRefsCursorNext(&c);
    747  }
    748 }
    749 
    750 // Evaluate optimal cache bits for the local color cache.
    751 // The input *best_cache_bits sets the maximum cache bits to use (passing 0
    752 // implies disabling the local color cache). The local color cache is also
    753 // disabled for the lower (<= 25) quality.
    754 // Returns 0 in case of memory error.
    755 static int CalculateBestCacheSize(const uint32_t* argb, int quality,
    756                                  const VP8LBackwardRefs* const refs,
    757                                  int* const best_cache_bits) {
    758  int i;
    759  const int cache_bits_max = (quality <= 25) ? 0 : *best_cache_bits;
    760  uint64_t entropy_min = WEBP_UINT64_MAX;
    761  int cc_init[MAX_COLOR_CACHE_BITS + 1] = { 0 };
    762  VP8LColorCache hashers[MAX_COLOR_CACHE_BITS + 1];
    763  VP8LRefsCursor c = VP8LRefsCursorInit(refs);
    764  VP8LHistogram* histos[MAX_COLOR_CACHE_BITS + 1] = { NULL };
    765  int ok = 0;
    766 
    767  assert(cache_bits_max >= 0 && cache_bits_max <= MAX_COLOR_CACHE_BITS);
    768 
    769  if (cache_bits_max == 0) {
    770    *best_cache_bits = 0;
    771    // Local color cache is disabled.
    772    return 1;
    773  }
    774 
    775  // Allocate data.
    776  for (i = 0; i <= cache_bits_max; ++i) {
    777    histos[i] = VP8LAllocateHistogram(i);
    778    if (histos[i] == NULL) goto Error;
    779    VP8LHistogramInit(histos[i], i, /*init_arrays=*/ 1);
    780    if (i == 0) continue;
    781    cc_init[i] = VP8LColorCacheInit(&hashers[i], i);
    782    if (!cc_init[i]) goto Error;
    783  }
    784 
    785  // Find the cache_bits giving the lowest entropy. The search is done in a
    786  // brute-force way as the function (entropy w.r.t cache_bits) can be
    787  // anything in practice.
    788  while (VP8LRefsCursorOk(&c)) {
    789    const PixOrCopy* const v = c.cur_pos;
    790    if (PixOrCopyIsLiteral(v)) {
    791      const uint32_t pix = *argb++;
    792      const uint32_t a = (pix >> 24) & 0xff;
    793      const uint32_t r = (pix >> 16) & 0xff;
    794      const uint32_t g = (pix >>  8) & 0xff;
    795      const uint32_t b = (pix >>  0) & 0xff;
    796      // The keys of the caches can be derived from the longest one.
    797      int key = VP8LHashPix(pix, 32 - cache_bits_max);
    798      // Do not use the color cache for cache_bits = 0.
    799      ++histos[0]->blue[b];
    800      ++histos[0]->literal[g];
    801      ++histos[0]->red[r];
    802      ++histos[0]->alpha[a];
    803      // Deal with cache_bits > 0.
    804      for (i = cache_bits_max; i >= 1; --i, key >>= 1) {
    805        if (VP8LColorCacheLookup(&hashers[i], key) == pix) {
    806          ++histos[i]->literal[NUM_LITERAL_CODES + NUM_LENGTH_CODES + key];
    807        } else {
    808          VP8LColorCacheSet(&hashers[i], key, pix);
    809          ++histos[i]->blue[b];
    810          ++histos[i]->literal[g];
    811          ++histos[i]->red[r];
    812          ++histos[i]->alpha[a];
    813        }
    814      }
    815    } else {
    816      int code, extra_bits, extra_bits_value;
    817      // We should compute the contribution of the (distance,length)
    818      // histograms but those are the same independently from the cache size.
    819      // As those constant contributions are in the end added to the other
    820      // histogram contributions, we can ignore them, except for the length
    821      // prefix that is part of the 'literal' histogram.
    822      int len = PixOrCopyLength(v);
    823      uint32_t argb_prev = *argb ^ 0xffffffffu;
    824      VP8LPrefixEncode(len, &code, &extra_bits, &extra_bits_value);
    825      for (i = 0; i <= cache_bits_max; ++i) {
    826        ++histos[i]->literal[NUM_LITERAL_CODES + code];
    827      }
    828      // Update the color caches.
    829      do {
    830        if (*argb != argb_prev) {
    831          // Efficiency: insert only if the color changes.
    832          int key = VP8LHashPix(*argb, 32 - cache_bits_max);
    833          for (i = cache_bits_max; i >= 1; --i, key >>= 1) {
    834            hashers[i].colors[key] = *argb;
    835          }
    836          argb_prev = *argb;
    837        }
    838        argb++;
    839      } while (--len != 0);
    840    }
    841    VP8LRefsCursorNext(&c);
    842  }
    843 
    844  for (i = 0; i <= cache_bits_max; ++i) {
    845    const uint64_t entropy = VP8LHistogramEstimateBits(histos[i]);
    846    if (i == 0 || entropy < entropy_min) {
    847      entropy_min = entropy;
    848      *best_cache_bits = i;
    849    }
    850  }
    851  ok = 1;
    852 Error:
    853  for (i = 0; i <= cache_bits_max; ++i) {
    854    if (cc_init[i]) VP8LColorCacheClear(&hashers[i]);
    855    VP8LFreeHistogram(histos[i]);
    856  }
    857  return ok;
    858 }
    859 
    860 // Update (in-place) backward references for specified cache_bits.
    861 static int BackwardRefsWithLocalCache(const uint32_t* const argb,
    862                                      int cache_bits,
    863                                      VP8LBackwardRefs* const refs) {
    864  int pixel_index = 0;
    865  VP8LColorCache hashers;
    866  VP8LRefsCursor c = VP8LRefsCursorInit(refs);
    867  if (!VP8LColorCacheInit(&hashers, cache_bits)) return 0;
    868 
    869  while (VP8LRefsCursorOk(&c)) {
    870    PixOrCopy* const v = c.cur_pos;
    871    if (PixOrCopyIsLiteral(v)) {
    872      const uint32_t argb_literal = v->argb_or_distance;
    873      const int ix = VP8LColorCacheContains(&hashers, argb_literal);
    874      if (ix >= 0) {
    875        // hashers contains argb_literal
    876        *v = PixOrCopyCreateCacheIdx(ix);
    877      } else {
    878        VP8LColorCacheInsert(&hashers, argb_literal);
    879      }
    880      ++pixel_index;
    881    } else {
    882      // refs was created without local cache, so it can not have cache indexes.
    883      int k;
    884      assert(PixOrCopyIsCopy(v));
    885      for (k = 0; k < v->len; ++k) {
    886        VP8LColorCacheInsert(&hashers, argb[pixel_index++]);
    887      }
    888    }
    889    VP8LRefsCursorNext(&c);
    890  }
    891  VP8LColorCacheClear(&hashers);
    892  return 1;
    893 }
    894 
    895 static VP8LBackwardRefs* GetBackwardReferencesLowEffort(
    896    int width, int height, const uint32_t* const argb,
    897    int* const cache_bits, const VP8LHashChain* const hash_chain,
    898    VP8LBackwardRefs* const refs_lz77) {
    899  *cache_bits = 0;
    900  if (!BackwardReferencesLz77(width, height, argb, 0, hash_chain, refs_lz77)) {
    901    return NULL;
    902  }
    903  BackwardReferences2DLocality(width, refs_lz77);
    904  return refs_lz77;
    905 }
    906 
    907 extern int VP8LBackwardReferencesTraceBackwards(
    908    int xsize, int ysize, const uint32_t* const argb, int cache_bits,
    909    const VP8LHashChain* const hash_chain,
    910    const VP8LBackwardRefs* const refs_src, VP8LBackwardRefs* const refs_dst);
    911 static int GetBackwardReferences(int width, int height,
    912                                 const uint32_t* const argb, int quality,
    913                                 int lz77_types_to_try, int cache_bits_max,
    914                                 int do_no_cache,
    915                                 const VP8LHashChain* const hash_chain,
    916                                 VP8LBackwardRefs* const refs,
    917                                 int* const cache_bits_best) {
    918  VP8LHistogram* histo = NULL;
    919  int i, lz77_type;
    920  // Index 0 is for a color cache, index 1 for no cache (if needed).
    921  int lz77_types_best[2] = {0, 0};
    922  uint64_t bit_costs_best[2] = {WEBP_UINT64_MAX, WEBP_UINT64_MAX};
    923  VP8LHashChain hash_chain_box;
    924  VP8LBackwardRefs* const refs_tmp = &refs[do_no_cache ? 2 : 1];
    925  int status = 0;
    926  memset(&hash_chain_box, 0, sizeof(hash_chain_box));
    927 
    928  histo = VP8LAllocateHistogram(MAX_COLOR_CACHE_BITS);
    929  if (histo == NULL) goto Error;
    930 
    931  for (lz77_type = 1; lz77_types_to_try;
    932       lz77_types_to_try &= ~lz77_type, lz77_type <<= 1) {
    933    int res = 0;
    934    uint64_t bit_cost = 0u;
    935    if ((lz77_types_to_try & lz77_type) == 0) continue;
    936    switch (lz77_type) {
    937      case kLZ77RLE:
    938        res = BackwardReferencesRle(width, height, argb, 0, refs_tmp);
    939        break;
    940      case kLZ77Standard:
    941        // Compute LZ77 with no cache (0 bits), as the ideal LZ77 with a color
    942        // cache is not that different in practice.
    943        res = BackwardReferencesLz77(width, height, argb, 0, hash_chain,
    944                                     refs_tmp);
    945        break;
    946      case kLZ77Box:
    947        if (!VP8LHashChainInit(&hash_chain_box, width * height)) goto Error;
    948        res = BackwardReferencesLz77Box(width, height, argb, 0, hash_chain,
    949                                        &hash_chain_box, refs_tmp);
    950        break;
    951      default:
    952        assert(0);
    953    }
    954    if (!res) goto Error;
    955 
    956    // Start with the no color cache case.
    957    for (i = 1; i >= 0; --i) {
    958      int cache_bits = (i == 1) ? 0 : cache_bits_max;
    959 
    960      if (i == 1 && !do_no_cache) continue;
    961 
    962      if (i == 0) {
    963        // Try with a color cache.
    964        if (!CalculateBestCacheSize(argb, quality, refs_tmp, &cache_bits)) {
    965          goto Error;
    966        }
    967        if (cache_bits > 0) {
    968          if (!BackwardRefsWithLocalCache(argb, cache_bits, refs_tmp)) {
    969            goto Error;
    970          }
    971        }
    972      }
    973 
    974      if (i == 0 && do_no_cache && cache_bits == 0) {
    975        // No need to re-compute bit_cost as it was computed at i == 1.
    976      } else {
    977        VP8LHistogramCreate(histo, refs_tmp, cache_bits);
    978        bit_cost = VP8LHistogramEstimateBits(histo);
    979      }
    980 
    981      if (bit_cost < bit_costs_best[i]) {
    982        if (i == 1) {
    983          // Do not swap as the full cache analysis would have the wrong
    984          // VP8LBackwardRefs to start with.
    985          if (!BackwardRefsClone(refs_tmp, &refs[1])) goto Error;
    986        } else {
    987          BackwardRefsSwap(refs_tmp, &refs[0]);
    988        }
    989        bit_costs_best[i] = bit_cost;
    990        lz77_types_best[i] = lz77_type;
    991        if (i == 0) *cache_bits_best = cache_bits;
    992      }
    993    }
    994  }
    995  assert(lz77_types_best[0] > 0);
    996  assert(!do_no_cache || lz77_types_best[1] > 0);
    997 
    998  // Improve on simple LZ77 but only for high quality (TraceBackwards is
    999  // costly).
   1000  for (i = 1; i >= 0; --i) {
   1001    if (i == 1 && !do_no_cache) continue;
   1002    if ((lz77_types_best[i] == kLZ77Standard ||
   1003         lz77_types_best[i] == kLZ77Box) &&
   1004        quality >= 25) {
   1005      const VP8LHashChain* const hash_chain_tmp =
   1006          (lz77_types_best[i] == kLZ77Standard) ? hash_chain : &hash_chain_box;
   1007      const int cache_bits = (i == 1) ? 0 : *cache_bits_best;
   1008      uint64_t bit_cost_trace;
   1009      if (!VP8LBackwardReferencesTraceBackwards(width, height, argb, cache_bits,
   1010                                                hash_chain_tmp, &refs[i],
   1011                                                refs_tmp)) {
   1012        goto Error;
   1013      }
   1014      VP8LHistogramCreate(histo, refs_tmp, cache_bits);
   1015      bit_cost_trace = VP8LHistogramEstimateBits(histo);
   1016      if (bit_cost_trace < bit_costs_best[i]) {
   1017        BackwardRefsSwap(refs_tmp, &refs[i]);
   1018      }
   1019    }
   1020 
   1021    BackwardReferences2DLocality(width, &refs[i]);
   1022 
   1023    if (i == 1 && lz77_types_best[0] == lz77_types_best[1] &&
   1024        *cache_bits_best == 0) {
   1025      // If the best cache size is 0 and we have the same best LZ77, just copy
   1026      // the data over and stop here.
   1027      if (!BackwardRefsClone(&refs[1], &refs[0])) goto Error;
   1028      break;
   1029    }
   1030  }
   1031  status = 1;
   1032 
   1033 Error:
   1034  VP8LHashChainClear(&hash_chain_box);
   1035  VP8LFreeHistogram(histo);
   1036  return status;
   1037 }
   1038 
   1039 int VP8LGetBackwardReferences(
   1040    int width, int height, const uint32_t* const argb, int quality,
   1041    int low_effort, int lz77_types_to_try, int cache_bits_max, int do_no_cache,
   1042    const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs,
   1043    int* const cache_bits_best, const WebPPicture* const pic, int percent_range,
   1044    int* const percent) {
   1045  if (low_effort) {
   1046    VP8LBackwardRefs* refs_best;
   1047    *cache_bits_best = cache_bits_max;
   1048    refs_best = GetBackwardReferencesLowEffort(
   1049        width, height, argb, cache_bits_best, hash_chain, refs);
   1050    if (refs_best == NULL) {
   1051      return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
   1052    }
   1053    // Set it in first position.
   1054    BackwardRefsSwap(refs_best, &refs[0]);
   1055  } else {
   1056    if (!GetBackwardReferences(width, height, argb, quality, lz77_types_to_try,
   1057                               cache_bits_max, do_no_cache, hash_chain, refs,
   1058                               cache_bits_best)) {
   1059      return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
   1060    }
   1061  }
   1062 
   1063  return WebPReportProgress(pic, *percent + percent_range, percent);
   1064 }