tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

wedge_utils.c (4102B)


      1 /*
      2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
      3 *
      4 * This source code is subject to the terms of the BSD 2 Clause License and
      5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 * was not distributed with this source code in the LICENSE file, you can
      7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 * Media Patent License 1.0 was not distributed with this source code in the
      9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 */
     11 
     12 #include <assert.h>
     13 
     14 #include "aom/aom_integer.h"
     15 
     16 #include "aom_ports/mem.h"
     17 
     18 #include "aom_dsp/aom_dsp_common.h"
     19 
     20 #include "av1/common/reconinter.h"
     21 
     22 #define MAX_MASK_VALUE (1 << WEDGE_WEIGHT_BITS)
     23 
     24 /**
     25 * Computes SSE of a compound predictor constructed from 2 fundamental
     26 * predictors p0 and p1 using blending with mask.
     27 *
     28 * r1:  Residuals of p1.
     29 *      (source - p1)
     30 * d:   Difference of p1 and p0.
     31 *      (p1 - p0)
     32 * m:   The blending mask
     33 * N:   Number of pixels
     34 *
     35 * 'r1', 'd', and 'm' are contiguous.
     36 *
     37 * Computes:
     38 *  Sum((MAX_MASK_VALUE*r1 + mask*d)**2), which is equivalent to:
     39 *  Sum((mask*r0 + (MAX_MASK_VALUE-mask)*r1)**2),
     40 *    where r0 is (source - p0), and r1 is (source - p1), which is in turn
     41 *    is equivalent to:
     42 *  Sum((source*MAX_MASK_VALUE - (mask*p0 + (MAX_MASK_VALUE-mask)*p1))**2),
     43 *    which is the SSE of the residuals of the compound predictor scaled up by
     44 *    MAX_MASK_VALUE**2.
     45 *
     46 * Note that we clamp the partial term in the loop to 16 bits signed. This is
     47 * to facilitate equivalent SIMD implementation. It should have no effect if
     48 * residuals are within 16 - WEDGE_WEIGHT_BITS (=10) signed, which always
     49 * holds for 8 bit input, and on real input, it should hold practically always,
     50 * as residuals are expected to be small.
     51 */
     52 uint64_t av1_wedge_sse_from_residuals_c(const int16_t *r1, const int16_t *d,
     53                                        const uint8_t *m, int N) {
     54  uint64_t csse = 0;
     55  int i;
     56 
     57  for (i = 0; i < N; i++) {
     58    int32_t t = MAX_MASK_VALUE * r1[i] + m[i] * d[i];
     59    t = clamp(t, INT16_MIN, INT16_MAX);
     60    csse += t * t;
     61  }
     62  return ROUND_POWER_OF_TWO(csse, 2 * WEDGE_WEIGHT_BITS);
     63 }
     64 
     65 /**
     66 * Choose the mask sign for a compound predictor.
     67 *
     68 * ds:    Difference of the squares of the residuals.
     69 *        r0**2 - r1**2
     70 * m:     The blending mask
     71 * N:     Number of pixels
     72 * limit: Pre-computed threshold value.
     73 *        MAX_MASK_VALUE/2 * (sum(r0**2) - sum(r1**2))
     74 *
     75 * 'ds' and 'm' are contiguous.
     76 *
     77 * Returns true if the negated mask has lower SSE compared to the positive
     78 * mask. Computation is based on:
     79 *  Sum((mask*r0 + (MAX_MASK_VALUE-mask)*r1)**2)
     80 *                                     >
     81 *                                Sum(((MAX_MASK_VALUE-mask)*r0 + mask*r1)**2)
     82 *
     83 *  which can be simplified to:
     84 *
     85 *  Sum(mask*(r0**2 - r1**2)) > MAX_MASK_VALUE/2 * (sum(r0**2) - sum(r1**2))
     86 *
     87 *  The right hand side does not depend on the mask, and needs to be passed as
     88 *  the 'limit' parameter.
     89 *
     90 *  After pre-computing (r0**2 - r1**2), which is passed in as 'ds', the left
     91 *  hand side is simply a scalar product between an int16_t and uint8_t vector.
     92 *
     93 *  Note that for efficiency, ds is stored on 16 bits. Real input residuals
     94 *  being small, this should not cause a noticeable issue.
     95 */
     96 int8_t av1_wedge_sign_from_residuals_c(const int16_t *ds, const uint8_t *m,
     97                                       int N, int64_t limit) {
     98  int64_t acc = 0;
     99 
    100  do {
    101    acc += *ds++ * *m++;
    102  } while (--N);
    103 
    104  return acc > limit;
    105 }
    106 
    107 /**
    108 * Compute the element-wise difference of the squares of 2 arrays.
    109 *
    110 * d: Difference of the squares of the inputs: a**2 - b**2
    111 * a: First input array
    112 * b: Second input array
    113 * N: Number of elements
    114 *
    115 * 'd', 'a', and 'b' are contiguous.
    116 *
    117 * The result is saturated to signed 16 bits.
    118 */
    119 void av1_wedge_compute_delta_squares_c(int16_t *d, const int16_t *a,
    120                                       const int16_t *b, int N) {
    121  int i;
    122 
    123  for (i = 0; i < N; i++)
    124    d[i] = clamp(a[i] * a[i] - b[i] * b[i], INT16_MIN, INT16_MAX);
    125 }