tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

avg_sve.c (2298B)


      1 /*
      2 * Copyright (c) 2023, Alliance for Open Media. All rights reserved.
      3 *
      4 * This source code is subject to the terms of the BSD 2 Clause License and
      5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 * was not distributed with this source code in the LICENSE file, you can
      7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 * Media Patent License 1.0 was not distributed with this source code in the
      9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 */
     11 
     12 #include <arm_neon.h>
     13 #include <assert.h>
     14 
     15 #include "config/aom_config.h"
     16 #include "config/aom_dsp_rtcd.h"
     17 #include "aom/aom_integer.h"
     18 #include "aom_dsp/arm/aom_neon_sve_bridge.h"
     19 #include "aom_dsp/arm/mem_neon.h"
     20 #include "aom_ports/mem.h"
     21 
     22 int aom_vector_var_sve(const int16_t *ref, const int16_t *src, int bwl) {
     23  assert(bwl >= 2 && bwl <= 5);
     24  int width = 4 << bwl;
     25 
     26  int64x2_t sse_s64[2] = { vdupq_n_s64(0), vdupq_n_s64(0) };
     27  int16x8_t v_mean[2] = { vdupq_n_s16(0), vdupq_n_s16(0) };
     28 
     29  do {
     30    int16x8_t r0 = vld1q_s16(ref);
     31    int16x8_t s0 = vld1q_s16(src);
     32 
     33    // diff: dynamic range [-510, 510] 10 (signed) bits.
     34    int16x8_t diff0 = vsubq_s16(r0, s0);
     35    // v_mean: dynamic range 16 * diff -> [-8160, 8160], 14 (signed) bits.
     36    v_mean[0] = vaddq_s16(v_mean[0], diff0);
     37 
     38    // v_sse: dynamic range 2 * 16 * diff^2 -> [0, 8,323,200], 24 (signed) bits.
     39    sse_s64[0] = aom_sdotq_s16(sse_s64[0], diff0, diff0);
     40 
     41    int16x8_t r1 = vld1q_s16(ref + 8);
     42    int16x8_t s1 = vld1q_s16(src + 8);
     43 
     44    // diff: dynamic range [-510, 510] 10 (signed) bits.
     45    int16x8_t diff1 = vsubq_s16(r1, s1);
     46    // v_mean: dynamic range 16 * diff -> [-8160, 8160], 14 (signed) bits.
     47    v_mean[1] = vaddq_s16(v_mean[1], diff1);
     48 
     49    // v_sse: dynamic range 2 * 16 * diff^2 -> [0, 8,323,200], 24 (signed) bits.
     50    sse_s64[1] = aom_sdotq_s16(sse_s64[1], diff1, diff1);
     51 
     52    ref += 16;
     53    src += 16;
     54    width -= 16;
     55  } while (width != 0);
     56 
     57  // Dynamic range [0, 65280], 16 (unsigned) bits.
     58  const uint32_t mean_abs = abs(vaddlvq_s16(vaddq_s16(v_mean[0], v_mean[1])));
     59  const int64_t sse = vaddvq_s64(vaddq_s64(sse_s64[0], sse_s64[1]));
     60 
     61  // (mean_abs * mean_abs): dynamic range 32 (unsigned) bits.
     62  return (int)(sse - ((mean_abs * mean_abs) >> (bwl + 2)));
     63 }