tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

aom_neon_sve_bridge.h (2624B)


      1 /*
      2 * Copyright (c) 2023, Alliance for Open Media. All rights reserved.
      3 *
      4 * This source code is subject to the terms of the BSD 2 Clause License and
      5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 * was not distributed with this source code in the LICENSE file, you can
      7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 * Media Patent License 1.0 was not distributed with this source code in the
      9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 */
     11 
     12 #ifndef AOM_AOM_DSP_ARM_AOM_NEON_SVE_BRIDGE_H_
     13 #define AOM_AOM_DSP_ARM_AOM_NEON_SVE_BRIDGE_H_
     14 
     15 #include <arm_neon_sve_bridge.h>
     16 
     17 #include "config/aom_dsp_rtcd.h"
     18 #include "config/aom_config.h"
     19 
     20 // We can access instructions exclusive to the SVE instruction set from a
     21 // predominantly Neon context by making use of the Neon-SVE bridge intrinsics
     22 // to reinterpret Neon vectors as SVE vectors - with the high part of the SVE
     23 // vector (if it's longer than 128 bits) being "don't care".
     24 
     25 // While sub-optimal on machines that have SVE vector length > 128-bit - as the
     26 // remainder of the vector is unused - this approach is still beneficial when
     27 // compared to a Neon-only solution.
     28 
     29 static inline uint64x2_t aom_udotq_u16(uint64x2_t acc, uint16x8_t x,
     30                                       uint16x8_t y) {
     31  return svget_neonq_u64(svdot_u64(svset_neonq_u64(svundef_u64(), acc),
     32                                   svset_neonq_u16(svundef_u16(), x),
     33                                   svset_neonq_u16(svundef_u16(), y)));
     34 }
     35 
     36 static inline int64x2_t aom_sdotq_s16(int64x2_t acc, int16x8_t x, int16x8_t y) {
     37  return svget_neonq_s64(svdot_s64(svset_neonq_s64(svundef_s64(), acc),
     38                                   svset_neonq_s16(svundef_s16(), x),
     39                                   svset_neonq_s16(svundef_s16(), y)));
     40 }
     41 
     42 #define aom_svdot_lane_s16(sum, s0, f, lane)                          \
     43  svget_neonq_s64(svdot_lane_s64(svset_neonq_s64(svundef_s64(), sum), \
     44                                 svset_neonq_s16(svundef_s16(), s0),  \
     45                                 svset_neonq_s16(svundef_s16(), f), lane))
     46 
     47 static inline uint16x8_t aom_tbl_u16(uint16x8_t s, uint16x8_t tbl) {
     48  return svget_neonq_u16(svtbl_u16(svset_neonq_u16(svundef_u16(), s),
     49                                   svset_neonq_u16(svundef_u16(), tbl)));
     50 }
     51 
     52 static inline int16x8_t aom_tbl_s16(int16x8_t s, uint16x8_t tbl) {
     53  return svget_neonq_s16(svtbl_s16(svset_neonq_s16(svundef_s16(), s),
     54                                   svset_neonq_u16(svundef_u16(), tbl)));
     55 }
     56 
     57 #endif  // AOM_AOM_DSP_ARM_AOM_NEON_SVE_BRIDGE_H_