aom_neon_sve_bridge.h (2624B)
1 /* 2 * Copyright (c) 2023, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #ifndef AOM_AOM_DSP_ARM_AOM_NEON_SVE_BRIDGE_H_ 13 #define AOM_AOM_DSP_ARM_AOM_NEON_SVE_BRIDGE_H_ 14 15 #include <arm_neon_sve_bridge.h> 16 17 #include "config/aom_dsp_rtcd.h" 18 #include "config/aom_config.h" 19 20 // We can access instructions exclusive to the SVE instruction set from a 21 // predominantly Neon context by making use of the Neon-SVE bridge intrinsics 22 // to reinterpret Neon vectors as SVE vectors - with the high part of the SVE 23 // vector (if it's longer than 128 bits) being "don't care". 24 25 // While sub-optimal on machines that have SVE vector length > 128-bit - as the 26 // remainder of the vector is unused - this approach is still beneficial when 27 // compared to a Neon-only solution. 28 29 static inline uint64x2_t aom_udotq_u16(uint64x2_t acc, uint16x8_t x, 30 uint16x8_t y) { 31 return svget_neonq_u64(svdot_u64(svset_neonq_u64(svundef_u64(), acc), 32 svset_neonq_u16(svundef_u16(), x), 33 svset_neonq_u16(svundef_u16(), y))); 34 } 35 36 static inline int64x2_t aom_sdotq_s16(int64x2_t acc, int16x8_t x, int16x8_t y) { 37 return svget_neonq_s64(svdot_s64(svset_neonq_s64(svundef_s64(), acc), 38 svset_neonq_s16(svundef_s16(), x), 39 svset_neonq_s16(svundef_s16(), y))); 40 } 41 42 #define aom_svdot_lane_s16(sum, s0, f, lane) \ 43 svget_neonq_s64(svdot_lane_s64(svset_neonq_s64(svundef_s64(), sum), \ 44 svset_neonq_s16(svundef_s16(), s0), \ 45 svset_neonq_s16(svundef_s16(), f), lane)) 46 47 static inline uint16x8_t aom_tbl_u16(uint16x8_t s, uint16x8_t tbl) { 48 return svget_neonq_u16(svtbl_u16(svset_neonq_u16(svundef_u16(), s), 49 svset_neonq_u16(svundef_u16(), tbl))); 50 } 51 52 static inline int16x8_t aom_tbl_s16(int16x8_t s, uint16x8_t tbl) { 53 return svget_neonq_s16(svtbl_s16(svset_neonq_s16(svundef_s16(), s), 54 svset_neonq_u16(svundef_u16(), tbl))); 55 } 56 57 #endif // AOM_AOM_DSP_ARM_AOM_NEON_SVE_BRIDGE_H_