tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

v256_intrinsics.h (14120B)


      1 /*
      2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
      3 *
      4 * This source code is subject to the terms of the BSD 2 Clause License and
      5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 * was not distributed with this source code in the LICENSE file, you can
      7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 * Media Patent License 1.0 was not distributed with this source code in the
      9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 */
     11 
     12 #ifndef AOM_AOM_DSP_SIMD_V256_INTRINSICS_H_
     13 #define AOM_AOM_DSP_SIMD_V256_INTRINSICS_H_
     14 
     15 #include <stdio.h>
     16 #include <stdlib.h>
     17 #include <string.h>
     18 
     19 #include "aom_dsp/simd/v256_intrinsics_c.h"
     20 #include "aom_dsp/simd/v128_intrinsics.h"
     21 #include "aom_dsp/simd/v64_intrinsics.h"
     22 
     23 /* Fallback to plain, unoptimised C. */
     24 
     25 typedef c_v256 v256;
     26 
     27 SIMD_INLINE uint32_t v256_low_u32(v256 a) { return c_v256_low_u32(a); }
     28 SIMD_INLINE v64 v256_low_v64(v256 a) { return c_v256_low_v64(a); }
     29 SIMD_INLINE uint64_t v256_low_u64(v256 a) { return c_v256_low_u64(a); }
     30 SIMD_INLINE v128 v256_low_v128(v256 a) { return c_v256_low_v128(a); }
     31 SIMD_INLINE v128 v256_high_v128(v256 a) { return c_v256_high_v128(a); }
     32 SIMD_INLINE v256 v256_from_v128(v128 hi, v128 lo) {
     33  return c_v256_from_v128(hi, lo);
     34 }
     35 SIMD_INLINE v256 v256_from_64(uint64_t a, uint64_t b, uint64_t c, uint64_t d) {
     36  return c_v256_from_64(a, b, c, d);
     37 }
     38 SIMD_INLINE v256 v256_from_v64(v64 a, v64 b, v64 c, v64 d) {
     39  return c_v256_from_v64(a, b, c, d);
     40 }
     41 
     42 SIMD_INLINE v256 v256_load_unaligned(const void *p) {
     43  return c_v256_load_unaligned(p);
     44 }
     45 SIMD_INLINE v256 v256_load_aligned(const void *p) {
     46  return c_v256_load_aligned(p);
     47 }
     48 
     49 SIMD_INLINE void v256_store_unaligned(void *p, v256 a) {
     50  c_v256_store_unaligned(p, a);
     51 }
     52 SIMD_INLINE void v256_store_aligned(void *p, v256 a) {
     53  c_v256_store_aligned(p, a);
     54 }
     55 
     56 SIMD_INLINE v256 v256_align(v256 a, v256 b, unsigned int c) {
     57  return c_v256_align(a, b, c);
     58 }
     59 
     60 SIMD_INLINE v256 v256_zero(void) { return c_v256_zero(); }
     61 SIMD_INLINE v256 v256_dup_8(uint8_t x) { return c_v256_dup_8(x); }
     62 SIMD_INLINE v256 v256_dup_16(uint16_t x) { return c_v256_dup_16(x); }
     63 SIMD_INLINE v256 v256_dup_32(uint32_t x) { return c_v256_dup_32(x); }
     64 SIMD_INLINE v256 v256_dup_64(uint64_t x) { return c_v256_dup_64(x); }
     65 
     66 SIMD_INLINE c_sad256_internal v256_sad_u8_init(void) {
     67  return c_v256_sad_u8_init();
     68 }
     69 SIMD_INLINE c_sad256_internal v256_sad_u8(c_sad256_internal s, v256 a, v256 b) {
     70  return c_v256_sad_u8(s, a, b);
     71 }
     72 SIMD_INLINE uint32_t v256_sad_u8_sum(c_sad256_internal s) {
     73  return c_v256_sad_u8_sum(s);
     74 }
     75 SIMD_INLINE c_ssd256_internal v256_ssd_u8_init(void) {
     76  return c_v256_ssd_u8_init();
     77 }
     78 SIMD_INLINE c_ssd256_internal v256_ssd_u8(c_ssd256_internal s, v256 a, v256 b) {
     79  return c_v256_ssd_u8(s, a, b);
     80 }
     81 SIMD_INLINE uint32_t v256_ssd_u8_sum(c_ssd256_internal s) {
     82  return c_v256_ssd_u8_sum(s);
     83 }
     84 
     85 SIMD_INLINE c_ssd256_internal_s16 v256_ssd_s16_init(void) {
     86  return c_v256_ssd_s16_init();
     87 }
     88 SIMD_INLINE c_ssd256_internal_s16 v256_ssd_s16(c_ssd256_internal_s16 s, v256 a,
     89                                               v256 b) {
     90  return c_v256_ssd_s16(s, a, b);
     91 }
     92 SIMD_INLINE uint64_t v256_ssd_s16_sum(c_ssd256_internal_s16 s) {
     93  return c_v256_ssd_s16_sum(s);
     94 }
     95 
     96 SIMD_INLINE int64_t v256_dotp_su8(v256 a, v256 b) {
     97  return c_v256_dotp_su8(a, b);
     98 }
     99 SIMD_INLINE int64_t v256_dotp_s16(v256 a, v256 b) {
    100  return c_v256_dotp_s16(a, b);
    101 }
    102 SIMD_INLINE int64_t v256_dotp_s32(v256 a, v256 b) {
    103  return c_v256_dotp_s32(a, b);
    104 }
    105 SIMD_INLINE uint64_t v256_hadd_u8(v256 a) { return c_v256_hadd_u8(a); }
    106 
    107 SIMD_INLINE v256 v256_or(v256 a, v256 b) { return c_v256_or(a, b); }
    108 SIMD_INLINE v256 v256_xor(v256 a, v256 b) { return c_v256_xor(a, b); }
    109 SIMD_INLINE v256 v256_and(v256 a, v256 b) { return c_v256_and(a, b); }
    110 SIMD_INLINE v256 v256_andn(v256 a, v256 b) { return c_v256_andn(a, b); }
    111 
    112 SIMD_INLINE v256 v256_add_8(v256 a, v256 b) { return c_v256_add_8(a, b); }
    113 SIMD_INLINE v256 v256_add_16(v256 a, v256 b) { return c_v256_add_16(a, b); }
    114 SIMD_INLINE v256 v256_sadd_s8(v256 a, v256 b) { return c_v256_sadd_s8(a, b); }
    115 SIMD_INLINE v256 v256_sadd_u8(v256 a, v256 b) { return c_v256_sadd_u8(a, b); }
    116 SIMD_INLINE v256 v256_sadd_s16(v256 a, v256 b) { return c_v256_sadd_s16(a, b); }
    117 SIMD_INLINE v256 v256_add_32(v256 a, v256 b) { return c_v256_add_32(a, b); }
    118 SIMD_INLINE v256 v256_add_64(v256 a, v256 b) { return c_v256_add_64(a, b); }
    119 SIMD_INLINE v256 v256_sub_64(v256 a, v256 b) { return c_v256_sub_64(a, b); }
    120 SIMD_INLINE v256 v256_padd_u8(v256 a) { return c_v256_padd_u8(a); }
    121 SIMD_INLINE v256 v256_padd_s16(v256 a) { return c_v256_padd_s16(a); }
    122 SIMD_INLINE v256 v256_sub_8(v256 a, v256 b) { return c_v256_sub_8(a, b); }
    123 SIMD_INLINE v256 v256_ssub_u8(v256 a, v256 b) { return c_v256_ssub_u8(a, b); }
    124 SIMD_INLINE v256 v256_ssub_s8(v256 a, v256 b) { return c_v256_ssub_s8(a, b); }
    125 SIMD_INLINE v256 v256_sub_16(v256 a, v256 b) { return c_v256_sub_16(a, b); }
    126 SIMD_INLINE v256 v256_ssub_s16(v256 a, v256 b) { return c_v256_ssub_s16(a, b); }
    127 SIMD_INLINE v256 v256_ssub_u16(v256 a, v256 b) { return c_v256_ssub_u16(a, b); }
    128 SIMD_INLINE v256 v256_sub_32(v256 a, v256 b) { return c_v256_sub_32(a, b); }
    129 SIMD_INLINE v256 v256_abs_s16(v256 a) { return c_v256_abs_s16(a); }
    130 SIMD_INLINE v256 v256_abs_s8(v256 a) { return c_v256_abs_s8(a); }
    131 
    132 SIMD_INLINE v256 v256_mul_s16(v128 a, v128 b) { return c_v256_mul_s16(a, b); }
    133 SIMD_INLINE v256 v256_mullo_s16(v256 a, v256 b) {
    134  return c_v256_mullo_s16(a, b);
    135 }
    136 SIMD_INLINE v256 v256_mulhi_s16(v256 a, v256 b) {
    137  return c_v256_mulhi_s16(a, b);
    138 }
    139 SIMD_INLINE v256 v256_mullo_s32(v256 a, v256 b) {
    140  return c_v256_mullo_s32(a, b);
    141 }
    142 SIMD_INLINE v256 v256_madd_s16(v256 a, v256 b) { return c_v256_madd_s16(a, b); }
    143 SIMD_INLINE v256 v256_madd_us8(v256 a, v256 b) { return c_v256_madd_us8(a, b); }
    144 
    145 SIMD_INLINE uint32_t v256_movemask_8(v256 a) { return c_v256_movemask_8(a); }
    146 SIMD_INLINE v256 v256_blend_8(v256 a, v256 b, v256 c) {
    147  return c_v256_blend_8(a, b, c);
    148 }
    149 
    150 SIMD_INLINE v256 v256_avg_u8(v256 a, v256 b) { return c_v256_avg_u8(a, b); }
    151 SIMD_INLINE v256 v256_rdavg_u8(v256 a, v256 b) { return c_v256_rdavg_u8(a, b); }
    152 SIMD_INLINE v256 v256_rdavg_u16(v256 a, v256 b) {
    153  return c_v256_rdavg_u16(a, b);
    154 }
    155 SIMD_INLINE v256 v256_avg_u16(v256 a, v256 b) { return c_v256_avg_u16(a, b); }
    156 SIMD_INLINE v256 v256_min_u8(v256 a, v256 b) { return c_v256_min_u8(a, b); }
    157 SIMD_INLINE v256 v256_max_u8(v256 a, v256 b) { return c_v256_max_u8(a, b); }
    158 SIMD_INLINE v256 v256_min_s8(v256 a, v256 b) { return c_v256_min_s8(a, b); }
    159 SIMD_INLINE v256 v256_max_s8(v256 a, v256 b) { return c_v256_max_s8(a, b); }
    160 SIMD_INLINE v256 v256_min_s16(v256 a, v256 b) { return c_v256_min_s16(a, b); }
    161 SIMD_INLINE v256 v256_max_s16(v256 a, v256 b) { return c_v256_max_s16(a, b); }
    162 SIMD_INLINE v256 v256_min_s32(v256 a, v256 b) { return c_v256_min_s32(a, b); }
    163 SIMD_INLINE v256 v256_max_s32(v256 a, v256 b) { return c_v256_max_s32(a, b); }
    164 
    165 SIMD_INLINE v256 v256_ziplo_8(v256 a, v256 b) { return c_v256_ziplo_8(a, b); }
    166 SIMD_INLINE v256 v256_ziphi_8(v256 a, v256 b) { return c_v256_ziphi_8(a, b); }
    167 SIMD_INLINE v256 v256_ziplo_16(v256 a, v256 b) { return c_v256_ziplo_16(a, b); }
    168 SIMD_INLINE v256 v256_ziphi_16(v256 a, v256 b) { return c_v256_ziphi_16(a, b); }
    169 SIMD_INLINE v256 v256_ziplo_32(v256 a, v256 b) { return c_v256_ziplo_32(a, b); }
    170 SIMD_INLINE v256 v256_ziphi_32(v256 a, v256 b) { return c_v256_ziphi_32(a, b); }
    171 SIMD_INLINE v256 v256_ziplo_64(v256 a, v256 b) { return c_v256_ziplo_64(a, b); }
    172 SIMD_INLINE v256 v256_ziphi_64(v256 a, v256 b) { return c_v256_ziphi_64(a, b); }
    173 SIMD_INLINE v256 v256_ziplo_128(v256 a, v256 b) {
    174  return c_v256_ziplo_128(a, b);
    175 }
    176 SIMD_INLINE v256 v256_ziphi_128(v256 a, v256 b) {
    177  return c_v256_ziphi_128(a, b);
    178 }
    179 SIMD_INLINE v256 v256_zip_8(v128 a, v128 b) { return c_v256_zip_8(a, b); }
    180 SIMD_INLINE v256 v256_zip_16(v128 a, v128 b) { return c_v256_zip_16(a, b); }
    181 SIMD_INLINE v256 v256_zip_32(v128 a, v128 b) { return c_v256_zip_32(a, b); }
    182 SIMD_INLINE v256 v256_unziplo_8(v256 a, v256 b) {
    183  return c_v256_unziplo_8(a, b);
    184 }
    185 SIMD_INLINE v256 v256_unziphi_8(v256 a, v256 b) {
    186  return c_v256_unziphi_8(a, b);
    187 }
    188 SIMD_INLINE v256 v256_unziplo_16(v256 a, v256 b) {
    189  return c_v256_unziplo_16(a, b);
    190 }
    191 SIMD_INLINE v256 v256_unziphi_16(v256 a, v256 b) {
    192  return c_v256_unziphi_16(a, b);
    193 }
    194 SIMD_INLINE v256 v256_unziplo_32(v256 a, v256 b) {
    195  return c_v256_unziplo_32(a, b);
    196 }
    197 SIMD_INLINE v256 v256_unziphi_32(v256 a, v256 b) {
    198  return c_v256_unziphi_32(a, b);
    199 }
    200 SIMD_INLINE v256 v256_unziplo_64(v256 a, v256 b) {
    201  return c_v256_unziplo_64(a, b);
    202 }
    203 SIMD_INLINE v256 v256_unziphi_64(v256 a, v256 b) {
    204  return c_v256_unziphi_64(a, b);
    205 }
    206 SIMD_INLINE v256 v256_unpack_u8_s16(v128 a) { return c_v256_unpack_u8_s16(a); }
    207 SIMD_INLINE v256 v256_unpacklo_u8_s16(v256 a) {
    208  return c_v256_unpacklo_u8_s16(a);
    209 }
    210 SIMD_INLINE v256 v256_unpackhi_u8_s16(v256 a) {
    211  return c_v256_unpackhi_u8_s16(a);
    212 }
    213 SIMD_INLINE v256 v256_unpack_s8_s16(v128 a) { return c_v256_unpack_s8_s16(a); }
    214 SIMD_INLINE v256 v256_unpacklo_s8_s16(v256 a) {
    215  return c_v256_unpacklo_s8_s16(a);
    216 }
    217 SIMD_INLINE v256 v256_unpackhi_s8_s16(v256 a) {
    218  return c_v256_unpackhi_s8_s16(a);
    219 }
    220 SIMD_INLINE v256 v256_pack_s32_s16(v256 a, v256 b) {
    221  return c_v256_pack_s32_s16(a, b);
    222 }
    223 SIMD_INLINE v256 v256_pack_s32_u16(v256 a, v256 b) {
    224  return c_v256_pack_s32_u16(a, b);
    225 }
    226 SIMD_INLINE v256 v256_pack_s16_u8(v256 a, v256 b) {
    227  return c_v256_pack_s16_u8(a, b);
    228 }
    229 SIMD_INLINE v256 v256_pack_s16_s8(v256 a, v256 b) {
    230  return c_v256_pack_s16_s8(a, b);
    231 }
    232 SIMD_INLINE v256 v256_unpack_u16_s32(v128 a) {
    233  return c_v256_unpack_u16_s32(a);
    234 }
    235 SIMD_INLINE v256 v256_unpack_s16_s32(v128 a) {
    236  return c_v256_unpack_s16_s32(a);
    237 }
    238 SIMD_INLINE v256 v256_unpacklo_u16_s32(v256 a) {
    239  return c_v256_unpacklo_u16_s32(a);
    240 }
    241 SIMD_INLINE v256 v256_unpacklo_s16_s32(v256 a) {
    242  return c_v256_unpacklo_s16_s32(a);
    243 }
    244 SIMD_INLINE v256 v256_unpackhi_u16_s32(v256 a) {
    245  return c_v256_unpackhi_u16_s32(a);
    246 }
    247 SIMD_INLINE v256 v256_unpackhi_s16_s32(v256 a) {
    248  return c_v256_unpackhi_s16_s32(a);
    249 }
    250 SIMD_INLINE v256 v256_shuffle_8(v256 a, v256 pattern) {
    251  return c_v256_shuffle_8(a, pattern);
    252 }
    253 SIMD_INLINE v256 v256_wideshuffle_8(v256 a, v256 b, v256 pattern) {
    254  return c_v256_wideshuffle_8(a, b, pattern);
    255 }
    256 SIMD_INLINE v256 v256_pshuffle_8(v256 a, v256 pattern) {
    257  return c_v256_pshuffle_8(a, pattern);
    258 }
    259 
    260 SIMD_INLINE v256 v256_cmpgt_s8(v256 a, v256 b) { return c_v256_cmpgt_s8(a, b); }
    261 SIMD_INLINE v256 v256_cmplt_s8(v256 a, v256 b) { return c_v256_cmplt_s8(a, b); }
    262 SIMD_INLINE v256 v256_cmpeq_8(v256 a, v256 b) { return c_v256_cmpeq_8(a, b); }
    263 SIMD_INLINE v256 v256_cmpgt_s16(v256 a, v256 b) {
    264  return c_v256_cmpgt_s16(a, b);
    265 }
    266 SIMD_INLINE v256 v256_cmplt_s16(v256 a, v256 b) {
    267  return c_v256_cmplt_s16(a, b);
    268 }
    269 SIMD_INLINE v256 v256_cmpeq_16(v256 a, v256 b) { return c_v256_cmpeq_16(a, b); }
    270 SIMD_INLINE v256 v256_cmpeq_32(v256 a, v256 b) { return c_v256_cmpeq_32(a, b); }
    271 
    272 SIMD_INLINE v256 v256_cmpgt_s32(v256 a, v256 b) {
    273  return c_v256_cmpgt_s32(a, b);
    274 }
    275 SIMD_INLINE v256 v256_cmplt_s32(v256 a, v256 b) {
    276  return c_v256_cmplt_s32(a, b);
    277 }
    278 SIMD_INLINE v256 v256_shl_8(v256 a, unsigned int c) {
    279  return c_v256_shl_8(a, c);
    280 }
    281 SIMD_INLINE v256 v256_shr_u8(v256 a, unsigned int c) {
    282  return c_v256_shr_u8(a, c);
    283 }
    284 SIMD_INLINE v256 v256_shr_s8(v256 a, unsigned int c) {
    285  return c_v256_shr_s8(a, c);
    286 }
    287 SIMD_INLINE v256 v256_shl_16(v256 a, unsigned int c) {
    288  return c_v256_shl_16(a, c);
    289 }
    290 SIMD_INLINE v256 v256_shr_u16(v256 a, unsigned int c) {
    291  return c_v256_shr_u16(a, c);
    292 }
    293 SIMD_INLINE v256 v256_shr_s16(v256 a, unsigned int c) {
    294  return c_v256_shr_s16(a, c);
    295 }
    296 SIMD_INLINE v256 v256_shl_32(v256 a, unsigned int c) {
    297  return c_v256_shl_32(a, c);
    298 }
    299 SIMD_INLINE v256 v256_shr_u32(v256 a, unsigned int c) {
    300  return c_v256_shr_u32(a, c);
    301 }
    302 SIMD_INLINE v256 v256_shr_s32(v256 a, unsigned int c) {
    303  return c_v256_shr_s32(a, c);
    304 }
    305 SIMD_INLINE v256 v256_shl_64(v256 a, unsigned int c) {
    306  return c_v256_shl_64(a, c);
    307 }
    308 SIMD_INLINE v256 v256_shr_u64(v256 a, unsigned int c) {
    309  return c_v256_shr_u64(a, c);
    310 }
    311 SIMD_INLINE v256 v256_shr_s64(v256 a, unsigned int c) {
    312  return c_v256_shr_s64(a, c);
    313 }
    314 
    315 SIMD_INLINE v256 v256_shr_n_byte(v256 a, unsigned int n) {
    316  return c_v256_shr_n_byte(a, n);
    317 }
    318 SIMD_INLINE v256 v256_shl_n_byte(v256 a, unsigned int n) {
    319  return c_v256_shl_n_byte(a, n);
    320 }
    321 SIMD_INLINE v256 v256_shl_n_8(v256 a, unsigned int n) {
    322  return c_v256_shl_n_8(a, n);
    323 }
    324 SIMD_INLINE v256 v256_shl_n_16(v256 a, unsigned int n) {
    325  return c_v256_shl_n_16(a, n);
    326 }
    327 SIMD_INLINE v256 v256_shl_n_32(v256 a, unsigned int n) {
    328  return c_v256_shl_n_32(a, n);
    329 }
    330 SIMD_INLINE v256 v256_shl_n_64(v256 a, unsigned int n) {
    331  return c_v256_shl_n_64(a, n);
    332 }
    333 SIMD_INLINE v256 v256_shr_n_u8(v256 a, unsigned int n) {
    334  return c_v256_shr_n_u8(a, n);
    335 }
    336 SIMD_INLINE v256 v256_shr_n_u16(v256 a, unsigned int n) {
    337  return c_v256_shr_n_u16(a, n);
    338 }
    339 SIMD_INLINE v256 v256_shr_n_u32(v256 a, unsigned int n) {
    340  return c_v256_shr_n_u32(a, n);
    341 }
    342 SIMD_INLINE v256 v256_shr_n_u64(v256 a, unsigned int n) {
    343  return c_v256_shr_n_u64(a, n);
    344 }
    345 SIMD_INLINE v256 v256_shr_n_s8(v256 a, unsigned int n) {
    346  return c_v256_shr_n_s8(a, n);
    347 }
    348 SIMD_INLINE v256 v256_shr_n_s16(v256 a, unsigned int n) {
    349  return c_v256_shr_n_s16(a, n);
    350 }
    351 SIMD_INLINE v256 v256_shr_n_s32(v256 a, unsigned int n) {
    352  return c_v256_shr_n_s32(a, n);
    353 }
    354 SIMD_INLINE v256 v256_shr_n_s64(v256 a, unsigned int n) {
    355  return c_v256_shr_n_s64(a, n);
    356 }
    357 
    358 SIMD_INLINE v256 v256_shr_n_word(v256 a, unsigned int n) {
    359  return c_v256_shr_n_word(a, n);
    360 }
    361 SIMD_INLINE v256 v256_shl_n_word(v256 a, unsigned int n) {
    362  return c_v256_shl_n_word(a, n);
    363 }
    364 
    365 typedef uint32_t sad256_internal_u16;
    366 SIMD_INLINE sad256_internal_u16 v256_sad_u16_init(void) {
    367  return c_v256_sad_u16_init();
    368 }
    369 SIMD_INLINE sad256_internal_u16 v256_sad_u16(sad256_internal_u16 s, v256 a,
    370                                             v256 b) {
    371  return c_v256_sad_u16(s, a, b);
    372 }
    373 SIMD_INLINE uint32_t v256_sad_u16_sum(sad256_internal_u16 s) {
    374  return c_v256_sad_u16_sum(s);
    375 }
    376 
    377 #endif  // AOM_AOM_DSP_SIMD_V256_INTRINSICS_H_