v256_intrinsics.h (14120B)
1 /* 2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #ifndef AOM_AOM_DSP_SIMD_V256_INTRINSICS_H_ 13 #define AOM_AOM_DSP_SIMD_V256_INTRINSICS_H_ 14 15 #include <stdio.h> 16 #include <stdlib.h> 17 #include <string.h> 18 19 #include "aom_dsp/simd/v256_intrinsics_c.h" 20 #include "aom_dsp/simd/v128_intrinsics.h" 21 #include "aom_dsp/simd/v64_intrinsics.h" 22 23 /* Fallback to plain, unoptimised C. */ 24 25 typedef c_v256 v256; 26 27 SIMD_INLINE uint32_t v256_low_u32(v256 a) { return c_v256_low_u32(a); } 28 SIMD_INLINE v64 v256_low_v64(v256 a) { return c_v256_low_v64(a); } 29 SIMD_INLINE uint64_t v256_low_u64(v256 a) { return c_v256_low_u64(a); } 30 SIMD_INLINE v128 v256_low_v128(v256 a) { return c_v256_low_v128(a); } 31 SIMD_INLINE v128 v256_high_v128(v256 a) { return c_v256_high_v128(a); } 32 SIMD_INLINE v256 v256_from_v128(v128 hi, v128 lo) { 33 return c_v256_from_v128(hi, lo); 34 } 35 SIMD_INLINE v256 v256_from_64(uint64_t a, uint64_t b, uint64_t c, uint64_t d) { 36 return c_v256_from_64(a, b, c, d); 37 } 38 SIMD_INLINE v256 v256_from_v64(v64 a, v64 b, v64 c, v64 d) { 39 return c_v256_from_v64(a, b, c, d); 40 } 41 42 SIMD_INLINE v256 v256_load_unaligned(const void *p) { 43 return c_v256_load_unaligned(p); 44 } 45 SIMD_INLINE v256 v256_load_aligned(const void *p) { 46 return c_v256_load_aligned(p); 47 } 48 49 SIMD_INLINE void v256_store_unaligned(void *p, v256 a) { 50 c_v256_store_unaligned(p, a); 51 } 52 SIMD_INLINE void v256_store_aligned(void *p, v256 a) { 53 c_v256_store_aligned(p, a); 54 } 55 56 SIMD_INLINE v256 v256_align(v256 a, v256 b, unsigned int c) { 57 return c_v256_align(a, b, c); 58 } 59 60 SIMD_INLINE v256 v256_zero(void) { return c_v256_zero(); } 61 SIMD_INLINE v256 v256_dup_8(uint8_t x) { return c_v256_dup_8(x); } 62 SIMD_INLINE v256 v256_dup_16(uint16_t x) { return c_v256_dup_16(x); } 63 SIMD_INLINE v256 v256_dup_32(uint32_t x) { return c_v256_dup_32(x); } 64 SIMD_INLINE v256 v256_dup_64(uint64_t x) { return c_v256_dup_64(x); } 65 66 SIMD_INLINE c_sad256_internal v256_sad_u8_init(void) { 67 return c_v256_sad_u8_init(); 68 } 69 SIMD_INLINE c_sad256_internal v256_sad_u8(c_sad256_internal s, v256 a, v256 b) { 70 return c_v256_sad_u8(s, a, b); 71 } 72 SIMD_INLINE uint32_t v256_sad_u8_sum(c_sad256_internal s) { 73 return c_v256_sad_u8_sum(s); 74 } 75 SIMD_INLINE c_ssd256_internal v256_ssd_u8_init(void) { 76 return c_v256_ssd_u8_init(); 77 } 78 SIMD_INLINE c_ssd256_internal v256_ssd_u8(c_ssd256_internal s, v256 a, v256 b) { 79 return c_v256_ssd_u8(s, a, b); 80 } 81 SIMD_INLINE uint32_t v256_ssd_u8_sum(c_ssd256_internal s) { 82 return c_v256_ssd_u8_sum(s); 83 } 84 85 SIMD_INLINE c_ssd256_internal_s16 v256_ssd_s16_init(void) { 86 return c_v256_ssd_s16_init(); 87 } 88 SIMD_INLINE c_ssd256_internal_s16 v256_ssd_s16(c_ssd256_internal_s16 s, v256 a, 89 v256 b) { 90 return c_v256_ssd_s16(s, a, b); 91 } 92 SIMD_INLINE uint64_t v256_ssd_s16_sum(c_ssd256_internal_s16 s) { 93 return c_v256_ssd_s16_sum(s); 94 } 95 96 SIMD_INLINE int64_t v256_dotp_su8(v256 a, v256 b) { 97 return c_v256_dotp_su8(a, b); 98 } 99 SIMD_INLINE int64_t v256_dotp_s16(v256 a, v256 b) { 100 return c_v256_dotp_s16(a, b); 101 } 102 SIMD_INLINE int64_t v256_dotp_s32(v256 a, v256 b) { 103 return c_v256_dotp_s32(a, b); 104 } 105 SIMD_INLINE uint64_t v256_hadd_u8(v256 a) { return c_v256_hadd_u8(a); } 106 107 SIMD_INLINE v256 v256_or(v256 a, v256 b) { return c_v256_or(a, b); } 108 SIMD_INLINE v256 v256_xor(v256 a, v256 b) { return c_v256_xor(a, b); } 109 SIMD_INLINE v256 v256_and(v256 a, v256 b) { return c_v256_and(a, b); } 110 SIMD_INLINE v256 v256_andn(v256 a, v256 b) { return c_v256_andn(a, b); } 111 112 SIMD_INLINE v256 v256_add_8(v256 a, v256 b) { return c_v256_add_8(a, b); } 113 SIMD_INLINE v256 v256_add_16(v256 a, v256 b) { return c_v256_add_16(a, b); } 114 SIMD_INLINE v256 v256_sadd_s8(v256 a, v256 b) { return c_v256_sadd_s8(a, b); } 115 SIMD_INLINE v256 v256_sadd_u8(v256 a, v256 b) { return c_v256_sadd_u8(a, b); } 116 SIMD_INLINE v256 v256_sadd_s16(v256 a, v256 b) { return c_v256_sadd_s16(a, b); } 117 SIMD_INLINE v256 v256_add_32(v256 a, v256 b) { return c_v256_add_32(a, b); } 118 SIMD_INLINE v256 v256_add_64(v256 a, v256 b) { return c_v256_add_64(a, b); } 119 SIMD_INLINE v256 v256_sub_64(v256 a, v256 b) { return c_v256_sub_64(a, b); } 120 SIMD_INLINE v256 v256_padd_u8(v256 a) { return c_v256_padd_u8(a); } 121 SIMD_INLINE v256 v256_padd_s16(v256 a) { return c_v256_padd_s16(a); } 122 SIMD_INLINE v256 v256_sub_8(v256 a, v256 b) { return c_v256_sub_8(a, b); } 123 SIMD_INLINE v256 v256_ssub_u8(v256 a, v256 b) { return c_v256_ssub_u8(a, b); } 124 SIMD_INLINE v256 v256_ssub_s8(v256 a, v256 b) { return c_v256_ssub_s8(a, b); } 125 SIMD_INLINE v256 v256_sub_16(v256 a, v256 b) { return c_v256_sub_16(a, b); } 126 SIMD_INLINE v256 v256_ssub_s16(v256 a, v256 b) { return c_v256_ssub_s16(a, b); } 127 SIMD_INLINE v256 v256_ssub_u16(v256 a, v256 b) { return c_v256_ssub_u16(a, b); } 128 SIMD_INLINE v256 v256_sub_32(v256 a, v256 b) { return c_v256_sub_32(a, b); } 129 SIMD_INLINE v256 v256_abs_s16(v256 a) { return c_v256_abs_s16(a); } 130 SIMD_INLINE v256 v256_abs_s8(v256 a) { return c_v256_abs_s8(a); } 131 132 SIMD_INLINE v256 v256_mul_s16(v128 a, v128 b) { return c_v256_mul_s16(a, b); } 133 SIMD_INLINE v256 v256_mullo_s16(v256 a, v256 b) { 134 return c_v256_mullo_s16(a, b); 135 } 136 SIMD_INLINE v256 v256_mulhi_s16(v256 a, v256 b) { 137 return c_v256_mulhi_s16(a, b); 138 } 139 SIMD_INLINE v256 v256_mullo_s32(v256 a, v256 b) { 140 return c_v256_mullo_s32(a, b); 141 } 142 SIMD_INLINE v256 v256_madd_s16(v256 a, v256 b) { return c_v256_madd_s16(a, b); } 143 SIMD_INLINE v256 v256_madd_us8(v256 a, v256 b) { return c_v256_madd_us8(a, b); } 144 145 SIMD_INLINE uint32_t v256_movemask_8(v256 a) { return c_v256_movemask_8(a); } 146 SIMD_INLINE v256 v256_blend_8(v256 a, v256 b, v256 c) { 147 return c_v256_blend_8(a, b, c); 148 } 149 150 SIMD_INLINE v256 v256_avg_u8(v256 a, v256 b) { return c_v256_avg_u8(a, b); } 151 SIMD_INLINE v256 v256_rdavg_u8(v256 a, v256 b) { return c_v256_rdavg_u8(a, b); } 152 SIMD_INLINE v256 v256_rdavg_u16(v256 a, v256 b) { 153 return c_v256_rdavg_u16(a, b); 154 } 155 SIMD_INLINE v256 v256_avg_u16(v256 a, v256 b) { return c_v256_avg_u16(a, b); } 156 SIMD_INLINE v256 v256_min_u8(v256 a, v256 b) { return c_v256_min_u8(a, b); } 157 SIMD_INLINE v256 v256_max_u8(v256 a, v256 b) { return c_v256_max_u8(a, b); } 158 SIMD_INLINE v256 v256_min_s8(v256 a, v256 b) { return c_v256_min_s8(a, b); } 159 SIMD_INLINE v256 v256_max_s8(v256 a, v256 b) { return c_v256_max_s8(a, b); } 160 SIMD_INLINE v256 v256_min_s16(v256 a, v256 b) { return c_v256_min_s16(a, b); } 161 SIMD_INLINE v256 v256_max_s16(v256 a, v256 b) { return c_v256_max_s16(a, b); } 162 SIMD_INLINE v256 v256_min_s32(v256 a, v256 b) { return c_v256_min_s32(a, b); } 163 SIMD_INLINE v256 v256_max_s32(v256 a, v256 b) { return c_v256_max_s32(a, b); } 164 165 SIMD_INLINE v256 v256_ziplo_8(v256 a, v256 b) { return c_v256_ziplo_8(a, b); } 166 SIMD_INLINE v256 v256_ziphi_8(v256 a, v256 b) { return c_v256_ziphi_8(a, b); } 167 SIMD_INLINE v256 v256_ziplo_16(v256 a, v256 b) { return c_v256_ziplo_16(a, b); } 168 SIMD_INLINE v256 v256_ziphi_16(v256 a, v256 b) { return c_v256_ziphi_16(a, b); } 169 SIMD_INLINE v256 v256_ziplo_32(v256 a, v256 b) { return c_v256_ziplo_32(a, b); } 170 SIMD_INLINE v256 v256_ziphi_32(v256 a, v256 b) { return c_v256_ziphi_32(a, b); } 171 SIMD_INLINE v256 v256_ziplo_64(v256 a, v256 b) { return c_v256_ziplo_64(a, b); } 172 SIMD_INLINE v256 v256_ziphi_64(v256 a, v256 b) { return c_v256_ziphi_64(a, b); } 173 SIMD_INLINE v256 v256_ziplo_128(v256 a, v256 b) { 174 return c_v256_ziplo_128(a, b); 175 } 176 SIMD_INLINE v256 v256_ziphi_128(v256 a, v256 b) { 177 return c_v256_ziphi_128(a, b); 178 } 179 SIMD_INLINE v256 v256_zip_8(v128 a, v128 b) { return c_v256_zip_8(a, b); } 180 SIMD_INLINE v256 v256_zip_16(v128 a, v128 b) { return c_v256_zip_16(a, b); } 181 SIMD_INLINE v256 v256_zip_32(v128 a, v128 b) { return c_v256_zip_32(a, b); } 182 SIMD_INLINE v256 v256_unziplo_8(v256 a, v256 b) { 183 return c_v256_unziplo_8(a, b); 184 } 185 SIMD_INLINE v256 v256_unziphi_8(v256 a, v256 b) { 186 return c_v256_unziphi_8(a, b); 187 } 188 SIMD_INLINE v256 v256_unziplo_16(v256 a, v256 b) { 189 return c_v256_unziplo_16(a, b); 190 } 191 SIMD_INLINE v256 v256_unziphi_16(v256 a, v256 b) { 192 return c_v256_unziphi_16(a, b); 193 } 194 SIMD_INLINE v256 v256_unziplo_32(v256 a, v256 b) { 195 return c_v256_unziplo_32(a, b); 196 } 197 SIMD_INLINE v256 v256_unziphi_32(v256 a, v256 b) { 198 return c_v256_unziphi_32(a, b); 199 } 200 SIMD_INLINE v256 v256_unziplo_64(v256 a, v256 b) { 201 return c_v256_unziplo_64(a, b); 202 } 203 SIMD_INLINE v256 v256_unziphi_64(v256 a, v256 b) { 204 return c_v256_unziphi_64(a, b); 205 } 206 SIMD_INLINE v256 v256_unpack_u8_s16(v128 a) { return c_v256_unpack_u8_s16(a); } 207 SIMD_INLINE v256 v256_unpacklo_u8_s16(v256 a) { 208 return c_v256_unpacklo_u8_s16(a); 209 } 210 SIMD_INLINE v256 v256_unpackhi_u8_s16(v256 a) { 211 return c_v256_unpackhi_u8_s16(a); 212 } 213 SIMD_INLINE v256 v256_unpack_s8_s16(v128 a) { return c_v256_unpack_s8_s16(a); } 214 SIMD_INLINE v256 v256_unpacklo_s8_s16(v256 a) { 215 return c_v256_unpacklo_s8_s16(a); 216 } 217 SIMD_INLINE v256 v256_unpackhi_s8_s16(v256 a) { 218 return c_v256_unpackhi_s8_s16(a); 219 } 220 SIMD_INLINE v256 v256_pack_s32_s16(v256 a, v256 b) { 221 return c_v256_pack_s32_s16(a, b); 222 } 223 SIMD_INLINE v256 v256_pack_s32_u16(v256 a, v256 b) { 224 return c_v256_pack_s32_u16(a, b); 225 } 226 SIMD_INLINE v256 v256_pack_s16_u8(v256 a, v256 b) { 227 return c_v256_pack_s16_u8(a, b); 228 } 229 SIMD_INLINE v256 v256_pack_s16_s8(v256 a, v256 b) { 230 return c_v256_pack_s16_s8(a, b); 231 } 232 SIMD_INLINE v256 v256_unpack_u16_s32(v128 a) { 233 return c_v256_unpack_u16_s32(a); 234 } 235 SIMD_INLINE v256 v256_unpack_s16_s32(v128 a) { 236 return c_v256_unpack_s16_s32(a); 237 } 238 SIMD_INLINE v256 v256_unpacklo_u16_s32(v256 a) { 239 return c_v256_unpacklo_u16_s32(a); 240 } 241 SIMD_INLINE v256 v256_unpacklo_s16_s32(v256 a) { 242 return c_v256_unpacklo_s16_s32(a); 243 } 244 SIMD_INLINE v256 v256_unpackhi_u16_s32(v256 a) { 245 return c_v256_unpackhi_u16_s32(a); 246 } 247 SIMD_INLINE v256 v256_unpackhi_s16_s32(v256 a) { 248 return c_v256_unpackhi_s16_s32(a); 249 } 250 SIMD_INLINE v256 v256_shuffle_8(v256 a, v256 pattern) { 251 return c_v256_shuffle_8(a, pattern); 252 } 253 SIMD_INLINE v256 v256_wideshuffle_8(v256 a, v256 b, v256 pattern) { 254 return c_v256_wideshuffle_8(a, b, pattern); 255 } 256 SIMD_INLINE v256 v256_pshuffle_8(v256 a, v256 pattern) { 257 return c_v256_pshuffle_8(a, pattern); 258 } 259 260 SIMD_INLINE v256 v256_cmpgt_s8(v256 a, v256 b) { return c_v256_cmpgt_s8(a, b); } 261 SIMD_INLINE v256 v256_cmplt_s8(v256 a, v256 b) { return c_v256_cmplt_s8(a, b); } 262 SIMD_INLINE v256 v256_cmpeq_8(v256 a, v256 b) { return c_v256_cmpeq_8(a, b); } 263 SIMD_INLINE v256 v256_cmpgt_s16(v256 a, v256 b) { 264 return c_v256_cmpgt_s16(a, b); 265 } 266 SIMD_INLINE v256 v256_cmplt_s16(v256 a, v256 b) { 267 return c_v256_cmplt_s16(a, b); 268 } 269 SIMD_INLINE v256 v256_cmpeq_16(v256 a, v256 b) { return c_v256_cmpeq_16(a, b); } 270 SIMD_INLINE v256 v256_cmpeq_32(v256 a, v256 b) { return c_v256_cmpeq_32(a, b); } 271 272 SIMD_INLINE v256 v256_cmpgt_s32(v256 a, v256 b) { 273 return c_v256_cmpgt_s32(a, b); 274 } 275 SIMD_INLINE v256 v256_cmplt_s32(v256 a, v256 b) { 276 return c_v256_cmplt_s32(a, b); 277 } 278 SIMD_INLINE v256 v256_shl_8(v256 a, unsigned int c) { 279 return c_v256_shl_8(a, c); 280 } 281 SIMD_INLINE v256 v256_shr_u8(v256 a, unsigned int c) { 282 return c_v256_shr_u8(a, c); 283 } 284 SIMD_INLINE v256 v256_shr_s8(v256 a, unsigned int c) { 285 return c_v256_shr_s8(a, c); 286 } 287 SIMD_INLINE v256 v256_shl_16(v256 a, unsigned int c) { 288 return c_v256_shl_16(a, c); 289 } 290 SIMD_INLINE v256 v256_shr_u16(v256 a, unsigned int c) { 291 return c_v256_shr_u16(a, c); 292 } 293 SIMD_INLINE v256 v256_shr_s16(v256 a, unsigned int c) { 294 return c_v256_shr_s16(a, c); 295 } 296 SIMD_INLINE v256 v256_shl_32(v256 a, unsigned int c) { 297 return c_v256_shl_32(a, c); 298 } 299 SIMD_INLINE v256 v256_shr_u32(v256 a, unsigned int c) { 300 return c_v256_shr_u32(a, c); 301 } 302 SIMD_INLINE v256 v256_shr_s32(v256 a, unsigned int c) { 303 return c_v256_shr_s32(a, c); 304 } 305 SIMD_INLINE v256 v256_shl_64(v256 a, unsigned int c) { 306 return c_v256_shl_64(a, c); 307 } 308 SIMD_INLINE v256 v256_shr_u64(v256 a, unsigned int c) { 309 return c_v256_shr_u64(a, c); 310 } 311 SIMD_INLINE v256 v256_shr_s64(v256 a, unsigned int c) { 312 return c_v256_shr_s64(a, c); 313 } 314 315 SIMD_INLINE v256 v256_shr_n_byte(v256 a, unsigned int n) { 316 return c_v256_shr_n_byte(a, n); 317 } 318 SIMD_INLINE v256 v256_shl_n_byte(v256 a, unsigned int n) { 319 return c_v256_shl_n_byte(a, n); 320 } 321 SIMD_INLINE v256 v256_shl_n_8(v256 a, unsigned int n) { 322 return c_v256_shl_n_8(a, n); 323 } 324 SIMD_INLINE v256 v256_shl_n_16(v256 a, unsigned int n) { 325 return c_v256_shl_n_16(a, n); 326 } 327 SIMD_INLINE v256 v256_shl_n_32(v256 a, unsigned int n) { 328 return c_v256_shl_n_32(a, n); 329 } 330 SIMD_INLINE v256 v256_shl_n_64(v256 a, unsigned int n) { 331 return c_v256_shl_n_64(a, n); 332 } 333 SIMD_INLINE v256 v256_shr_n_u8(v256 a, unsigned int n) { 334 return c_v256_shr_n_u8(a, n); 335 } 336 SIMD_INLINE v256 v256_shr_n_u16(v256 a, unsigned int n) { 337 return c_v256_shr_n_u16(a, n); 338 } 339 SIMD_INLINE v256 v256_shr_n_u32(v256 a, unsigned int n) { 340 return c_v256_shr_n_u32(a, n); 341 } 342 SIMD_INLINE v256 v256_shr_n_u64(v256 a, unsigned int n) { 343 return c_v256_shr_n_u64(a, n); 344 } 345 SIMD_INLINE v256 v256_shr_n_s8(v256 a, unsigned int n) { 346 return c_v256_shr_n_s8(a, n); 347 } 348 SIMD_INLINE v256 v256_shr_n_s16(v256 a, unsigned int n) { 349 return c_v256_shr_n_s16(a, n); 350 } 351 SIMD_INLINE v256 v256_shr_n_s32(v256 a, unsigned int n) { 352 return c_v256_shr_n_s32(a, n); 353 } 354 SIMD_INLINE v256 v256_shr_n_s64(v256 a, unsigned int n) { 355 return c_v256_shr_n_s64(a, n); 356 } 357 358 SIMD_INLINE v256 v256_shr_n_word(v256 a, unsigned int n) { 359 return c_v256_shr_n_word(a, n); 360 } 361 SIMD_INLINE v256 v256_shl_n_word(v256 a, unsigned int n) { 362 return c_v256_shl_n_word(a, n); 363 } 364 365 typedef uint32_t sad256_internal_u16; 366 SIMD_INLINE sad256_internal_u16 v256_sad_u16_init(void) { 367 return c_v256_sad_u16_init(); 368 } 369 SIMD_INLINE sad256_internal_u16 v256_sad_u16(sad256_internal_u16 s, v256 a, 370 v256 b) { 371 return c_v256_sad_u16(s, a, b); 372 } 373 SIMD_INLINE uint32_t v256_sad_u16_sum(sad256_internal_u16 s) { 374 return c_v256_sad_u16_sum(s); 375 } 376 377 #endif // AOM_AOM_DSP_SIMD_V256_INTRINSICS_H_