av1_txfm_sse4.h (2393B)
1 /* 2 * Copyright (c) 2018, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #ifndef AOM_AV1_COMMON_X86_AV1_TXFM_SSE4_H_ 13 #define AOM_AV1_COMMON_X86_AV1_TXFM_SSE4_H_ 14 15 #include <smmintrin.h> 16 17 #ifdef __cplusplus 18 extern "C" { 19 #endif 20 21 static inline __m128i av1_round_shift_32_sse4_1(__m128i vec, int bit) { 22 __m128i tmp, round; 23 round = _mm_set1_epi32(1 << (bit - 1)); 24 tmp = _mm_add_epi32(vec, round); 25 return _mm_srai_epi32(tmp, bit); 26 } 27 28 static inline void av1_round_shift_array_32_sse4_1(const __m128i *input, 29 __m128i *output, 30 const int size, 31 const int bit) { 32 if (bit > 0) { 33 int i; 34 for (i = 0; i < size; i++) { 35 output[i] = av1_round_shift_32_sse4_1(input[i], bit); 36 } 37 } else { 38 int i; 39 for (i = 0; i < size; i++) { 40 output[i] = _mm_slli_epi32(input[i], -bit); 41 } 42 } 43 } 44 45 static inline void av1_round_shift_rect_array_32_sse4_1(const __m128i *input, 46 __m128i *output, 47 const int size, 48 const int bit, 49 const int val) { 50 const __m128i sqrt2 = _mm_set1_epi32(val); 51 if (bit > 0) { 52 int i; 53 for (i = 0; i < size; i++) { 54 const __m128i r0 = av1_round_shift_32_sse4_1(input[i], bit); 55 const __m128i r1 = _mm_mullo_epi32(sqrt2, r0); 56 output[i] = av1_round_shift_32_sse4_1(r1, NewSqrt2Bits); 57 } 58 } else { 59 int i; 60 for (i = 0; i < size; i++) { 61 const __m128i r0 = _mm_slli_epi32(input[i], -bit); 62 const __m128i r1 = _mm_mullo_epi32(sqrt2, r0); 63 output[i] = av1_round_shift_32_sse4_1(r1, NewSqrt2Bits); 64 } 65 } 66 } 67 68 #ifdef __cplusplus 69 } 70 #endif 71 72 #endif // AOM_AV1_COMMON_X86_AV1_TXFM_SSE4_H_