tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

av1_txfm_sse4.h (2393B)


      1 /*
      2 * Copyright (c) 2018, Alliance for Open Media. All rights reserved.
      3 *
      4 * This source code is subject to the terms of the BSD 2 Clause License and
      5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 * was not distributed with this source code in the LICENSE file, you can
      7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 * Media Patent License 1.0 was not distributed with this source code in the
      9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 */
     11 
     12 #ifndef AOM_AV1_COMMON_X86_AV1_TXFM_SSE4_H_
     13 #define AOM_AV1_COMMON_X86_AV1_TXFM_SSE4_H_
     14 
     15 #include <smmintrin.h>
     16 
     17 #ifdef __cplusplus
     18 extern "C" {
     19 #endif
     20 
     21 static inline __m128i av1_round_shift_32_sse4_1(__m128i vec, int bit) {
     22  __m128i tmp, round;
     23  round = _mm_set1_epi32(1 << (bit - 1));
     24  tmp = _mm_add_epi32(vec, round);
     25  return _mm_srai_epi32(tmp, bit);
     26 }
     27 
     28 static inline void av1_round_shift_array_32_sse4_1(const __m128i *input,
     29                                                   __m128i *output,
     30                                                   const int size,
     31                                                   const int bit) {
     32  if (bit > 0) {
     33    int i;
     34    for (i = 0; i < size; i++) {
     35      output[i] = av1_round_shift_32_sse4_1(input[i], bit);
     36    }
     37  } else {
     38    int i;
     39    for (i = 0; i < size; i++) {
     40      output[i] = _mm_slli_epi32(input[i], -bit);
     41    }
     42  }
     43 }
     44 
     45 static inline void av1_round_shift_rect_array_32_sse4_1(const __m128i *input,
     46                                                        __m128i *output,
     47                                                        const int size,
     48                                                        const int bit,
     49                                                        const int val) {
     50  const __m128i sqrt2 = _mm_set1_epi32(val);
     51  if (bit > 0) {
     52    int i;
     53    for (i = 0; i < size; i++) {
     54      const __m128i r0 = av1_round_shift_32_sse4_1(input[i], bit);
     55      const __m128i r1 = _mm_mullo_epi32(sqrt2, r0);
     56      output[i] = av1_round_shift_32_sse4_1(r1, NewSqrt2Bits);
     57    }
     58  } else {
     59    int i;
     60    for (i = 0; i < size; i++) {
     61      const __m128i r0 = _mm_slli_epi32(input[i], -bit);
     62      const __m128i r1 = _mm_mullo_epi32(sqrt2, r0);
     63      output[i] = av1_round_shift_32_sse4_1(r1, NewSqrt2Bits);
     64    }
     65  }
     66 }
     67 
     68 #ifdef __cplusplus
     69 }
     70 #endif
     71 
     72 #endif  // AOM_AV1_COMMON_X86_AV1_TXFM_SSE4_H_