tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

av1_fwd_txfm_avx2.h (4197B)


      1 /*
      2 * Copyright (c) 2018, Alliance for Open Media. All rights reserved.
      3 *
      4 * This source code is subject to the terms of the BSD 2 Clause License and
      5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 * was not distributed with this source code in the LICENSE file, you can
      7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 * Media Patent License 1.0 was not distributed with this source code in the
      9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 */
     11 
     12 #ifndef AOM_AV1_ENCODER_X86_AV1_FWD_TXFM_AVX2_H_
     13 #define AOM_AV1_ENCODER_X86_AV1_FWD_TXFM_AVX2_H_
     14 #include <immintrin.h>
     15 
     16 // out0 = in0*w0 + in1*w1
     17 // out1 = -in1*w0 + in0*w1
     18 static inline void btf_32_avx2_type0(const int32_t w0, const int32_t w1,
     19                                     __m256i *in0, __m256i *in1,
     20                                     const __m256i _r, const int32_t cos_bit) {
     21  __m256i _in0 = *in0;
     22  __m256i _in1 = *in1;
     23  const __m256i ww0 = _mm256_set1_epi32(w0);
     24  const __m256i ww1 = _mm256_set1_epi32(w1);
     25  const __m256i in0_w0 = _mm256_mullo_epi32(_in0, ww0);
     26  const __m256i in1_w1 = _mm256_mullo_epi32(_in1, ww1);
     27  __m256i temp0 = _mm256_add_epi32(in0_w0, in1_w1);
     28  temp0 = _mm256_add_epi32(temp0, _r);
     29  *in0 = _mm256_srai_epi32(temp0, cos_bit);
     30  const __m256i in0_w1 = _mm256_mullo_epi32(_in0, ww1);
     31  const __m256i in1_w0 = _mm256_mullo_epi32(_in1, ww0);
     32  __m256i temp1 = _mm256_sub_epi32(in0_w1, in1_w0);
     33  temp1 = _mm256_add_epi32(temp1, _r);
     34  *in1 = _mm256_srai_epi32(temp1, cos_bit);
     35 }
     36 
     37 static inline void btf_32_avx2_type1(const int32_t w0, const int32_t w1,
     38                                     __m256i *in0, __m256i *in1,
     39                                     const __m256i _r, const int32_t cos_bit) {
     40  __m256i _in0 = *in0;
     41  __m256i _in1 = *in1;
     42  const __m256i ww0 = _mm256_set1_epi32(w0);
     43  const __m256i ww1 = _mm256_set1_epi32(w1);
     44  const __m256i in0_w0 = _mm256_mullo_epi32(_in0, ww0);
     45  const __m256i in1_w1 = _mm256_mullo_epi32(_in1, ww1);
     46  __m256i temp0 = _mm256_add_epi32(in0_w0, in1_w1);
     47  temp0 = _mm256_add_epi32(temp0, _r);
     48  *in0 = _mm256_srai_epi32(temp0, cos_bit);
     49  const __m256i in0_w1 = _mm256_mullo_epi32(_in0, ww1);
     50  const __m256i in1_w0 = _mm256_mullo_epi32(_in1, ww0);
     51  __m256i temp1 = _mm256_sub_epi32(in1_w0, in0_w1);
     52  temp1 = _mm256_add_epi32(temp1, _r);
     53  *in1 = _mm256_srai_epi32(temp1, cos_bit);
     54 }
     55 
     56 // out0 = in0*w0 + in1*w1
     57 // out1 = -in1*w0 + in0*w1
     58 static inline void btf_32_avx2_type0_new(const __m256i ww0, const __m256i ww1,
     59                                         __m256i *in0, __m256i *in1,
     60                                         const __m256i _r,
     61                                         const int32_t cos_bit) {
     62  __m256i _in0 = *in0;
     63  __m256i _in1 = *in1;
     64  const __m256i in0_w0 = _mm256_mullo_epi32(_in0, ww0);
     65  const __m256i in1_w1 = _mm256_mullo_epi32(_in1, ww1);
     66  __m256i temp0 = _mm256_add_epi32(in0_w0, in1_w1);
     67  temp0 = _mm256_add_epi32(temp0, _r);
     68  *in0 = _mm256_srai_epi32(temp0, cos_bit);
     69  const __m256i in0_w1 = _mm256_mullo_epi32(_in0, ww1);
     70  const __m256i in1_w0 = _mm256_mullo_epi32(_in1, ww0);
     71  __m256i temp1 = _mm256_sub_epi32(in0_w1, in1_w0);
     72  temp1 = _mm256_add_epi32(temp1, _r);
     73  *in1 = _mm256_srai_epi32(temp1, cos_bit);
     74 }
     75 
     76 // out0 = in0*w0 + in1*w1
     77 // out1 = in1*w0 - in0*w1
     78 static inline void btf_32_avx2_type1_new(const __m256i ww0, const __m256i ww1,
     79                                         __m256i *in0, __m256i *in1,
     80                                         const __m256i _r,
     81                                         const int32_t cos_bit) {
     82  __m256i _in0 = *in0;
     83  __m256i _in1 = *in1;
     84  const __m256i in0_w0 = _mm256_mullo_epi32(_in0, ww0);
     85  const __m256i in1_w1 = _mm256_mullo_epi32(_in1, ww1);
     86  __m256i temp0 = _mm256_add_epi32(in0_w0, in1_w1);
     87  temp0 = _mm256_add_epi32(temp0, _r);
     88  *in0 = _mm256_srai_epi32(temp0, cos_bit);
     89  const __m256i in0_w1 = _mm256_mullo_epi32(_in0, ww1);
     90  const __m256i in1_w0 = _mm256_mullo_epi32(_in1, ww0);
     91  __m256i temp1 = _mm256_sub_epi32(in1_w0, in0_w1);
     92  temp1 = _mm256_add_epi32(temp1, _r);
     93  *in1 = _mm256_srai_epi32(temp1, cos_bit);
     94 }
     95 
     96 #endif  // AOM_AV1_ENCODER_X86_AV1_FWD_TXFM_AVX2_H_