tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

sinc_resampler_sse.cc (2241B)


      1 /*
      2 *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
      3 *
      4 *  Use of this source code is governed by a BSD-style license
      5 *  that can be found in the LICENSE file in the root of the source
      6 *  tree. An additional intellectual property rights grant can be found
      7 *  in the file PATENTS.  All contributing project authors may
      8 *  be found in the AUTHORS file in the root of the source tree.
      9 */
     10 
     11 // Modified from the Chromium original:
     12 // src/media/base/simd/sinc_resampler_sse.cc
     13 
     14 #include <xmmintrin.h>
     15 
     16 #include <cstddef>
     17 #include <cstdint>
     18 
     19 #include "common_audio/resampler/sinc_resampler.h"
     20 
     21 namespace webrtc {
     22 
     23 float SincResampler::Convolve_SSE(const float* input_ptr,
     24                                  const float* k1,
     25                                  const float* k2,
     26                                  double kernel_interpolation_factor) {
     27  __m128 m_input;
     28  __m128 m_sums1 = _mm_setzero_ps();
     29  __m128 m_sums2 = _mm_setzero_ps();
     30 
     31  // Based on `input_ptr` alignment, we need to use loadu or load.  Unrolling
     32  // these loops hurt performance in local testing.
     33  if (reinterpret_cast<uintptr_t>(input_ptr) & 0x0F) {
     34    for (size_t i = 0; i < kKernelSize; i += 4) {
     35      m_input = _mm_loadu_ps(input_ptr + i);
     36      m_sums1 = _mm_add_ps(m_sums1, _mm_mul_ps(m_input, _mm_load_ps(k1 + i)));
     37      m_sums2 = _mm_add_ps(m_sums2, _mm_mul_ps(m_input, _mm_load_ps(k2 + i)));
     38    }
     39  } else {
     40    for (size_t i = 0; i < kKernelSize; i += 4) {
     41      m_input = _mm_load_ps(input_ptr + i);
     42      m_sums1 = _mm_add_ps(m_sums1, _mm_mul_ps(m_input, _mm_load_ps(k1 + i)));
     43      m_sums2 = _mm_add_ps(m_sums2, _mm_mul_ps(m_input, _mm_load_ps(k2 + i)));
     44    }
     45  }
     46 
     47  // Linearly interpolate the two "convolutions".
     48  m_sums1 = _mm_mul_ps(
     49      m_sums1,
     50      _mm_set_ps1(static_cast<float>(1.0 - kernel_interpolation_factor)));
     51  m_sums2 = _mm_mul_ps(
     52      m_sums2, _mm_set_ps1(static_cast<float>(kernel_interpolation_factor)));
     53  m_sums1 = _mm_add_ps(m_sums1, m_sums2);
     54 
     55  // Sum components together.
     56  float result;
     57  m_sums2 = _mm_add_ps(_mm_movehl_ps(m_sums1, m_sums1), m_sums1);
     58  _mm_store_ss(&result,
     59               _mm_add_ss(m_sums2, _mm_shuffle_ps(m_sums2, m_sums2, 1)));
     60 
     61  return result;
     62 }
     63 
     64 }  // namespace webrtc