sinc_resampler_sse.cc (2241B)
1 /* 2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 // Modified from the Chromium original: 12 // src/media/base/simd/sinc_resampler_sse.cc 13 14 #include <xmmintrin.h> 15 16 #include <cstddef> 17 #include <cstdint> 18 19 #include "common_audio/resampler/sinc_resampler.h" 20 21 namespace webrtc { 22 23 float SincResampler::Convolve_SSE(const float* input_ptr, 24 const float* k1, 25 const float* k2, 26 double kernel_interpolation_factor) { 27 __m128 m_input; 28 __m128 m_sums1 = _mm_setzero_ps(); 29 __m128 m_sums2 = _mm_setzero_ps(); 30 31 // Based on `input_ptr` alignment, we need to use loadu or load. Unrolling 32 // these loops hurt performance in local testing. 33 if (reinterpret_cast<uintptr_t>(input_ptr) & 0x0F) { 34 for (size_t i = 0; i < kKernelSize; i += 4) { 35 m_input = _mm_loadu_ps(input_ptr + i); 36 m_sums1 = _mm_add_ps(m_sums1, _mm_mul_ps(m_input, _mm_load_ps(k1 + i))); 37 m_sums2 = _mm_add_ps(m_sums2, _mm_mul_ps(m_input, _mm_load_ps(k2 + i))); 38 } 39 } else { 40 for (size_t i = 0; i < kKernelSize; i += 4) { 41 m_input = _mm_load_ps(input_ptr + i); 42 m_sums1 = _mm_add_ps(m_sums1, _mm_mul_ps(m_input, _mm_load_ps(k1 + i))); 43 m_sums2 = _mm_add_ps(m_sums2, _mm_mul_ps(m_input, _mm_load_ps(k2 + i))); 44 } 45 } 46 47 // Linearly interpolate the two "convolutions". 48 m_sums1 = _mm_mul_ps( 49 m_sums1, 50 _mm_set_ps1(static_cast<float>(1.0 - kernel_interpolation_factor))); 51 m_sums2 = _mm_mul_ps( 52 m_sums2, _mm_set_ps1(static_cast<float>(kernel_interpolation_factor))); 53 m_sums1 = _mm_add_ps(m_sums1, m_sums2); 54 55 // Sum components together. 56 float result; 57 m_sums2 = _mm_add_ps(_mm_movehl_ps(m_sums1, m_sums1), m_sums1); 58 _mm_store_ss(&result, 59 _mm_add_ss(m_sums2, _mm_shuffle_ps(m_sums2, m_sums2, 1))); 60 61 return result; 62 } 63 64 } // namespace webrtc