pitch_search.cc (3148B)
1 /* 2 * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "modules/audio_processing/agc2/rnn_vad/pitch_search.h" 12 13 #include <cstddef> 14 15 #include "api/array_view.h" 16 #include "modules/audio_processing/agc2/cpu_features.h" 17 #include "modules/audio_processing/agc2/rnn_vad/common.h" 18 #include "modules/audio_processing/agc2/rnn_vad/pitch_search_internal.h" 19 #include "rtc_base/checks.h" 20 21 namespace webrtc { 22 namespace rnn_vad { 23 24 PitchEstimator::PitchEstimator(const AvailableCpuFeatures& cpu_features) 25 : cpu_features_(cpu_features), 26 y_energy_24kHz_(kRefineNumLags24kHz, 0.f), 27 pitch_buffer_12kHz_(kBufSize12kHz), 28 auto_correlation_12kHz_(kNumLags12kHz) {} 29 30 PitchEstimator::~PitchEstimator() = default; 31 32 int PitchEstimator::Estimate( 33 ArrayView<const float, kBufSize24kHz> pitch_buffer) { 34 ArrayView<float, kBufSize12kHz> pitch_buffer_12kHz_view( 35 pitch_buffer_12kHz_.data(), kBufSize12kHz); 36 RTC_DCHECK_EQ(pitch_buffer_12kHz_.size(), pitch_buffer_12kHz_view.size()); 37 ArrayView<float, kNumLags12kHz> auto_correlation_12kHz_view( 38 auto_correlation_12kHz_.data(), kNumLags12kHz); 39 RTC_DCHECK_EQ(auto_correlation_12kHz_.size(), 40 auto_correlation_12kHz_view.size()); 41 42 // TODO(bugs.chromium.org/10480): Use `cpu_features_` to estimate pitch. 43 // Perform the initial pitch search at 12 kHz. 44 Decimate2x(pitch_buffer, pitch_buffer_12kHz_view); 45 auto_corr_calculator_.ComputeOnPitchBuffer(pitch_buffer_12kHz_view, 46 auto_correlation_12kHz_view); 47 CandidatePitchPeriods pitch_periods = ComputePitchPeriod12kHz( 48 pitch_buffer_12kHz_view, auto_correlation_12kHz_view, cpu_features_); 49 // The refinement is done using the pitch buffer that contains 24 kHz samples. 50 // Therefore, adapt the inverted lags in `pitch_candidates_inv_lags` from 12 51 // to 24 kHz. 52 pitch_periods.best *= 2; 53 pitch_periods.second_best *= 2; 54 55 // Refine the initial pitch period estimation from 12 kHz to 48 kHz. 56 // Pre-compute frame energies at 24 kHz. 57 ArrayView<float, kRefineNumLags24kHz> y_energy_24kHz_view( 58 y_energy_24kHz_.data(), kRefineNumLags24kHz); 59 RTC_DCHECK_EQ(y_energy_24kHz_.size(), y_energy_24kHz_view.size()); 60 ComputeSlidingFrameSquareEnergies24kHz(pitch_buffer, y_energy_24kHz_view, 61 cpu_features_); 62 // Estimation at 48 kHz. 63 const int pitch_lag_48kHz = ComputePitchPeriod48kHz( 64 pitch_buffer, y_energy_24kHz_view, pitch_periods, cpu_features_); 65 last_pitch_48kHz_ = ComputeExtendedPitchPeriod48kHz( 66 pitch_buffer, y_energy_24kHz_view, 67 /*initial_pitch_period_48kHz=*/kMaxPitch48kHz - pitch_lag_48kHz, 68 last_pitch_48kHz_, cpu_features_); 69 return last_pitch_48kHz_.period; 70 } 71 72 } // namespace rnn_vad 73 } // namespace webrtc