speech_probability_estimator.cc (4204B)
1 /* 2 * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "modules/audio_processing/ns/speech_probability_estimator.h" 12 13 #include <algorithm> 14 #include <array> 15 #include <cmath> 16 #include <cstddef> 17 #include <cstdint> 18 19 #include "api/array_view.h" 20 #include "modules/audio_processing/ns/fast_math.h" 21 #include "modules/audio_processing/ns/ns_common.h" 22 #include "modules/audio_processing/ns/prior_signal_model.h" 23 #include "modules/audio_processing/ns/signal_model.h" 24 25 namespace webrtc { 26 27 SpeechProbabilityEstimator::SpeechProbabilityEstimator() { 28 speech_probability_.fill(0.f); 29 } 30 31 void SpeechProbabilityEstimator::Update( 32 int32_t num_analyzed_frames, 33 ArrayView<const float, kFftSizeBy2Plus1> prior_snr, 34 ArrayView<const float, kFftSizeBy2Plus1> post_snr, 35 ArrayView<const float, kFftSizeBy2Plus1> conservative_noise_spectrum, 36 ArrayView<const float, kFftSizeBy2Plus1> signal_spectrum, 37 float signal_spectral_sum, 38 float signal_energy) { 39 // Update models. 40 if (num_analyzed_frames < kLongStartupPhaseBlocks) { 41 signal_model_estimator_.AdjustNormalization(num_analyzed_frames, 42 signal_energy); 43 } 44 signal_model_estimator_.Update(prior_snr, post_snr, 45 conservative_noise_spectrum, signal_spectrum, 46 signal_spectral_sum, signal_energy); 47 48 const SignalModel& model = signal_model_estimator_.get_model(); 49 const PriorSignalModel& prior_model = 50 signal_model_estimator_.get_prior_model(); 51 52 // Width parameter in sigmoid map for prior model. 53 constexpr float kWidthPrior0 = 4.f; 54 // Width for pause region: lower range, so increase width in tanh map. 55 constexpr float kWidthPrior1 = 2.f * kWidthPrior0; 56 57 // Average LRT feature: use larger width in tanh map for pause regions. 58 float width_prior = model.lrt < prior_model.lrt ? kWidthPrior1 : kWidthPrior0; 59 60 // Compute indicator function: sigmoid map. 61 float indicator0 = 62 0.5f * (tanh(width_prior * (model.lrt - prior_model.lrt)) + 1.f); 63 64 // Spectral flatness feature: use larger width in tanh map for pause regions. 65 width_prior = model.spectral_flatness > prior_model.flatness_threshold 66 ? kWidthPrior1 67 : kWidthPrior0; 68 69 // Compute indicator function: sigmoid map. 70 float indicator1 = 71 0.5f * (tanh(1.f * width_prior * 72 (prior_model.flatness_threshold - model.spectral_flatness)) + 73 1.f); 74 75 // For template spectrum-difference : use larger width in tanh map for pause 76 // regions. 77 width_prior = model.spectral_diff < prior_model.template_diff_threshold 78 ? kWidthPrior1 79 : kWidthPrior0; 80 81 // Compute indicator function: sigmoid map. 82 float indicator2 = 83 0.5f * (tanh(width_prior * (model.spectral_diff - 84 prior_model.template_diff_threshold)) + 85 1.f); 86 87 // Combine the indicator function with the feature weights. 88 float ind_prior = prior_model.lrt_weighting * indicator0 + 89 prior_model.flatness_weighting * indicator1 + 90 prior_model.difference_weighting * indicator2; 91 92 // Compute the prior probability. 93 prior_speech_prob_ += 0.1f * (ind_prior - prior_speech_prob_); 94 95 // Make sure probabilities are within range: keep floor to 0.01. 96 prior_speech_prob_ = std::max(std::min(prior_speech_prob_, 1.f), 0.01f); 97 98 // Final speech probability: combine prior model with LR factor:. 99 float gain_prior = 100 (1.f - prior_speech_prob_) / (prior_speech_prob_ + 0.0001f); 101 102 std::array<float, kFftSizeBy2Plus1> inv_lrt; 103 ExpApproximationSignFlip(model.avg_log_lrt, inv_lrt); 104 for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { 105 speech_probability_[i] = 1.f / (1.f + gain_prior * inv_lrt[i]); 106 } 107 } 108 109 } // namespace webrtc