noise_estimator.cc (8272B)
1 /* 2 * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "modules/audio_processing/ns/noise_estimator.h" 12 13 #include <algorithm> 14 #include <array> 15 #include <cstddef> 16 #include <cstdint> 17 #include <numbers> 18 19 #include "api/array_view.h" 20 #include "modules/audio_processing/ns/fast_math.h" 21 #include "modules/audio_processing/ns/ns_common.h" 22 #include "modules/audio_processing/ns/suppression_params.h" 23 #include "rtc_base/checks.h" 24 25 namespace webrtc { 26 27 namespace { 28 29 using std::numbers::ln10_v; 30 31 // Log(i). 32 // clang-format off 33 constexpr std::array<float, 129> log_table = { 34 0.f, 0.f, 0.f, 0.f, 0.f, 1.609438f, 1.791759f, 35 1.945910f, 2.079442f, 2.197225f, ln10_v<float>, 2.397895f, 2.484907f, 36 2.564949f, 37 2.639057f, 2.708050f, 2.772589f, 2.833213f, 2.890372f, 2.944439f, 2.995732f, 38 3.044522f, 3.091043f, 3.135494f, 3.178054f, 3.218876f, 3.258097f, 3.295837f, 39 3.332205f, 3.367296f, 3.401197f, 3.433987f, 3.465736f, 3.496507f, 3.526361f, 40 3.555348f, 3.583519f, 3.610918f, 3.637586f, 3.663562f, 3.688879f, 3.713572f, 41 3.737669f, 3.761200f, 3.784190f, 3.806663f, 3.828641f, 3.850147f, 3.871201f, 42 3.891820f, 3.912023f, 3.931826f, 3.951244f, 3.970292f, 3.988984f, 4.007333f, 43 4.025352f, 4.043051f, 4.060443f, 4.077538f, 4.094345f, 4.110874f, 4.127134f, 44 4.143135f, 4.158883f, 4.174387f, 4.189655f, 4.204693f, 4.219508f, 4.234107f, 45 4.248495f, 4.262680f, 4.276666f, 4.290460f, 4.304065f, 4.317488f, 4.330733f, 46 4.343805f, 4.356709f, 4.369448f, 4.382027f, 4.394449f, 4.406719f, 4.418841f, 47 4.430817f, 4.442651f, 4.454347f, 4.465908f, 4.477337f, 4.488636f, 4.499810f, 48 4.510859f, 4.521789f, 4.532599f, 4.543295f, 4.553877f, 4.564348f, 4.574711f, 49 4.584968f, 4.595119f, 4.605170f, 4.615121f, 4.624973f, 4.634729f, 4.644391f, 50 4.653960f, 4.663439f, 4.672829f, 4.682131f, 4.691348f, 4.700480f, 4.709530f, 51 4.718499f, 4.727388f, 4.736198f, 4.744932f, 4.753591f, 4.762174f, 4.770685f, 52 4.779124f, 4.787492f, 4.795791f, 4.804021f, 4.812184f, 4.820282f, 4.828314f, 53 4.836282f, 4.844187f, 4.852030f}; 54 // clang-format on 55 56 } // namespace 57 58 NoiseEstimator::NoiseEstimator(const SuppressionParams& suppression_params) 59 : suppression_params_(suppression_params) { 60 noise_spectrum_.fill(0.f); 61 prev_noise_spectrum_.fill(0.f); 62 conservative_noise_spectrum_.fill(0.f); 63 parametric_noise_spectrum_.fill(0.f); 64 } 65 66 void NoiseEstimator::PrepareAnalysis() { 67 std::copy(noise_spectrum_.begin(), noise_spectrum_.end(), 68 prev_noise_spectrum_.begin()); 69 } 70 71 void NoiseEstimator::PreUpdate( 72 int32_t num_analyzed_frames, 73 ArrayView<const float, kFftSizeBy2Plus1> signal_spectrum, 74 float signal_spectral_sum) { 75 quantile_noise_estimator_.Estimate(signal_spectrum, noise_spectrum_); 76 77 if (num_analyzed_frames < kShortStartupPhaseBlocks) { 78 // Compute simplified noise model during startup. 79 const size_t kStartBand = 5; 80 float sum_log_i_log_magn = 0.f; 81 float sum_log_i = 0.f; 82 float sum_log_i_square = 0.f; 83 float sum_log_magn = 0.f; 84 for (size_t i = kStartBand; i < kFftSizeBy2Plus1; ++i) { 85 float log_i = log_table[i]; 86 sum_log_i += log_i; 87 sum_log_i_square += log_i * log_i; 88 float log_signal = LogApproximation(signal_spectrum[i]); 89 sum_log_magn += log_signal; 90 sum_log_i_log_magn += log_i * log_signal; 91 } 92 93 // Estimate the parameter for the level of the white noise. 94 constexpr float kOneByFftSizeBy2Plus1 = 1.f / kFftSizeBy2Plus1; 95 white_noise_level_ += signal_spectral_sum * kOneByFftSizeBy2Plus1 * 96 suppression_params_.over_subtraction_factor; 97 98 // Estimate pink noise parameters. 99 float denom = sum_log_i_square * (kFftSizeBy2Plus1 - kStartBand) - 100 sum_log_i * sum_log_i; 101 float num = 102 sum_log_i_square * sum_log_magn - sum_log_i * sum_log_i_log_magn; 103 RTC_DCHECK_NE(denom, 0.f); 104 float pink_noise_adjustment = num / denom; 105 106 // Constrain the estimated spectrum to be positive. 107 pink_noise_adjustment = std::max(pink_noise_adjustment, 0.f); 108 pink_noise_numerator_ += pink_noise_adjustment; 109 num = sum_log_i * sum_log_magn - 110 (kFftSizeBy2Plus1 - kStartBand) * sum_log_i_log_magn; 111 RTC_DCHECK_NE(denom, 0.f); 112 pink_noise_adjustment = num / denom; 113 114 // Constrain the pink noise power to be in the interval [0, 1]. 115 pink_noise_adjustment = std::max(std::min(pink_noise_adjustment, 1.f), 0.f); 116 117 pink_noise_exp_ += pink_noise_adjustment; 118 119 const float one_by_num_analyzed_frames_plus_1 = 120 1.f / (num_analyzed_frames + 1.f); 121 122 // Calculate the frequency-independent parts of parametric noise estimate. 123 float parametric_exp = 0.f; 124 float parametric_num = 0.f; 125 if (pink_noise_exp_ > 0.f) { 126 // Use pink noise estimate. 127 parametric_num = ExpApproximation(pink_noise_numerator_ * 128 one_by_num_analyzed_frames_plus_1); 129 parametric_num *= num_analyzed_frames + 1.f; 130 parametric_exp = pink_noise_exp_ * one_by_num_analyzed_frames_plus_1; 131 } 132 133 constexpr float kOneByShortStartupPhaseBlocks = 134 1.f / kShortStartupPhaseBlocks; 135 for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { 136 // Estimate the background noise using the white and pink noise 137 // parameters. 138 if (pink_noise_exp_ == 0.f) { 139 // Use white noise estimate. 140 parametric_noise_spectrum_[i] = white_noise_level_; 141 } else { 142 // Use pink noise estimate. 143 float use_band = i < kStartBand ? kStartBand : i; 144 float parametric_denom = PowApproximation(use_band, parametric_exp); 145 RTC_DCHECK_NE(parametric_denom, 0.f); 146 parametric_noise_spectrum_[i] = parametric_num / parametric_denom; 147 } 148 } 149 150 // Weight quantile noise with modeled noise. 151 for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { 152 noise_spectrum_[i] *= num_analyzed_frames; 153 float tmp = parametric_noise_spectrum_[i] * 154 (kShortStartupPhaseBlocks - num_analyzed_frames); 155 noise_spectrum_[i] += tmp * one_by_num_analyzed_frames_plus_1; 156 noise_spectrum_[i] *= kOneByShortStartupPhaseBlocks; 157 } 158 } 159 } 160 161 void NoiseEstimator::PostUpdate( 162 ArrayView<const float> speech_probability, 163 ArrayView<const float, kFftSizeBy2Plus1> signal_spectrum) { 164 // Time-avg parameter for noise_spectrum update. 165 constexpr float kNoiseUpdate = 0.9f; 166 167 float gamma = kNoiseUpdate; 168 for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { 169 const float prob_speech = speech_probability[i]; 170 const float prob_non_speech = 1.f - prob_speech; 171 172 // Temporary noise update used for speech frames if update value is less 173 // than previous. 174 float noise_update_tmp = 175 gamma * prev_noise_spectrum_[i] + 176 (1.f - gamma) * (prob_non_speech * signal_spectrum[i] + 177 prob_speech * prev_noise_spectrum_[i]); 178 179 // Time-constant based on speech/noise_spectrum state. 180 float gamma_old = gamma; 181 182 // Increase gamma for frame likely to be seech. 183 constexpr float kProbRange = .2f; 184 gamma = prob_speech > kProbRange ? .99f : kNoiseUpdate; 185 186 // Conservative noise_spectrum update. 187 if (prob_speech < kProbRange) { 188 conservative_noise_spectrum_[i] += 189 0.05f * (signal_spectrum[i] - conservative_noise_spectrum_[i]); 190 } 191 192 // Noise_spectrum update. 193 if (gamma == gamma_old) { 194 noise_spectrum_[i] = noise_update_tmp; 195 } else { 196 noise_spectrum_[i] = 197 gamma * prev_noise_spectrum_[i] + 198 (1.f - gamma) * (prob_non_speech * signal_spectrum[i] + 199 prob_speech * prev_noise_spectrum_[i]); 200 // Allow for noise_spectrum update downwards: If noise_spectrum update 201 // decreases the noise_spectrum, it is safe, so allow it to happen. 202 noise_spectrum_[i] = std::min(noise_spectrum_[i], noise_update_tmp); 203 } 204 } 205 } 206 207 } // namespace webrtc