tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

noise_estimator.cc (8272B)


      1 /*
      2 *  Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
      3 *
      4 *  Use of this source code is governed by a BSD-style license
      5 *  that can be found in the LICENSE file in the root of the source
      6 *  tree. An additional intellectual property rights grant can be found
      7 *  in the file PATENTS.  All contributing project authors may
      8 *  be found in the AUTHORS file in the root of the source tree.
      9 */
     10 
     11 #include "modules/audio_processing/ns/noise_estimator.h"
     12 
     13 #include <algorithm>
     14 #include <array>
     15 #include <cstddef>
     16 #include <cstdint>
     17 #include <numbers>
     18 
     19 #include "api/array_view.h"
     20 #include "modules/audio_processing/ns/fast_math.h"
     21 #include "modules/audio_processing/ns/ns_common.h"
     22 #include "modules/audio_processing/ns/suppression_params.h"
     23 #include "rtc_base/checks.h"
     24 
     25 namespace webrtc {
     26 
     27 namespace {
     28 
     29 using std::numbers::ln10_v;
     30 
     31 // Log(i).
     32 // clang-format off
     33 constexpr std::array<float, 129> log_table = {
     34    0.f,       0.f,       0.f,       0.f,       0.f,       1.609438f, 1.791759f,
     35    1.945910f, 2.079442f, 2.197225f, ln10_v<float>, 2.397895f, 2.484907f,
     36    2.564949f,
     37    2.639057f, 2.708050f, 2.772589f, 2.833213f, 2.890372f, 2.944439f, 2.995732f,
     38    3.044522f, 3.091043f, 3.135494f, 3.178054f, 3.218876f, 3.258097f, 3.295837f,
     39    3.332205f, 3.367296f, 3.401197f, 3.433987f, 3.465736f, 3.496507f, 3.526361f,
     40    3.555348f, 3.583519f, 3.610918f, 3.637586f, 3.663562f, 3.688879f, 3.713572f,
     41    3.737669f, 3.761200f, 3.784190f, 3.806663f, 3.828641f, 3.850147f, 3.871201f,
     42    3.891820f, 3.912023f, 3.931826f, 3.951244f, 3.970292f, 3.988984f, 4.007333f,
     43    4.025352f, 4.043051f, 4.060443f, 4.077538f, 4.094345f, 4.110874f, 4.127134f,
     44    4.143135f, 4.158883f, 4.174387f, 4.189655f, 4.204693f, 4.219508f, 4.234107f,
     45    4.248495f, 4.262680f, 4.276666f, 4.290460f, 4.304065f, 4.317488f, 4.330733f,
     46    4.343805f, 4.356709f, 4.369448f, 4.382027f, 4.394449f, 4.406719f, 4.418841f,
     47    4.430817f, 4.442651f, 4.454347f, 4.465908f, 4.477337f, 4.488636f, 4.499810f,
     48    4.510859f, 4.521789f, 4.532599f, 4.543295f, 4.553877f, 4.564348f, 4.574711f,
     49    4.584968f, 4.595119f, 4.605170f, 4.615121f, 4.624973f, 4.634729f, 4.644391f,
     50    4.653960f, 4.663439f, 4.672829f, 4.682131f, 4.691348f, 4.700480f, 4.709530f,
     51    4.718499f, 4.727388f, 4.736198f, 4.744932f, 4.753591f, 4.762174f, 4.770685f,
     52    4.779124f, 4.787492f, 4.795791f, 4.804021f, 4.812184f, 4.820282f, 4.828314f,
     53    4.836282f, 4.844187f, 4.852030f};
     54 // clang-format on
     55 
     56 }  // namespace
     57 
     58 NoiseEstimator::NoiseEstimator(const SuppressionParams& suppression_params)
     59    : suppression_params_(suppression_params) {
     60  noise_spectrum_.fill(0.f);
     61  prev_noise_spectrum_.fill(0.f);
     62  conservative_noise_spectrum_.fill(0.f);
     63  parametric_noise_spectrum_.fill(0.f);
     64 }
     65 
     66 void NoiseEstimator::PrepareAnalysis() {
     67  std::copy(noise_spectrum_.begin(), noise_spectrum_.end(),
     68            prev_noise_spectrum_.begin());
     69 }
     70 
     71 void NoiseEstimator::PreUpdate(
     72    int32_t num_analyzed_frames,
     73    ArrayView<const float, kFftSizeBy2Plus1> signal_spectrum,
     74    float signal_spectral_sum) {
     75  quantile_noise_estimator_.Estimate(signal_spectrum, noise_spectrum_);
     76 
     77  if (num_analyzed_frames < kShortStartupPhaseBlocks) {
     78    // Compute simplified noise model during startup.
     79    const size_t kStartBand = 5;
     80    float sum_log_i_log_magn = 0.f;
     81    float sum_log_i = 0.f;
     82    float sum_log_i_square = 0.f;
     83    float sum_log_magn = 0.f;
     84    for (size_t i = kStartBand; i < kFftSizeBy2Plus1; ++i) {
     85      float log_i = log_table[i];
     86      sum_log_i += log_i;
     87      sum_log_i_square += log_i * log_i;
     88      float log_signal = LogApproximation(signal_spectrum[i]);
     89      sum_log_magn += log_signal;
     90      sum_log_i_log_magn += log_i * log_signal;
     91    }
     92 
     93    // Estimate the parameter for the level of the white noise.
     94    constexpr float kOneByFftSizeBy2Plus1 = 1.f / kFftSizeBy2Plus1;
     95    white_noise_level_ += signal_spectral_sum * kOneByFftSizeBy2Plus1 *
     96                          suppression_params_.over_subtraction_factor;
     97 
     98    // Estimate pink noise parameters.
     99    float denom = sum_log_i_square * (kFftSizeBy2Plus1 - kStartBand) -
    100                  sum_log_i * sum_log_i;
    101    float num =
    102        sum_log_i_square * sum_log_magn - sum_log_i * sum_log_i_log_magn;
    103    RTC_DCHECK_NE(denom, 0.f);
    104    float pink_noise_adjustment = num / denom;
    105 
    106    // Constrain the estimated spectrum to be positive.
    107    pink_noise_adjustment = std::max(pink_noise_adjustment, 0.f);
    108    pink_noise_numerator_ += pink_noise_adjustment;
    109    num = sum_log_i * sum_log_magn -
    110          (kFftSizeBy2Plus1 - kStartBand) * sum_log_i_log_magn;
    111    RTC_DCHECK_NE(denom, 0.f);
    112    pink_noise_adjustment = num / denom;
    113 
    114    // Constrain the pink noise power to be in the interval [0, 1].
    115    pink_noise_adjustment = std::max(std::min(pink_noise_adjustment, 1.f), 0.f);
    116 
    117    pink_noise_exp_ += pink_noise_adjustment;
    118 
    119    const float one_by_num_analyzed_frames_plus_1 =
    120        1.f / (num_analyzed_frames + 1.f);
    121 
    122    // Calculate the frequency-independent parts of parametric noise estimate.
    123    float parametric_exp = 0.f;
    124    float parametric_num = 0.f;
    125    if (pink_noise_exp_ > 0.f) {
    126      // Use pink noise estimate.
    127      parametric_num = ExpApproximation(pink_noise_numerator_ *
    128                                        one_by_num_analyzed_frames_plus_1);
    129      parametric_num *= num_analyzed_frames + 1.f;
    130      parametric_exp = pink_noise_exp_ * one_by_num_analyzed_frames_plus_1;
    131    }
    132 
    133    constexpr float kOneByShortStartupPhaseBlocks =
    134        1.f / kShortStartupPhaseBlocks;
    135    for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) {
    136      // Estimate the background noise using the white and pink noise
    137      // parameters.
    138      if (pink_noise_exp_ == 0.f) {
    139        // Use white noise estimate.
    140        parametric_noise_spectrum_[i] = white_noise_level_;
    141      } else {
    142        // Use pink noise estimate.
    143        float use_band = i < kStartBand ? kStartBand : i;
    144        float parametric_denom = PowApproximation(use_band, parametric_exp);
    145        RTC_DCHECK_NE(parametric_denom, 0.f);
    146        parametric_noise_spectrum_[i] = parametric_num / parametric_denom;
    147      }
    148    }
    149 
    150    // Weight quantile noise with modeled noise.
    151    for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) {
    152      noise_spectrum_[i] *= num_analyzed_frames;
    153      float tmp = parametric_noise_spectrum_[i] *
    154                  (kShortStartupPhaseBlocks - num_analyzed_frames);
    155      noise_spectrum_[i] += tmp * one_by_num_analyzed_frames_plus_1;
    156      noise_spectrum_[i] *= kOneByShortStartupPhaseBlocks;
    157    }
    158  }
    159 }
    160 
    161 void NoiseEstimator::PostUpdate(
    162    ArrayView<const float> speech_probability,
    163    ArrayView<const float, kFftSizeBy2Plus1> signal_spectrum) {
    164  // Time-avg parameter for noise_spectrum update.
    165  constexpr float kNoiseUpdate = 0.9f;
    166 
    167  float gamma = kNoiseUpdate;
    168  for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) {
    169    const float prob_speech = speech_probability[i];
    170    const float prob_non_speech = 1.f - prob_speech;
    171 
    172    // Temporary noise update used for speech frames if update value is less
    173    // than previous.
    174    float noise_update_tmp =
    175        gamma * prev_noise_spectrum_[i] +
    176        (1.f - gamma) * (prob_non_speech * signal_spectrum[i] +
    177                         prob_speech * prev_noise_spectrum_[i]);
    178 
    179    // Time-constant based on speech/noise_spectrum state.
    180    float gamma_old = gamma;
    181 
    182    // Increase gamma for frame likely to be seech.
    183    constexpr float kProbRange = .2f;
    184    gamma = prob_speech > kProbRange ? .99f : kNoiseUpdate;
    185 
    186    // Conservative noise_spectrum update.
    187    if (prob_speech < kProbRange) {
    188      conservative_noise_spectrum_[i] +=
    189          0.05f * (signal_spectrum[i] - conservative_noise_spectrum_[i]);
    190    }
    191 
    192    // Noise_spectrum update.
    193    if (gamma == gamma_old) {
    194      noise_spectrum_[i] = noise_update_tmp;
    195    } else {
    196      noise_spectrum_[i] =
    197          gamma * prev_noise_spectrum_[i] +
    198          (1.f - gamma) * (prob_non_speech * signal_spectrum[i] +
    199                           prob_speech * prev_noise_spectrum_[i]);
    200      // Allow for noise_spectrum update downwards: If noise_spectrum update
    201      // decreases the noise_spectrum, it is safe, so allow it to happen.
    202      noise_spectrum_[i] = std::min(noise_spectrum_[i], noise_update_tmp);
    203    }
    204  }
    205 }
    206 
    207 }  // namespace webrtc