tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

speech_probability_estimator.cc (4204B)


      1 /*
      2 *  Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
      3 *
      4 *  Use of this source code is governed by a BSD-style license
      5 *  that can be found in the LICENSE file in the root of the source
      6 *  tree. An additional intellectual property rights grant can be found
      7 *  in the file PATENTS.  All contributing project authors may
      8 *  be found in the AUTHORS file in the root of the source tree.
      9 */
     10 
     11 #include "modules/audio_processing/ns/speech_probability_estimator.h"
     12 
     13 #include <algorithm>
     14 #include <array>
     15 #include <cmath>
     16 #include <cstddef>
     17 #include <cstdint>
     18 
     19 #include "api/array_view.h"
     20 #include "modules/audio_processing/ns/fast_math.h"
     21 #include "modules/audio_processing/ns/ns_common.h"
     22 #include "modules/audio_processing/ns/prior_signal_model.h"
     23 #include "modules/audio_processing/ns/signal_model.h"
     24 
     25 namespace webrtc {
     26 
     27 SpeechProbabilityEstimator::SpeechProbabilityEstimator() {
     28  speech_probability_.fill(0.f);
     29 }
     30 
     31 void SpeechProbabilityEstimator::Update(
     32    int32_t num_analyzed_frames,
     33    ArrayView<const float, kFftSizeBy2Plus1> prior_snr,
     34    ArrayView<const float, kFftSizeBy2Plus1> post_snr,
     35    ArrayView<const float, kFftSizeBy2Plus1> conservative_noise_spectrum,
     36    ArrayView<const float, kFftSizeBy2Plus1> signal_spectrum,
     37    float signal_spectral_sum,
     38    float signal_energy) {
     39  // Update models.
     40  if (num_analyzed_frames < kLongStartupPhaseBlocks) {
     41    signal_model_estimator_.AdjustNormalization(num_analyzed_frames,
     42                                                signal_energy);
     43  }
     44  signal_model_estimator_.Update(prior_snr, post_snr,
     45                                 conservative_noise_spectrum, signal_spectrum,
     46                                 signal_spectral_sum, signal_energy);
     47 
     48  const SignalModel& model = signal_model_estimator_.get_model();
     49  const PriorSignalModel& prior_model =
     50      signal_model_estimator_.get_prior_model();
     51 
     52  // Width parameter in sigmoid map for prior model.
     53  constexpr float kWidthPrior0 = 4.f;
     54  // Width for pause region: lower range, so increase width in tanh map.
     55  constexpr float kWidthPrior1 = 2.f * kWidthPrior0;
     56 
     57  // Average LRT feature: use larger width in tanh map for pause regions.
     58  float width_prior = model.lrt < prior_model.lrt ? kWidthPrior1 : kWidthPrior0;
     59 
     60  // Compute indicator function: sigmoid map.
     61  float indicator0 =
     62      0.5f * (tanh(width_prior * (model.lrt - prior_model.lrt)) + 1.f);
     63 
     64  // Spectral flatness feature: use larger width in tanh map for pause regions.
     65  width_prior = model.spectral_flatness > prior_model.flatness_threshold
     66                    ? kWidthPrior1
     67                    : kWidthPrior0;
     68 
     69  // Compute indicator function: sigmoid map.
     70  float indicator1 =
     71      0.5f * (tanh(1.f * width_prior *
     72                   (prior_model.flatness_threshold - model.spectral_flatness)) +
     73              1.f);
     74 
     75  // For template spectrum-difference : use larger width in tanh map for pause
     76  // regions.
     77  width_prior = model.spectral_diff < prior_model.template_diff_threshold
     78                    ? kWidthPrior1
     79                    : kWidthPrior0;
     80 
     81  // Compute indicator function: sigmoid map.
     82  float indicator2 =
     83      0.5f * (tanh(width_prior * (model.spectral_diff -
     84                                  prior_model.template_diff_threshold)) +
     85              1.f);
     86 
     87  // Combine the indicator function with the feature weights.
     88  float ind_prior = prior_model.lrt_weighting * indicator0 +
     89                    prior_model.flatness_weighting * indicator1 +
     90                    prior_model.difference_weighting * indicator2;
     91 
     92  // Compute the prior probability.
     93  prior_speech_prob_ += 0.1f * (ind_prior - prior_speech_prob_);
     94 
     95  // Make sure probabilities are within range: keep floor to 0.01.
     96  prior_speech_prob_ = std::max(std::min(prior_speech_prob_, 1.f), 0.01f);
     97 
     98  // Final speech probability: combine prior model with LR factor:.
     99  float gain_prior =
    100      (1.f - prior_speech_prob_) / (prior_speech_prob_ + 0.0001f);
    101 
    102  std::array<float, kFftSizeBy2Plus1> inv_lrt;
    103  ExpApproximationSignFlip(model.avg_log_lrt, inv_lrt);
    104  for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) {
    105    speech_probability_[i] = 1.f / (1.f + gain_prior * inv_lrt[i]);
    106  }
    107 }
    108 
    109 }  // namespace webrtc