tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

pitch_based_vad.cc (4322B)


      1 /*
      2 *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
      3 *
      4 *  Use of this source code is governed by a BSD-style license
      5 *  that can be found in the LICENSE file in the root of the source
      6 *  tree. An additional intellectual property rights grant can be found
      7 *  in the file PATENTS.  All contributing project authors may
      8 *  be found in the AUTHORS file in the root of the source tree.
      9 */
     10 
     11 #include "modules/audio_processing/vad/pitch_based_vad.h"
     12 
     13 #include <cstring>
     14 
     15 #include "modules/audio_processing/vad/common.h"
     16 #include "modules/audio_processing/vad/gmm.h"
     17 #include "modules/audio_processing/vad/noise_gmm_tables.h"
     18 #include "modules/audio_processing/vad/vad_circular_buffer.h"
     19 #include "modules/audio_processing/vad/voice_gmm_tables.h"
     20 
     21 namespace webrtc {
     22 
     23 static_assert(kNoiseGmmDim == kVoiceGmmDim,
     24              "noise and voice gmm dimension not equal");
     25 
     26 // These values should match MATLAB counterparts for unit-tests to pass.
     27 static const int kPosteriorHistorySize = 500;  // 5 sec of 10 ms frames.
     28 static const double kInitialPriorProbability = 0.3;
     29 static const int kTransientWidthThreshold = 7;
     30 static const double kLowProbabilityThreshold = 0.2;
     31 
     32 static double LimitProbability(double p) {
     33  const double kLimHigh = 0.99;
     34  const double kLimLow = 0.01;
     35 
     36  if (p > kLimHigh)
     37    p = kLimHigh;
     38  else if (p < kLimLow)
     39    p = kLimLow;
     40  return p;
     41 }
     42 
     43 PitchBasedVad::PitchBasedVad()
     44    : p_prior_(kInitialPriorProbability),
     45      circular_buffer_(VadCircularBuffer::Create(kPosteriorHistorySize)) {
     46  // Setup noise GMM.
     47  noise_gmm_.dimension = kNoiseGmmDim;
     48  noise_gmm_.num_mixtures = kNoiseGmmNumMixtures;
     49  noise_gmm_.weight = kNoiseGmmWeights;
     50  noise_gmm_.mean = &kNoiseGmmMean[0][0];
     51  noise_gmm_.covar_inverse = &kNoiseGmmCovarInverse[0][0][0];
     52 
     53  // Setup voice GMM.
     54  voice_gmm_.dimension = kVoiceGmmDim;
     55  voice_gmm_.num_mixtures = kVoiceGmmNumMixtures;
     56  voice_gmm_.weight = kVoiceGmmWeights;
     57  voice_gmm_.mean = &kVoiceGmmMean[0][0];
     58  voice_gmm_.covar_inverse = &kVoiceGmmCovarInverse[0][0][0];
     59 }
     60 
     61 PitchBasedVad::~PitchBasedVad() {}
     62 
     63 int PitchBasedVad::VoicingProbability(const AudioFeatures& features,
     64                                      double* p_combined) {
     65  double p;
     66  double gmm_features[3];
     67  double pdf_features_given_voice;
     68  double pdf_features_given_noise;
     69  // These limits are the same in matlab implementation 'VoicingProbGMM().'
     70  const double kLimLowLogPitchGain = -2.0;
     71  const double kLimHighLogPitchGain = -0.9;
     72  const double kLimLowSpectralPeak = 200;
     73  const double kLimHighSpectralPeak = 2000;
     74  const double kEps = 1e-12;
     75  for (size_t n = 0; n < features.num_frames; n++) {
     76    gmm_features[0] = features.log_pitch_gain[n];
     77    gmm_features[1] = features.spectral_peak[n];
     78    gmm_features[2] = features.pitch_lag_hz[n];
     79 
     80    pdf_features_given_voice = EvaluateGmm(gmm_features, voice_gmm_);
     81    pdf_features_given_noise = EvaluateGmm(gmm_features, noise_gmm_);
     82 
     83    if (features.spectral_peak[n] < kLimLowSpectralPeak ||
     84        features.spectral_peak[n] > kLimHighSpectralPeak ||
     85        features.log_pitch_gain[n] < kLimLowLogPitchGain) {
     86      pdf_features_given_voice = kEps * pdf_features_given_noise;
     87    } else if (features.log_pitch_gain[n] > kLimHighLogPitchGain) {
     88      pdf_features_given_noise = kEps * pdf_features_given_voice;
     89    }
     90 
     91    p = p_prior_ * pdf_features_given_voice /
     92        (pdf_features_given_voice * p_prior_ +
     93         pdf_features_given_noise * (1 - p_prior_));
     94 
     95    p = LimitProbability(p);
     96 
     97    // Combine pitch-based probability with standalone probability, before
     98    // updating prior probabilities.
     99    double prod_active = p * p_combined[n];
    100    double prod_inactive = (1 - p) * (1 - p_combined[n]);
    101    p_combined[n] = prod_active / (prod_active + prod_inactive);
    102 
    103    if (UpdatePrior(p_combined[n]) < 0)
    104      return -1;
    105    // Limit prior probability. With a zero prior probability the posterior
    106    // probability is always zero.
    107    p_prior_ = LimitProbability(p_prior_);
    108  }
    109  return 0;
    110 }
    111 
    112 int PitchBasedVad::UpdatePrior(double p) {
    113  circular_buffer_->Insert(p);
    114  if (circular_buffer_->RemoveTransient(kTransientWidthThreshold,
    115                                        kLowProbabilityThreshold) < 0)
    116    return -1;
    117  p_prior_ = circular_buffer_->Mean();
    118  return 0;
    119 }
    120 
    121 }  // namespace webrtc