tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

voice_activity_detector.cc (3336B)


      1 /*
      2 *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
      3 *
      4 *  Use of this source code is governed by a BSD-style license
      5 *  that can be found in the LICENSE file in the root of the source
      6 *  tree. An additional intellectual property rights grant can be found
      7 *  in the file PATENTS.  All contributing project authors may
      8 *  be found in the AUTHORS file in the root of the source tree.
      9 */
     10 
     11 #include "modules/audio_processing/vad/voice_activity_detector.h"
     12 
     13 #include <algorithm>
     14 #include <cstddef>
     15 #include <cstdint>
     16 
     17 #include "modules/audio_processing/vad/common.h"
     18 #include "modules/audio_processing/vad/standalone_vad.h"
     19 #include "rtc_base/checks.h"
     20 
     21 namespace webrtc {
     22 namespace {
     23 
     24 const size_t kNumChannels = 1;
     25 
     26 const double kDefaultVoiceValue = 1.0;
     27 const double kNeutralProbability = 0.5;
     28 const double kLowProbability = 0.01;
     29 
     30 }  // namespace
     31 
     32 VoiceActivityDetector::VoiceActivityDetector()
     33    : last_voice_probability_(kDefaultVoiceValue),
     34      standalone_vad_(StandaloneVad::Create()) {}
     35 
     36 VoiceActivityDetector::~VoiceActivityDetector() = default;
     37 
     38 // Because ISAC has a different chunk length, it updates
     39 // `chunkwise_voice_probabilities_` and `chunkwise_rms_` when there is new data.
     40 // Otherwise it clears them.
     41 void VoiceActivityDetector::ProcessChunk(const int16_t* audio,
     42                                         size_t length,
     43                                         int sample_rate_hz) {
     44  RTC_DCHECK_EQ(length, sample_rate_hz / 100);
     45  // TODO(bugs.webrtc.org/7494): Remove resampling and force 16 kHz audio.
     46  // Resample to the required rate.
     47  const int16_t* resampled_ptr = audio;
     48  if (sample_rate_hz != kSampleRateHz) {
     49    RTC_CHECK_EQ(
     50        resampler_.ResetIfNeeded(sample_rate_hz, kSampleRateHz, kNumChannels),
     51        0);
     52    resampler_.Push(audio, length, resampled_, kLength10Ms, length);
     53    resampled_ptr = resampled_;
     54  }
     55  RTC_DCHECK_EQ(length, kLength10Ms);
     56 
     57  // Each chunk needs to be passed into `standalone_vad_`, because internally it
     58  // buffers the audio and processes it all at once when GetActivity() is
     59  // called.
     60  RTC_CHECK_EQ(standalone_vad_->AddAudio(resampled_ptr, length), 0);
     61 
     62  audio_processing_.ExtractFeatures(resampled_ptr, length, &features_);
     63 
     64  chunkwise_voice_probabilities_.resize(features_.num_frames);
     65  chunkwise_rms_.resize(features_.num_frames);
     66  std::copy(features_.rms, features_.rms + chunkwise_rms_.size(),
     67            chunkwise_rms_.begin());
     68  if (features_.num_frames > 0) {
     69    if (features_.silence) {
     70      // The other features are invalid, so set the voice probabilities to an
     71      // arbitrary low value.
     72      std::fill(chunkwise_voice_probabilities_.begin(),
     73                chunkwise_voice_probabilities_.end(), kLowProbability);
     74    } else {
     75      std::fill(chunkwise_voice_probabilities_.begin(),
     76                chunkwise_voice_probabilities_.end(), kNeutralProbability);
     77      RTC_CHECK_GE(
     78          standalone_vad_->GetActivity(&chunkwise_voice_probabilities_[0],
     79                                       chunkwise_voice_probabilities_.size()),
     80          0);
     81      RTC_CHECK_GE(pitch_based_vad_.VoicingProbability(
     82                       features_, &chunkwise_voice_probabilities_[0]),
     83                   0);
     84    }
     85    last_voice_probability_ = chunkwise_voice_probabilities_.back();
     86  }
     87 }
     88 
     89 }  // namespace webrtc