voice_activity_detector.cc (3336B)
1 /* 2 * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "modules/audio_processing/vad/voice_activity_detector.h" 12 13 #include <algorithm> 14 #include <cstddef> 15 #include <cstdint> 16 17 #include "modules/audio_processing/vad/common.h" 18 #include "modules/audio_processing/vad/standalone_vad.h" 19 #include "rtc_base/checks.h" 20 21 namespace webrtc { 22 namespace { 23 24 const size_t kNumChannels = 1; 25 26 const double kDefaultVoiceValue = 1.0; 27 const double kNeutralProbability = 0.5; 28 const double kLowProbability = 0.01; 29 30 } // namespace 31 32 VoiceActivityDetector::VoiceActivityDetector() 33 : last_voice_probability_(kDefaultVoiceValue), 34 standalone_vad_(StandaloneVad::Create()) {} 35 36 VoiceActivityDetector::~VoiceActivityDetector() = default; 37 38 // Because ISAC has a different chunk length, it updates 39 // `chunkwise_voice_probabilities_` and `chunkwise_rms_` when there is new data. 40 // Otherwise it clears them. 41 void VoiceActivityDetector::ProcessChunk(const int16_t* audio, 42 size_t length, 43 int sample_rate_hz) { 44 RTC_DCHECK_EQ(length, sample_rate_hz / 100); 45 // TODO(bugs.webrtc.org/7494): Remove resampling and force 16 kHz audio. 46 // Resample to the required rate. 47 const int16_t* resampled_ptr = audio; 48 if (sample_rate_hz != kSampleRateHz) { 49 RTC_CHECK_EQ( 50 resampler_.ResetIfNeeded(sample_rate_hz, kSampleRateHz, kNumChannels), 51 0); 52 resampler_.Push(audio, length, resampled_, kLength10Ms, length); 53 resampled_ptr = resampled_; 54 } 55 RTC_DCHECK_EQ(length, kLength10Ms); 56 57 // Each chunk needs to be passed into `standalone_vad_`, because internally it 58 // buffers the audio and processes it all at once when GetActivity() is 59 // called. 60 RTC_CHECK_EQ(standalone_vad_->AddAudio(resampled_ptr, length), 0); 61 62 audio_processing_.ExtractFeatures(resampled_ptr, length, &features_); 63 64 chunkwise_voice_probabilities_.resize(features_.num_frames); 65 chunkwise_rms_.resize(features_.num_frames); 66 std::copy(features_.rms, features_.rms + chunkwise_rms_.size(), 67 chunkwise_rms_.begin()); 68 if (features_.num_frames > 0) { 69 if (features_.silence) { 70 // The other features are invalid, so set the voice probabilities to an 71 // arbitrary low value. 72 std::fill(chunkwise_voice_probabilities_.begin(), 73 chunkwise_voice_probabilities_.end(), kLowProbability); 74 } else { 75 std::fill(chunkwise_voice_probabilities_.begin(), 76 chunkwise_voice_probabilities_.end(), kNeutralProbability); 77 RTC_CHECK_GE( 78 standalone_vad_->GetActivity(&chunkwise_voice_probabilities_[0], 79 chunkwise_voice_probabilities_.size()), 80 0); 81 RTC_CHECK_GE(pitch_based_vad_.VoicingProbability( 82 features_, &chunkwise_voice_probabilities_[0]), 83 0); 84 } 85 last_voice_probability_ = chunkwise_voice_probabilities_.back(); 86 } 87 } 88 89 } // namespace webrtc