vad_wrapper.h (3007B)
1 /* 2 * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #ifndef MODULES_AUDIO_PROCESSING_AGC2_VAD_WRAPPER_H_ 12 #define MODULES_AUDIO_PROCESSING_AGC2_VAD_WRAPPER_H_ 13 14 #include <memory> 15 #include <vector> 16 17 #include "api/audio/audio_view.h" 18 #include "common_audio/resampler/include/push_resampler.h" 19 #include "modules/audio_processing/agc2/cpu_features.h" 20 21 namespace webrtc { 22 23 // Wraps a single-channel Voice Activity Detector (VAD) which is used to analyze 24 // the first channel of the input audio frames. Takes care of resampling the 25 // input frames to match the sample rate of the wrapped VAD and periodically 26 // resets the VAD. 27 class VoiceActivityDetectorWrapper { 28 public: 29 // Single channel VAD interface. 30 class MonoVad { 31 public: 32 virtual ~MonoVad() = default; 33 // Returns the sample rate (Hz) required for the input frames analyzed by 34 // `ComputeProbability`. 35 virtual int SampleRateHz() const = 0; 36 // Resets the internal state. 37 virtual void Reset() = 0; 38 // Analyzes an audio frame and returns the speech probability. 39 virtual float Analyze(MonoView<const float> frame) = 0; 40 }; 41 42 // Ctor. Uses `cpu_features` to instantiate the default VAD. 43 VoiceActivityDetectorWrapper(const AvailableCpuFeatures& cpu_features, 44 int sample_rate_hz); 45 46 // Ctor. `vad_reset_period_ms` indicates the period in milliseconds to call 47 // `MonoVad::Reset()`; it must be equal to or greater than the duration of two 48 // frames. Uses `cpu_features` to instantiate the default VAD. 49 VoiceActivityDetectorWrapper(int vad_reset_period_ms, 50 const AvailableCpuFeatures& cpu_features, 51 int sample_rate_hz); 52 // Ctor. Uses a custom `vad`. 53 VoiceActivityDetectorWrapper(int vad_reset_period_ms, 54 std::unique_ptr<MonoVad> vad, 55 int sample_rate_hz); 56 57 VoiceActivityDetectorWrapper(const VoiceActivityDetectorWrapper&) = delete; 58 VoiceActivityDetectorWrapper& operator=(const VoiceActivityDetectorWrapper&) = 59 delete; 60 ~VoiceActivityDetectorWrapper(); 61 62 // Analyzes the first channel of `frame` and returns the speech probability. 63 // `frame` must be a 10 ms frame with the sample rate specified in the last 64 // `Initialize()` call. 65 float Analyze(DeinterleavedView<const float> frame); 66 67 private: 68 const int vad_reset_period_frames_; 69 const int frame_size_; 70 int time_to_vad_reset_; 71 std::unique_ptr<MonoVad> vad_; 72 std::vector<float> resampled_buffer_; 73 PushResampler<float> resampler_; 74 }; 75 76 } // namespace webrtc 77 78 #endif // MODULES_AUDIO_PROCESSING_AGC2_VAD_WRAPPER_H_