tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

vad_wrapper.h (3007B)


      1 /*
      2 *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
      3 *
      4 *  Use of this source code is governed by a BSD-style license
      5 *  that can be found in the LICENSE file in the root of the source
      6 *  tree. An additional intellectual property rights grant can be found
      7 *  in the file PATENTS.  All contributing project authors may
      8 *  be found in the AUTHORS file in the root of the source tree.
      9 */
     10 
     11 #ifndef MODULES_AUDIO_PROCESSING_AGC2_VAD_WRAPPER_H_
     12 #define MODULES_AUDIO_PROCESSING_AGC2_VAD_WRAPPER_H_
     13 
     14 #include <memory>
     15 #include <vector>
     16 
     17 #include "api/audio/audio_view.h"
     18 #include "common_audio/resampler/include/push_resampler.h"
     19 #include "modules/audio_processing/agc2/cpu_features.h"
     20 
     21 namespace webrtc {
     22 
     23 // Wraps a single-channel Voice Activity Detector (VAD) which is used to analyze
     24 // the first channel of the input audio frames. Takes care of resampling the
     25 // input frames to match the sample rate of the wrapped VAD and periodically
     26 // resets the VAD.
     27 class VoiceActivityDetectorWrapper {
     28 public:
     29  // Single channel VAD interface.
     30  class MonoVad {
     31   public:
     32    virtual ~MonoVad() = default;
     33    // Returns the sample rate (Hz) required for the input frames analyzed by
     34    // `ComputeProbability`.
     35    virtual int SampleRateHz() const = 0;
     36    // Resets the internal state.
     37    virtual void Reset() = 0;
     38    // Analyzes an audio frame and returns the speech probability.
     39    virtual float Analyze(MonoView<const float> frame) = 0;
     40  };
     41 
     42  // Ctor. Uses `cpu_features` to instantiate the default VAD.
     43  VoiceActivityDetectorWrapper(const AvailableCpuFeatures& cpu_features,
     44                               int sample_rate_hz);
     45 
     46  // Ctor. `vad_reset_period_ms` indicates the period in milliseconds to call
     47  // `MonoVad::Reset()`; it must be equal to or greater than the duration of two
     48  // frames. Uses `cpu_features` to instantiate the default VAD.
     49  VoiceActivityDetectorWrapper(int vad_reset_period_ms,
     50                               const AvailableCpuFeatures& cpu_features,
     51                               int sample_rate_hz);
     52  // Ctor. Uses a custom `vad`.
     53  VoiceActivityDetectorWrapper(int vad_reset_period_ms,
     54                               std::unique_ptr<MonoVad> vad,
     55                               int sample_rate_hz);
     56 
     57  VoiceActivityDetectorWrapper(const VoiceActivityDetectorWrapper&) = delete;
     58  VoiceActivityDetectorWrapper& operator=(const VoiceActivityDetectorWrapper&) =
     59      delete;
     60  ~VoiceActivityDetectorWrapper();
     61 
     62  // Analyzes the first channel of `frame` and returns the speech probability.
     63  // `frame` must be a 10 ms frame with the sample rate specified in the last
     64  // `Initialize()` call.
     65  float Analyze(DeinterleavedView<const float> frame);
     66 
     67 private:
     68  const int vad_reset_period_frames_;
     69  const int frame_size_;
     70  int time_to_vad_reset_;
     71  std::unique_ptr<MonoVad> vad_;
     72  std::vector<float> resampled_buffer_;
     73  PushResampler<float> resampler_;
     74 };
     75 
     76 }  // namespace webrtc
     77 
     78 #endif  // MODULES_AUDIO_PROCESSING_AGC2_VAD_WRAPPER_H_