tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

features_extraction.cc (3777B)


      1 /*
      2 *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
      3 *
      4 *  Use of this source code is governed by a BSD-style license
      5 *  that can be found in the LICENSE file in the root of the source
      6 *  tree. An additional intellectual property rights grant can be found
      7 *  in the file PATENTS.  All contributing project authors may
      8 *  be found in the AUTHORS file in the root of the source tree.
      9 */
     10 
     11 #include "modules/audio_processing/agc2/rnn_vad/features_extraction.h"
     12 
     13 #include <array>
     14 
     15 #include "api/array_view.h"
     16 #include "modules/audio_processing/agc2/biquad_filter.h"
     17 #include "modules/audio_processing/agc2/cpu_features.h"
     18 #include "modules/audio_processing/agc2/rnn_vad/common.h"
     19 #include "modules/audio_processing/agc2/rnn_vad/lp_residual.h"
     20 #include "rtc_base/checks.h"
     21 
     22 namespace webrtc {
     23 namespace rnn_vad {
     24 namespace {
     25 
     26 // Computed as `scipy.signal.butter(N=2, Wn=60/24000, btype='highpass')`.
     27 constexpr BiQuadFilter::Config kHpfConfig24k{
     28    .b = {0.99446179f, -1.98892358f, 0.99446179f},
     29    .a = {-1.98889291f, 0.98895425f}};
     30 
     31 }  // namespace
     32 
     33 FeaturesExtractor::FeaturesExtractor(const AvailableCpuFeatures& cpu_features)
     34    : use_high_pass_filter_(false),
     35      hpf_(kHpfConfig24k),
     36      pitch_buf_24kHz_(),
     37      pitch_buf_24kHz_view_(pitch_buf_24kHz_.GetBufferView()),
     38      lp_residual_(kBufSize24kHz),
     39      lp_residual_view_(lp_residual_.data(), kBufSize24kHz),
     40      pitch_estimator_(cpu_features),
     41      reference_frame_view_(pitch_buf_24kHz_.GetMostRecentValuesView()) {
     42  RTC_DCHECK_EQ(kBufSize24kHz, lp_residual_.size());
     43  Reset();
     44 }
     45 
     46 FeaturesExtractor::~FeaturesExtractor() = default;
     47 
     48 void FeaturesExtractor::Reset() {
     49  pitch_buf_24kHz_.Reset();
     50  spectral_features_extractor_.Reset();
     51  if (use_high_pass_filter_) {
     52    hpf_.Reset();
     53  }
     54 }
     55 
     56 bool FeaturesExtractor::CheckSilenceComputeFeatures(
     57    ArrayView<const float, kFrameSize10ms24kHz> samples,
     58    ArrayView<float, kFeatureVectorSize> feature_vector) {
     59  // Pre-processing.
     60  if (use_high_pass_filter_) {
     61    std::array<float, kFrameSize10ms24kHz> samples_filtered;
     62    hpf_.Process(samples, samples_filtered);
     63    // Feed buffer with the pre-processed version of `samples`.
     64    pitch_buf_24kHz_.Push(samples_filtered);
     65  } else {
     66    // Feed buffer with `samples`.
     67    pitch_buf_24kHz_.Push(samples);
     68  }
     69  // Extract the LP residual.
     70  float lpc_coeffs[kNumLpcCoefficients];
     71  ComputeAndPostProcessLpcCoefficients(pitch_buf_24kHz_view_, lpc_coeffs);
     72  ComputeLpResidual(lpc_coeffs, pitch_buf_24kHz_view_, lp_residual_view_);
     73  // Estimate pitch on the LP-residual and write the normalized pitch period
     74  // into the output vector (normalization based on training data stats).
     75  pitch_period_48kHz_ = pitch_estimator_.Estimate(lp_residual_view_);
     76  feature_vector[kFeatureVectorSize - 2] = 0.01f * (pitch_period_48kHz_ - 300);
     77  // Extract lagged frames (according to the estimated pitch period).
     78  RTC_DCHECK_LE(pitch_period_48kHz_ / 2, kMaxPitch24kHz);
     79  auto lagged_frame = pitch_buf_24kHz_view_.subview(
     80      kMaxPitch24kHz - pitch_period_48kHz_ / 2, kFrameSize20ms24kHz);
     81  // Analyze reference and lagged frames checking if silence has been detected
     82  // and write the feature vector.
     83  return spectral_features_extractor_.CheckSilenceComputeFeatures(
     84      reference_frame_view_, {lagged_frame.data(), kFrameSize20ms24kHz},
     85      {feature_vector.data() + kNumLowerBands, kNumBands - kNumLowerBands},
     86      {feature_vector.data(), kNumLowerBands},
     87      {feature_vector.data() + kNumBands, kNumLowerBands},
     88      {feature_vector.data() + kNumBands + kNumLowerBands, kNumLowerBands},
     89      {feature_vector.data() + kNumBands + 2 * kNumLowerBands, kNumLowerBands},
     90      &feature_vector[kFeatureVectorSize - 1]);
     91 }
     92 
     93 }  // namespace rnn_vad
     94 }  // namespace webrtc