tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

spectral_features.cc (9204B)


      1 /*
      2 *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
      3 *
      4 *  Use of this source code is governed by a BSD-style license
      5 *  that can be found in the LICENSE file in the root of the source
      6 *  tree. An additional intellectual property rights grant can be found
      7 *  in the file PATENTS.  All contributing project authors may
      8 *  be found in the AUTHORS file in the root of the source tree.
      9 */
     10 
     11 #include "modules/audio_processing/agc2/rnn_vad/spectral_features.h"
     12 
     13 #include <algorithm>
     14 #include <array>
     15 #include <cmath>
     16 #include <limits>
     17 #include <numeric>
     18 
     19 #include "api/array_view.h"
     20 #include "modules/audio_processing/agc2/rnn_vad/common.h"
     21 #include "modules/audio_processing/agc2/rnn_vad/ring_buffer.h"
     22 #include "modules/audio_processing/agc2/rnn_vad/spectral_features_internal.h"
     23 #include "modules/audio_processing/agc2/rnn_vad/symmetric_matrix_buffer.h"
     24 #include "modules/audio_processing/utility/pffft_wrapper.h"
     25 #include "rtc_base/checks.h"
     26 #include "rtc_base/numerics/safe_compare.h"
     27 
     28 namespace webrtc {
     29 namespace rnn_vad {
     30 namespace {
     31 
     32 constexpr float kSilenceThreshold = 0.04f;
     33 
     34 // Computes the new cepstral difference stats and pushes them into the passed
     35 // symmetric matrix buffer.
     36 void UpdateCepstralDifferenceStats(
     37    ArrayView<const float, kNumBands> new_cepstral_coeffs,
     38    const RingBuffer<float, kNumBands, kCepstralCoeffsHistorySize>& ring_buf,
     39    SymmetricMatrixBuffer<float, kCepstralCoeffsHistorySize>* sym_matrix_buf) {
     40  RTC_DCHECK(sym_matrix_buf);
     41  // Compute the new cepstral distance stats.
     42  std::array<float, kCepstralCoeffsHistorySize - 1> distances;
     43  for (int i = 0; i < kCepstralCoeffsHistorySize - 1; ++i) {
     44    const int delay = i + 1;
     45    auto old_cepstral_coeffs = ring_buf.GetArrayView(delay);
     46    distances[i] = 0.f;
     47    for (int k = 0; k < kNumBands; ++k) {
     48      const float c = new_cepstral_coeffs[k] - old_cepstral_coeffs[k];
     49      distances[i] += c * c;
     50    }
     51  }
     52  // Push the new spectral distance stats into the symmetric matrix buffer.
     53  sym_matrix_buf->Push(distances);
     54 }
     55 
     56 // Computes the first half of the Vorbis window.
     57 std::array<float, kFrameSize20ms24kHz / 2> ComputeScaledHalfVorbisWindow(
     58    float scaling = 1.f) {
     59  constexpr int kHalfSize = kFrameSize20ms24kHz / 2;
     60  std::array<float, kHalfSize> half_window{};
     61  for (int i = 0; i < kHalfSize; ++i) {
     62    half_window[i] =
     63        scaling *
     64        std::sin(0.5 * kPi * std::sin(0.5 * kPi * (i + 0.5) / kHalfSize) *
     65                 std::sin(0.5 * kPi * (i + 0.5) / kHalfSize));
     66  }
     67  return half_window;
     68 }
     69 
     70 // Computes the forward FFT on a 20 ms frame to which a given window function is
     71 // applied. The Fourier coefficient corresponding to the Nyquist frequency is
     72 // set to zero (it is never used and this allows to simplify the code).
     73 void ComputeWindowedForwardFft(
     74    ArrayView<const float, kFrameSize20ms24kHz> frame,
     75    const std::array<float, kFrameSize20ms24kHz / 2>& half_window,
     76    Pffft::FloatBuffer* fft_input_buffer,
     77    Pffft::FloatBuffer* fft_output_buffer,
     78    Pffft* fft) {
     79  RTC_DCHECK_EQ(frame.size(), 2 * half_window.size());
     80  // Apply windowing.
     81  auto in = fft_input_buffer->GetView();
     82  for (int i = 0, j = kFrameSize20ms24kHz - 1; SafeLt(i, half_window.size());
     83       ++i, --j) {
     84    in[i] = frame[i] * half_window[i];
     85    in[j] = frame[j] * half_window[i];
     86  }
     87  fft->ForwardTransform(*fft_input_buffer, fft_output_buffer, /*ordered=*/true);
     88  // Set the Nyquist frequency coefficient to zero.
     89  auto out = fft_output_buffer->GetView();
     90  out[1] = 0.f;
     91 }
     92 
     93 }  // namespace
     94 
     95 SpectralFeaturesExtractor::SpectralFeaturesExtractor()
     96    : half_window_(ComputeScaledHalfVorbisWindow(
     97          1.f / static_cast<float>(kFrameSize20ms24kHz))),
     98      fft_(kFrameSize20ms24kHz, Pffft::FftType::kReal),
     99      fft_buffer_(fft_.CreateBuffer()),
    100      reference_frame_fft_(fft_.CreateBuffer()),
    101      lagged_frame_fft_(fft_.CreateBuffer()),
    102      dct_table_(ComputeDctTable()) {}
    103 
    104 SpectralFeaturesExtractor::~SpectralFeaturesExtractor() = default;
    105 
    106 void SpectralFeaturesExtractor::Reset() {
    107  cepstral_coeffs_ring_buf_.Reset();
    108  cepstral_diffs_buf_.Reset();
    109 }
    110 
    111 bool SpectralFeaturesExtractor::CheckSilenceComputeFeatures(
    112    ArrayView<const float, kFrameSize20ms24kHz> reference_frame,
    113    ArrayView<const float, kFrameSize20ms24kHz> lagged_frame,
    114    ArrayView<float, kNumBands - kNumLowerBands> higher_bands_cepstrum,
    115    ArrayView<float, kNumLowerBands> average,
    116    ArrayView<float, kNumLowerBands> first_derivative,
    117    ArrayView<float, kNumLowerBands> second_derivative,
    118    ArrayView<float, kNumLowerBands> bands_cross_corr,
    119    float* variability) {
    120  // Compute the Opus band energies for the reference frame.
    121  ComputeWindowedForwardFft(reference_frame, half_window_, fft_buffer_.get(),
    122                            reference_frame_fft_.get(), &fft_);
    123  spectral_correlator_.ComputeAutoCorrelation(
    124      reference_frame_fft_->GetConstView(), reference_frame_bands_energy_);
    125  // Check if the reference frame has silence.
    126  const float tot_energy =
    127      std::accumulate(reference_frame_bands_energy_.begin(),
    128                      reference_frame_bands_energy_.end(), 0.f);
    129  if (tot_energy < kSilenceThreshold) {
    130    return true;
    131  }
    132  // Compute the Opus band energies for the lagged frame.
    133  ComputeWindowedForwardFft(lagged_frame, half_window_, fft_buffer_.get(),
    134                            lagged_frame_fft_.get(), &fft_);
    135  spectral_correlator_.ComputeAutoCorrelation(lagged_frame_fft_->GetConstView(),
    136                                              lagged_frame_bands_energy_);
    137  // Log of the band energies for the reference frame.
    138  std::array<float, kNumBands> log_bands_energy;
    139  ComputeSmoothedLogMagnitudeSpectrum(reference_frame_bands_energy_,
    140                                      log_bands_energy);
    141  // Reference frame cepstrum.
    142  std::array<float, kNumBands> cepstrum;
    143  ComputeDct(log_bands_energy, dct_table_, cepstrum);
    144  // Ad-hoc correction terms for the first two cepstral coefficients.
    145  cepstrum[0] -= 12.f;
    146  cepstrum[1] -= 4.f;
    147  // Update the ring buffer and the cepstral difference stats.
    148  cepstral_coeffs_ring_buf_.Push(cepstrum);
    149  UpdateCepstralDifferenceStats(cepstrum, cepstral_coeffs_ring_buf_,
    150                                &cepstral_diffs_buf_);
    151  // Write the higher bands cepstral coefficients.
    152  RTC_DCHECK_EQ(cepstrum.size() - kNumLowerBands, higher_bands_cepstrum.size());
    153  std::copy(cepstrum.begin() + kNumLowerBands, cepstrum.end(),
    154            higher_bands_cepstrum.begin());
    155  // Compute and write remaining features.
    156  ComputeAvgAndDerivatives(average, first_derivative, second_derivative);
    157  ComputeNormalizedCepstralCorrelation(bands_cross_corr);
    158  RTC_DCHECK(variability);
    159  *variability = ComputeVariability();
    160  return false;
    161 }
    162 
    163 void SpectralFeaturesExtractor::ComputeAvgAndDerivatives(
    164    ArrayView<float, kNumLowerBands> average,
    165    ArrayView<float, kNumLowerBands> first_derivative,
    166    ArrayView<float, kNumLowerBands> second_derivative) const {
    167  auto curr = cepstral_coeffs_ring_buf_.GetArrayView(0);
    168  auto prev1 = cepstral_coeffs_ring_buf_.GetArrayView(1);
    169  auto prev2 = cepstral_coeffs_ring_buf_.GetArrayView(2);
    170  RTC_DCHECK_EQ(average.size(), first_derivative.size());
    171  RTC_DCHECK_EQ(first_derivative.size(), second_derivative.size());
    172  RTC_DCHECK_LE(average.size(), curr.size());
    173  for (int i = 0; SafeLt(i, average.size()); ++i) {
    174    // Average, kernel: [1, 1, 1].
    175    average[i] = curr[i] + prev1[i] + prev2[i];
    176    // First derivative, kernel: [1, 0, - 1].
    177    first_derivative[i] = curr[i] - prev2[i];
    178    // Second derivative, Laplacian kernel: [1, -2, 1].
    179    second_derivative[i] = curr[i] - 2 * prev1[i] + prev2[i];
    180  }
    181 }
    182 
    183 void SpectralFeaturesExtractor::ComputeNormalizedCepstralCorrelation(
    184    ArrayView<float, kNumLowerBands> bands_cross_corr) {
    185  spectral_correlator_.ComputeCrossCorrelation(
    186      reference_frame_fft_->GetConstView(), lagged_frame_fft_->GetConstView(),
    187      bands_cross_corr_);
    188  // Normalize.
    189  for (int i = 0; SafeLt(i, bands_cross_corr_.size()); ++i) {
    190    bands_cross_corr_[i] =
    191        bands_cross_corr_[i] /
    192        std::sqrt(0.001f + reference_frame_bands_energy_[i] *
    193                               lagged_frame_bands_energy_[i]);
    194  }
    195  // Cepstrum.
    196  ComputeDct(bands_cross_corr_, dct_table_, bands_cross_corr);
    197  // Ad-hoc correction terms for the first two cepstral coefficients.
    198  bands_cross_corr[0] -= 1.3f;
    199  bands_cross_corr[1] -= 0.9f;
    200 }
    201 
    202 float SpectralFeaturesExtractor::ComputeVariability() const {
    203  // Compute cepstral variability score.
    204  float variability = 0.f;
    205  for (int delay1 = 0; delay1 < kCepstralCoeffsHistorySize; ++delay1) {
    206    float min_dist = std::numeric_limits<float>::max();
    207    for (int delay2 = 0; delay2 < kCepstralCoeffsHistorySize; ++delay2) {
    208      if (delay1 == delay2)  // The distance would be 0.
    209        continue;
    210      min_dist =
    211          std::min(min_dist, cepstral_diffs_buf_.GetValue(delay1, delay2));
    212    }
    213    variability += min_dist;
    214  }
    215  // Normalize (based on training set stats).
    216  // TODO(bugs.webrtc.org/10480): Isolate normalization from feature extraction.
    217  return variability / kCepstralCoeffsHistorySize - 2.1f;
    218 }
    219 
    220 }  // namespace rnn_vad
    221 }  // namespace webrtc