tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

aec_state.cc (20165B)


      1 /*
      2 *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
      3 *
      4 *  Use of this source code is governed by a BSD-style license
      5 *  that can be found in the LICENSE file in the root of the source
      6 *  tree. An additional intellectual property rights grant can be found
      7 *  in the file PATENTS.  All contributing project authors may
      8 *  be found in the AUTHORS file in the root of the source tree.
      9 */
     10 
     11 #include "modules/audio_processing/aec3/aec_state.h"
     12 
     13 #include <algorithm>
     14 #include <array>
     15 #include <atomic>
     16 #include <cmath>
     17 #include <cstddef>
     18 #include <numeric>
     19 #include <optional>
     20 #include <vector>
     21 
     22 #include "api/array_view.h"
     23 #include "api/audio/echo_canceller3_config.h"
     24 #include "api/environment/environment.h"
     25 #include "api/field_trials_view.h"
     26 #include "modules/audio_processing/aec3/aec3_common.h"
     27 #include "modules/audio_processing/aec3/block.h"
     28 #include "modules/audio_processing/aec3/delay_estimate.h"
     29 #include "modules/audio_processing/aec3/echo_path_variability.h"
     30 #include "modules/audio_processing/aec3/render_buffer.h"
     31 #include "modules/audio_processing/aec3/reverb_model.h"
     32 #include "modules/audio_processing/aec3/spectrum_buffer.h"
     33 #include "modules/audio_processing/aec3/subtractor_output.h"
     34 #include "modules/audio_processing/aec3/transparent_mode.h"
     35 #include "modules/audio_processing/logging/apm_data_dumper.h"
     36 #include "rtc_base/checks.h"
     37 
     38 namespace webrtc {
     39 namespace {
     40 
     41 bool DeactivateInitialStateResetAtEchoPathChange(
     42    const FieldTrialsView& field_trials) {
     43  return field_trials.IsEnabled(
     44      "WebRTC-Aec3DeactivateInitialStateResetKillSwitch");
     45 }
     46 
     47 bool FullResetAtEchoPathChange(const FieldTrialsView& field_trials) {
     48  return !field_trials.IsEnabled("WebRTC-Aec3AecStateFullResetKillSwitch");
     49 }
     50 
     51 bool SubtractorAnalyzerResetAtEchoPathChange(
     52    const FieldTrialsView& field_trials) {
     53  return !field_trials.IsEnabled(
     54      "WebRTC-Aec3AecStateSubtractorAnalyzerResetKillSwitch");
     55 }
     56 
     57 void ComputeAvgRenderReverb(
     58    const SpectrumBuffer& spectrum_buffer,
     59    int delay_blocks,
     60    float reverb_decay,
     61    ReverbModel* reverb_model,
     62    ArrayView<float, kFftLengthBy2Plus1> reverb_power_spectrum) {
     63  RTC_DCHECK(reverb_model);
     64  const size_t num_render_channels = spectrum_buffer.buffer[0].size();
     65  int idx_at_delay =
     66      spectrum_buffer.OffsetIndex(spectrum_buffer.read, delay_blocks);
     67  int idx_past = spectrum_buffer.IncIndex(idx_at_delay);
     68 
     69  std::array<float, kFftLengthBy2Plus1> X2_data;
     70  ArrayView<const float> X2;
     71  if (num_render_channels > 1) {
     72    auto average_channels =
     73        [](size_t num_render_channels,
     74           ArrayView<const std::array<float, kFftLengthBy2Plus1>>
     75               spectrum_band_0,
     76           ArrayView<float, kFftLengthBy2Plus1> render_power) {
     77          std::fill(render_power.begin(), render_power.end(), 0.f);
     78          for (size_t ch = 0; ch < num_render_channels; ++ch) {
     79            for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
     80              render_power[k] += spectrum_band_0[ch][k];
     81            }
     82          }
     83          const float normalizer = 1.f / num_render_channels;
     84          for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
     85            render_power[k] *= normalizer;
     86          }
     87        };
     88    average_channels(num_render_channels, spectrum_buffer.buffer[idx_past],
     89                     X2_data);
     90    reverb_model->UpdateReverbNoFreqShaping(
     91        X2_data, /*power_spectrum_scaling=*/1.0f, reverb_decay);
     92 
     93    average_channels(num_render_channels, spectrum_buffer.buffer[idx_at_delay],
     94                     X2_data);
     95    X2 = X2_data;
     96  } else {
     97    reverb_model->UpdateReverbNoFreqShaping(
     98        spectrum_buffer.buffer[idx_past][/*channel=*/0],
     99        /*power_spectrum_scaling=*/1.0f, reverb_decay);
    100 
    101    X2 = spectrum_buffer.buffer[idx_at_delay][/*channel=*/0];
    102  }
    103 
    104  ArrayView<const float, kFftLengthBy2Plus1> reverb_power =
    105      reverb_model->reverb();
    106  for (size_t k = 0; k < X2.size(); ++k) {
    107    reverb_power_spectrum[k] = X2[k] + reverb_power[k];
    108  }
    109 }
    110 
    111 }  // namespace
    112 
    113 std::atomic<int> AecState::instance_count_(0);
    114 
    115 void AecState::GetResidualEchoScaling(ArrayView<float> residual_scaling) const {
    116  bool filter_has_had_time_to_converge;
    117  if (config_.filter.conservative_initial_phase) {
    118    filter_has_had_time_to_converge =
    119        strong_not_saturated_render_blocks_ >= 1.5f * kNumBlocksPerSecond;
    120  } else {
    121    filter_has_had_time_to_converge =
    122        strong_not_saturated_render_blocks_ >= 0.8f * kNumBlocksPerSecond;
    123  }
    124  echo_audibility_.GetResidualEchoScaling(filter_has_had_time_to_converge,
    125                                          residual_scaling);
    126 }
    127 
    128 AecState::AecState(const Environment& env,
    129                   const EchoCanceller3Config& config,
    130                   size_t num_capture_channels)
    131    : data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)),
    132      config_(config),
    133      num_capture_channels_(num_capture_channels),
    134      deactivate_initial_state_reset_at_echo_path_change_(
    135          DeactivateInitialStateResetAtEchoPathChange(env.field_trials())),
    136      full_reset_at_echo_path_change_(
    137          FullResetAtEchoPathChange(env.field_trials())),
    138      subtractor_analyzer_reset_at_echo_path_change_(
    139          SubtractorAnalyzerResetAtEchoPathChange(env.field_trials())),
    140      initial_state_(config_),
    141      delay_state_(config_, num_capture_channels_),
    142      transparent_state_(TransparentMode::Create(env, config_)),
    143      filter_quality_state_(config_, num_capture_channels_),
    144      erl_estimator_(2 * kNumBlocksPerSecond),
    145      erle_estimator_(env,
    146                      2 * kNumBlocksPerSecond,
    147                      config_,
    148                      num_capture_channels_),
    149      filter_analyzer_(config_, num_capture_channels_),
    150      echo_audibility_(
    151          config_.echo_audibility.use_stationarity_properties_at_init),
    152      reverb_model_estimator_(config_, num_capture_channels_),
    153      subtractor_output_analyzer_(num_capture_channels_) {}
    154 
    155 AecState::~AecState() = default;
    156 
    157 void AecState::HandleEchoPathChange(
    158    const EchoPathVariability& echo_path_variability) {
    159  const auto full_reset = [&]() {
    160    filter_analyzer_.Reset();
    161    capture_signal_saturation_ = false;
    162    strong_not_saturated_render_blocks_ = 0;
    163    blocks_with_active_render_ = 0;
    164    if (!deactivate_initial_state_reset_at_echo_path_change_) {
    165      initial_state_.Reset();
    166    }
    167    if (transparent_state_) {
    168      transparent_state_->Reset();
    169    }
    170    erle_estimator_.Reset(true);
    171    erl_estimator_.Reset();
    172    filter_quality_state_.Reset();
    173  };
    174 
    175  // TODO(peah): Refine the reset scheme according to the type of gain and
    176  // delay adjustment.
    177 
    178  if (full_reset_at_echo_path_change_ &&
    179      echo_path_variability.delay_change !=
    180          EchoPathVariability::DelayAdjustment::kNone) {
    181    full_reset();
    182  } else if (echo_path_variability.gain_change) {
    183    erle_estimator_.Reset(false);
    184  }
    185  if (subtractor_analyzer_reset_at_echo_path_change_) {
    186    subtractor_output_analyzer_.HandleEchoPathChange();
    187  }
    188 }
    189 
    190 void AecState::Update(
    191    const std::optional<DelayEstimate>& external_delay,
    192    ArrayView<const std::vector<std::array<float, kFftLengthBy2Plus1>>>
    193        adaptive_filter_frequency_responses,
    194    ArrayView<const std::vector<float>> adaptive_filter_impulse_responses,
    195    const RenderBuffer& render_buffer,
    196    ArrayView<const std::array<float, kFftLengthBy2Plus1>> E2_refined,
    197    ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2,
    198    ArrayView<const SubtractorOutput> subtractor_output) {
    199  RTC_DCHECK_EQ(num_capture_channels_, Y2.size());
    200  RTC_DCHECK_EQ(num_capture_channels_, subtractor_output.size());
    201  RTC_DCHECK_EQ(num_capture_channels_,
    202                adaptive_filter_frequency_responses.size());
    203  RTC_DCHECK_EQ(num_capture_channels_,
    204                adaptive_filter_impulse_responses.size());
    205 
    206  // Analyze the filter outputs and filters.
    207  bool any_filter_converged;
    208  bool any_coarse_filter_converged;
    209  bool all_filters_diverged;
    210  subtractor_output_analyzer_.Update(subtractor_output, &any_filter_converged,
    211                                     &any_coarse_filter_converged,
    212                                     &all_filters_diverged);
    213 
    214  bool any_filter_consistent;
    215  float max_echo_path_gain;
    216  filter_analyzer_.Update(adaptive_filter_impulse_responses, render_buffer,
    217                          &any_filter_consistent, &max_echo_path_gain);
    218 
    219  // Estimate the direct path delay of the filter.
    220  if (config_.filter.use_linear_filter) {
    221    delay_state_.Update(filter_analyzer_.FilterDelaysBlocks(), external_delay,
    222                        strong_not_saturated_render_blocks_);
    223  }
    224 
    225  const Block& aligned_render_block =
    226      render_buffer.GetBlock(-delay_state_.MinDirectPathFilterDelay());
    227 
    228  // Update render counters.
    229  bool active_render = false;
    230  for (int ch = 0; ch < aligned_render_block.NumChannels(); ++ch) {
    231    const float render_energy =
    232        std::inner_product(aligned_render_block.begin(/*block=*/0, ch),
    233                           aligned_render_block.end(/*block=*/0, ch),
    234                           aligned_render_block.begin(/*block=*/0, ch), 0.f);
    235    if (render_energy > (config_.render_levels.active_render_limit *
    236                         config_.render_levels.active_render_limit) *
    237                            kFftLengthBy2) {
    238      active_render = true;
    239      break;
    240    }
    241  }
    242  blocks_with_active_render_ += active_render ? 1 : 0;
    243  strong_not_saturated_render_blocks_ +=
    244      active_render && !SaturatedCapture() ? 1 : 0;
    245 
    246  std::array<float, kFftLengthBy2Plus1> avg_render_spectrum_with_reverb;
    247 
    248  ComputeAvgRenderReverb(render_buffer.GetSpectrumBuffer(),
    249                         delay_state_.MinDirectPathFilterDelay(),
    250                         ReverbDecay(/*mild=*/false), &avg_render_reverb_,
    251                         avg_render_spectrum_with_reverb);
    252 
    253  if (config_.echo_audibility.use_stationarity_properties) {
    254    // Update the echo audibility evaluator.
    255    echo_audibility_.Update(render_buffer, avg_render_reverb_.reverb(),
    256                            delay_state_.MinDirectPathFilterDelay(),
    257                            delay_state_.ExternalDelayReported());
    258  }
    259 
    260  // Update the ERL and ERLE measures.
    261  if (initial_state_.TransitionTriggered()) {
    262    erle_estimator_.Reset(false);
    263  }
    264 
    265  erle_estimator_.Update(render_buffer, adaptive_filter_frequency_responses,
    266                         avg_render_spectrum_with_reverb, Y2, E2_refined,
    267                         subtractor_output_analyzer_.ConvergedFilters());
    268 
    269  erl_estimator_.Update(
    270      subtractor_output_analyzer_.ConvergedFilters(),
    271      render_buffer.Spectrum(delay_state_.MinDirectPathFilterDelay()), Y2);
    272 
    273  // Detect and flag echo saturation.
    274  if (config_.ep_strength.echo_can_saturate) {
    275    saturation_detector_.Update(aligned_render_block, SaturatedCapture(),
    276                                UsableLinearEstimate(), subtractor_output,
    277                                max_echo_path_gain);
    278  } else {
    279    RTC_DCHECK(!saturation_detector_.SaturatedEcho());
    280  }
    281 
    282  // Update the decision on whether to use the initial state parameter set.
    283  initial_state_.Update(active_render, SaturatedCapture());
    284 
    285  // Detect whether the transparent mode should be activated.
    286  if (transparent_state_) {
    287    transparent_state_->Update(
    288        delay_state_.MinDirectPathFilterDelay(), any_filter_consistent,
    289        any_filter_converged, any_coarse_filter_converged, all_filters_diverged,
    290        active_render, SaturatedCapture());
    291  }
    292 
    293  // Analyze the quality of the filter.
    294  filter_quality_state_.Update(active_render, TransparentModeActive(),
    295                               SaturatedCapture(), external_delay,
    296                               any_filter_converged);
    297 
    298  // Update the reverb estimate.
    299  const bool stationary_block =
    300      config_.echo_audibility.use_stationarity_properties &&
    301      echo_audibility_.IsBlockStationary();
    302 
    303  reverb_model_estimator_.Update(
    304      filter_analyzer_.GetAdjustedFilters(),
    305      adaptive_filter_frequency_responses,
    306      erle_estimator_.GetInstLinearQualityEstimates(),
    307      delay_state_.DirectPathFilterDelays(),
    308      filter_quality_state_.UsableLinearFilterOutputs(), stationary_block);
    309 
    310  erle_estimator_.Dump(data_dumper_);
    311  reverb_model_estimator_.Dump(data_dumper_.get());
    312  data_dumper_->DumpRaw("aec3_active_render", active_render);
    313  data_dumper_->DumpRaw("aec3_erl", Erl());
    314  data_dumper_->DumpRaw("aec3_erl_time_domain", ErlTimeDomain());
    315  data_dumper_->DumpRaw("aec3_erle", Erle(/*onset_compensated=*/false)[0]);
    316  data_dumper_->DumpRaw("aec3_erle_onset_compensated",
    317                        Erle(/*onset_compensated=*/true)[0]);
    318  data_dumper_->DumpRaw("aec3_usable_linear_estimate", UsableLinearEstimate());
    319  data_dumper_->DumpRaw("aec3_transparent_mode", TransparentModeActive());
    320  data_dumper_->DumpRaw("aec3_filter_delay",
    321                        filter_analyzer_.MinFilterDelayBlocks());
    322 
    323  data_dumper_->DumpRaw("aec3_any_filter_consistent", any_filter_consistent);
    324  data_dumper_->DumpRaw("aec3_initial_state",
    325                        initial_state_.InitialStateActive());
    326  data_dumper_->DumpRaw("aec3_capture_saturation", SaturatedCapture());
    327  data_dumper_->DumpRaw("aec3_echo_saturation", SaturatedEcho());
    328  data_dumper_->DumpRaw("aec3_any_filter_converged", any_filter_converged);
    329  data_dumper_->DumpRaw("aec3_any_coarse_filter_converged",
    330                        any_coarse_filter_converged);
    331  data_dumper_->DumpRaw("aec3_all_filters_diverged", all_filters_diverged);
    332 
    333  data_dumper_->DumpRaw("aec3_external_delay_avaliable",
    334                        external_delay ? 1 : 0);
    335  data_dumper_->DumpRaw("aec3_filter_tail_freq_resp_est",
    336                        GetReverbFrequencyResponse());
    337  data_dumper_->DumpRaw("aec3_subtractor_y2", subtractor_output[0].y2);
    338  data_dumper_->DumpRaw("aec3_subtractor_e2_coarse",
    339                        subtractor_output[0].e2_coarse);
    340  data_dumper_->DumpRaw("aec3_subtractor_e2_refined",
    341                        subtractor_output[0].e2_refined);
    342 }
    343 
    344 AecState::InitialState::InitialState(const EchoCanceller3Config& config)
    345    : conservative_initial_phase_(config.filter.conservative_initial_phase),
    346      initial_state_seconds_(config.filter.initial_state_seconds) {
    347  Reset();
    348 }
    349 void AecState::InitialState::InitialState::Reset() {
    350  initial_state_ = true;
    351  strong_not_saturated_render_blocks_ = 0;
    352 }
    353 void AecState::InitialState::InitialState::Update(bool active_render,
    354                                                  bool saturated_capture) {
    355  strong_not_saturated_render_blocks_ +=
    356      active_render && !saturated_capture ? 1 : 0;
    357 
    358  // Flag whether the initial state is still active.
    359  bool prev_initial_state = initial_state_;
    360  if (conservative_initial_phase_) {
    361    initial_state_ =
    362        strong_not_saturated_render_blocks_ < 5 * kNumBlocksPerSecond;
    363  } else {
    364    initial_state_ = strong_not_saturated_render_blocks_ <
    365                     initial_state_seconds_ * kNumBlocksPerSecond;
    366  }
    367 
    368  // Flag whether the transition from the initial state has started.
    369  transition_triggered_ = !initial_state_ && prev_initial_state;
    370 }
    371 
    372 AecState::FilterDelay::FilterDelay(const EchoCanceller3Config& config,
    373                                   size_t num_capture_channels)
    374    : delay_headroom_blocks_(config.delay.delay_headroom_samples / kBlockSize),
    375      filter_delays_blocks_(num_capture_channels, delay_headroom_blocks_),
    376      min_filter_delay_(delay_headroom_blocks_) {}
    377 
    378 void AecState::FilterDelay::Update(
    379    ArrayView<const int> analyzer_filter_delay_estimates_blocks,
    380    const std::optional<DelayEstimate>& external_delay,
    381    size_t blocks_with_proper_filter_adaptation) {
    382  // Update the delay based on the external delay.
    383  if (external_delay &&
    384      (!external_delay_ || external_delay_->delay != external_delay->delay)) {
    385    external_delay_ = external_delay;
    386  }
    387 
    388  // Override the estimated delay if it is not certain that the filter has had
    389  // time to converge.
    390  const bool delay_estimator_may_not_have_converged =
    391      blocks_with_proper_filter_adaptation < 2 * kNumBlocksPerSecond;
    392  if (delay_estimator_may_not_have_converged && external_delay_) {
    393    const int delay_guess = delay_headroom_blocks_;
    394    std::fill(filter_delays_blocks_.begin(), filter_delays_blocks_.end(),
    395              delay_guess);
    396  } else {
    397    RTC_DCHECK_EQ(filter_delays_blocks_.size(),
    398                  analyzer_filter_delay_estimates_blocks.size());
    399    std::copy(analyzer_filter_delay_estimates_blocks.begin(),
    400              analyzer_filter_delay_estimates_blocks.end(),
    401              filter_delays_blocks_.begin());
    402  }
    403 
    404  min_filter_delay_ = *std::min_element(filter_delays_blocks_.begin(),
    405                                        filter_delays_blocks_.end());
    406 }
    407 
    408 AecState::FilteringQualityAnalyzer::FilteringQualityAnalyzer(
    409    const EchoCanceller3Config& config,
    410    size_t num_capture_channels)
    411    : use_linear_filter_(config.filter.use_linear_filter),
    412      usable_linear_filter_estimates_(num_capture_channels, false) {}
    413 
    414 void AecState::FilteringQualityAnalyzer::Reset() {
    415  std::fill(usable_linear_filter_estimates_.begin(),
    416            usable_linear_filter_estimates_.end(), false);
    417  overall_usable_linear_estimates_ = false;
    418  filter_update_blocks_since_reset_ = 0;
    419 }
    420 
    421 void AecState::FilteringQualityAnalyzer::Update(
    422    bool active_render,
    423    bool transparent_mode,
    424    bool saturated_capture,
    425    const std::optional<DelayEstimate>& external_delay,
    426    bool any_filter_converged) {
    427  // Update blocks counter.
    428  const bool filter_update = active_render && !saturated_capture;
    429  filter_update_blocks_since_reset_ += filter_update ? 1 : 0;
    430  filter_update_blocks_since_start_ += filter_update ? 1 : 0;
    431 
    432  // Store convergence flag when observed.
    433  convergence_seen_ = convergence_seen_ || any_filter_converged;
    434 
    435  // Verify requirements for achieving a decent filter. The requirements for
    436  // filter adaptation at call startup are more restrictive than after an
    437  // in-call reset.
    438  const bool sufficient_data_to_converge_at_startup =
    439      filter_update_blocks_since_start_ > kNumBlocksPerSecond * 0.4f;
    440  const bool sufficient_data_to_converge_at_reset =
    441      sufficient_data_to_converge_at_startup &&
    442      filter_update_blocks_since_reset_ > kNumBlocksPerSecond * 0.2f;
    443 
    444  // The linear filter can only be used if it has had time to converge.
    445  overall_usable_linear_estimates_ = sufficient_data_to_converge_at_startup &&
    446                                     sufficient_data_to_converge_at_reset;
    447 
    448  // The linear filter can only be used if an external delay or convergence have
    449  // been identified
    450  overall_usable_linear_estimates_ =
    451      overall_usable_linear_estimates_ && (external_delay || convergence_seen_);
    452 
    453  // If transparent mode is on, deactivate usign the linear filter.
    454  overall_usable_linear_estimates_ =
    455      overall_usable_linear_estimates_ && !transparent_mode;
    456 
    457  if (use_linear_filter_) {
    458    std::fill(usable_linear_filter_estimates_.begin(),
    459              usable_linear_filter_estimates_.end(),
    460              overall_usable_linear_estimates_);
    461  }
    462 }
    463 
    464 void AecState::SaturationDetector::Update(
    465    const Block& x,
    466    bool saturated_capture,
    467    bool usable_linear_estimate,
    468    ArrayView<const SubtractorOutput> subtractor_output,
    469    float echo_path_gain) {
    470  saturated_echo_ = false;
    471  if (!saturated_capture) {
    472    return;
    473  }
    474 
    475  if (usable_linear_estimate) {
    476    constexpr float kSaturationThreshold = 20000.f;
    477    for (size_t ch = 0; ch < subtractor_output.size(); ++ch) {
    478      saturated_echo_ =
    479          saturated_echo_ ||
    480          (subtractor_output[ch].s_refined_max_abs > kSaturationThreshold ||
    481           subtractor_output[ch].s_coarse_max_abs > kSaturationThreshold);
    482    }
    483  } else {
    484    float max_sample = 0.f;
    485    for (int ch = 0; ch < x.NumChannels(); ++ch) {
    486      ArrayView<const float, kBlockSize> x_ch = x.View(/*band=*/0, ch);
    487      for (float sample : x_ch) {
    488        max_sample = std::max(max_sample, fabsf(sample));
    489      }
    490    }
    491 
    492    const float kMargin = 10.f;
    493    float peak_echo_amplitude = max_sample * echo_path_gain * kMargin;
    494    saturated_echo_ = saturated_echo_ || peak_echo_amplitude > 32000;
    495  }
    496 }
    497 
    498 }  // namespace webrtc