aec_state.cc (20165B)
1 /* 2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "modules/audio_processing/aec3/aec_state.h" 12 13 #include <algorithm> 14 #include <array> 15 #include <atomic> 16 #include <cmath> 17 #include <cstddef> 18 #include <numeric> 19 #include <optional> 20 #include <vector> 21 22 #include "api/array_view.h" 23 #include "api/audio/echo_canceller3_config.h" 24 #include "api/environment/environment.h" 25 #include "api/field_trials_view.h" 26 #include "modules/audio_processing/aec3/aec3_common.h" 27 #include "modules/audio_processing/aec3/block.h" 28 #include "modules/audio_processing/aec3/delay_estimate.h" 29 #include "modules/audio_processing/aec3/echo_path_variability.h" 30 #include "modules/audio_processing/aec3/render_buffer.h" 31 #include "modules/audio_processing/aec3/reverb_model.h" 32 #include "modules/audio_processing/aec3/spectrum_buffer.h" 33 #include "modules/audio_processing/aec3/subtractor_output.h" 34 #include "modules/audio_processing/aec3/transparent_mode.h" 35 #include "modules/audio_processing/logging/apm_data_dumper.h" 36 #include "rtc_base/checks.h" 37 38 namespace webrtc { 39 namespace { 40 41 bool DeactivateInitialStateResetAtEchoPathChange( 42 const FieldTrialsView& field_trials) { 43 return field_trials.IsEnabled( 44 "WebRTC-Aec3DeactivateInitialStateResetKillSwitch"); 45 } 46 47 bool FullResetAtEchoPathChange(const FieldTrialsView& field_trials) { 48 return !field_trials.IsEnabled("WebRTC-Aec3AecStateFullResetKillSwitch"); 49 } 50 51 bool SubtractorAnalyzerResetAtEchoPathChange( 52 const FieldTrialsView& field_trials) { 53 return !field_trials.IsEnabled( 54 "WebRTC-Aec3AecStateSubtractorAnalyzerResetKillSwitch"); 55 } 56 57 void ComputeAvgRenderReverb( 58 const SpectrumBuffer& spectrum_buffer, 59 int delay_blocks, 60 float reverb_decay, 61 ReverbModel* reverb_model, 62 ArrayView<float, kFftLengthBy2Plus1> reverb_power_spectrum) { 63 RTC_DCHECK(reverb_model); 64 const size_t num_render_channels = spectrum_buffer.buffer[0].size(); 65 int idx_at_delay = 66 spectrum_buffer.OffsetIndex(spectrum_buffer.read, delay_blocks); 67 int idx_past = spectrum_buffer.IncIndex(idx_at_delay); 68 69 std::array<float, kFftLengthBy2Plus1> X2_data; 70 ArrayView<const float> X2; 71 if (num_render_channels > 1) { 72 auto average_channels = 73 [](size_t num_render_channels, 74 ArrayView<const std::array<float, kFftLengthBy2Plus1>> 75 spectrum_band_0, 76 ArrayView<float, kFftLengthBy2Plus1> render_power) { 77 std::fill(render_power.begin(), render_power.end(), 0.f); 78 for (size_t ch = 0; ch < num_render_channels; ++ch) { 79 for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { 80 render_power[k] += spectrum_band_0[ch][k]; 81 } 82 } 83 const float normalizer = 1.f / num_render_channels; 84 for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { 85 render_power[k] *= normalizer; 86 } 87 }; 88 average_channels(num_render_channels, spectrum_buffer.buffer[idx_past], 89 X2_data); 90 reverb_model->UpdateReverbNoFreqShaping( 91 X2_data, /*power_spectrum_scaling=*/1.0f, reverb_decay); 92 93 average_channels(num_render_channels, spectrum_buffer.buffer[idx_at_delay], 94 X2_data); 95 X2 = X2_data; 96 } else { 97 reverb_model->UpdateReverbNoFreqShaping( 98 spectrum_buffer.buffer[idx_past][/*channel=*/0], 99 /*power_spectrum_scaling=*/1.0f, reverb_decay); 100 101 X2 = spectrum_buffer.buffer[idx_at_delay][/*channel=*/0]; 102 } 103 104 ArrayView<const float, kFftLengthBy2Plus1> reverb_power = 105 reverb_model->reverb(); 106 for (size_t k = 0; k < X2.size(); ++k) { 107 reverb_power_spectrum[k] = X2[k] + reverb_power[k]; 108 } 109 } 110 111 } // namespace 112 113 std::atomic<int> AecState::instance_count_(0); 114 115 void AecState::GetResidualEchoScaling(ArrayView<float> residual_scaling) const { 116 bool filter_has_had_time_to_converge; 117 if (config_.filter.conservative_initial_phase) { 118 filter_has_had_time_to_converge = 119 strong_not_saturated_render_blocks_ >= 1.5f * kNumBlocksPerSecond; 120 } else { 121 filter_has_had_time_to_converge = 122 strong_not_saturated_render_blocks_ >= 0.8f * kNumBlocksPerSecond; 123 } 124 echo_audibility_.GetResidualEchoScaling(filter_has_had_time_to_converge, 125 residual_scaling); 126 } 127 128 AecState::AecState(const Environment& env, 129 const EchoCanceller3Config& config, 130 size_t num_capture_channels) 131 : data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)), 132 config_(config), 133 num_capture_channels_(num_capture_channels), 134 deactivate_initial_state_reset_at_echo_path_change_( 135 DeactivateInitialStateResetAtEchoPathChange(env.field_trials())), 136 full_reset_at_echo_path_change_( 137 FullResetAtEchoPathChange(env.field_trials())), 138 subtractor_analyzer_reset_at_echo_path_change_( 139 SubtractorAnalyzerResetAtEchoPathChange(env.field_trials())), 140 initial_state_(config_), 141 delay_state_(config_, num_capture_channels_), 142 transparent_state_(TransparentMode::Create(env, config_)), 143 filter_quality_state_(config_, num_capture_channels_), 144 erl_estimator_(2 * kNumBlocksPerSecond), 145 erle_estimator_(env, 146 2 * kNumBlocksPerSecond, 147 config_, 148 num_capture_channels_), 149 filter_analyzer_(config_, num_capture_channels_), 150 echo_audibility_( 151 config_.echo_audibility.use_stationarity_properties_at_init), 152 reverb_model_estimator_(config_, num_capture_channels_), 153 subtractor_output_analyzer_(num_capture_channels_) {} 154 155 AecState::~AecState() = default; 156 157 void AecState::HandleEchoPathChange( 158 const EchoPathVariability& echo_path_variability) { 159 const auto full_reset = [&]() { 160 filter_analyzer_.Reset(); 161 capture_signal_saturation_ = false; 162 strong_not_saturated_render_blocks_ = 0; 163 blocks_with_active_render_ = 0; 164 if (!deactivate_initial_state_reset_at_echo_path_change_) { 165 initial_state_.Reset(); 166 } 167 if (transparent_state_) { 168 transparent_state_->Reset(); 169 } 170 erle_estimator_.Reset(true); 171 erl_estimator_.Reset(); 172 filter_quality_state_.Reset(); 173 }; 174 175 // TODO(peah): Refine the reset scheme according to the type of gain and 176 // delay adjustment. 177 178 if (full_reset_at_echo_path_change_ && 179 echo_path_variability.delay_change != 180 EchoPathVariability::DelayAdjustment::kNone) { 181 full_reset(); 182 } else if (echo_path_variability.gain_change) { 183 erle_estimator_.Reset(false); 184 } 185 if (subtractor_analyzer_reset_at_echo_path_change_) { 186 subtractor_output_analyzer_.HandleEchoPathChange(); 187 } 188 } 189 190 void AecState::Update( 191 const std::optional<DelayEstimate>& external_delay, 192 ArrayView<const std::vector<std::array<float, kFftLengthBy2Plus1>>> 193 adaptive_filter_frequency_responses, 194 ArrayView<const std::vector<float>> adaptive_filter_impulse_responses, 195 const RenderBuffer& render_buffer, 196 ArrayView<const std::array<float, kFftLengthBy2Plus1>> E2_refined, 197 ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2, 198 ArrayView<const SubtractorOutput> subtractor_output) { 199 RTC_DCHECK_EQ(num_capture_channels_, Y2.size()); 200 RTC_DCHECK_EQ(num_capture_channels_, subtractor_output.size()); 201 RTC_DCHECK_EQ(num_capture_channels_, 202 adaptive_filter_frequency_responses.size()); 203 RTC_DCHECK_EQ(num_capture_channels_, 204 adaptive_filter_impulse_responses.size()); 205 206 // Analyze the filter outputs and filters. 207 bool any_filter_converged; 208 bool any_coarse_filter_converged; 209 bool all_filters_diverged; 210 subtractor_output_analyzer_.Update(subtractor_output, &any_filter_converged, 211 &any_coarse_filter_converged, 212 &all_filters_diverged); 213 214 bool any_filter_consistent; 215 float max_echo_path_gain; 216 filter_analyzer_.Update(adaptive_filter_impulse_responses, render_buffer, 217 &any_filter_consistent, &max_echo_path_gain); 218 219 // Estimate the direct path delay of the filter. 220 if (config_.filter.use_linear_filter) { 221 delay_state_.Update(filter_analyzer_.FilterDelaysBlocks(), external_delay, 222 strong_not_saturated_render_blocks_); 223 } 224 225 const Block& aligned_render_block = 226 render_buffer.GetBlock(-delay_state_.MinDirectPathFilterDelay()); 227 228 // Update render counters. 229 bool active_render = false; 230 for (int ch = 0; ch < aligned_render_block.NumChannels(); ++ch) { 231 const float render_energy = 232 std::inner_product(aligned_render_block.begin(/*block=*/0, ch), 233 aligned_render_block.end(/*block=*/0, ch), 234 aligned_render_block.begin(/*block=*/0, ch), 0.f); 235 if (render_energy > (config_.render_levels.active_render_limit * 236 config_.render_levels.active_render_limit) * 237 kFftLengthBy2) { 238 active_render = true; 239 break; 240 } 241 } 242 blocks_with_active_render_ += active_render ? 1 : 0; 243 strong_not_saturated_render_blocks_ += 244 active_render && !SaturatedCapture() ? 1 : 0; 245 246 std::array<float, kFftLengthBy2Plus1> avg_render_spectrum_with_reverb; 247 248 ComputeAvgRenderReverb(render_buffer.GetSpectrumBuffer(), 249 delay_state_.MinDirectPathFilterDelay(), 250 ReverbDecay(/*mild=*/false), &avg_render_reverb_, 251 avg_render_spectrum_with_reverb); 252 253 if (config_.echo_audibility.use_stationarity_properties) { 254 // Update the echo audibility evaluator. 255 echo_audibility_.Update(render_buffer, avg_render_reverb_.reverb(), 256 delay_state_.MinDirectPathFilterDelay(), 257 delay_state_.ExternalDelayReported()); 258 } 259 260 // Update the ERL and ERLE measures. 261 if (initial_state_.TransitionTriggered()) { 262 erle_estimator_.Reset(false); 263 } 264 265 erle_estimator_.Update(render_buffer, adaptive_filter_frequency_responses, 266 avg_render_spectrum_with_reverb, Y2, E2_refined, 267 subtractor_output_analyzer_.ConvergedFilters()); 268 269 erl_estimator_.Update( 270 subtractor_output_analyzer_.ConvergedFilters(), 271 render_buffer.Spectrum(delay_state_.MinDirectPathFilterDelay()), Y2); 272 273 // Detect and flag echo saturation. 274 if (config_.ep_strength.echo_can_saturate) { 275 saturation_detector_.Update(aligned_render_block, SaturatedCapture(), 276 UsableLinearEstimate(), subtractor_output, 277 max_echo_path_gain); 278 } else { 279 RTC_DCHECK(!saturation_detector_.SaturatedEcho()); 280 } 281 282 // Update the decision on whether to use the initial state parameter set. 283 initial_state_.Update(active_render, SaturatedCapture()); 284 285 // Detect whether the transparent mode should be activated. 286 if (transparent_state_) { 287 transparent_state_->Update( 288 delay_state_.MinDirectPathFilterDelay(), any_filter_consistent, 289 any_filter_converged, any_coarse_filter_converged, all_filters_diverged, 290 active_render, SaturatedCapture()); 291 } 292 293 // Analyze the quality of the filter. 294 filter_quality_state_.Update(active_render, TransparentModeActive(), 295 SaturatedCapture(), external_delay, 296 any_filter_converged); 297 298 // Update the reverb estimate. 299 const bool stationary_block = 300 config_.echo_audibility.use_stationarity_properties && 301 echo_audibility_.IsBlockStationary(); 302 303 reverb_model_estimator_.Update( 304 filter_analyzer_.GetAdjustedFilters(), 305 adaptive_filter_frequency_responses, 306 erle_estimator_.GetInstLinearQualityEstimates(), 307 delay_state_.DirectPathFilterDelays(), 308 filter_quality_state_.UsableLinearFilterOutputs(), stationary_block); 309 310 erle_estimator_.Dump(data_dumper_); 311 reverb_model_estimator_.Dump(data_dumper_.get()); 312 data_dumper_->DumpRaw("aec3_active_render", active_render); 313 data_dumper_->DumpRaw("aec3_erl", Erl()); 314 data_dumper_->DumpRaw("aec3_erl_time_domain", ErlTimeDomain()); 315 data_dumper_->DumpRaw("aec3_erle", Erle(/*onset_compensated=*/false)[0]); 316 data_dumper_->DumpRaw("aec3_erle_onset_compensated", 317 Erle(/*onset_compensated=*/true)[0]); 318 data_dumper_->DumpRaw("aec3_usable_linear_estimate", UsableLinearEstimate()); 319 data_dumper_->DumpRaw("aec3_transparent_mode", TransparentModeActive()); 320 data_dumper_->DumpRaw("aec3_filter_delay", 321 filter_analyzer_.MinFilterDelayBlocks()); 322 323 data_dumper_->DumpRaw("aec3_any_filter_consistent", any_filter_consistent); 324 data_dumper_->DumpRaw("aec3_initial_state", 325 initial_state_.InitialStateActive()); 326 data_dumper_->DumpRaw("aec3_capture_saturation", SaturatedCapture()); 327 data_dumper_->DumpRaw("aec3_echo_saturation", SaturatedEcho()); 328 data_dumper_->DumpRaw("aec3_any_filter_converged", any_filter_converged); 329 data_dumper_->DumpRaw("aec3_any_coarse_filter_converged", 330 any_coarse_filter_converged); 331 data_dumper_->DumpRaw("aec3_all_filters_diverged", all_filters_diverged); 332 333 data_dumper_->DumpRaw("aec3_external_delay_avaliable", 334 external_delay ? 1 : 0); 335 data_dumper_->DumpRaw("aec3_filter_tail_freq_resp_est", 336 GetReverbFrequencyResponse()); 337 data_dumper_->DumpRaw("aec3_subtractor_y2", subtractor_output[0].y2); 338 data_dumper_->DumpRaw("aec3_subtractor_e2_coarse", 339 subtractor_output[0].e2_coarse); 340 data_dumper_->DumpRaw("aec3_subtractor_e2_refined", 341 subtractor_output[0].e2_refined); 342 } 343 344 AecState::InitialState::InitialState(const EchoCanceller3Config& config) 345 : conservative_initial_phase_(config.filter.conservative_initial_phase), 346 initial_state_seconds_(config.filter.initial_state_seconds) { 347 Reset(); 348 } 349 void AecState::InitialState::InitialState::Reset() { 350 initial_state_ = true; 351 strong_not_saturated_render_blocks_ = 0; 352 } 353 void AecState::InitialState::InitialState::Update(bool active_render, 354 bool saturated_capture) { 355 strong_not_saturated_render_blocks_ += 356 active_render && !saturated_capture ? 1 : 0; 357 358 // Flag whether the initial state is still active. 359 bool prev_initial_state = initial_state_; 360 if (conservative_initial_phase_) { 361 initial_state_ = 362 strong_not_saturated_render_blocks_ < 5 * kNumBlocksPerSecond; 363 } else { 364 initial_state_ = strong_not_saturated_render_blocks_ < 365 initial_state_seconds_ * kNumBlocksPerSecond; 366 } 367 368 // Flag whether the transition from the initial state has started. 369 transition_triggered_ = !initial_state_ && prev_initial_state; 370 } 371 372 AecState::FilterDelay::FilterDelay(const EchoCanceller3Config& config, 373 size_t num_capture_channels) 374 : delay_headroom_blocks_(config.delay.delay_headroom_samples / kBlockSize), 375 filter_delays_blocks_(num_capture_channels, delay_headroom_blocks_), 376 min_filter_delay_(delay_headroom_blocks_) {} 377 378 void AecState::FilterDelay::Update( 379 ArrayView<const int> analyzer_filter_delay_estimates_blocks, 380 const std::optional<DelayEstimate>& external_delay, 381 size_t blocks_with_proper_filter_adaptation) { 382 // Update the delay based on the external delay. 383 if (external_delay && 384 (!external_delay_ || external_delay_->delay != external_delay->delay)) { 385 external_delay_ = external_delay; 386 } 387 388 // Override the estimated delay if it is not certain that the filter has had 389 // time to converge. 390 const bool delay_estimator_may_not_have_converged = 391 blocks_with_proper_filter_adaptation < 2 * kNumBlocksPerSecond; 392 if (delay_estimator_may_not_have_converged && external_delay_) { 393 const int delay_guess = delay_headroom_blocks_; 394 std::fill(filter_delays_blocks_.begin(), filter_delays_blocks_.end(), 395 delay_guess); 396 } else { 397 RTC_DCHECK_EQ(filter_delays_blocks_.size(), 398 analyzer_filter_delay_estimates_blocks.size()); 399 std::copy(analyzer_filter_delay_estimates_blocks.begin(), 400 analyzer_filter_delay_estimates_blocks.end(), 401 filter_delays_blocks_.begin()); 402 } 403 404 min_filter_delay_ = *std::min_element(filter_delays_blocks_.begin(), 405 filter_delays_blocks_.end()); 406 } 407 408 AecState::FilteringQualityAnalyzer::FilteringQualityAnalyzer( 409 const EchoCanceller3Config& config, 410 size_t num_capture_channels) 411 : use_linear_filter_(config.filter.use_linear_filter), 412 usable_linear_filter_estimates_(num_capture_channels, false) {} 413 414 void AecState::FilteringQualityAnalyzer::Reset() { 415 std::fill(usable_linear_filter_estimates_.begin(), 416 usable_linear_filter_estimates_.end(), false); 417 overall_usable_linear_estimates_ = false; 418 filter_update_blocks_since_reset_ = 0; 419 } 420 421 void AecState::FilteringQualityAnalyzer::Update( 422 bool active_render, 423 bool transparent_mode, 424 bool saturated_capture, 425 const std::optional<DelayEstimate>& external_delay, 426 bool any_filter_converged) { 427 // Update blocks counter. 428 const bool filter_update = active_render && !saturated_capture; 429 filter_update_blocks_since_reset_ += filter_update ? 1 : 0; 430 filter_update_blocks_since_start_ += filter_update ? 1 : 0; 431 432 // Store convergence flag when observed. 433 convergence_seen_ = convergence_seen_ || any_filter_converged; 434 435 // Verify requirements for achieving a decent filter. The requirements for 436 // filter adaptation at call startup are more restrictive than after an 437 // in-call reset. 438 const bool sufficient_data_to_converge_at_startup = 439 filter_update_blocks_since_start_ > kNumBlocksPerSecond * 0.4f; 440 const bool sufficient_data_to_converge_at_reset = 441 sufficient_data_to_converge_at_startup && 442 filter_update_blocks_since_reset_ > kNumBlocksPerSecond * 0.2f; 443 444 // The linear filter can only be used if it has had time to converge. 445 overall_usable_linear_estimates_ = sufficient_data_to_converge_at_startup && 446 sufficient_data_to_converge_at_reset; 447 448 // The linear filter can only be used if an external delay or convergence have 449 // been identified 450 overall_usable_linear_estimates_ = 451 overall_usable_linear_estimates_ && (external_delay || convergence_seen_); 452 453 // If transparent mode is on, deactivate usign the linear filter. 454 overall_usable_linear_estimates_ = 455 overall_usable_linear_estimates_ && !transparent_mode; 456 457 if (use_linear_filter_) { 458 std::fill(usable_linear_filter_estimates_.begin(), 459 usable_linear_filter_estimates_.end(), 460 overall_usable_linear_estimates_); 461 } 462 } 463 464 void AecState::SaturationDetector::Update( 465 const Block& x, 466 bool saturated_capture, 467 bool usable_linear_estimate, 468 ArrayView<const SubtractorOutput> subtractor_output, 469 float echo_path_gain) { 470 saturated_echo_ = false; 471 if (!saturated_capture) { 472 return; 473 } 474 475 if (usable_linear_estimate) { 476 constexpr float kSaturationThreshold = 20000.f; 477 for (size_t ch = 0; ch < subtractor_output.size(); ++ch) { 478 saturated_echo_ = 479 saturated_echo_ || 480 (subtractor_output[ch].s_refined_max_abs > kSaturationThreshold || 481 subtractor_output[ch].s_coarse_max_abs > kSaturationThreshold); 482 } 483 } else { 484 float max_sample = 0.f; 485 for (int ch = 0; ch < x.NumChannels(); ++ch) { 486 ArrayView<const float, kBlockSize> x_ch = x.View(/*band=*/0, ch); 487 for (float sample : x_ch) { 488 max_sample = std::max(max_sample, fabsf(sample)); 489 } 490 } 491 492 const float kMargin = 10.f; 493 float peak_echo_amplitude = max_sample * echo_path_gain * kMargin; 494 saturated_echo_ = saturated_echo_ || peak_echo_amplitude > 32000; 495 } 496 } 497 498 } // namespace webrtc