echo_remover.cc (23515B)
1 /* 2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 #include "modules/audio_processing/aec3/echo_remover.h" 11 12 #include <algorithm> 13 #include <array> 14 #include <atomic> 15 #include <cmath> 16 #include <cstddef> 17 #include <memory> 18 #include <optional> 19 #include <vector> 20 21 #include "api/array_view.h" 22 #include "api/audio/echo_canceller3_config.h" 23 #include "api/audio/echo_control.h" 24 #include "api/environment/environment.h" 25 #include "modules/audio_processing/aec3/aec3_common.h" 26 #include "modules/audio_processing/aec3/aec3_fft.h" 27 #include "modules/audio_processing/aec3/aec_state.h" 28 #include "modules/audio_processing/aec3/block.h" 29 #include "modules/audio_processing/aec3/comfort_noise_generator.h" 30 #include "modules/audio_processing/aec3/delay_estimate.h" 31 #include "modules/audio_processing/aec3/echo_path_variability.h" 32 #include "modules/audio_processing/aec3/echo_remover_metrics.h" 33 #include "modules/audio_processing/aec3/fft_data.h" 34 #include "modules/audio_processing/aec3/render_buffer.h" 35 #include "modules/audio_processing/aec3/render_signal_analyzer.h" 36 #include "modules/audio_processing/aec3/residual_echo_estimator.h" 37 #include "modules/audio_processing/aec3/subtractor.h" 38 #include "modules/audio_processing/aec3/subtractor_output.h" 39 #include "modules/audio_processing/aec3/suppression_filter.h" 40 #include "modules/audio_processing/aec3/suppression_gain.h" 41 #include "modules/audio_processing/logging/apm_data_dumper.h" 42 #include "rtc_base/checks.h" 43 #include "rtc_base/logging.h" 44 45 namespace webrtc { 46 47 namespace { 48 49 // Maximum number of channels for which the capture channel data is stored on 50 // the stack. If the number of channels are larger than this, they are stored 51 // using scratch memory that is pre-allocated on the heap. The reason for this 52 // partitioning is not to waste heap space for handling the more common numbers 53 // of channels, while at the same time not limiting the support for higher 54 // numbers of channels by enforcing the capture channel data to be stored on the 55 // stack using a fixed maximum value. 56 constexpr size_t kMaxNumChannelsOnStack = 2; 57 58 // Chooses the number of channels to store on the heap when that is required due 59 // to the number of capture channels being larger than the pre-defined number 60 // of channels to store on the stack. 61 size_t NumChannelsOnHeap(size_t num_capture_channels) { 62 return num_capture_channels > kMaxNumChannelsOnStack ? num_capture_channels 63 : 0; 64 } 65 66 void LinearEchoPower(const FftData& E, 67 const FftData& Y, 68 std::array<float, kFftLengthBy2Plus1>* S2) { 69 for (size_t k = 0; k < E.re.size(); ++k) { 70 (*S2)[k] = (Y.re[k] - E.re[k]) * (Y.re[k] - E.re[k]) + 71 (Y.im[k] - E.im[k]) * (Y.im[k] - E.im[k]); 72 } 73 } 74 75 // Fades between two input signals using a fix-sized transition. 76 void SignalTransition(ArrayView<const float> from, 77 ArrayView<const float> to, 78 ArrayView<float> out) { 79 if (from == to) { 80 RTC_DCHECK_EQ(to.size(), out.size()); 81 std::copy(to.begin(), to.end(), out.begin()); 82 } else { 83 constexpr size_t kTransitionSize = 30; 84 constexpr float kOneByTransitionSizePlusOne = 1.f / (kTransitionSize + 1); 85 86 RTC_DCHECK_EQ(from.size(), to.size()); 87 RTC_DCHECK_EQ(from.size(), out.size()); 88 RTC_DCHECK_LE(kTransitionSize, out.size()); 89 90 for (size_t k = 0; k < kTransitionSize; ++k) { 91 float a = (k + 1) * kOneByTransitionSizePlusOne; 92 out[k] = a * to[k] + (1.f - a) * from[k]; 93 } 94 95 std::copy(to.begin() + kTransitionSize, to.end(), 96 out.begin() + kTransitionSize); 97 } 98 } 99 100 // Computes a windowed (square root Hanning) padded FFT and updates the related 101 // memory. 102 void WindowedPaddedFft(const Aec3Fft& fft, 103 ArrayView<const float> v, 104 ArrayView<float> v_old, 105 FftData* V) { 106 fft.PaddedFft(v, v_old, Aec3Fft::Window::kSqrtHanning, V); 107 std::copy(v.begin(), v.end(), v_old.begin()); 108 } 109 110 // Class for removing the echo from the capture signal. 111 class EchoRemoverImpl final : public EchoRemover { 112 public: 113 EchoRemoverImpl(const Environment& env, 114 const EchoCanceller3Config& config, 115 int sample_rate_hz, 116 size_t num_render_channels, 117 size_t num_capture_channels, 118 NeuralResidualEchoEstimator* neural_residual_echo_estimator); 119 ~EchoRemoverImpl() override; 120 EchoRemoverImpl(const EchoRemoverImpl&) = delete; 121 EchoRemoverImpl& operator=(const EchoRemoverImpl&) = delete; 122 123 void GetMetrics(EchoControl::Metrics* metrics) const override; 124 125 // Removes the echo from a block of samples from the capture signal. The 126 // supplied render signal is assumed to be pre-aligned with the capture 127 // signal. 128 void ProcessCapture(EchoPathVariability echo_path_variability, 129 bool capture_signal_saturation, 130 const std::optional<DelayEstimate>& external_delay, 131 RenderBuffer* render_buffer, 132 Block* linear_output, 133 Block* capture) override; 134 135 // Updates the status on whether echo leakage is detected in the output of the 136 // echo remover. 137 void UpdateEchoLeakageStatus(bool leakage_detected) override { 138 echo_leakage_detected_ = leakage_detected; 139 } 140 141 void SetCaptureOutputUsage(bool capture_output_used) override { 142 capture_output_used_ = capture_output_used; 143 } 144 145 private: 146 // Selects which of the coarse and refined linear filter outputs that is most 147 // appropriate to pass to the suppressor and forms the linear filter output by 148 // smoothly transition between those. 149 void FormLinearFilterOutput(const SubtractorOutput& subtractor_output, 150 ArrayView<float> output); 151 152 static std::atomic<int> instance_count_; 153 const EchoCanceller3Config config_; 154 const Aec3Fft fft_; 155 std::unique_ptr<ApmDataDumper> data_dumper_; 156 const Aec3Optimization optimization_; 157 const int sample_rate_hz_; 158 const size_t num_render_channels_; 159 const size_t num_capture_channels_; 160 const bool use_coarse_filter_output_; 161 Subtractor subtractor_; 162 SuppressionGain suppression_gain_; 163 ComfortNoiseGenerator cng_; 164 SuppressionFilter suppression_filter_; 165 RenderSignalAnalyzer render_signal_analyzer_; 166 ResidualEchoEstimator residual_echo_estimator_; 167 bool echo_leakage_detected_ = false; 168 bool capture_output_used_ = true; 169 AecState aec_state_; 170 EchoRemoverMetrics metrics_; 171 std::vector<std::array<float, kFftLengthBy2>> e_old_; 172 std::vector<std::array<float, kFftLengthBy2>> y_old_; 173 size_t block_counter_ = 0; 174 int gain_change_hangover_ = 0; 175 bool refined_filter_output_last_selected_ = true; 176 177 std::vector<std::array<float, kFftLengthBy2>> e_heap_; 178 std::vector<std::array<float, kFftLengthBy2Plus1>> Y2_heap_; 179 std::vector<std::array<float, kFftLengthBy2Plus1>> E2_heap_; 180 std::vector<std::array<float, kFftLengthBy2Plus1>> R2_heap_; 181 std::vector<std::array<float, kFftLengthBy2Plus1>> R2_unbounded_heap_; 182 std::vector<std::array<float, kFftLengthBy2Plus1>> S2_linear_heap_; 183 std::vector<FftData> Y_heap_; 184 std::vector<FftData> E_heap_; 185 std::vector<FftData> comfort_noise_heap_; 186 std::vector<FftData> high_band_comfort_noise_heap_; 187 std::vector<SubtractorOutput> subtractor_output_heap_; 188 }; 189 190 std::atomic<int> EchoRemoverImpl::instance_count_(0); 191 192 EchoRemoverImpl::EchoRemoverImpl( 193 const Environment& env, 194 const EchoCanceller3Config& config, 195 int sample_rate_hz, 196 size_t num_render_channels, 197 size_t num_capture_channels, 198 NeuralResidualEchoEstimator* neural_residual_echo_estimator) 199 : config_(config), 200 fft_(), 201 data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)), 202 optimization_(DetectOptimization()), 203 sample_rate_hz_(sample_rate_hz), 204 num_render_channels_(num_render_channels), 205 num_capture_channels_(num_capture_channels), 206 use_coarse_filter_output_( 207 config_.filter.enable_coarse_filter_output_usage), 208 subtractor_(env, 209 config, 210 num_render_channels_, 211 num_capture_channels_, 212 data_dumper_.get(), 213 optimization_), 214 suppression_gain_(config_, 215 optimization_, 216 sample_rate_hz, 217 num_capture_channels), 218 cng_(config_, optimization_, num_capture_channels_), 219 suppression_filter_(optimization_, 220 sample_rate_hz_, 221 num_capture_channels_), 222 render_signal_analyzer_(config_), 223 residual_echo_estimator_(env, 224 config_, 225 num_render_channels, 226 neural_residual_echo_estimator), 227 aec_state_(env, config_, num_capture_channels_), 228 e_old_(num_capture_channels_, {0.f}), 229 y_old_(num_capture_channels_, {0.f}), 230 e_heap_(NumChannelsOnHeap(num_capture_channels_), {0.f}), 231 Y2_heap_(NumChannelsOnHeap(num_capture_channels_)), 232 E2_heap_(NumChannelsOnHeap(num_capture_channels_)), 233 R2_heap_(NumChannelsOnHeap(num_capture_channels_)), 234 R2_unbounded_heap_(NumChannelsOnHeap(num_capture_channels_)), 235 S2_linear_heap_(NumChannelsOnHeap(num_capture_channels_)), 236 Y_heap_(NumChannelsOnHeap(num_capture_channels_)), 237 E_heap_(NumChannelsOnHeap(num_capture_channels_)), 238 comfort_noise_heap_(NumChannelsOnHeap(num_capture_channels_)), 239 high_band_comfort_noise_heap_(NumChannelsOnHeap(num_capture_channels_)), 240 subtractor_output_heap_(NumChannelsOnHeap(num_capture_channels_)) { 241 RTC_DCHECK(ValidFullBandRate(sample_rate_hz)); 242 } 243 244 EchoRemoverImpl::~EchoRemoverImpl() = default; 245 246 void EchoRemoverImpl::GetMetrics(EchoControl::Metrics* metrics) const { 247 // Echo return loss (ERL) is inverted to go from gain to attenuation. 248 metrics->echo_return_loss = -10.0 * std::log10(aec_state_.ErlTimeDomain()); 249 metrics->echo_return_loss_enhancement = 250 Log2TodB(aec_state_.FullBandErleLog2()); 251 } 252 253 void EchoRemoverImpl::ProcessCapture( 254 EchoPathVariability echo_path_variability, 255 bool capture_signal_saturation, 256 const std::optional<DelayEstimate>& external_delay, 257 RenderBuffer* render_buffer, 258 Block* linear_output, 259 Block* capture) { 260 ++block_counter_; 261 const Block& x = render_buffer->GetBlock(0); 262 Block* y = capture; 263 RTC_DCHECK(render_buffer); 264 RTC_DCHECK(y); 265 RTC_DCHECK_EQ(x.NumBands(), NumBandsForRate(sample_rate_hz_)); 266 RTC_DCHECK_EQ(y->NumBands(), NumBandsForRate(sample_rate_hz_)); 267 RTC_DCHECK_EQ(x.NumChannels(), num_render_channels_); 268 RTC_DCHECK_EQ(y->NumChannels(), num_capture_channels_); 269 270 // Stack allocated data to use when the number of channels is low. 271 std::array<std::array<float, kFftLengthBy2>, kMaxNumChannelsOnStack> e_stack; 272 std::array<std::array<float, kFftLengthBy2Plus1>, kMaxNumChannelsOnStack> 273 Y2_stack; 274 std::array<std::array<float, kFftLengthBy2Plus1>, kMaxNumChannelsOnStack> 275 E2_stack; 276 std::array<std::array<float, kFftLengthBy2Plus1>, kMaxNumChannelsOnStack> 277 R2_stack; 278 std::array<std::array<float, kFftLengthBy2Plus1>, kMaxNumChannelsOnStack> 279 R2_unbounded_stack; 280 std::array<std::array<float, kFftLengthBy2Plus1>, kMaxNumChannelsOnStack> 281 S2_linear_stack; 282 std::array<FftData, kMaxNumChannelsOnStack> Y_stack; 283 std::array<FftData, kMaxNumChannelsOnStack> E_stack; 284 std::array<FftData, kMaxNumChannelsOnStack> comfort_noise_stack; 285 std::array<FftData, kMaxNumChannelsOnStack> high_band_comfort_noise_stack; 286 std::array<SubtractorOutput, kMaxNumChannelsOnStack> subtractor_output_stack; 287 288 ArrayView<std::array<float, kFftLengthBy2>> e(e_stack.data(), 289 num_capture_channels_); 290 ArrayView<std::array<float, kFftLengthBy2Plus1>> Y2(Y2_stack.data(), 291 num_capture_channels_); 292 ArrayView<std::array<float, kFftLengthBy2Plus1>> E2(E2_stack.data(), 293 num_capture_channels_); 294 ArrayView<std::array<float, kFftLengthBy2Plus1>> R2(R2_stack.data(), 295 num_capture_channels_); 296 ArrayView<std::array<float, kFftLengthBy2Plus1>> R2_unbounded( 297 R2_unbounded_stack.data(), num_capture_channels_); 298 ArrayView<std::array<float, kFftLengthBy2Plus1>> S2_linear( 299 S2_linear_stack.data(), num_capture_channels_); 300 ArrayView<FftData> Y(Y_stack.data(), num_capture_channels_); 301 ArrayView<FftData> E(E_stack.data(), num_capture_channels_); 302 ArrayView<FftData> comfort_noise(comfort_noise_stack.data(), 303 num_capture_channels_); 304 ArrayView<FftData> high_band_comfort_noise( 305 high_band_comfort_noise_stack.data(), num_capture_channels_); 306 ArrayView<SubtractorOutput> subtractor_output(subtractor_output_stack.data(), 307 num_capture_channels_); 308 if (NumChannelsOnHeap(num_capture_channels_) > 0) { 309 // If the stack-allocated space is too small, use the heap for storing the 310 // microphone data. 311 e = ArrayView<std::array<float, kFftLengthBy2>>(e_heap_.data(), 312 num_capture_channels_); 313 Y2 = ArrayView<std::array<float, kFftLengthBy2Plus1>>( 314 Y2_heap_.data(), num_capture_channels_); 315 E2 = ArrayView<std::array<float, kFftLengthBy2Plus1>>( 316 E2_heap_.data(), num_capture_channels_); 317 R2 = ArrayView<std::array<float, kFftLengthBy2Plus1>>( 318 R2_heap_.data(), num_capture_channels_); 319 R2_unbounded = ArrayView<std::array<float, kFftLengthBy2Plus1>>( 320 R2_unbounded_heap_.data(), num_capture_channels_); 321 S2_linear = ArrayView<std::array<float, kFftLengthBy2Plus1>>( 322 S2_linear_heap_.data(), num_capture_channels_); 323 Y = ArrayView<FftData>(Y_heap_.data(), num_capture_channels_); 324 E = ArrayView<FftData>(E_heap_.data(), num_capture_channels_); 325 comfort_noise = 326 ArrayView<FftData>(comfort_noise_heap_.data(), num_capture_channels_); 327 high_band_comfort_noise = ArrayView<FftData>( 328 high_band_comfort_noise_heap_.data(), num_capture_channels_); 329 subtractor_output = ArrayView<SubtractorOutput>( 330 subtractor_output_heap_.data(), num_capture_channels_); 331 } 332 333 data_dumper_->DumpWav("aec3_echo_remover_capture_input", 334 y->View(/*band=*/0, /*channel=*/0), 16000, 1); 335 data_dumper_->DumpWav("aec3_echo_remover_render_input", 336 x.View(/*band=*/0, /*channel=*/0), 16000, 1); 337 data_dumper_->DumpRaw("aec3_echo_remover_capture_input", 338 y->View(/*band=*/0, /*channel=*/0)); 339 data_dumper_->DumpRaw("aec3_echo_remover_render_input", 340 x.View(/*band=*/0, /*channel=*/0)); 341 342 aec_state_.UpdateCaptureSaturation(capture_signal_saturation); 343 344 if (echo_path_variability.AudioPathChanged()) { 345 // Ensure that the gain change is only acted on once per frame. 346 if (echo_path_variability.gain_change) { 347 if (gain_change_hangover_ == 0) { 348 constexpr int kMaxBlocksPerFrame = 3; 349 gain_change_hangover_ = kMaxBlocksPerFrame; 350 LoggingSeverity log_level = config_.delay.log_warning_on_delay_changes 351 ? LS_WARNING 352 : LS_VERBOSE; 353 RTC_LOG_V(log_level) 354 << "Gain change detected at block " << block_counter_; 355 } else { 356 echo_path_variability.gain_change = false; 357 } 358 } 359 360 subtractor_.HandleEchoPathChange(echo_path_variability); 361 aec_state_.HandleEchoPathChange(echo_path_variability); 362 363 if (echo_path_variability.delay_change != 364 EchoPathVariability::DelayAdjustment::kNone) { 365 suppression_gain_.SetInitialState(true); 366 } 367 } 368 if (gain_change_hangover_ > 0) { 369 --gain_change_hangover_; 370 } 371 372 // Analyze the render signal. 373 render_signal_analyzer_.Update(*render_buffer, 374 aec_state_.MinDirectPathFilterDelay()); 375 376 // State transition. 377 if (aec_state_.TransitionTriggered()) { 378 subtractor_.ExitInitialState(); 379 suppression_gain_.SetInitialState(false); 380 } 381 382 // Perform linear echo cancellation. 383 subtractor_.Process(*render_buffer, *y, render_signal_analyzer_, aec_state_, 384 subtractor_output); 385 386 // Compute spectra. 387 for (size_t ch = 0; ch < num_capture_channels_; ++ch) { 388 FormLinearFilterOutput(subtractor_output[ch], e[ch]); 389 WindowedPaddedFft(fft_, y->View(/*band=*/0, ch), y_old_[ch], &Y[ch]); 390 WindowedPaddedFft(fft_, e[ch], e_old_[ch], &E[ch]); 391 LinearEchoPower(E[ch], Y[ch], &S2_linear[ch]); 392 Y[ch].Spectrum(optimization_, Y2[ch]); 393 E[ch].Spectrum(optimization_, E2[ch]); 394 } 395 // `y_old_` and `e_old_` now point to the current block. Though their channel 396 // layout is already suitable for residual echo estimation, an alias is 397 // created for clarity. 398 const auto& y_current = y_old_; 399 const auto& e_current = e_old_; 400 401 // Optionally return the linear filter output. 402 if (linear_output) { 403 RTC_DCHECK_GE(1, linear_output->NumBands()); 404 RTC_DCHECK_EQ(num_capture_channels_, linear_output->NumChannels()); 405 for (size_t ch = 0; ch < num_capture_channels_; ++ch) { 406 std::copy(e[ch].begin(), e[ch].end(), 407 linear_output->begin(/*band=*/0, ch)); 408 } 409 } 410 411 // Update the AEC state information. 412 aec_state_.Update(external_delay, subtractor_.FilterFrequencyResponses(), 413 subtractor_.FilterImpulseResponses(), *render_buffer, E2, 414 Y2, subtractor_output); 415 416 // Choose the linear output. 417 const auto& Y_fft = aec_state_.UseLinearFilterOutput() ? E : Y; 418 419 data_dumper_->DumpWav("aec3_output_linear", 420 y->View(/*band=*/0, /*channel=*/0), 16000, 1); 421 data_dumper_->DumpWav("aec3_output_linear2", kBlockSize, &e[0][0], 16000, 1); 422 423 // Estimate the comfort noise. 424 cng_.Compute(aec_state_.SaturatedCapture(), Y2, comfort_noise, 425 high_band_comfort_noise); 426 427 // Only do the below processing if the output of the audio processing module 428 // is used. 429 std::array<float, kFftLengthBy2Plus1> G; 430 if (capture_output_used_) { 431 // Estimate the residual echo power. 432 residual_echo_estimator_.Estimate( 433 aec_state_, *render_buffer, y_current, e_current, S2_linear, Y2, E2, 434 suppression_gain_.IsDominantNearend(), R2, R2_unbounded); 435 436 // Suppressor nearend estimate. 437 if (aec_state_.UsableLinearEstimate()) { 438 // E2 is bound by Y2. 439 for (size_t ch = 0; ch < num_capture_channels_; ++ch) { 440 std::transform(E2[ch].begin(), E2[ch].end(), Y2[ch].begin(), 441 E2[ch].begin(), 442 [](float a, float b) { return std::min(a, b); }); 443 } 444 } 445 const auto& nearend_spectrum = aec_state_.UsableLinearEstimate() ? E2 : Y2; 446 447 // Suppressor echo estimate. 448 const auto& echo_spectrum = 449 aec_state_.UsableLinearEstimate() ? S2_linear : R2; 450 451 // Determine if the suppressor should assume clock drift. 452 const bool clock_drift = config_.echo_removal_control.has_clock_drift || 453 echo_path_variability.clock_drift; 454 455 // Compute preferred gains. 456 float high_bands_gain; 457 suppression_gain_.GetGain(nearend_spectrum, echo_spectrum, R2, R2_unbounded, 458 cng_.NoiseSpectrum(), render_signal_analyzer_, 459 aec_state_, x, clock_drift, &high_bands_gain, &G); 460 461 suppression_filter_.ApplyGain(comfort_noise, high_band_comfort_noise, G, 462 high_bands_gain, Y_fft, y); 463 464 } else { 465 G.fill(0.f); 466 } 467 468 // Update the metrics. 469 metrics_.Update(aec_state_, cng_.NoiseSpectrum()[0], G); 470 471 // Debug outputs for the purpose of development and analysis. 472 data_dumper_->DumpWav("aec3_echo_estimate", kBlockSize, 473 &subtractor_output[0].s_refined[0], 16000, 1); 474 data_dumper_->DumpRaw("aec3_output", y->View(/*band=*/0, /*channel=*/0)); 475 data_dumper_->DumpRaw("aec3_narrow_render", 476 render_signal_analyzer_.NarrowPeakBand() ? 1 : 0); 477 data_dumper_->DumpRaw("aec3_N2", cng_.NoiseSpectrum()[0]); 478 data_dumper_->DumpRaw("aec3_suppressor_gain", G); 479 data_dumper_->DumpWav("aec3_output", y->View(/*band=*/0, /*channel=*/0), 480 16000, 1); 481 data_dumper_->DumpRaw("aec3_using_subtractor_output[0]", 482 aec_state_.UseLinearFilterOutput() ? 1 : 0); 483 data_dumper_->DumpRaw("aec3_E2", E2[0]); 484 data_dumper_->DumpRaw("aec3_S2_linear", S2_linear[0]); 485 data_dumper_->DumpRaw("aec3_Y2", Y2[0]); 486 data_dumper_->DumpRaw( 487 "aec3_X2", render_buffer->Spectrum( 488 aec_state_.MinDirectPathFilterDelay())[/*channel=*/0]); 489 data_dumper_->DumpRaw("aec3_R2", R2[0]); 490 data_dumper_->DumpRaw("aec3_filter_delay", 491 aec_state_.MinDirectPathFilterDelay()); 492 data_dumper_->DumpRaw("aec3_capture_saturation", 493 aec_state_.SaturatedCapture() ? 1 : 0); 494 } 495 496 void EchoRemoverImpl::FormLinearFilterOutput( 497 const SubtractorOutput& subtractor_output, 498 ArrayView<float> output) { 499 RTC_DCHECK_EQ(subtractor_output.e_refined.size(), output.size()); 500 RTC_DCHECK_EQ(subtractor_output.e_coarse.size(), output.size()); 501 bool use_refined_output = true; 502 if (use_coarse_filter_output_) { 503 // As the output of the refined adaptive filter generally should be better 504 // than the coarse filter output, add a margin and threshold for when 505 // choosing the coarse filter output. 506 if (subtractor_output.e2_coarse < 0.9f * subtractor_output.e2_refined && 507 subtractor_output.y2 > 30.f * 30.f * kBlockSize && 508 (subtractor_output.s2_refined > 60.f * 60.f * kBlockSize || 509 subtractor_output.s2_coarse > 60.f * 60.f * kBlockSize)) { 510 use_refined_output = false; 511 } else { 512 // If the refined filter is diverged, choose the filter output that has 513 // the lowest power. 514 if (subtractor_output.e2_coarse < subtractor_output.e2_refined && 515 subtractor_output.y2 < subtractor_output.e2_refined) { 516 use_refined_output = false; 517 } 518 } 519 } 520 521 SignalTransition(refined_filter_output_last_selected_ 522 ? subtractor_output.e_refined 523 : subtractor_output.e_coarse, 524 use_refined_output ? subtractor_output.e_refined 525 : subtractor_output.e_coarse, 526 output); 527 refined_filter_output_last_selected_ = use_refined_output; 528 } 529 530 } // namespace 531 532 std::unique_ptr<EchoRemover> EchoRemover::Create( 533 const Environment& env, 534 const EchoCanceller3Config& config, 535 int sample_rate_hz, 536 size_t num_render_channels, 537 size_t num_capture_channels, 538 NeuralResidualEchoEstimator* neural_residual_echo_estimator) { 539 return std::make_unique<EchoRemoverImpl>( 540 env, config, sample_rate_hz, num_render_channels, num_capture_channels, 541 neural_residual_echo_estimator); 542 } 543 544 } // namespace webrtc