residual_echo_detector.cc (8442B)
1 /* 2 * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "modules/audio_processing/residual_echo_detector.h" 12 13 #include <algorithm> 14 #include <atomic> 15 #include <cstddef> 16 #include <numeric> 17 #include <optional> 18 19 #include "api/array_view.h" 20 #include "api/audio/audio_processing.h" 21 #include "modules/audio_processing/logging/apm_data_dumper.h" 22 #include "rtc_base/checks.h" 23 #include "rtc_base/logging.h" 24 #include "system_wrappers/include/metrics.h" 25 26 namespace { 27 28 float Power(webrtc::ArrayView<const float> input) { 29 if (input.empty()) { 30 return 0.f; 31 } 32 return std::inner_product(input.begin(), input.end(), input.begin(), 0.f) / 33 input.size(); 34 } 35 36 constexpr size_t kLookbackFrames = 650; 37 // TODO(ivoc): Verify the size of this buffer. 38 constexpr size_t kRenderBufferSize = 30; 39 constexpr float kAlpha = 0.001f; 40 // 10 seconds of data, updated every 10 ms. 41 constexpr size_t kAggregationBufferSize = 10 * 100; 42 43 } // namespace 44 45 namespace webrtc { 46 47 std::atomic<int> ResidualEchoDetector::instance_count_(0); 48 49 ResidualEchoDetector::ResidualEchoDetector() 50 : data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)), 51 render_buffer_(kRenderBufferSize), 52 render_power_(kLookbackFrames), 53 render_power_mean_(kLookbackFrames), 54 render_power_std_dev_(kLookbackFrames), 55 covariances_(kLookbackFrames), 56 recent_likelihood_max_(kAggregationBufferSize) {} 57 58 ResidualEchoDetector::~ResidualEchoDetector() = default; 59 60 void ResidualEchoDetector::AnalyzeRenderAudio( 61 ArrayView<const float> render_audio) { 62 // Dump debug data assuming 48 kHz sample rate (if this assumption is not 63 // valid the dumped audio will need to be converted offline accordingly). 64 data_dumper_->DumpWav("ed_render", render_audio.size(), render_audio.data(), 65 48000, 1); 66 67 if (render_buffer_.Size() == 0) { 68 frames_since_zero_buffer_size_ = 0; 69 } else if (frames_since_zero_buffer_size_ >= kRenderBufferSize) { 70 // This can happen in a few cases: at the start of a call, due to a glitch 71 // or due to clock drift. The excess capture value will be ignored. 72 // TODO(ivoc): Include how often this happens in APM stats. 73 render_buffer_.Pop(); 74 frames_since_zero_buffer_size_ = 0; 75 } 76 ++frames_since_zero_buffer_size_; 77 float power = Power(render_audio); 78 render_buffer_.Push(power); 79 } 80 81 void ResidualEchoDetector::AnalyzeCaptureAudio( 82 ArrayView<const float> capture_audio) { 83 // Dump debug data assuming 48 kHz sample rate (if this assumption is not 84 // valid the dumped audio will need to be converted offline accordingly). 85 data_dumper_->DumpWav("ed_capture", capture_audio.size(), 86 capture_audio.data(), 48000, 1); 87 88 if (first_process_call_) { 89 // On the first process call (so the start of a call), we must flush the 90 // render buffer, otherwise the render data will be delayed. 91 render_buffer_.Clear(); 92 first_process_call_ = false; 93 } 94 95 // Get the next render value. 96 const std::optional<float> buffered_render_power = render_buffer_.Pop(); 97 if (!buffered_render_power) { 98 // This can happen in a few cases: at the start of a call, due to a glitch 99 // or due to clock drift. The excess capture value will be ignored. 100 // TODO(ivoc): Include how often this happens in APM stats. 101 return; 102 } 103 // Update the render statistics, and store the statistics in circular buffers. 104 render_statistics_.Update(*buffered_render_power); 105 RTC_DCHECK_LT(next_insertion_index_, kLookbackFrames); 106 render_power_[next_insertion_index_] = *buffered_render_power; 107 render_power_mean_[next_insertion_index_] = render_statistics_.mean(); 108 render_power_std_dev_[next_insertion_index_] = 109 render_statistics_.std_deviation(); 110 111 // Get the next capture value, update capture statistics and add the relevant 112 // values to the buffers. 113 const float capture_power = Power(capture_audio); 114 capture_statistics_.Update(capture_power); 115 const float capture_mean = capture_statistics_.mean(); 116 const float capture_std_deviation = capture_statistics_.std_deviation(); 117 118 // Update the covariance values and determine the new echo likelihood. 119 echo_likelihood_ = 0.f; 120 size_t read_index = next_insertion_index_; 121 122 int best_delay = -1; 123 for (size_t delay = 0; delay < covariances_.size(); ++delay) { 124 RTC_DCHECK_LT(read_index, render_power_.size()); 125 covariances_[delay].Update(capture_power, capture_mean, 126 capture_std_deviation, render_power_[read_index], 127 render_power_mean_[read_index], 128 render_power_std_dev_[read_index]); 129 read_index = read_index > 0 ? read_index - 1 : kLookbackFrames - 1; 130 131 if (covariances_[delay].normalized_cross_correlation() > echo_likelihood_) { 132 echo_likelihood_ = covariances_[delay].normalized_cross_correlation(); 133 best_delay = static_cast<int>(delay); 134 } 135 } 136 // This is a temporary log message to help find the underlying cause for echo 137 // likelihoods > 1.0. 138 // TODO(ivoc): Remove once the issue is resolved. 139 if (echo_likelihood_ > 1.1f) { 140 // Make sure we don't spam the log. 141 if (log_counter_ < 5 && best_delay != -1) { 142 size_t read_index_high_echo = 143 kLookbackFrames + next_insertion_index_ - best_delay; 144 if (read_index_high_echo >= kLookbackFrames) { 145 read_index_high_echo -= kLookbackFrames; 146 } 147 RTC_DCHECK_LT(read_index_high_echo, render_power_.size()); 148 RTC_LOG_F(LS_ERROR) 149 << "Echo detector internal state: {" 150 "Echo likelihood: " 151 << echo_likelihood_ << ", Best Delay: " << best_delay 152 << ", Covariance: " << covariances_[best_delay].covariance() 153 << ", Last capture power: " << capture_power 154 << ", Capture mean: " << capture_mean 155 << ", Capture_standard deviation: " << capture_std_deviation 156 << ", Last render power: " << render_power_[read_index_high_echo] 157 << ", Render mean: " << render_power_mean_[read_index_high_echo] 158 << ", Render standard deviation: " 159 << render_power_std_dev_[read_index_high_echo] 160 << ", Reliability: " << reliability_ << "}"; 161 log_counter_++; 162 } 163 } 164 RTC_DCHECK_LT(echo_likelihood_, 1.1f); 165 166 reliability_ = (1.0f - kAlpha) * reliability_ + kAlpha * 1.0f; 167 echo_likelihood_ *= reliability_; 168 // This is a temporary fix to prevent echo likelihood values > 1.0. 169 // TODO(ivoc): Find the root cause of this issue and fix it. 170 echo_likelihood_ = std::min(echo_likelihood_, 1.0f); 171 int echo_percentage = static_cast<int>(echo_likelihood_ * 100); 172 RTC_HISTOGRAM_COUNTS("WebRTC.Audio.ResidualEchoDetector.EchoLikelihood", 173 echo_percentage, 0, 100, 100 /* number of bins */); 174 175 // Update the buffer of recent likelihood values. 176 recent_likelihood_max_.Update(echo_likelihood_); 177 178 // Update the next insertion index. 179 next_insertion_index_ = next_insertion_index_ < (kLookbackFrames - 1) 180 ? next_insertion_index_ + 1 181 : 0; 182 } 183 184 void ResidualEchoDetector::Initialize(int /*capture_sample_rate_hz*/, 185 int /*num_capture_channels*/, 186 int /*render_sample_rate_hz*/, 187 int /*num_render_channels*/) { 188 render_buffer_.Clear(); 189 std::fill(render_power_.begin(), render_power_.end(), 0.f); 190 std::fill(render_power_mean_.begin(), render_power_mean_.end(), 0.f); 191 std::fill(render_power_std_dev_.begin(), render_power_std_dev_.end(), 0.f); 192 render_statistics_.Clear(); 193 capture_statistics_.Clear(); 194 recent_likelihood_max_.Clear(); 195 for (auto& cov : covariances_) { 196 cov.Clear(); 197 } 198 echo_likelihood_ = 0.f; 199 next_insertion_index_ = 0; 200 reliability_ = 0.f; 201 } 202 203 EchoDetector::Metrics ResidualEchoDetector::GetMetrics() const { 204 EchoDetector::Metrics metrics; 205 metrics.echo_likelihood = echo_likelihood_; 206 metrics.echo_likelihood_recent_max = recent_likelihood_max_.max(); 207 return metrics; 208 } 209 } // namespace webrtc