transparent_mode.cc (9365B)
1 /* 2 * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "modules/audio_processing/aec3/transparent_mode.h" 12 13 #include <cstddef> 14 #include <memory> 15 16 #include "api/audio/echo_canceller3_config.h" 17 #include "api/environment/environment.h" 18 #include "api/field_trials_view.h" 19 #include "modules/audio_processing/aec3/aec3_common.h" 20 #include "rtc_base/checks.h" 21 #include "rtc_base/logging.h" 22 23 namespace webrtc { 24 namespace { 25 26 constexpr size_t kBlocksSinceConvergencedFilterInit = 10000; 27 constexpr size_t kBlocksSinceConsistentEstimateInit = 10000; 28 constexpr float kInitialTransparentStateProbability = 0.2f; 29 30 bool DeactivateTransparentMode(const FieldTrialsView& field_trials) { 31 return field_trials.IsEnabled("WebRTC-Aec3TransparentModeKillSwitch"); 32 } 33 34 bool ActivateTransparentModeHmm(const FieldTrialsView& field_trials) { 35 return field_trials.IsEnabled("WebRTC-Aec3TransparentModeHmm"); 36 } 37 38 } // namespace 39 40 // Classifier that toggles transparent mode which reduces echo suppression when 41 // headsets are used. 42 class TransparentModeImpl : public TransparentMode { 43 public: 44 bool Active() const override { return transparency_activated_; } 45 46 void Reset() override { 47 // Determines if transparent mode is used. 48 transparency_activated_ = false; 49 50 // The estimated probability of being transparent mode. 51 prob_transparent_state_ = kInitialTransparentStateProbability; 52 } 53 54 void Update(int /* filter_delay_blocks */, 55 bool /* any_filter_consistent */, 56 bool /* any_filter_converged */, 57 bool any_coarse_filter_converged, 58 bool /* all_filters_diverged */, 59 bool active_render, 60 bool /* saturated_capture */) override { 61 // The classifier is implemented as a Hidden Markov Model (HMM) with two 62 // hidden states: "normal" and "transparent". The estimated probabilities of 63 // the two states are updated by observing filter convergence during active 64 // render. The filters are less likely to be reported as converged when 65 // there is no echo present in the microphone signal. 66 67 // The constants have been obtained by observing active_render and 68 // any_coarse_filter_converged under varying call scenarios. They 69 // have further been hand tuned to prefer normal state during uncertain 70 // regions (to avoid echo leaks). 71 72 // The model is only updated during active render. 73 if (!active_render) 74 return; 75 76 // Probability of switching from one state to the other. 77 constexpr float kSwitch = 0.000001f; 78 79 // Probability of observing converged filters in states "normal" and 80 // "transparent" during active render. 81 constexpr float kConvergedNormal = 0.01f; 82 constexpr float kConvergedTransparent = 0.001f; 83 84 // Probability of transitioning to transparent state from normal state and 85 // transparent state respectively. 86 constexpr float kA[2] = {kSwitch, 1.f - kSwitch}; 87 88 // Probability of the two observations (converged filter or not converged 89 // filter) in normal state and transparent state respectively. 90 constexpr float kB[2][2] = { 91 {1.f - kConvergedNormal, kConvergedNormal}, 92 {1.f - kConvergedTransparent, kConvergedTransparent}}; 93 94 // Probability of the two states before the update. 95 const float prob_transparent = prob_transparent_state_; 96 const float prob_normal = 1.f - prob_transparent; 97 98 // Probability of transitioning to transparent state. 99 const float prob_transition_transparent = 100 prob_normal * kA[0] + prob_transparent * kA[1]; 101 const float prob_transition_normal = 1.f - prob_transition_transparent; 102 103 // Observed output. 104 const int out = static_cast<int>(any_coarse_filter_converged); 105 106 // Joint probabilites of the observed output and respective states. 107 const float prob_joint_normal = prob_transition_normal * kB[0][out]; 108 const float prob_joint_transparent = 109 prob_transition_transparent * kB[1][out]; 110 111 // Conditional probability of transparent state and the observed output. 112 RTC_DCHECK_GT(prob_joint_normal + prob_joint_transparent, 0.f); 113 prob_transparent_state_ = 114 prob_joint_transparent / (prob_joint_normal + prob_joint_transparent); 115 116 // Transparent mode is only activated when its state probability is high. 117 // Dead zone between activation/deactivation thresholds to avoid switching 118 // back and forth. 119 if (prob_transparent_state_ > 0.95f) { 120 transparency_activated_ = true; 121 } else if (prob_transparent_state_ < 0.5f) { 122 transparency_activated_ = false; 123 } 124 } 125 126 private: 127 bool transparency_activated_ = false; 128 float prob_transparent_state_ = kInitialTransparentStateProbability; 129 }; 130 131 // Legacy classifier for toggling transparent mode. 132 class LegacyTransparentModeImpl : public TransparentMode { 133 public: 134 explicit LegacyTransparentModeImpl(const EchoCanceller3Config& config) 135 : linear_and_stable_echo_path_( 136 config.echo_removal_control.linear_and_stable_echo_path), 137 active_blocks_since_sane_filter_(kBlocksSinceConsistentEstimateInit), 138 non_converged_sequence_size_(kBlocksSinceConvergencedFilterInit) {} 139 140 bool Active() const override { return transparency_activated_; } 141 142 void Reset() override { 143 non_converged_sequence_size_ = kBlocksSinceConvergencedFilterInit; 144 diverged_sequence_size_ = 0; 145 strong_not_saturated_render_blocks_ = 0; 146 if (linear_and_stable_echo_path_) { 147 recent_convergence_during_activity_ = false; 148 } 149 } 150 151 void Update(int filter_delay_blocks, 152 bool any_filter_consistent, 153 bool any_filter_converged, 154 bool /* any_coarse_filter_converged */, 155 bool all_filters_diverged, 156 bool active_render, 157 bool saturated_capture) override { 158 ++capture_block_counter_; 159 strong_not_saturated_render_blocks_ += 160 active_render && !saturated_capture ? 1 : 0; 161 162 if (any_filter_consistent && filter_delay_blocks < 5) { 163 sane_filter_observed_ = true; 164 active_blocks_since_sane_filter_ = 0; 165 } else if (active_render) { 166 ++active_blocks_since_sane_filter_; 167 } 168 169 bool sane_filter_recently_seen; 170 if (!sane_filter_observed_) { 171 sane_filter_recently_seen = 172 capture_block_counter_ <= 5 * kNumBlocksPerSecond; 173 } else { 174 sane_filter_recently_seen = 175 active_blocks_since_sane_filter_ <= 30 * kNumBlocksPerSecond; 176 } 177 178 if (any_filter_converged) { 179 recent_convergence_during_activity_ = true; 180 active_non_converged_sequence_size_ = 0; 181 non_converged_sequence_size_ = 0; 182 ++num_converged_blocks_; 183 } else { 184 if (++non_converged_sequence_size_ > 20 * kNumBlocksPerSecond) { 185 num_converged_blocks_ = 0; 186 } 187 188 if (active_render && 189 ++active_non_converged_sequence_size_ > 60 * kNumBlocksPerSecond) { 190 recent_convergence_during_activity_ = false; 191 } 192 } 193 194 if (!all_filters_diverged) { 195 diverged_sequence_size_ = 0; 196 } else if (++diverged_sequence_size_ >= 60) { 197 // TODO(peah): Change these lines to ensure proper triggering of usable 198 // filter. 199 non_converged_sequence_size_ = kBlocksSinceConvergencedFilterInit; 200 } 201 202 if (active_non_converged_sequence_size_ > 60 * kNumBlocksPerSecond) { 203 finite_erl_recently_detected_ = false; 204 } 205 if (num_converged_blocks_ > 50) { 206 finite_erl_recently_detected_ = true; 207 } 208 209 if (finite_erl_recently_detected_) { 210 transparency_activated_ = false; 211 } else if (sane_filter_recently_seen && 212 recent_convergence_during_activity_) { 213 transparency_activated_ = false; 214 } else { 215 const bool filter_should_have_converged = 216 strong_not_saturated_render_blocks_ > 6 * kNumBlocksPerSecond; 217 transparency_activated_ = filter_should_have_converged; 218 } 219 } 220 221 private: 222 const bool linear_and_stable_echo_path_; 223 size_t capture_block_counter_ = 0; 224 bool transparency_activated_ = false; 225 size_t active_blocks_since_sane_filter_; 226 bool sane_filter_observed_ = false; 227 bool finite_erl_recently_detected_ = false; 228 size_t non_converged_sequence_size_; 229 size_t diverged_sequence_size_ = 0; 230 size_t active_non_converged_sequence_size_ = 0; 231 size_t num_converged_blocks_ = 0; 232 bool recent_convergence_during_activity_ = false; 233 size_t strong_not_saturated_render_blocks_ = 0; 234 }; 235 236 std::unique_ptr<TransparentMode> TransparentMode::Create( 237 const Environment& env, 238 const EchoCanceller3Config& config) { 239 if (config.ep_strength.bounded_erl || 240 DeactivateTransparentMode(env.field_trials())) { 241 RTC_LOG(LS_INFO) << "AEC3 Transparent Mode: Disabled"; 242 return nullptr; 243 } 244 if (ActivateTransparentModeHmm(env.field_trials())) { 245 RTC_LOG(LS_INFO) << "AEC3 Transparent Mode: HMM"; 246 return std::make_unique<TransparentModeImpl>(); 247 } 248 RTC_LOG(LS_INFO) << "AEC3 Transparent Mode: Legacy"; 249 return std::make_unique<LegacyTransparentModeImpl>(config); 250 } 251 252 } // namespace webrtc