adaptive_digital_gain_controller.cc (9536B)
1 /* 2 * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "modules/audio_processing/agc2/adaptive_digital_gain_controller.h" 12 13 #include <algorithm> 14 15 #include "api/audio/audio_processing.h" 16 #include "api/audio/audio_view.h" 17 #include "common_audio/include/audio_util.h" 18 #include "modules/audio_processing/agc2/agc2_common.h" 19 #include "modules/audio_processing/logging/apm_data_dumper.h" 20 #include "rtc_base/checks.h" 21 #include "rtc_base/logging.h" 22 #include "rtc_base/numerics/safe_minmax.h" 23 #include "system_wrappers/include/metrics.h" 24 25 namespace webrtc { 26 namespace { 27 28 using AdaptiveDigitalConfig = 29 AudioProcessing::Config::GainController2::AdaptiveDigital; 30 31 constexpr int kHeadroomHistogramMin = 0; 32 constexpr int kHeadroomHistogramMax = 50; 33 constexpr int kGainDbHistogramMax = 30; 34 35 // Computes the gain for `input_level_dbfs` to reach `-config.headroom_db`. 36 // Clamps the gain in [0, `config.max_gain_db`]. `config.headroom_db` is a 37 // safety margin to allow transient peaks to exceed the target peak level 38 // without clipping. 39 float ComputeGainDb(float input_level_dbfs, 40 const AdaptiveDigitalConfig& config) { 41 // If the level is very low, apply the maximum gain. 42 if (input_level_dbfs < -(config.headroom_db + config.max_gain_db)) { 43 return config.max_gain_db; 44 } 45 // We expect to end up here most of the time: the level is below 46 // -headroom, but we can boost it to -headroom. 47 if (input_level_dbfs < -config.headroom_db) { 48 return -config.headroom_db - input_level_dbfs; 49 } 50 // The level is too high and we can't boost. 51 RTC_DCHECK_GE(input_level_dbfs, -config.headroom_db); 52 return 0.0f; 53 } 54 55 // Returns `target_gain_db` if applying such a gain to `input_noise_level_dbfs` 56 // does not exceed `max_output_noise_level_dbfs`. Otherwise lowers and returns 57 // `target_gain_db` so that the output noise level equals 58 // `max_output_noise_level_dbfs`. 59 float LimitGainByNoise(float target_gain_db, 60 float input_noise_level_dbfs, 61 float max_output_noise_level_dbfs, 62 ApmDataDumper& apm_data_dumper) { 63 const float max_allowed_gain_db = 64 max_output_noise_level_dbfs - input_noise_level_dbfs; 65 apm_data_dumper.DumpRaw("agc2_adaptive_gain_applier_max_allowed_gain_db", 66 max_allowed_gain_db); 67 return std::min(target_gain_db, std::max(max_allowed_gain_db, 0.0f)); 68 } 69 70 float LimitGainByLowConfidence(float target_gain_db, 71 float last_gain_db, 72 float limiter_audio_level_dbfs, 73 bool estimate_is_confident) { 74 if (estimate_is_confident || 75 limiter_audio_level_dbfs <= kLimiterThresholdForAgcGainDbfs) { 76 return target_gain_db; 77 } 78 const float limiter_level_dbfs_before_gain = 79 limiter_audio_level_dbfs - last_gain_db; 80 81 // Compute a new gain so that `limiter_level_dbfs_before_gain` + 82 // `new_target_gain_db` is not great than `kLimiterThresholdForAgcGainDbfs`. 83 const float new_target_gain_db = std::max( 84 kLimiterThresholdForAgcGainDbfs - limiter_level_dbfs_before_gain, 0.0f); 85 return std::min(new_target_gain_db, target_gain_db); 86 } 87 88 // Computes how the gain should change during this frame. 89 // Return the gain difference in db to 'last_gain_db'. 90 float ComputeGainChangeThisFrameDb(float target_gain_db, 91 float last_gain_db, 92 bool gain_increase_allowed, 93 float max_gain_decrease_db, 94 float max_gain_increase_db) { 95 RTC_DCHECK_GT(max_gain_decrease_db, 0); 96 RTC_DCHECK_GT(max_gain_increase_db, 0); 97 float target_gain_difference_db = target_gain_db - last_gain_db; 98 if (!gain_increase_allowed) { 99 target_gain_difference_db = std::min(target_gain_difference_db, 0.0f); 100 } 101 return SafeClamp(target_gain_difference_db, -max_gain_decrease_db, 102 max_gain_increase_db); 103 } 104 105 } // namespace 106 107 AdaptiveDigitalGainController::AdaptiveDigitalGainController( 108 ApmDataDumper* apm_data_dumper, 109 const AudioProcessing::Config::GainController2::AdaptiveDigital& config, 110 int adjacent_speech_frames_threshold) 111 : apm_data_dumper_(apm_data_dumper), 112 gain_applier_( 113 /*hard_clip_samples=*/false, 114 /*initial_gain_factor=*/DbToRatio(config.initial_gain_db)), 115 config_(config), 116 adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold), 117 max_gain_change_db_per_10ms_(config_.max_gain_change_db_per_second * 118 kFrameDurationMs / 1000.0f), 119 calls_since_last_gain_log_(0), 120 frames_to_gain_increase_allowed_(adjacent_speech_frames_threshold), 121 last_gain_db_(config_.initial_gain_db) { 122 RTC_DCHECK_GT(max_gain_change_db_per_10ms_, 0.0f); 123 RTC_DCHECK_GE(frames_to_gain_increase_allowed_, 1); 124 RTC_DCHECK_GE(config_.max_output_noise_level_dbfs, -90.0f); 125 RTC_DCHECK_LE(config_.max_output_noise_level_dbfs, 0.0f); 126 } 127 128 void AdaptiveDigitalGainController::Process(const FrameInfo& info, 129 DeinterleavedView<float> frame) { 130 RTC_DCHECK_GE(info.speech_level_dbfs, -150.0f); 131 RTC_DCHECK_GE(frame.num_channels(), 1); 132 RTC_DCHECK( 133 frame.samples_per_channel() == 80 || frame.samples_per_channel() == 160 || 134 frame.samples_per_channel() == 320 || frame.samples_per_channel() == 480) 135 << "`frame` does not look like a 10 ms frame for an APM supported sample " 136 "rate"; 137 138 // Compute the input level used to select the desired gain. 139 RTC_DCHECK_GT(info.headroom_db, 0.0f); 140 const float input_level_dbfs = info.speech_level_dbfs + info.headroom_db; 141 142 const float target_gain_db = LimitGainByLowConfidence( 143 LimitGainByNoise(ComputeGainDb(input_level_dbfs, config_), 144 info.noise_rms_dbfs, config_.max_output_noise_level_dbfs, 145 *apm_data_dumper_), 146 last_gain_db_, info.limiter_envelope_dbfs, info.speech_level_reliable); 147 148 // Forbid increasing the gain until enough adjacent speech frames are 149 // observed. 150 bool first_confident_speech_frame = false; 151 if (info.speech_probability < kVadConfidenceThreshold) { 152 frames_to_gain_increase_allowed_ = adjacent_speech_frames_threshold_; 153 } else if (frames_to_gain_increase_allowed_ > 0) { 154 frames_to_gain_increase_allowed_--; 155 first_confident_speech_frame = frames_to_gain_increase_allowed_ == 0; 156 } 157 apm_data_dumper_->DumpRaw( 158 "agc2_adaptive_gain_applier_frames_to_gain_increase_allowed", 159 frames_to_gain_increase_allowed_); 160 161 const bool gain_increase_allowed = frames_to_gain_increase_allowed_ == 0; 162 163 float max_gain_increase_db = max_gain_change_db_per_10ms_; 164 if (first_confident_speech_frame) { 165 // No gain increase happened while waiting for a long enough speech 166 // sequence. Therefore, temporarily allow a faster gain increase. 167 RTC_DCHECK(gain_increase_allowed); 168 max_gain_increase_db *= adjacent_speech_frames_threshold_; 169 } 170 171 const float gain_change_this_frame_db = ComputeGainChangeThisFrameDb( 172 target_gain_db, last_gain_db_, gain_increase_allowed, 173 /*max_gain_decrease_db=*/max_gain_change_db_per_10ms_, 174 max_gain_increase_db); 175 176 apm_data_dumper_->DumpRaw("agc2_adaptive_gain_applier_want_to_change_by_db", 177 target_gain_db - last_gain_db_); 178 apm_data_dumper_->DumpRaw("agc2_adaptive_gain_applier_will_change_by_db", 179 gain_change_this_frame_db); 180 181 // Optimization: avoid calling math functions if gain does not 182 // change. 183 if (gain_change_this_frame_db != 0.f) { 184 gain_applier_.SetGainFactor( 185 DbToRatio(last_gain_db_ + gain_change_this_frame_db)); 186 } 187 188 gain_applier_.ApplyGain(frame); 189 190 // Remember that the gain has changed for the next iteration. 191 last_gain_db_ = last_gain_db_ + gain_change_this_frame_db; 192 apm_data_dumper_->DumpRaw("agc2_adaptive_gain_applier_applied_gain_db", 193 last_gain_db_); 194 195 // Log every 10 seconds. 196 calls_since_last_gain_log_++; 197 if (calls_since_last_gain_log_ == 1000) { 198 calls_since_last_gain_log_ = 0; 199 RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.EstimatedSpeechLevel", 200 -info.speech_level_dbfs, 0, 100, 101); 201 RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.EstimatedNoiseLevel", 202 -info.noise_rms_dbfs, 0, 100, 101); 203 RTC_HISTOGRAM_COUNTS_LINEAR( 204 "WebRTC.Audio.Agc2.Headroom", info.headroom_db, kHeadroomHistogramMin, 205 kHeadroomHistogramMax, 206 kHeadroomHistogramMax - kHeadroomHistogramMin + 1); 207 RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.DigitalGainApplied", 208 last_gain_db_, 0, kGainDbHistogramMax, 209 kGainDbHistogramMax + 1); 210 RTC_LOG(LS_INFO) << "AGC2 adaptive digital" 211 << " | speech_dbfs: " << info.speech_level_dbfs 212 << " | noise_dbfs: " << info.noise_rms_dbfs 213 << " | headroom_db: " << info.headroom_db 214 << " | gain_db: " << last_gain_db_; 215 } 216 } 217 218 } // namespace webrtc