gain_controller2.cc (11330B)
1 /* 2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "modules/audio_processing/gain_controller2.h" 12 13 #include <algorithm> 14 #include <atomic> 15 #include <cmath> 16 #include <memory> 17 #include <optional> 18 19 #include "api/audio/audio_frame.h" 20 #include "api/audio/audio_processing.h" 21 #include "api/audio/audio_view.h" 22 #include "api/environment/environment.h" 23 #include "api/field_trials_view.h" 24 #include "common_audio/include/audio_util.h" 25 #include "modules/audio_processing/agc2/adaptive_digital_gain_controller.h" 26 #include "modules/audio_processing/agc2/agc2_common.h" 27 #include "modules/audio_processing/agc2/cpu_features.h" 28 #include "modules/audio_processing/agc2/input_volume_controller.h" 29 #include "modules/audio_processing/agc2/interpolated_gain_curve.h" 30 #include "modules/audio_processing/agc2/noise_level_estimator.h" 31 #include "modules/audio_processing/agc2/saturation_protector.h" 32 #include "modules/audio_processing/agc2/speech_level_estimator.h" 33 #include "modules/audio_processing/agc2/vad_wrapper.h" 34 #include "modules/audio_processing/audio_buffer.h" 35 #include "modules/audio_processing/logging/apm_data_dumper.h" 36 #include "rtc_base/checks.h" 37 #include "rtc_base/logging.h" 38 39 namespace webrtc { 40 namespace { 41 42 using Agc2Config = AudioProcessing::Config::GainController2; 43 using InputVolumeControllerConfig = InputVolumeController::Config; 44 45 constexpr int kLogLimiterStatsPeriodMs = 30'000; 46 constexpr int kFrameLengthMs = 10; 47 constexpr int kLogLimiterStatsPeriodNumFrames = 48 kLogLimiterStatsPeriodMs / kFrameLengthMs; 49 50 // Detects the available CPU features and applies any kill-switches. 51 AvailableCpuFeatures GetAllowedCpuFeatures( 52 const FieldTrialsView& field_trials) { 53 AvailableCpuFeatures features = GetAvailableCpuFeatures(); 54 if (field_trials.IsEnabled("WebRTC-Agc2SimdSse2KillSwitch")) { 55 features.sse2 = false; 56 } 57 if (field_trials.IsEnabled("WebRTC-Agc2SimdAvx2KillSwitch")) { 58 features.avx2 = false; 59 } 60 if (field_trials.IsEnabled("WebRTC-Agc2SimdNeonKillSwitch")) { 61 features.neon = false; 62 } 63 return features; 64 } 65 66 // Peak and RMS audio levels in dBFS. 67 struct AudioLevels { 68 float peak_dbfs; 69 float rms_dbfs; 70 }; 71 72 // Speech level info. 73 struct SpeechLevel { 74 bool is_confident; 75 float rms_dbfs; 76 }; 77 78 // Computes the audio levels for the first channel in `frame`. 79 AudioLevels ComputeAudioLevels(DeinterleavedView<float> frame, 80 ApmDataDumper& data_dumper) { 81 float peak = 0.0f; 82 float rms = 0.0f; 83 for (const auto& x : frame[0]) { 84 peak = std::max(std::fabs(x), peak); 85 rms += x * x; 86 } 87 AudioLevels levels{ 88 .peak_dbfs = FloatS16ToDbfs(peak), 89 .rms_dbfs = FloatS16ToDbfs(std::sqrt(rms / frame.samples_per_channel()))}; 90 data_dumper.DumpRaw("agc2_input_rms_dbfs", levels.rms_dbfs); 91 data_dumper.DumpRaw("agc2_input_peak_dbfs", levels.peak_dbfs); 92 return levels; 93 } 94 95 } // namespace 96 97 std::atomic<int> GainController2::instance_count_(0); 98 99 GainController2::GainController2( 100 const Environment& env, 101 const Agc2Config& config, 102 const InputVolumeControllerConfig& input_volume_controller_config, 103 int sample_rate_hz, 104 int num_channels, 105 bool use_internal_vad) 106 : cpu_features_(GetAllowedCpuFeatures(env.field_trials())), 107 data_dumper_(instance_count_.fetch_add(1) + 1), 108 fixed_gain_applier_( 109 /*hard_clip_samples=*/false, 110 /*initial_gain_factor=*/DbToRatio(config.fixed_digital.gain_db)), 111 limiter_(&data_dumper_, 112 SampleRateToDefaultChannelSize(sample_rate_hz), 113 /*histogram_name_prefix=*/"Agc2"), 114 calls_since_last_limiter_log_(0) { 115 RTC_DCHECK(Validate(config)); 116 data_dumper_.InitiateNewSetOfRecordings(); 117 118 if (config.input_volume_controller.enabled || 119 config.adaptive_digital.enabled) { 120 // Create dependencies. 121 speech_level_estimator_ = std::make_unique<SpeechLevelEstimator>( 122 &data_dumper_, config.adaptive_digital, kAdjacentSpeechFramesThreshold); 123 if (use_internal_vad) 124 vad_ = std::make_unique<VoiceActivityDetectorWrapper>( 125 kVadResetPeriodMs, cpu_features_, sample_rate_hz); 126 } 127 128 if (config.input_volume_controller.enabled) { 129 // Create controller. 130 input_volume_controller_ = std::make_unique<InputVolumeController>( 131 num_channels, input_volume_controller_config); 132 // TODO(bugs.webrtc.org/7494): Call `Initialize` in ctor and remove method. 133 input_volume_controller_->Initialize(); 134 } 135 136 if (config.adaptive_digital.enabled) { 137 // Create dependencies. 138 noise_level_estimator_ = CreateNoiseFloorEstimator(&data_dumper_); 139 saturation_protector_ = CreateSaturationProtector( 140 kSaturationProtectorInitialHeadroomDb, kAdjacentSpeechFramesThreshold, 141 &data_dumper_); 142 // Create controller. 143 adaptive_digital_controller_ = 144 std::make_unique<AdaptiveDigitalGainController>( 145 &data_dumper_, config.adaptive_digital, 146 kAdjacentSpeechFramesThreshold); 147 } 148 } 149 150 GainController2::~GainController2() = default; 151 152 // TODO(webrtc:7494): Pass the flag also to the other components. 153 void GainController2::SetCaptureOutputUsed(bool capture_output_used) { 154 if (input_volume_controller_) { 155 input_volume_controller_->HandleCaptureOutputUsedChange( 156 capture_output_used); 157 } 158 } 159 160 void GainController2::SetFixedGainDb(float gain_db) { 161 const float gain_factor = DbToRatio(gain_db); 162 if (fixed_gain_applier_.GetGainFactor() != gain_factor) { 163 // Reset the limiter to quickly react on abrupt level changes caused by 164 // large changes of the fixed gain. 165 limiter_.Reset(); 166 } 167 fixed_gain_applier_.SetGainFactor(gain_factor); 168 } 169 170 void GainController2::Analyze(int applied_input_volume, 171 const AudioBuffer& audio_buffer) { 172 recommended_input_volume_ = std::nullopt; 173 174 RTC_DCHECK_GE(applied_input_volume, 0); 175 RTC_DCHECK_LE(applied_input_volume, 255); 176 177 if (input_volume_controller_) { 178 input_volume_controller_->AnalyzeInputAudio(applied_input_volume, 179 audio_buffer); 180 } 181 } 182 183 void GainController2::Process(std::optional<float> speech_probability, 184 bool input_volume_changed, 185 AudioBuffer* audio) { 186 recommended_input_volume_ = std::nullopt; 187 188 data_dumper_.DumpRaw("agc2_applied_input_volume_changed", 189 input_volume_changed); 190 if (input_volume_changed) { 191 // Handle input volume changes. 192 if (speech_level_estimator_) 193 speech_level_estimator_->Reset(); 194 if (saturation_protector_) 195 saturation_protector_->Reset(); 196 } 197 198 DeinterleavedView<float> float_frame = audio->view(); 199 200 // Compute speech probability. 201 if (vad_) { 202 // When the VAD component runs, `speech_probability` should not be specified 203 // because APM should not run the same VAD twice (as an APM sub-module and 204 // internally in AGC2). 205 RTC_DCHECK(!speech_probability.has_value()); 206 speech_probability = vad_->Analyze(float_frame); 207 } 208 if (speech_probability.has_value()) { 209 RTC_DCHECK_GE(*speech_probability, 0.0f); 210 RTC_DCHECK_LE(*speech_probability, 1.0f); 211 } 212 // The speech probability may not be defined at this step (e.g., when the 213 // fixed digital controller alone is enabled). 214 if (speech_probability.has_value()) 215 data_dumper_.DumpRaw("agc2_speech_probability", *speech_probability); 216 217 // Compute audio, noise and speech levels. 218 AudioLevels audio_levels = ComputeAudioLevels(float_frame, data_dumper_); 219 std::optional<float> noise_rms_dbfs; 220 if (noise_level_estimator_) { 221 // TODO(bugs.webrtc.org/7494): Pass `audio_levels` to remove duplicated 222 // computation in `noise_level_estimator_`. 223 noise_rms_dbfs = noise_level_estimator_->Analyze(float_frame); 224 } 225 std::optional<SpeechLevel> speech_level; 226 if (speech_level_estimator_) { 227 RTC_DCHECK(speech_probability.has_value()); 228 speech_level_estimator_->Update( 229 audio_levels.rms_dbfs, audio_levels.peak_dbfs, *speech_probability); 230 speech_level = 231 SpeechLevel{.is_confident = speech_level_estimator_->is_confident(), 232 .rms_dbfs = speech_level_estimator_->level_dbfs()}; 233 } 234 235 // Update the recommended input volume. 236 if (input_volume_controller_) { 237 RTC_DCHECK(speech_level.has_value()); 238 RTC_DCHECK(speech_probability.has_value()); 239 if (speech_probability.has_value()) { 240 recommended_input_volume_ = 241 input_volume_controller_->RecommendInputVolume( 242 *speech_probability, 243 speech_level->is_confident 244 ? std::optional<float>(speech_level->rms_dbfs) 245 : std::nullopt); 246 } 247 } 248 249 if (adaptive_digital_controller_) { 250 RTC_DCHECK(saturation_protector_); 251 RTC_DCHECK(speech_probability.has_value()); 252 RTC_DCHECK(speech_level.has_value()); 253 saturation_protector_->Analyze(*speech_probability, audio_levels.peak_dbfs, 254 speech_level->rms_dbfs); 255 float headroom_db = saturation_protector_->HeadroomDb(); 256 data_dumper_.DumpRaw("agc2_headroom_db", headroom_db); 257 float limiter_envelope_dbfs = FloatS16ToDbfs(limiter_.LastAudioLevel()); 258 data_dumper_.DumpRaw("agc2_limiter_envelope_dbfs", limiter_envelope_dbfs); 259 RTC_DCHECK(noise_rms_dbfs.has_value()); 260 adaptive_digital_controller_->Process( 261 /*info=*/{.speech_probability = *speech_probability, 262 .speech_level_dbfs = speech_level->rms_dbfs, 263 .speech_level_reliable = speech_level->is_confident, 264 .noise_rms_dbfs = *noise_rms_dbfs, 265 .headroom_db = headroom_db, 266 .limiter_envelope_dbfs = limiter_envelope_dbfs}, 267 float_frame); 268 } 269 270 // TODO(bugs.webrtc.org/7494): Pass `audio_levels` to remove duplicated 271 // computation in `limiter_`. 272 fixed_gain_applier_.ApplyGain(float_frame); 273 274 limiter_.Process(float_frame); 275 276 // Periodically log limiter stats. 277 if (++calls_since_last_limiter_log_ == kLogLimiterStatsPeriodNumFrames) { 278 calls_since_last_limiter_log_ = 0; 279 InterpolatedGainCurve::Stats stats = limiter_.GetGainCurveStats(); 280 RTC_LOG(LS_INFO) << "[AGC2] limiter stats" 281 << " | identity: " << stats.look_ups_identity_region 282 << " | knee: " << stats.look_ups_knee_region 283 << " | limiter: " << stats.look_ups_limiter_region 284 << " | saturation: " << stats.look_ups_saturation_region; 285 } 286 } 287 288 bool GainController2::Validate( 289 const AudioProcessing::Config::GainController2& config) { 290 const auto& fixed = config.fixed_digital; 291 const auto& adaptive = config.adaptive_digital; 292 return fixed.gain_db >= 0.0f && fixed.gain_db < 50.0f && 293 adaptive.headroom_db >= 0.0f && adaptive.max_gain_db > 0.0f && 294 adaptive.initial_gain_db >= 0.0f && 295 adaptive.max_gain_change_db_per_second > 0.0f && 296 adaptive.max_output_noise_level_dbfs <= 0.0f; 297 } 298 299 } // namespace webrtc