clipping_predictor.cc (15690B)
1 /* 2 * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "modules/audio_processing/agc2/clipping_predictor.h" 12 13 #include <algorithm> 14 #include <cmath> 15 #include <memory> 16 #include <optional> 17 #include <vector> 18 19 #include "api/audio/audio_processing.h" 20 #include "common_audio/include/audio_util.h" 21 #include "modules/audio_processing/agc2/clipping_predictor_level_buffer.h" 22 #include "modules/audio_processing/agc2/gain_map_internal.h" 23 #include "modules/audio_processing/include/audio_frame_view.h" 24 #include "rtc_base/checks.h" 25 #include "rtc_base/logging.h" 26 #include "rtc_base/numerics/safe_minmax.h" 27 28 namespace webrtc { 29 namespace { 30 31 constexpr int kClippingPredictorMaxGainChange = 15; 32 33 // Returns an input volume in the [`min_input_volume`, `max_input_volume`] range 34 // that reduces `gain_error_db`, which is a gain error estimated when 35 // `input_volume` was applied, according to a fixed gain map. 36 int ComputeVolumeUpdate(int gain_error_db, 37 int input_volume, 38 int min_input_volume, 39 int max_input_volume) { 40 RTC_DCHECK_GE(input_volume, 0); 41 RTC_DCHECK_LE(input_volume, max_input_volume); 42 if (gain_error_db == 0) { 43 return input_volume; 44 } 45 int new_volume = input_volume; 46 if (gain_error_db > 0) { 47 while (kGainMap[new_volume] - kGainMap[input_volume] < gain_error_db && 48 new_volume < max_input_volume) { 49 ++new_volume; 50 } 51 } else { 52 while (kGainMap[new_volume] - kGainMap[input_volume] > gain_error_db && 53 new_volume > min_input_volume) { 54 --new_volume; 55 } 56 } 57 return new_volume; 58 } 59 60 float ComputeCrestFactor(const ClippingPredictorLevelBuffer::Level& level) { 61 const float crest_factor = 62 FloatS16ToDbfs(level.max) - FloatS16ToDbfs(std::sqrt(level.average)); 63 return crest_factor; 64 } 65 66 // Crest factor-based clipping prediction and clipped level step estimation. 67 class ClippingEventPredictor : public ClippingPredictor { 68 public: 69 // ClippingEventPredictor with `num_channels` channels (limited to values 70 // higher than zero); window size `window_length` and reference window size 71 // `reference_window_length` (both referring to the number of frames in the 72 // respective sliding windows and limited to values higher than zero); 73 // reference window delay `reference_window_delay` (delay in frames, limited 74 // to values zero and higher with an additional requirement of 75 // `window_length` < `reference_window_length` + reference_window_delay`); 76 // and an estimation peak threshold `clipping_threshold` and a crest factor 77 // drop threshold `crest_factor_margin` (both in dB). 78 ClippingEventPredictor(int num_channels, 79 int window_length, 80 int reference_window_length, 81 int reference_window_delay, 82 float clipping_threshold, 83 float crest_factor_margin) 84 : window_length_(window_length), 85 reference_window_length_(reference_window_length), 86 reference_window_delay_(reference_window_delay), 87 clipping_threshold_(clipping_threshold), 88 crest_factor_margin_(crest_factor_margin) { 89 RTC_DCHECK_GT(num_channels, 0); 90 RTC_DCHECK_GT(window_length, 0); 91 RTC_DCHECK_GT(reference_window_length, 0); 92 RTC_DCHECK_GE(reference_window_delay, 0); 93 RTC_DCHECK_GT(reference_window_length + reference_window_delay, 94 window_length); 95 const int buffer_length = GetMinFramesProcessed(); 96 RTC_DCHECK_GT(buffer_length, 0); 97 for (int i = 0; i < num_channels; ++i) { 98 ch_buffers_.push_back( 99 std::make_unique<ClippingPredictorLevelBuffer>(buffer_length)); 100 } 101 } 102 103 ClippingEventPredictor(const ClippingEventPredictor&) = delete; 104 ClippingEventPredictor& operator=(const ClippingEventPredictor&) = delete; 105 ~ClippingEventPredictor() override {} 106 107 void Reset() override { 108 const int num_channels = ch_buffers_.size(); 109 for (int i = 0; i < num_channels; ++i) { 110 ch_buffers_[i]->Reset(); 111 } 112 } 113 114 // Analyzes a frame of audio and stores the framewise metrics in 115 // `ch_buffers_`. 116 void Analyze(const AudioFrameView<const float>& frame) override { 117 const int num_channels = frame.num_channels(); 118 RTC_DCHECK_EQ(num_channels, ch_buffers_.size()); 119 const int samples_per_channel = frame.samples_per_channel(); 120 RTC_DCHECK_GT(samples_per_channel, 0); 121 for (int channel = 0; channel < num_channels; ++channel) { 122 float sum_squares = 0.0f; 123 float peak = 0.0f; 124 for (const auto& sample : frame.channel(channel)) { 125 sum_squares += sample * sample; 126 peak = std::max(std::fabs(sample), peak); 127 } 128 ch_buffers_[channel]->Push( 129 {.average = sum_squares / static_cast<float>(samples_per_channel), 130 .max = peak}); 131 } 132 } 133 134 // Estimates the analog gain adjustment for channel `channel` using a 135 // sliding window over the frame-wise metrics in `ch_buffers_`. Returns an 136 // estimate for the clipped level step equal to `default_clipped_level_step_` 137 // if at least `GetMinFramesProcessed()` frames have been processed since the 138 // last reset and a clipping event is predicted. `level`, `min_mic_level`, and 139 // `max_mic_level` are limited to [0, 255] and `default_step` to [1, 255]. 140 std::optional<int> EstimateClippedLevelStep( 141 int channel, 142 int level, 143 int default_step, 144 int min_mic_level, 145 int max_mic_level) const override { 146 RTC_CHECK_GE(channel, 0); 147 RTC_CHECK_LT(channel, ch_buffers_.size()); 148 RTC_DCHECK_GE(level, 0); 149 RTC_DCHECK_LE(level, 255); 150 RTC_DCHECK_GT(default_step, 0); 151 RTC_DCHECK_LE(default_step, 255); 152 RTC_DCHECK_GE(min_mic_level, 0); 153 RTC_DCHECK_LE(min_mic_level, 255); 154 RTC_DCHECK_GE(max_mic_level, 0); 155 RTC_DCHECK_LE(max_mic_level, 255); 156 if (level <= min_mic_level) { 157 return std::nullopt; 158 } 159 if (PredictClippingEvent(channel)) { 160 const int new_level = 161 SafeClamp(level - default_step, min_mic_level, max_mic_level); 162 const int step = level - new_level; 163 if (step > 0) { 164 return step; 165 } 166 } 167 return std::nullopt; 168 } 169 170 private: 171 int GetMinFramesProcessed() const { 172 return reference_window_delay_ + reference_window_length_; 173 } 174 175 // Predicts clipping events based on the processed audio frames. Returns 176 // true if a clipping event is likely. 177 bool PredictClippingEvent(int channel) const { 178 const auto metrics = 179 ch_buffers_[channel]->ComputePartialMetrics(0, window_length_); 180 if (!metrics.has_value() || 181 !(FloatS16ToDbfs(metrics.value().max) > clipping_threshold_)) { 182 return false; 183 } 184 const auto reference_metrics = ch_buffers_[channel]->ComputePartialMetrics( 185 reference_window_delay_, reference_window_length_); 186 if (!reference_metrics.has_value()) { 187 return false; 188 } 189 const float crest_factor = ComputeCrestFactor(metrics.value()); 190 const float reference_crest_factor = 191 ComputeCrestFactor(reference_metrics.value()); 192 if (crest_factor < reference_crest_factor - crest_factor_margin_) { 193 return true; 194 } 195 return false; 196 } 197 198 std::vector<std::unique_ptr<ClippingPredictorLevelBuffer>> ch_buffers_; 199 const int window_length_; 200 const int reference_window_length_; 201 const int reference_window_delay_; 202 const float clipping_threshold_; 203 const float crest_factor_margin_; 204 }; 205 206 // Performs crest factor-based clipping peak prediction. 207 class ClippingPeakPredictor : public ClippingPredictor { 208 public: 209 // Ctor. ClippingPeakPredictor with `num_channels` channels (limited to values 210 // higher than zero); window size `window_length` and reference window size 211 // `reference_window_length` (both referring to the number of frames in the 212 // respective sliding windows and limited to values higher than zero); 213 // reference window delay `reference_window_delay` (delay in frames, limited 214 // to values zero and higher with an additional requirement of 215 // `window_length` < `reference_window_length` + reference_window_delay`); 216 // and a clipping prediction threshold `clipping_threshold` (in dB). Adaptive 217 // clipped level step estimation is used if `adaptive_step_estimation` is 218 // true. 219 explicit ClippingPeakPredictor(int num_channels, 220 int window_length, 221 int reference_window_length, 222 int reference_window_delay, 223 int clipping_threshold, 224 bool adaptive_step_estimation) 225 : window_length_(window_length), 226 reference_window_length_(reference_window_length), 227 reference_window_delay_(reference_window_delay), 228 clipping_threshold_(clipping_threshold), 229 adaptive_step_estimation_(adaptive_step_estimation) { 230 RTC_DCHECK_GT(num_channels, 0); 231 RTC_DCHECK_GT(window_length, 0); 232 RTC_DCHECK_GT(reference_window_length, 0); 233 RTC_DCHECK_GE(reference_window_delay, 0); 234 RTC_DCHECK_GT(reference_window_length + reference_window_delay, 235 window_length); 236 const int buffer_length = GetMinFramesProcessed(); 237 RTC_DCHECK_GT(buffer_length, 0); 238 for (int i = 0; i < num_channels; ++i) { 239 ch_buffers_.push_back( 240 std::make_unique<ClippingPredictorLevelBuffer>(buffer_length)); 241 } 242 } 243 244 ClippingPeakPredictor(const ClippingPeakPredictor&) = delete; 245 ClippingPeakPredictor& operator=(const ClippingPeakPredictor&) = delete; 246 ~ClippingPeakPredictor() override {} 247 248 void Reset() override { 249 const int num_channels = ch_buffers_.size(); 250 for (int i = 0; i < num_channels; ++i) { 251 ch_buffers_[i]->Reset(); 252 } 253 } 254 255 // Analyzes a frame of audio and stores the framewise metrics in 256 // `ch_buffers_`. 257 void Analyze(const AudioFrameView<const float>& frame) override { 258 const int num_channels = frame.num_channels(); 259 RTC_DCHECK_EQ(num_channels, ch_buffers_.size()); 260 const int samples_per_channel = frame.samples_per_channel(); 261 RTC_DCHECK_GT(samples_per_channel, 0); 262 for (int channel = 0; channel < num_channels; ++channel) { 263 float sum_squares = 0.0f; 264 float peak = 0.0f; 265 for (const auto& sample : frame.channel(channel)) { 266 sum_squares += sample * sample; 267 peak = std::max(std::fabs(sample), peak); 268 } 269 ch_buffers_[channel]->Push( 270 {.average = sum_squares / static_cast<float>(samples_per_channel), 271 .max = peak}); 272 } 273 } 274 275 // Estimates the analog gain adjustment for channel `channel` using a 276 // sliding window over the frame-wise metrics in `ch_buffers_`. Returns an 277 // estimate for the clipped level step (equal to 278 // `default_clipped_level_step_` if `adaptive_estimation_` is false) if at 279 // least `GetMinFramesProcessed()` frames have been processed since the last 280 // reset and a clipping event is predicted. `level`, `min_mic_level`, and 281 // `max_mic_level` are limited to [0, 255] and `default_step` to [1, 255]. 282 std::optional<int> EstimateClippedLevelStep( 283 int channel, 284 int level, 285 int default_step, 286 int min_mic_level, 287 int max_mic_level) const override { 288 RTC_DCHECK_GE(channel, 0); 289 RTC_DCHECK_LT(channel, ch_buffers_.size()); 290 RTC_DCHECK_GE(level, 0); 291 RTC_DCHECK_LE(level, 255); 292 RTC_DCHECK_GT(default_step, 0); 293 RTC_DCHECK_LE(default_step, 255); 294 RTC_DCHECK_GE(min_mic_level, 0); 295 RTC_DCHECK_LE(min_mic_level, 255); 296 RTC_DCHECK_GE(max_mic_level, 0); 297 RTC_DCHECK_LE(max_mic_level, 255); 298 if (level <= min_mic_level) { 299 return std::nullopt; 300 } 301 std::optional<float> estimate_db = EstimatePeakValue(channel); 302 if (estimate_db.has_value() && estimate_db.value() > clipping_threshold_) { 303 int step = 0; 304 if (!adaptive_step_estimation_) { 305 step = default_step; 306 } else { 307 const int estimated_gain_change = 308 SafeClamp(-static_cast<int>(std::ceil(estimate_db.value())), 309 -kClippingPredictorMaxGainChange, 0); 310 step = 311 std::max(level - ComputeVolumeUpdate(estimated_gain_change, level, 312 min_mic_level, max_mic_level), 313 default_step); 314 } 315 const int new_level = 316 SafeClamp(level - step, min_mic_level, max_mic_level); 317 if (level > new_level) { 318 return level - new_level; 319 } 320 } 321 return std::nullopt; 322 } 323 324 private: 325 int GetMinFramesProcessed() { 326 return reference_window_delay_ + reference_window_length_; 327 } 328 329 // Predicts clipping sample peaks based on the processed audio frames. 330 // Returns the estimated peak value if clipping is predicted. Otherwise 331 // returns std::nullopt. 332 std::optional<float> EstimatePeakValue(int channel) const { 333 const auto reference_metrics = ch_buffers_[channel]->ComputePartialMetrics( 334 reference_window_delay_, reference_window_length_); 335 if (!reference_metrics.has_value()) { 336 return std::nullopt; 337 } 338 const auto metrics = 339 ch_buffers_[channel]->ComputePartialMetrics(0, window_length_); 340 if (!metrics.has_value() || 341 !(FloatS16ToDbfs(metrics.value().max) > clipping_threshold_)) { 342 return std::nullopt; 343 } 344 const float reference_crest_factor = 345 ComputeCrestFactor(reference_metrics.value()); 346 const float& mean_squares = metrics.value().average; 347 const float projected_peak = 348 reference_crest_factor + FloatS16ToDbfs(std::sqrt(mean_squares)); 349 return projected_peak; 350 } 351 352 std::vector<std::unique_ptr<ClippingPredictorLevelBuffer>> ch_buffers_; 353 const int window_length_; 354 const int reference_window_length_; 355 const int reference_window_delay_; 356 const int clipping_threshold_; 357 const bool adaptive_step_estimation_; 358 }; 359 360 } // namespace 361 362 std::unique_ptr<ClippingPredictor> CreateClippingPredictor( 363 int num_channels, 364 const AudioProcessing::Config::GainController1::AnalogGainController:: 365 ClippingPredictor& config) { 366 if (!config.enabled) { 367 RTC_LOG(LS_INFO) << "[AGC2] Clipping prediction disabled."; 368 return nullptr; 369 } 370 RTC_LOG(LS_INFO) << "[AGC2] Clipping prediction enabled."; 371 using ClippingPredictorMode = AudioProcessing::Config::GainController1:: 372 AnalogGainController::ClippingPredictor::Mode; 373 switch (config.mode) { 374 case ClippingPredictorMode::kClippingEventPrediction: 375 return std::make_unique<ClippingEventPredictor>( 376 num_channels, config.window_length, config.reference_window_length, 377 config.reference_window_delay, config.clipping_threshold, 378 config.crest_factor_margin); 379 case ClippingPredictorMode::kAdaptiveStepClippingPeakPrediction: 380 return std::make_unique<ClippingPeakPredictor>( 381 num_channels, config.window_length, config.reference_window_length, 382 config.reference_window_delay, config.clipping_threshold, 383 /*adaptive_step_estimation=*/true); 384 case ClippingPredictorMode::kFixedStepClippingPeakPrediction: 385 return std::make_unique<ClippingPeakPredictor>( 386 num_channels, config.window_length, config.reference_window_length, 387 config.reference_window_delay, config.clipping_threshold, 388 /*adaptive_step_estimation=*/false); 389 } 390 RTC_DCHECK_NOTREACHED(); 391 } 392 393 } // namespace webrtc