tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

clipping_predictor.cc (15690B)


      1 /*
      2 *  Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
      3 *
      4 *  Use of this source code is governed by a BSD-style license
      5 *  that can be found in the LICENSE file in the root of the source
      6 *  tree. An additional intellectual property rights grant can be found
      7 *  in the file PATENTS.  All contributing project authors may
      8 *  be found in the AUTHORS file in the root of the source tree.
      9 */
     10 
     11 #include "modules/audio_processing/agc2/clipping_predictor.h"
     12 
     13 #include <algorithm>
     14 #include <cmath>
     15 #include <memory>
     16 #include <optional>
     17 #include <vector>
     18 
     19 #include "api/audio/audio_processing.h"
     20 #include "common_audio/include/audio_util.h"
     21 #include "modules/audio_processing/agc2/clipping_predictor_level_buffer.h"
     22 #include "modules/audio_processing/agc2/gain_map_internal.h"
     23 #include "modules/audio_processing/include/audio_frame_view.h"
     24 #include "rtc_base/checks.h"
     25 #include "rtc_base/logging.h"
     26 #include "rtc_base/numerics/safe_minmax.h"
     27 
     28 namespace webrtc {
     29 namespace {
     30 
     31 constexpr int kClippingPredictorMaxGainChange = 15;
     32 
     33 // Returns an input volume in the [`min_input_volume`, `max_input_volume`] range
     34 // that reduces `gain_error_db`, which is a gain error estimated when
     35 // `input_volume` was applied, according to a fixed gain map.
     36 int ComputeVolumeUpdate(int gain_error_db,
     37                        int input_volume,
     38                        int min_input_volume,
     39                        int max_input_volume) {
     40  RTC_DCHECK_GE(input_volume, 0);
     41  RTC_DCHECK_LE(input_volume, max_input_volume);
     42  if (gain_error_db == 0) {
     43    return input_volume;
     44  }
     45  int new_volume = input_volume;
     46  if (gain_error_db > 0) {
     47    while (kGainMap[new_volume] - kGainMap[input_volume] < gain_error_db &&
     48           new_volume < max_input_volume) {
     49      ++new_volume;
     50    }
     51  } else {
     52    while (kGainMap[new_volume] - kGainMap[input_volume] > gain_error_db &&
     53           new_volume > min_input_volume) {
     54      --new_volume;
     55    }
     56  }
     57  return new_volume;
     58 }
     59 
     60 float ComputeCrestFactor(const ClippingPredictorLevelBuffer::Level& level) {
     61  const float crest_factor =
     62      FloatS16ToDbfs(level.max) - FloatS16ToDbfs(std::sqrt(level.average));
     63  return crest_factor;
     64 }
     65 
     66 // Crest factor-based clipping prediction and clipped level step estimation.
     67 class ClippingEventPredictor : public ClippingPredictor {
     68 public:
     69  // ClippingEventPredictor with `num_channels` channels (limited to values
     70  // higher than zero); window size `window_length` and reference window size
     71  // `reference_window_length` (both referring to the number of frames in the
     72  // respective sliding windows and limited to values higher than zero);
     73  // reference window delay `reference_window_delay` (delay in frames, limited
     74  // to values zero and higher with an additional requirement of
     75  // `window_length` < `reference_window_length` + reference_window_delay`);
     76  // and an estimation peak threshold `clipping_threshold` and a crest factor
     77  // drop threshold `crest_factor_margin` (both in dB).
     78  ClippingEventPredictor(int num_channels,
     79                         int window_length,
     80                         int reference_window_length,
     81                         int reference_window_delay,
     82                         float clipping_threshold,
     83                         float crest_factor_margin)
     84      : window_length_(window_length),
     85        reference_window_length_(reference_window_length),
     86        reference_window_delay_(reference_window_delay),
     87        clipping_threshold_(clipping_threshold),
     88        crest_factor_margin_(crest_factor_margin) {
     89    RTC_DCHECK_GT(num_channels, 0);
     90    RTC_DCHECK_GT(window_length, 0);
     91    RTC_DCHECK_GT(reference_window_length, 0);
     92    RTC_DCHECK_GE(reference_window_delay, 0);
     93    RTC_DCHECK_GT(reference_window_length + reference_window_delay,
     94                  window_length);
     95    const int buffer_length = GetMinFramesProcessed();
     96    RTC_DCHECK_GT(buffer_length, 0);
     97    for (int i = 0; i < num_channels; ++i) {
     98      ch_buffers_.push_back(
     99          std::make_unique<ClippingPredictorLevelBuffer>(buffer_length));
    100    }
    101  }
    102 
    103  ClippingEventPredictor(const ClippingEventPredictor&) = delete;
    104  ClippingEventPredictor& operator=(const ClippingEventPredictor&) = delete;
    105  ~ClippingEventPredictor() override {}
    106 
    107  void Reset() override {
    108    const int num_channels = ch_buffers_.size();
    109    for (int i = 0; i < num_channels; ++i) {
    110      ch_buffers_[i]->Reset();
    111    }
    112  }
    113 
    114  // Analyzes a frame of audio and stores the framewise metrics in
    115  // `ch_buffers_`.
    116  void Analyze(const AudioFrameView<const float>& frame) override {
    117    const int num_channels = frame.num_channels();
    118    RTC_DCHECK_EQ(num_channels, ch_buffers_.size());
    119    const int samples_per_channel = frame.samples_per_channel();
    120    RTC_DCHECK_GT(samples_per_channel, 0);
    121    for (int channel = 0; channel < num_channels; ++channel) {
    122      float sum_squares = 0.0f;
    123      float peak = 0.0f;
    124      for (const auto& sample : frame.channel(channel)) {
    125        sum_squares += sample * sample;
    126        peak = std::max(std::fabs(sample), peak);
    127      }
    128      ch_buffers_[channel]->Push(
    129          {.average = sum_squares / static_cast<float>(samples_per_channel),
    130           .max = peak});
    131    }
    132  }
    133 
    134  // Estimates the analog gain adjustment for channel `channel` using a
    135  // sliding window over the frame-wise metrics in `ch_buffers_`. Returns an
    136  // estimate for the clipped level step equal to `default_clipped_level_step_`
    137  // if at least `GetMinFramesProcessed()` frames have been processed since the
    138  // last reset and a clipping event is predicted. `level`, `min_mic_level`, and
    139  // `max_mic_level` are limited to [0, 255] and `default_step` to [1, 255].
    140  std::optional<int> EstimateClippedLevelStep(
    141      int channel,
    142      int level,
    143      int default_step,
    144      int min_mic_level,
    145      int max_mic_level) const override {
    146    RTC_CHECK_GE(channel, 0);
    147    RTC_CHECK_LT(channel, ch_buffers_.size());
    148    RTC_DCHECK_GE(level, 0);
    149    RTC_DCHECK_LE(level, 255);
    150    RTC_DCHECK_GT(default_step, 0);
    151    RTC_DCHECK_LE(default_step, 255);
    152    RTC_DCHECK_GE(min_mic_level, 0);
    153    RTC_DCHECK_LE(min_mic_level, 255);
    154    RTC_DCHECK_GE(max_mic_level, 0);
    155    RTC_DCHECK_LE(max_mic_level, 255);
    156    if (level <= min_mic_level) {
    157      return std::nullopt;
    158    }
    159    if (PredictClippingEvent(channel)) {
    160      const int new_level =
    161          SafeClamp(level - default_step, min_mic_level, max_mic_level);
    162      const int step = level - new_level;
    163      if (step > 0) {
    164        return step;
    165      }
    166    }
    167    return std::nullopt;
    168  }
    169 
    170 private:
    171  int GetMinFramesProcessed() const {
    172    return reference_window_delay_ + reference_window_length_;
    173  }
    174 
    175  // Predicts clipping events based on the processed audio frames. Returns
    176  // true if a clipping event is likely.
    177  bool PredictClippingEvent(int channel) const {
    178    const auto metrics =
    179        ch_buffers_[channel]->ComputePartialMetrics(0, window_length_);
    180    if (!metrics.has_value() ||
    181        !(FloatS16ToDbfs(metrics.value().max) > clipping_threshold_)) {
    182      return false;
    183    }
    184    const auto reference_metrics = ch_buffers_[channel]->ComputePartialMetrics(
    185        reference_window_delay_, reference_window_length_);
    186    if (!reference_metrics.has_value()) {
    187      return false;
    188    }
    189    const float crest_factor = ComputeCrestFactor(metrics.value());
    190    const float reference_crest_factor =
    191        ComputeCrestFactor(reference_metrics.value());
    192    if (crest_factor < reference_crest_factor - crest_factor_margin_) {
    193      return true;
    194    }
    195    return false;
    196  }
    197 
    198  std::vector<std::unique_ptr<ClippingPredictorLevelBuffer>> ch_buffers_;
    199  const int window_length_;
    200  const int reference_window_length_;
    201  const int reference_window_delay_;
    202  const float clipping_threshold_;
    203  const float crest_factor_margin_;
    204 };
    205 
    206 // Performs crest factor-based clipping peak prediction.
    207 class ClippingPeakPredictor : public ClippingPredictor {
    208 public:
    209  // Ctor. ClippingPeakPredictor with `num_channels` channels (limited to values
    210  // higher than zero); window size `window_length` and reference window size
    211  // `reference_window_length` (both referring to the number of frames in the
    212  // respective sliding windows and limited to values higher than zero);
    213  // reference window delay `reference_window_delay` (delay in frames, limited
    214  // to values zero and higher with an additional requirement of
    215  // `window_length` < `reference_window_length` + reference_window_delay`);
    216  // and a clipping prediction threshold `clipping_threshold` (in dB). Adaptive
    217  // clipped level step estimation is used if `adaptive_step_estimation` is
    218  // true.
    219  explicit ClippingPeakPredictor(int num_channels,
    220                                 int window_length,
    221                                 int reference_window_length,
    222                                 int reference_window_delay,
    223                                 int clipping_threshold,
    224                                 bool adaptive_step_estimation)
    225      : window_length_(window_length),
    226        reference_window_length_(reference_window_length),
    227        reference_window_delay_(reference_window_delay),
    228        clipping_threshold_(clipping_threshold),
    229        adaptive_step_estimation_(adaptive_step_estimation) {
    230    RTC_DCHECK_GT(num_channels, 0);
    231    RTC_DCHECK_GT(window_length, 0);
    232    RTC_DCHECK_GT(reference_window_length, 0);
    233    RTC_DCHECK_GE(reference_window_delay, 0);
    234    RTC_DCHECK_GT(reference_window_length + reference_window_delay,
    235                  window_length);
    236    const int buffer_length = GetMinFramesProcessed();
    237    RTC_DCHECK_GT(buffer_length, 0);
    238    for (int i = 0; i < num_channels; ++i) {
    239      ch_buffers_.push_back(
    240          std::make_unique<ClippingPredictorLevelBuffer>(buffer_length));
    241    }
    242  }
    243 
    244  ClippingPeakPredictor(const ClippingPeakPredictor&) = delete;
    245  ClippingPeakPredictor& operator=(const ClippingPeakPredictor&) = delete;
    246  ~ClippingPeakPredictor() override {}
    247 
    248  void Reset() override {
    249    const int num_channels = ch_buffers_.size();
    250    for (int i = 0; i < num_channels; ++i) {
    251      ch_buffers_[i]->Reset();
    252    }
    253  }
    254 
    255  // Analyzes a frame of audio and stores the framewise metrics in
    256  // `ch_buffers_`.
    257  void Analyze(const AudioFrameView<const float>& frame) override {
    258    const int num_channels = frame.num_channels();
    259    RTC_DCHECK_EQ(num_channels, ch_buffers_.size());
    260    const int samples_per_channel = frame.samples_per_channel();
    261    RTC_DCHECK_GT(samples_per_channel, 0);
    262    for (int channel = 0; channel < num_channels; ++channel) {
    263      float sum_squares = 0.0f;
    264      float peak = 0.0f;
    265      for (const auto& sample : frame.channel(channel)) {
    266        sum_squares += sample * sample;
    267        peak = std::max(std::fabs(sample), peak);
    268      }
    269      ch_buffers_[channel]->Push(
    270          {.average = sum_squares / static_cast<float>(samples_per_channel),
    271           .max = peak});
    272    }
    273  }
    274 
    275  // Estimates the analog gain adjustment for channel `channel` using a
    276  // sliding window over the frame-wise metrics in `ch_buffers_`. Returns an
    277  // estimate for the clipped level step (equal to
    278  // `default_clipped_level_step_` if `adaptive_estimation_` is false) if at
    279  // least `GetMinFramesProcessed()` frames have been processed since the last
    280  // reset and a clipping event is predicted. `level`, `min_mic_level`, and
    281  // `max_mic_level` are limited to [0, 255] and `default_step` to [1, 255].
    282  std::optional<int> EstimateClippedLevelStep(
    283      int channel,
    284      int level,
    285      int default_step,
    286      int min_mic_level,
    287      int max_mic_level) const override {
    288    RTC_DCHECK_GE(channel, 0);
    289    RTC_DCHECK_LT(channel, ch_buffers_.size());
    290    RTC_DCHECK_GE(level, 0);
    291    RTC_DCHECK_LE(level, 255);
    292    RTC_DCHECK_GT(default_step, 0);
    293    RTC_DCHECK_LE(default_step, 255);
    294    RTC_DCHECK_GE(min_mic_level, 0);
    295    RTC_DCHECK_LE(min_mic_level, 255);
    296    RTC_DCHECK_GE(max_mic_level, 0);
    297    RTC_DCHECK_LE(max_mic_level, 255);
    298    if (level <= min_mic_level) {
    299      return std::nullopt;
    300    }
    301    std::optional<float> estimate_db = EstimatePeakValue(channel);
    302    if (estimate_db.has_value() && estimate_db.value() > clipping_threshold_) {
    303      int step = 0;
    304      if (!adaptive_step_estimation_) {
    305        step = default_step;
    306      } else {
    307        const int estimated_gain_change =
    308            SafeClamp(-static_cast<int>(std::ceil(estimate_db.value())),
    309                      -kClippingPredictorMaxGainChange, 0);
    310        step =
    311            std::max(level - ComputeVolumeUpdate(estimated_gain_change, level,
    312                                                 min_mic_level, max_mic_level),
    313                     default_step);
    314      }
    315      const int new_level =
    316          SafeClamp(level - step, min_mic_level, max_mic_level);
    317      if (level > new_level) {
    318        return level - new_level;
    319      }
    320    }
    321    return std::nullopt;
    322  }
    323 
    324 private:
    325  int GetMinFramesProcessed() {
    326    return reference_window_delay_ + reference_window_length_;
    327  }
    328 
    329  // Predicts clipping sample peaks based on the processed audio frames.
    330  // Returns the estimated peak value if clipping is predicted. Otherwise
    331  // returns std::nullopt.
    332  std::optional<float> EstimatePeakValue(int channel) const {
    333    const auto reference_metrics = ch_buffers_[channel]->ComputePartialMetrics(
    334        reference_window_delay_, reference_window_length_);
    335    if (!reference_metrics.has_value()) {
    336      return std::nullopt;
    337    }
    338    const auto metrics =
    339        ch_buffers_[channel]->ComputePartialMetrics(0, window_length_);
    340    if (!metrics.has_value() ||
    341        !(FloatS16ToDbfs(metrics.value().max) > clipping_threshold_)) {
    342      return std::nullopt;
    343    }
    344    const float reference_crest_factor =
    345        ComputeCrestFactor(reference_metrics.value());
    346    const float& mean_squares = metrics.value().average;
    347    const float projected_peak =
    348        reference_crest_factor + FloatS16ToDbfs(std::sqrt(mean_squares));
    349    return projected_peak;
    350  }
    351 
    352  std::vector<std::unique_ptr<ClippingPredictorLevelBuffer>> ch_buffers_;
    353  const int window_length_;
    354  const int reference_window_length_;
    355  const int reference_window_delay_;
    356  const int clipping_threshold_;
    357  const bool adaptive_step_estimation_;
    358 };
    359 
    360 }  // namespace
    361 
    362 std::unique_ptr<ClippingPredictor> CreateClippingPredictor(
    363    int num_channels,
    364    const AudioProcessing::Config::GainController1::AnalogGainController::
    365        ClippingPredictor& config) {
    366  if (!config.enabled) {
    367    RTC_LOG(LS_INFO) << "[AGC2] Clipping prediction disabled.";
    368    return nullptr;
    369  }
    370  RTC_LOG(LS_INFO) << "[AGC2] Clipping prediction enabled.";
    371  using ClippingPredictorMode = AudioProcessing::Config::GainController1::
    372      AnalogGainController::ClippingPredictor::Mode;
    373  switch (config.mode) {
    374    case ClippingPredictorMode::kClippingEventPrediction:
    375      return std::make_unique<ClippingEventPredictor>(
    376          num_channels, config.window_length, config.reference_window_length,
    377          config.reference_window_delay, config.clipping_threshold,
    378          config.crest_factor_margin);
    379    case ClippingPredictorMode::kAdaptiveStepClippingPeakPrediction:
    380      return std::make_unique<ClippingPeakPredictor>(
    381          num_channels, config.window_length, config.reference_window_length,
    382          config.reference_window_delay, config.clipping_threshold,
    383          /*adaptive_step_estimation=*/true);
    384    case ClippingPredictorMode::kFixedStepClippingPeakPrediction:
    385      return std::make_unique<ClippingPeakPredictor>(
    386          num_channels, config.window_length, config.reference_window_length,
    387          config.reference_window_delay, config.clipping_threshold,
    388          /*adaptive_step_estimation=*/false);
    389  }
    390  RTC_DCHECK_NOTREACHED();
    391 }
    392 
    393 }  // namespace webrtc