[ tor-browser ].git.dasho

adaptive_digital_gain_controller.cc (9536B)
      1 /*
      2 *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
      3 *
      4 *  Use of this source code is governed by a BSD-style license
      5 *  that can be found in the LICENSE file in the root of the source
      6 *  tree. An additional intellectual property rights grant can be found
      7 *  in the file PATENTS.  All contributing project authors may
      8 *  be found in the AUTHORS file in the root of the source tree.
      9 */
     10 
     11 #include "modules/audio_processing/agc2/adaptive_digital_gain_controller.h"
     12 
     13 #include <algorithm>
     14 
     15 #include "api/audio/audio_processing.h"
     16 #include "api/audio/audio_view.h"
     17 #include "common_audio/include/audio_util.h"
     18 #include "modules/audio_processing/agc2/agc2_common.h"
     19 #include "modules/audio_processing/logging/apm_data_dumper.h"
     20 #include "rtc_base/checks.h"
     21 #include "rtc_base/logging.h"
     22 #include "rtc_base/numerics/safe_minmax.h"
     23 #include "system_wrappers/include/metrics.h"
     24 
     25 namespace webrtc {
     26 namespace {
     27 
     28 using AdaptiveDigitalConfig =
     29    AudioProcessing::Config::GainController2::AdaptiveDigital;
     30 
     31 constexpr int kHeadroomHistogramMin = 0;
     32 constexpr int kHeadroomHistogramMax = 50;
     33 constexpr int kGainDbHistogramMax = 30;
     34 
     35 // Computes the gain for `input_level_dbfs` to reach `-config.headroom_db`.
     36 // Clamps the gain in [0, `config.max_gain_db`]. `config.headroom_db` is a
     37 // safety margin to allow transient peaks to exceed the target peak level
     38 // without clipping.
     39 float ComputeGainDb(float input_level_dbfs,
     40                    const AdaptiveDigitalConfig& config) {
     41  // If the level is very low, apply the maximum gain.
     42  if (input_level_dbfs < -(config.headroom_db + config.max_gain_db)) {
     43    return config.max_gain_db;
     44  }
     45  // We expect to end up here most of the time: the level is below
     46  // -headroom, but we can boost it to -headroom.
     47  if (input_level_dbfs < -config.headroom_db) {
     48    return -config.headroom_db - input_level_dbfs;
     49  }
     50  // The level is too high and we can't boost.
     51  RTC_DCHECK_GE(input_level_dbfs, -config.headroom_db);
     52  return 0.0f;
     53 }
     54 
     55 // Returns `target_gain_db` if applying such a gain to `input_noise_level_dbfs`
     56 // does not exceed `max_output_noise_level_dbfs`. Otherwise lowers and returns
     57 // `target_gain_db` so that the output noise level equals
     58 // `max_output_noise_level_dbfs`.
     59 float LimitGainByNoise(float target_gain_db,
     60                       float input_noise_level_dbfs,
     61                       float max_output_noise_level_dbfs,
     62                       ApmDataDumper& apm_data_dumper) {
     63  const float max_allowed_gain_db =
     64      max_output_noise_level_dbfs - input_noise_level_dbfs;
     65  apm_data_dumper.DumpRaw("agc2_adaptive_gain_applier_max_allowed_gain_db",
     66                          max_allowed_gain_db);
     67  return std::min(target_gain_db, std::max(max_allowed_gain_db, 0.0f));
     68 }
     69 
     70 float LimitGainByLowConfidence(float target_gain_db,
     71                               float last_gain_db,
     72                               float limiter_audio_level_dbfs,
     73                               bool estimate_is_confident) {
     74  if (estimate_is_confident ||
     75      limiter_audio_level_dbfs <= kLimiterThresholdForAgcGainDbfs) {
     76    return target_gain_db;
     77  }
     78  const float limiter_level_dbfs_before_gain =
     79      limiter_audio_level_dbfs - last_gain_db;
     80 
     81  // Compute a new gain so that `limiter_level_dbfs_before_gain` +
     82  // `new_target_gain_db` is not great than `kLimiterThresholdForAgcGainDbfs`.
     83  const float new_target_gain_db = std::max(
     84      kLimiterThresholdForAgcGainDbfs - limiter_level_dbfs_before_gain, 0.0f);
     85  return std::min(new_target_gain_db, target_gain_db);
     86 }
     87 
     88 // Computes how the gain should change during this frame.
     89 // Return the gain difference in db to 'last_gain_db'.
     90 float ComputeGainChangeThisFrameDb(float target_gain_db,
     91                                   float last_gain_db,
     92                                   bool gain_increase_allowed,
     93                                   float max_gain_decrease_db,
     94                                   float max_gain_increase_db) {
     95  RTC_DCHECK_GT(max_gain_decrease_db, 0);
     96  RTC_DCHECK_GT(max_gain_increase_db, 0);
     97  float target_gain_difference_db = target_gain_db - last_gain_db;
     98  if (!gain_increase_allowed) {
     99    target_gain_difference_db = std::min(target_gain_difference_db, 0.0f);
    100  }
    101  return SafeClamp(target_gain_difference_db, -max_gain_decrease_db,
    102                   max_gain_increase_db);
    103 }
    104 
    105 }  // namespace
    106 
    107 AdaptiveDigitalGainController::AdaptiveDigitalGainController(
    108    ApmDataDumper* apm_data_dumper,
    109    const AudioProcessing::Config::GainController2::AdaptiveDigital& config,
    110    int adjacent_speech_frames_threshold)
    111    : apm_data_dumper_(apm_data_dumper),
    112      gain_applier_(
    113          /*hard_clip_samples=*/false,
    114          /*initial_gain_factor=*/DbToRatio(config.initial_gain_db)),
    115      config_(config),
    116      adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold),
    117      max_gain_change_db_per_10ms_(config_.max_gain_change_db_per_second *
    118                                   kFrameDurationMs / 1000.0f),
    119      calls_since_last_gain_log_(0),
    120      frames_to_gain_increase_allowed_(adjacent_speech_frames_threshold),
    121      last_gain_db_(config_.initial_gain_db) {
    122  RTC_DCHECK_GT(max_gain_change_db_per_10ms_, 0.0f);
    123  RTC_DCHECK_GE(frames_to_gain_increase_allowed_, 1);
    124  RTC_DCHECK_GE(config_.max_output_noise_level_dbfs, -90.0f);
    125  RTC_DCHECK_LE(config_.max_output_noise_level_dbfs, 0.0f);
    126 }
    127 
    128 void AdaptiveDigitalGainController::Process(const FrameInfo& info,
    129                                            DeinterleavedView<float> frame) {
    130  RTC_DCHECK_GE(info.speech_level_dbfs, -150.0f);
    131  RTC_DCHECK_GE(frame.num_channels(), 1);
    132  RTC_DCHECK(
    133      frame.samples_per_channel() == 80 || frame.samples_per_channel() == 160 ||
    134      frame.samples_per_channel() == 320 || frame.samples_per_channel() == 480)
    135      << "`frame` does not look like a 10 ms frame for an APM supported sample "
    136         "rate";
    137 
    138  // Compute the input level used to select the desired gain.
    139  RTC_DCHECK_GT(info.headroom_db, 0.0f);
    140  const float input_level_dbfs = info.speech_level_dbfs + info.headroom_db;
    141 
    142  const float target_gain_db = LimitGainByLowConfidence(
    143      LimitGainByNoise(ComputeGainDb(input_level_dbfs, config_),
    144                       info.noise_rms_dbfs, config_.max_output_noise_level_dbfs,
    145                       *apm_data_dumper_),
    146      last_gain_db_, info.limiter_envelope_dbfs, info.speech_level_reliable);
    147 
    148  // Forbid increasing the gain until enough adjacent speech frames are
    149  // observed.
    150  bool first_confident_speech_frame = false;
    151  if (info.speech_probability < kVadConfidenceThreshold) {
    152    frames_to_gain_increase_allowed_ = adjacent_speech_frames_threshold_;
    153  } else if (frames_to_gain_increase_allowed_ > 0) {
    154    frames_to_gain_increase_allowed_--;
    155    first_confident_speech_frame = frames_to_gain_increase_allowed_ == 0;
    156  }
    157  apm_data_dumper_->DumpRaw(
    158      "agc2_adaptive_gain_applier_frames_to_gain_increase_allowed",
    159      frames_to_gain_increase_allowed_);
    160 
    161  const bool gain_increase_allowed = frames_to_gain_increase_allowed_ == 0;
    162 
    163  float max_gain_increase_db = max_gain_change_db_per_10ms_;
    164  if (first_confident_speech_frame) {
    165    // No gain increase happened while waiting for a long enough speech
    166    // sequence. Therefore, temporarily allow a faster gain increase.
    167    RTC_DCHECK(gain_increase_allowed);
    168    max_gain_increase_db *= adjacent_speech_frames_threshold_;
    169  }
    170 
    171  const float gain_change_this_frame_db = ComputeGainChangeThisFrameDb(
    172      target_gain_db, last_gain_db_, gain_increase_allowed,
    173      /*max_gain_decrease_db=*/max_gain_change_db_per_10ms_,
    174      max_gain_increase_db);
    175 
    176  apm_data_dumper_->DumpRaw("agc2_adaptive_gain_applier_want_to_change_by_db",
    177                            target_gain_db - last_gain_db_);
    178  apm_data_dumper_->DumpRaw("agc2_adaptive_gain_applier_will_change_by_db",
    179                            gain_change_this_frame_db);
    180 
    181  // Optimization: avoid calling math functions if gain does not
    182  // change.
    183  if (gain_change_this_frame_db != 0.f) {
    184    gain_applier_.SetGainFactor(
    185        DbToRatio(last_gain_db_ + gain_change_this_frame_db));
    186  }
    187 
    188  gain_applier_.ApplyGain(frame);
    189 
    190  // Remember that the gain has changed for the next iteration.
    191  last_gain_db_ = last_gain_db_ + gain_change_this_frame_db;
    192  apm_data_dumper_->DumpRaw("agc2_adaptive_gain_applier_applied_gain_db",
    193                            last_gain_db_);
    194 
    195  // Log every 10 seconds.
    196  calls_since_last_gain_log_++;
    197  if (calls_since_last_gain_log_ == 1000) {
    198    calls_since_last_gain_log_ = 0;
    199    RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.EstimatedSpeechLevel",
    200                                -info.speech_level_dbfs, 0, 100, 101);
    201    RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.EstimatedNoiseLevel",
    202                                -info.noise_rms_dbfs, 0, 100, 101);
    203    RTC_HISTOGRAM_COUNTS_LINEAR(
    204        "WebRTC.Audio.Agc2.Headroom", info.headroom_db, kHeadroomHistogramMin,
    205        kHeadroomHistogramMax,
    206        kHeadroomHistogramMax - kHeadroomHistogramMin + 1);
    207    RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.DigitalGainApplied",
    208                                last_gain_db_, 0, kGainDbHistogramMax,
    209                                kGainDbHistogramMax + 1);
    210    RTC_LOG(LS_INFO) << "AGC2 adaptive digital"
    211                     << " | speech_dbfs: " << info.speech_level_dbfs
    212                     << " | noise_dbfs: " << info.noise_rms_dbfs
    213                     << " | headroom_db: " << info.headroom_db
    214                     << " | gain_db: " << last_gain_db_;
    215  }
    216 }
    217 
    218 }  // namespace webrtc
	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE