[ tor-browser ].git.dasho

gain_controller2.cc (11330B)
      1 /*
      2 *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
      3 *
      4 *  Use of this source code is governed by a BSD-style license
      5 *  that can be found in the LICENSE file in the root of the source
      6 *  tree. An additional intellectual property rights grant can be found
      7 *  in the file PATENTS.  All contributing project authors may
      8 *  be found in the AUTHORS file in the root of the source tree.
      9 */
     10 
     11 #include "modules/audio_processing/gain_controller2.h"
     12 
     13 #include <algorithm>
     14 #include <atomic>
     15 #include <cmath>
     16 #include <memory>
     17 #include <optional>
     18 
     19 #include "api/audio/audio_frame.h"
     20 #include "api/audio/audio_processing.h"
     21 #include "api/audio/audio_view.h"
     22 #include "api/environment/environment.h"
     23 #include "api/field_trials_view.h"
     24 #include "common_audio/include/audio_util.h"
     25 #include "modules/audio_processing/agc2/adaptive_digital_gain_controller.h"
     26 #include "modules/audio_processing/agc2/agc2_common.h"
     27 #include "modules/audio_processing/agc2/cpu_features.h"
     28 #include "modules/audio_processing/agc2/input_volume_controller.h"
     29 #include "modules/audio_processing/agc2/interpolated_gain_curve.h"
     30 #include "modules/audio_processing/agc2/noise_level_estimator.h"
     31 #include "modules/audio_processing/agc2/saturation_protector.h"
     32 #include "modules/audio_processing/agc2/speech_level_estimator.h"
     33 #include "modules/audio_processing/agc2/vad_wrapper.h"
     34 #include "modules/audio_processing/audio_buffer.h"
     35 #include "modules/audio_processing/logging/apm_data_dumper.h"
     36 #include "rtc_base/checks.h"
     37 #include "rtc_base/logging.h"
     38 
     39 namespace webrtc {
     40 namespace {
     41 
     42 using Agc2Config = AudioProcessing::Config::GainController2;
     43 using InputVolumeControllerConfig = InputVolumeController::Config;
     44 
     45 constexpr int kLogLimiterStatsPeriodMs = 30'000;
     46 constexpr int kFrameLengthMs = 10;
     47 constexpr int kLogLimiterStatsPeriodNumFrames =
     48    kLogLimiterStatsPeriodMs / kFrameLengthMs;
     49 
     50 // Detects the available CPU features and applies any kill-switches.
     51 AvailableCpuFeatures GetAllowedCpuFeatures(
     52    const FieldTrialsView& field_trials) {
     53  AvailableCpuFeatures features = GetAvailableCpuFeatures();
     54  if (field_trials.IsEnabled("WebRTC-Agc2SimdSse2KillSwitch")) {
     55    features.sse2 = false;
     56  }
     57  if (field_trials.IsEnabled("WebRTC-Agc2SimdAvx2KillSwitch")) {
     58    features.avx2 = false;
     59  }
     60  if (field_trials.IsEnabled("WebRTC-Agc2SimdNeonKillSwitch")) {
     61    features.neon = false;
     62  }
     63  return features;
     64 }
     65 
     66 // Peak and RMS audio levels in dBFS.
     67 struct AudioLevels {
     68  float peak_dbfs;
     69  float rms_dbfs;
     70 };
     71 
     72 // Speech level info.
     73 struct SpeechLevel {
     74  bool is_confident;
     75  float rms_dbfs;
     76 };
     77 
     78 // Computes the audio levels for the first channel in `frame`.
     79 AudioLevels ComputeAudioLevels(DeinterleavedView<float> frame,
     80                               ApmDataDumper& data_dumper) {
     81  float peak = 0.0f;
     82  float rms = 0.0f;
     83  for (const auto& x : frame[0]) {
     84    peak = std::max(std::fabs(x), peak);
     85    rms += x * x;
     86  }
     87  AudioLevels levels{
     88      .peak_dbfs = FloatS16ToDbfs(peak),
     89      .rms_dbfs = FloatS16ToDbfs(std::sqrt(rms / frame.samples_per_channel()))};
     90  data_dumper.DumpRaw("agc2_input_rms_dbfs", levels.rms_dbfs);
     91  data_dumper.DumpRaw("agc2_input_peak_dbfs", levels.peak_dbfs);
     92  return levels;
     93 }
     94 
     95 }  // namespace
     96 
     97 std::atomic<int> GainController2::instance_count_(0);
     98 
     99 GainController2::GainController2(
    100    const Environment& env,
    101    const Agc2Config& config,
    102    const InputVolumeControllerConfig& input_volume_controller_config,
    103    int sample_rate_hz,
    104    int num_channels,
    105    bool use_internal_vad)
    106    : cpu_features_(GetAllowedCpuFeatures(env.field_trials())),
    107      data_dumper_(instance_count_.fetch_add(1) + 1),
    108      fixed_gain_applier_(
    109          /*hard_clip_samples=*/false,
    110          /*initial_gain_factor=*/DbToRatio(config.fixed_digital.gain_db)),
    111      limiter_(&data_dumper_,
    112               SampleRateToDefaultChannelSize(sample_rate_hz),
    113               /*histogram_name_prefix=*/"Agc2"),
    114      calls_since_last_limiter_log_(0) {
    115  RTC_DCHECK(Validate(config));
    116  data_dumper_.InitiateNewSetOfRecordings();
    117 
    118  if (config.input_volume_controller.enabled ||
    119      config.adaptive_digital.enabled) {
    120    // Create dependencies.
    121    speech_level_estimator_ = std::make_unique<SpeechLevelEstimator>(
    122        &data_dumper_, config.adaptive_digital, kAdjacentSpeechFramesThreshold);
    123    if (use_internal_vad)
    124      vad_ = std::make_unique<VoiceActivityDetectorWrapper>(
    125          kVadResetPeriodMs, cpu_features_, sample_rate_hz);
    126  }
    127 
    128  if (config.input_volume_controller.enabled) {
    129    // Create controller.
    130    input_volume_controller_ = std::make_unique<InputVolumeController>(
    131        num_channels, input_volume_controller_config);
    132    // TODO(bugs.webrtc.org/7494): Call `Initialize` in ctor and remove method.
    133    input_volume_controller_->Initialize();
    134  }
    135 
    136  if (config.adaptive_digital.enabled) {
    137    // Create dependencies.
    138    noise_level_estimator_ = CreateNoiseFloorEstimator(&data_dumper_);
    139    saturation_protector_ = CreateSaturationProtector(
    140        kSaturationProtectorInitialHeadroomDb, kAdjacentSpeechFramesThreshold,
    141        &data_dumper_);
    142    // Create controller.
    143    adaptive_digital_controller_ =
    144        std::make_unique<AdaptiveDigitalGainController>(
    145            &data_dumper_, config.adaptive_digital,
    146            kAdjacentSpeechFramesThreshold);
    147  }
    148 }
    149 
    150 GainController2::~GainController2() = default;
    151 
    152 // TODO(webrtc:7494): Pass the flag also to the other components.
    153 void GainController2::SetCaptureOutputUsed(bool capture_output_used) {
    154  if (input_volume_controller_) {
    155    input_volume_controller_->HandleCaptureOutputUsedChange(
    156        capture_output_used);
    157  }
    158 }
    159 
    160 void GainController2::SetFixedGainDb(float gain_db) {
    161  const float gain_factor = DbToRatio(gain_db);
    162  if (fixed_gain_applier_.GetGainFactor() != gain_factor) {
    163    // Reset the limiter to quickly react on abrupt level changes caused by
    164    // large changes of the fixed gain.
    165    limiter_.Reset();
    166  }
    167  fixed_gain_applier_.SetGainFactor(gain_factor);
    168 }
    169 
    170 void GainController2::Analyze(int applied_input_volume,
    171                              const AudioBuffer& audio_buffer) {
    172  recommended_input_volume_ = std::nullopt;
    173 
    174  RTC_DCHECK_GE(applied_input_volume, 0);
    175  RTC_DCHECK_LE(applied_input_volume, 255);
    176 
    177  if (input_volume_controller_) {
    178    input_volume_controller_->AnalyzeInputAudio(applied_input_volume,
    179                                                audio_buffer);
    180  }
    181 }
    182 
    183 void GainController2::Process(std::optional<float> speech_probability,
    184                              bool input_volume_changed,
    185                              AudioBuffer* audio) {
    186  recommended_input_volume_ = std::nullopt;
    187 
    188  data_dumper_.DumpRaw("agc2_applied_input_volume_changed",
    189                       input_volume_changed);
    190  if (input_volume_changed) {
    191    // Handle input volume changes.
    192    if (speech_level_estimator_)
    193      speech_level_estimator_->Reset();
    194    if (saturation_protector_)
    195      saturation_protector_->Reset();
    196  }
    197 
    198  DeinterleavedView<float> float_frame = audio->view();
    199 
    200  // Compute speech probability.
    201  if (vad_) {
    202    // When the VAD component runs, `speech_probability` should not be specified
    203    // because APM should not run the same VAD twice (as an APM sub-module and
    204    // internally in AGC2).
    205    RTC_DCHECK(!speech_probability.has_value());
    206    speech_probability = vad_->Analyze(float_frame);
    207  }
    208  if (speech_probability.has_value()) {
    209    RTC_DCHECK_GE(*speech_probability, 0.0f);
    210    RTC_DCHECK_LE(*speech_probability, 1.0f);
    211  }
    212  // The speech probability may not be defined at this step (e.g., when the
    213  // fixed digital controller alone is enabled).
    214  if (speech_probability.has_value())
    215    data_dumper_.DumpRaw("agc2_speech_probability", *speech_probability);
    216 
    217  // Compute audio, noise and speech levels.
    218  AudioLevels audio_levels = ComputeAudioLevels(float_frame, data_dumper_);
    219  std::optional<float> noise_rms_dbfs;
    220  if (noise_level_estimator_) {
    221    // TODO(bugs.webrtc.org/7494): Pass `audio_levels` to remove duplicated
    222    // computation in `noise_level_estimator_`.
    223    noise_rms_dbfs = noise_level_estimator_->Analyze(float_frame);
    224  }
    225  std::optional<SpeechLevel> speech_level;
    226  if (speech_level_estimator_) {
    227    RTC_DCHECK(speech_probability.has_value());
    228    speech_level_estimator_->Update(
    229        audio_levels.rms_dbfs, audio_levels.peak_dbfs, *speech_probability);
    230    speech_level =
    231        SpeechLevel{.is_confident = speech_level_estimator_->is_confident(),
    232                    .rms_dbfs = speech_level_estimator_->level_dbfs()};
    233  }
    234 
    235  // Update the recommended input volume.
    236  if (input_volume_controller_) {
    237    RTC_DCHECK(speech_level.has_value());
    238    RTC_DCHECK(speech_probability.has_value());
    239    if (speech_probability.has_value()) {
    240      recommended_input_volume_ =
    241          input_volume_controller_->RecommendInputVolume(
    242              *speech_probability,
    243              speech_level->is_confident
    244                  ? std::optional<float>(speech_level->rms_dbfs)
    245                  : std::nullopt);
    246    }
    247  }
    248 
    249  if (adaptive_digital_controller_) {
    250    RTC_DCHECK(saturation_protector_);
    251    RTC_DCHECK(speech_probability.has_value());
    252    RTC_DCHECK(speech_level.has_value());
    253    saturation_protector_->Analyze(*speech_probability, audio_levels.peak_dbfs,
    254                                   speech_level->rms_dbfs);
    255    float headroom_db = saturation_protector_->HeadroomDb();
    256    data_dumper_.DumpRaw("agc2_headroom_db", headroom_db);
    257    float limiter_envelope_dbfs = FloatS16ToDbfs(limiter_.LastAudioLevel());
    258    data_dumper_.DumpRaw("agc2_limiter_envelope_dbfs", limiter_envelope_dbfs);
    259    RTC_DCHECK(noise_rms_dbfs.has_value());
    260    adaptive_digital_controller_->Process(
    261        /*info=*/{.speech_probability = *speech_probability,
    262                  .speech_level_dbfs = speech_level->rms_dbfs,
    263                  .speech_level_reliable = speech_level->is_confident,
    264                  .noise_rms_dbfs = *noise_rms_dbfs,
    265                  .headroom_db = headroom_db,
    266                  .limiter_envelope_dbfs = limiter_envelope_dbfs},
    267        float_frame);
    268  }
    269 
    270  // TODO(bugs.webrtc.org/7494): Pass `audio_levels` to remove duplicated
    271  // computation in `limiter_`.
    272  fixed_gain_applier_.ApplyGain(float_frame);
    273 
    274  limiter_.Process(float_frame);
    275 
    276  // Periodically log limiter stats.
    277  if (++calls_since_last_limiter_log_ == kLogLimiterStatsPeriodNumFrames) {
    278    calls_since_last_limiter_log_ = 0;
    279    InterpolatedGainCurve::Stats stats = limiter_.GetGainCurveStats();
    280    RTC_LOG(LS_INFO) << "[AGC2] limiter stats"
    281                     << " | identity: " << stats.look_ups_identity_region
    282                     << " | knee: " << stats.look_ups_knee_region
    283                     << " | limiter: " << stats.look_ups_limiter_region
    284                     << " | saturation: " << stats.look_ups_saturation_region;
    285  }
    286 }
    287 
    288 bool GainController2::Validate(
    289    const AudioProcessing::Config::GainController2& config) {
    290  const auto& fixed = config.fixed_digital;
    291  const auto& adaptive = config.adaptive_digital;
    292  return fixed.gain_db >= 0.0f && fixed.gain_db < 50.0f &&
    293         adaptive.headroom_db >= 0.0f && adaptive.max_gain_db > 0.0f &&
    294         adaptive.initial_gain_db >= 0.0f &&
    295         adaptive.max_gain_change_db_per_second > 0.0f &&
    296         adaptive.max_output_noise_level_dbfs <= 0.0f;
    297 }
    298 
    299 }  // namespace webrtc
	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE