[ tor-browser ].git.dasho

input_volume_controller.h (11866B)
      1 /*
      2 *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
      3 *
      4 *  Use of this source code is governed by a BSD-style license
      5 *  that can be found in the LICENSE file in the root of the source
      6 *  tree. An additional intellectual property rights grant can be found
      7 *  in the file PATENTS.  All contributing project authors may
      8 *  be found in the AUTHORS file in the root of the source tree.
      9 */
     10 
     11 #ifndef MODULES_AUDIO_PROCESSING_AGC2_INPUT_VOLUME_CONTROLLER_H_
     12 #define MODULES_AUDIO_PROCESSING_AGC2_INPUT_VOLUME_CONTROLLER_H_
     13 
     14 #include <memory>
     15 #include <optional>
     16 #include <vector>
     17 
     18 #include "api/audio/audio_processing.h"
     19 #include "modules/audio_processing/agc2/clipping_predictor.h"
     20 #include "modules/audio_processing/audio_buffer.h"
     21 #include "rtc_base/gtest_prod_util.h"
     22 
     23 namespace webrtc {
     24 
     25 class MonoInputVolumeController;
     26 
     27 // The input volume controller recommends what volume to use, handles volume
     28 // changes and clipping detection and prediction. In particular, it handles
     29 // changes triggered by the user (e.g., volume set to zero by a HW mute button).
     30 // This class is not thread-safe.
     31 // TODO(bugs.webrtc.org/7494): Use applied/recommended input volume naming
     32 // convention.
     33 class InputVolumeController final {
     34 public:
     35  // Config for the constructor.
     36  struct Config {
     37    // Minimum input volume that can be recommended. Not enforced when the
     38    // applied input volume is zero outside startup.
     39    int min_input_volume = 20;
     40    // Lowest input volume level that will be applied in response to clipping.
     41    int clipped_level_min = 70;
     42    // Amount input volume level is lowered with every clipping event. Limited
     43    // to (0, 255].
     44    int clipped_level_step = 15;
     45    // Proportion of clipped samples required to declare a clipping event.
     46    // Limited to (0.0f, 1.0f).
     47    float clipped_ratio_threshold = 0.1f;
     48    // Time in frames to wait after a clipping event before checking again.
     49    // Limited to values higher than 0.
     50    int clipped_wait_frames = 300;
     51    // Enables clipping prediction functionality.
     52    bool enable_clipping_predictor = true;
     53    // Speech level target range (dBFS). If the speech level is in the range
     54    // [`target_range_min_dbfs`, `target_range_max_dbfs`], no input volume
     55    // adjustments are done based on the speech level. For speech levels below
     56    // and above the range, the targets `target_range_min_dbfs` and
     57    // `target_range_max_dbfs` are used, respectively.
     58    int target_range_max_dbfs = -30;
     59    int target_range_min_dbfs = -50;
     60    // Number of wait frames between the recommended input volume updates.
     61    int update_input_volume_wait_frames = 100;
     62    // Speech probability threshold: speech probabilities below the threshold
     63    // are considered silence. Limited to [0.0f, 1.0f].
     64    float speech_probability_threshold = 0.7f;
     65    // Minimum speech frame ratio for volume updates to be allowed. Limited to
     66    // [0.0f, 1.0f].
     67    float speech_ratio_threshold = 0.6f;
     68  };
     69 
     70  // Ctor. `num_capture_channels` specifies the number of channels for the audio
     71  // passed to `AnalyzePreProcess()` and `Process()`. Clamps
     72  // `config.startup_min_level` in the [12, 255] range.
     73  InputVolumeController(int num_capture_channels, const Config& config);
     74 
     75  ~InputVolumeController();
     76  InputVolumeController(const InputVolumeController&) = delete;
     77  InputVolumeController& operator=(const InputVolumeController&) = delete;
     78 
     79  // TODO(webrtc:7494): Integrate initialization into ctor and remove.
     80  void Initialize();
     81 
     82  // Analyzes `audio_buffer` before `RecommendInputVolume()` is called so tha
     83  // the analysis can be performed before digital processing operations take
     84  // place (e.g., echo cancellation). The analysis consists of input clipping
     85  // detection and prediction (if enabled).
     86  void AnalyzeInputAudio(int applied_input_volume,
     87                         const AudioBuffer& audio_buffer);
     88 
     89  // Adjusts the recommended input volume upwards/downwards based on the result
     90  // of `AnalyzeInputAudio()` and on `speech_level_dbfs` (if specified). Must
     91  // be called after `AnalyzeInputAudio()`.  The value of `speech_probability`
     92  // is expected to be in the range [0, 1] and `speech_level_dbfs` in the range
     93  // [-90, 30] and both should be estimated after echo cancellation and noise
     94  // suppression are applied. Returns a non-empty input volume recommendation if
     95  // available. If `capture_output_used_` is true, returns the applied input
     96  // volume.
     97  std::optional<int> RecommendInputVolume(
     98      float speech_probability,
     99      std::optional<float> speech_level_dbfs);
    100 
    101  // Stores whether the capture output will be used or not. Call when the
    102  // capture stream output has been flagged to be used/not-used. If unused, the
    103  // controller disregards all incoming audio.
    104  void HandleCaptureOutputUsedChange(bool capture_output_used);
    105 
    106  // Returns true if clipping prediction is enabled.
    107  // TODO(bugs.webrtc.org/7494): Deprecate this method.
    108  bool clipping_predictor_enabled() const { return !!clipping_predictor_; }
    109 
    110  // Returns true if clipping prediction is used to adjust the input volume.
    111  // TODO(bugs.webrtc.org/7494): Deprecate this method.
    112  bool use_clipping_predictor_step() const {
    113    return use_clipping_predictor_step_;
    114  }
    115 
    116  // Only use for testing: Use `RecommendInputVolume()` elsewhere.
    117  // Returns the value of a member variable, needed for testing
    118  // `AnalyzeInputAudio()`.
    119  int recommended_input_volume() const { return recommended_input_volume_; }
    120 
    121  // Only use for testing.
    122  bool capture_output_used() const { return capture_output_used_; }
    123 
    124 private:
    125  friend class InputVolumeControllerTestHelper;
    126 
    127  FRIEND_TEST_ALL_PREFIXES(InputVolumeControllerTest, MinInputVolumeDefault);
    128  FRIEND_TEST_ALL_PREFIXES(InputVolumeControllerTest, MinInputVolumeDisabled);
    129  FRIEND_TEST_ALL_PREFIXES(InputVolumeControllerTest,
    130                           MinInputVolumeOutOfRangeAbove);
    131  FRIEND_TEST_ALL_PREFIXES(InputVolumeControllerTest,
    132                           MinInputVolumeOutOfRangeBelow);
    133  FRIEND_TEST_ALL_PREFIXES(InputVolumeControllerTest, MinInputVolumeEnabled50);
    134  FRIEND_TEST_ALL_PREFIXES(InputVolumeControllerParametrizedTest,
    135                           ClippingParametersVerified);
    136 
    137  // Sets the applied input volume and resets the recommended input volume.
    138  void SetAppliedInputVolume(int level);
    139 
    140  void AggregateChannelLevels();
    141 
    142  const int num_capture_channels_;
    143 
    144  // Minimum input volume that can be recommended.
    145  const int min_input_volume_;
    146 
    147  // TODO(bugs.webrtc.org/7494): Once
    148  // `AudioProcessingImpl::recommended_stream_analog_level()` becomes a trivial
    149  // getter, leave uninitialized.
    150  // Recommended input volume. After `SetAppliedInputVolume()` is called it
    151  // holds holds the observed input volume. Possibly updated by
    152  // `AnalyzePreProcess()` and `Process()`; after these calls, holds the
    153  // recommended input volume.
    154  int recommended_input_volume_ = 0;
    155  // Applied input volume. After `SetAppliedInputVolume()` is called it holds
    156  // the current applied volume.
    157  std::optional<int> applied_input_volume_;
    158 
    159  bool capture_output_used_;
    160 
    161  // Clipping detection and prediction.
    162  const int clipped_level_step_;
    163  const float clipped_ratio_threshold_;
    164  const int clipped_wait_frames_;
    165  const std::unique_ptr<ClippingPredictor> clipping_predictor_;
    166  const bool use_clipping_predictor_step_;
    167  int frames_since_clipped_;
    168  int clipping_rate_log_counter_;
    169  float clipping_rate_log_;
    170 
    171  // Target range minimum and maximum. If the seech level is in the range
    172  // [`target_range_min_dbfs`, `target_range_max_dbfs`], no volume adjustments
    173  // take place. Instead, the digital gain controller is assumed to adapt to
    174  // compensate for the speech level RMS error.
    175  const int target_range_max_dbfs_;
    176  const int target_range_min_dbfs_;
    177 
    178  // Channel controllers updating the gain upwards/downwards.
    179  std::vector<std::unique_ptr<MonoInputVolumeController>> channel_controllers_;
    180  int channel_controlling_gain_ = 0;
    181 };
    182 
    183 // TODO(bugs.webrtc.org/7494): Use applied/recommended input volume naming
    184 // convention.
    185 class MonoInputVolumeController {
    186 public:
    187  MonoInputVolumeController(int min_input_volume_after_clipping,
    188                            int min_input_volume,
    189                            int update_input_volume_wait_frames,
    190                            float speech_probability_threshold,
    191                            float speech_ratio_threshold);
    192  ~MonoInputVolumeController();
    193  MonoInputVolumeController(const MonoInputVolumeController&) = delete;
    194  MonoInputVolumeController& operator=(const MonoInputVolumeController&) =
    195      delete;
    196 
    197  void Initialize();
    198  void HandleCaptureOutputUsedChange(bool capture_output_used);
    199 
    200  // Sets the current input volume.
    201  void set_stream_analog_level(int input_volume) {
    202    recommended_input_volume_ = input_volume;
    203  }
    204 
    205  // Lowers the recommended input volume in response to clipping based on the
    206  // suggested reduction `clipped_level_step`. Must be called after
    207  // `set_stream_analog_level()`.
    208  void HandleClipping(int clipped_level_step);
    209 
    210  // TODO(bugs.webrtc.org/7494): Rename, audio not passed to the method anymore.
    211  // Adjusts the recommended input volume upwards/downwards depending on the
    212  // result of `HandleClipping()` and on `rms_error_dbfs`. Updates are only
    213  // allowed for active speech segments and when `rms_error_dbfs` is not empty.
    214  // Must be called after `HandleClipping()`.
    215  void Process(std::optional<int> rms_error_dbfs, float speech_probability);
    216 
    217  // Returns the recommended input volume. Must be called after `Process()`.
    218  int recommended_analog_level() const { return recommended_input_volume_; }
    219 
    220  void ActivateLogging() { log_to_histograms_ = true; }
    221 
    222  int min_input_volume_after_clipping() const {
    223    return min_input_volume_after_clipping_;
    224  }
    225 
    226  // Only used for testing.
    227  int min_input_volume() const { return min_input_volume_; }
    228 
    229 private:
    230  // Sets a new input volume, after first checking that it hasn't been updated
    231  // by the user, in which case no action is taken.
    232  void SetInputVolume(int new_volume);
    233 
    234  // Sets the maximum input volume that the input volume controller is allowed
    235  // to apply. The volume must be at least `kClippedLevelMin`.
    236  void SetMaxLevel(int level);
    237 
    238  int CheckVolumeAndReset();
    239 
    240  // Updates the recommended input volume. If the volume slider needs to be
    241  // moved, we check first if the user has adjusted it, in which case we take no
    242  // action and cache the updated level.
    243  void UpdateInputVolume(int rms_error_dbfs);
    244 
    245  const int min_input_volume_;
    246  const int min_input_volume_after_clipping_;
    247  int max_input_volume_;
    248 
    249  int last_recommended_input_volume_ = 0;
    250 
    251  bool capture_output_used_ = true;
    252  bool check_volume_on_next_process_ = true;
    253  bool startup_ = true;
    254 
    255  // TODO(bugs.webrtc.org/7494): Create a separate member for the applied
    256  // input volume.
    257  // Recommended input volume. After `set_stream_analog_level()` is
    258  // called, it holds the observed applied input volume. Possibly updated by
    259  // `HandleClipping()` and `Process()`; after these calls, holds the
    260  // recommended input volume.
    261  int recommended_input_volume_ = 0;
    262 
    263  bool log_to_histograms_ = false;
    264 
    265  // Counters for frames and speech frames since the last update in the
    266  // recommended input volume.
    267  const int update_input_volume_wait_frames_;
    268  int frames_since_update_input_volume_ = 0;
    269  int speech_frames_since_update_input_volume_ = 0;
    270  bool is_first_frame_ = true;
    271 
    272  // Speech probability threshold for a frame to be considered speech (instead
    273  // of silence). Limited to [0.0f, 1.0f].
    274  const float speech_probability_threshold_;
    275  // Minimum ratio of speech frames. Limited to [0.0f, 1.0f].
    276  const float speech_ratio_threshold_;
    277 };
    278 
    279 }  // namespace webrtc
    280 
    281 #endif  // MODULES_AUDIO_PROCESSING_AGC2_INPUT_VOLUME_CONTROLLER_H_
	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE