saturation_protector.cc (7071B)
1 /* 2 * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "modules/audio_processing/agc2/saturation_protector.h" 12 13 #include <algorithm> 14 #include <memory> 15 16 #include "modules/audio_processing/agc2/agc2_common.h" 17 #include "modules/audio_processing/agc2/saturation_protector_buffer.h" 18 #include "modules/audio_processing/logging/apm_data_dumper.h" 19 #include "rtc_base/numerics/safe_compare.h" 20 #include "rtc_base/numerics/safe_minmax.h" 21 22 namespace webrtc { 23 namespace { 24 25 constexpr int kPeakEnveloperSuperFrameLengthMs = 400; 26 constexpr float kMinMarginDb = 12.0f; 27 constexpr float kMaxMarginDb = 25.0f; 28 constexpr float kAttack = 0.9988493699365052f; 29 constexpr float kDecay = 0.9997697679981565f; 30 31 // Saturation protector state. Defined outside of `SaturationProtectorImpl` to 32 // implement check-point and restore ops. 33 struct SaturationProtectorState { 34 bool operator==(const SaturationProtectorState& s) const { 35 return headroom_db == s.headroom_db && 36 peak_delay_buffer == s.peak_delay_buffer && 37 max_peaks_dbfs == s.max_peaks_dbfs && 38 time_since_push_ms == s.time_since_push_ms; 39 } 40 inline bool operator!=(const SaturationProtectorState& s) const { 41 return !(*this == s); 42 } 43 44 float headroom_db; 45 SaturationProtectorBuffer peak_delay_buffer; 46 float max_peaks_dbfs; 47 int time_since_push_ms; // Time since the last ring buffer push operation. 48 }; 49 50 // Resets the saturation protector state. 51 void ResetSaturationProtectorState(float initial_headroom_db, 52 SaturationProtectorState& state) { 53 state.headroom_db = initial_headroom_db; 54 state.peak_delay_buffer.Reset(); 55 state.max_peaks_dbfs = kMinLevelDbfs; 56 state.time_since_push_ms = 0; 57 } 58 59 // Updates `state` by analyzing the estimated speech level `speech_level_dbfs` 60 // and the peak level `peak_dbfs` for an observed frame. `state` must not be 61 // modified without calling this function. 62 void UpdateSaturationProtectorState(float peak_dbfs, 63 float speech_level_dbfs, 64 SaturationProtectorState& state) { 65 // Get the max peak over `kPeakEnveloperSuperFrameLengthMs` ms. 66 state.max_peaks_dbfs = std::max(state.max_peaks_dbfs, peak_dbfs); 67 state.time_since_push_ms += kFrameDurationMs; 68 if (SafeGt(state.time_since_push_ms, kPeakEnveloperSuperFrameLengthMs)) { 69 // Push `max_peaks_dbfs` back into the ring buffer. 70 state.peak_delay_buffer.PushBack(state.max_peaks_dbfs); 71 // Reset. 72 state.max_peaks_dbfs = kMinLevelDbfs; 73 state.time_since_push_ms = 0; 74 } 75 76 // Update the headroom by comparing the estimated speech level and the delayed 77 // max speech peak. 78 const float delayed_peak_dbfs = 79 state.peak_delay_buffer.Front().value_or(state.max_peaks_dbfs); 80 const float difference_db = delayed_peak_dbfs - speech_level_dbfs; 81 if (difference_db > state.headroom_db) { 82 // Attack. 83 state.headroom_db = 84 state.headroom_db * kAttack + difference_db * (1.0f - kAttack); 85 } else { 86 // Decay. 87 state.headroom_db = 88 state.headroom_db * kDecay + difference_db * (1.0f - kDecay); 89 } 90 91 state.headroom_db = 92 SafeClamp<float>(state.headroom_db, kMinMarginDb, kMaxMarginDb); 93 } 94 95 // Saturation protector which recommends a headroom based on the recent peaks. 96 class SaturationProtectorImpl : public SaturationProtector { 97 public: 98 explicit SaturationProtectorImpl(float initial_headroom_db, 99 int adjacent_speech_frames_threshold, 100 ApmDataDumper* apm_data_dumper) 101 : apm_data_dumper_(apm_data_dumper), 102 initial_headroom_db_(initial_headroom_db), 103 adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold) { 104 Reset(); 105 } 106 SaturationProtectorImpl(const SaturationProtectorImpl&) = delete; 107 SaturationProtectorImpl& operator=(const SaturationProtectorImpl&) = delete; 108 ~SaturationProtectorImpl() override = default; 109 110 float HeadroomDb() override { return headroom_db_; } 111 112 void Analyze(float speech_probability, 113 float peak_dbfs, 114 float speech_level_dbfs) override { 115 if (speech_probability < kVadConfidenceThreshold) { 116 // Not a speech frame. 117 if (adjacent_speech_frames_threshold_ > 1) { 118 // When two or more adjacent speech frames are required in order to 119 // update the state, we need to decide whether to discard or confirm the 120 // updates based on the speech sequence length. 121 if (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_) { 122 // First non-speech frame after a long enough sequence of speech 123 // frames. Update the reliable state. 124 reliable_state_ = preliminary_state_; 125 } else if (num_adjacent_speech_frames_ > 0) { 126 // First non-speech frame after a too short sequence of speech frames. 127 // Reset to the last reliable state. 128 preliminary_state_ = reliable_state_; 129 } 130 } 131 num_adjacent_speech_frames_ = 0; 132 } else { 133 // Speech frame observed. 134 num_adjacent_speech_frames_++; 135 136 // Update preliminary level estimate. 137 UpdateSaturationProtectorState(peak_dbfs, speech_level_dbfs, 138 preliminary_state_); 139 140 if (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_) { 141 // `preliminary_state_` is now reliable. Update the headroom. 142 headroom_db_ = preliminary_state_.headroom_db; 143 } 144 } 145 DumpDebugData(); 146 } 147 148 void Reset() override { 149 num_adjacent_speech_frames_ = 0; 150 headroom_db_ = initial_headroom_db_; 151 ResetSaturationProtectorState(initial_headroom_db_, preliminary_state_); 152 ResetSaturationProtectorState(initial_headroom_db_, reliable_state_); 153 } 154 155 private: 156 void DumpDebugData() { 157 apm_data_dumper_->DumpRaw( 158 "agc2_saturation_protector_preliminary_max_peak_dbfs", 159 preliminary_state_.max_peaks_dbfs); 160 apm_data_dumper_->DumpRaw( 161 "agc2_saturation_protector_reliable_max_peak_dbfs", 162 reliable_state_.max_peaks_dbfs); 163 } 164 165 ApmDataDumper* const apm_data_dumper_; 166 const float initial_headroom_db_; 167 const int adjacent_speech_frames_threshold_; 168 int num_adjacent_speech_frames_; 169 float headroom_db_; 170 SaturationProtectorState preliminary_state_; 171 SaturationProtectorState reliable_state_; 172 }; 173 174 } // namespace 175 176 std::unique_ptr<SaturationProtector> CreateSaturationProtector( 177 float initial_headroom_db, 178 int adjacent_speech_frames_threshold, 179 ApmDataDumper* apm_data_dumper) { 180 return std::make_unique<SaturationProtectorImpl>( 181 initial_headroom_db, adjacent_speech_frames_threshold, apm_data_dumper); 182 } 183 184 } // namespace webrtc