input_volume_controller_unittest.cc (81225B)
1 /* 2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "modules/audio_processing/agc2/input_volume_controller.h" 12 13 #include <algorithm> 14 #include <cmath> 15 #include <cstdint> 16 #include <fstream> 17 #include <ios> 18 #include <limits> 19 #include <memory> 20 #include <optional> 21 #include <tuple> 22 #include <vector> 23 24 #include "api/audio/audio_processing.h" 25 #include "modules/audio_processing/audio_buffer.h" 26 #include "rtc_base/checks.h" 27 #include "rtc_base/numerics/safe_minmax.h" 28 #include "system_wrappers/include/metrics.h" 29 #include "test/gmock.h" 30 #include "test/gtest.h" 31 #include "test/testsupport/file_utils.h" 32 33 using ::testing::_; 34 using ::testing::AtLeast; 35 using ::testing::DoAll; 36 using ::testing::Return; 37 using ::testing::SetArgPointee; 38 39 namespace webrtc { 40 namespace { 41 42 constexpr int kSampleRateHz = 32000; 43 constexpr int kNumChannels = 1; 44 constexpr int kDefaultInitialInputVolume = 128; 45 constexpr int kClippedMin = 165; // Arbitrary, but different from the default. 46 constexpr float kAboveClippedThreshold = 0.2f; 47 constexpr int kClippedLevelStep = 15; 48 constexpr float kClippedRatioThreshold = 0.1f; 49 constexpr int kClippedWaitFrames = 300; 50 constexpr float kHighSpeechProbability = 0.7f; 51 constexpr float kLowSpeechProbability = 0.1f; 52 constexpr float kSpeechLevel = -25.0f; 53 constexpr float kSpeechRatioThreshold = 0.8f; 54 55 constexpr float kMinSample = std::numeric_limits<int16_t>::min(); 56 constexpr float kMaxSample = std::numeric_limits<int16_t>::max(); 57 58 using ClippingPredictorConfig = AudioProcessing::Config::GainController1:: 59 AnalogGainController::ClippingPredictor; 60 61 using InputVolumeControllerConfig = InputVolumeController::Config; 62 63 constexpr ClippingPredictorConfig kDefaultClippingPredictorConfig{}; 64 65 std::unique_ptr<InputVolumeController> CreateInputVolumeController( 66 int clipped_level_step = kClippedLevelStep, 67 float clipped_ratio_threshold = kClippedRatioThreshold, 68 int clipped_wait_frames = kClippedWaitFrames, 69 bool enable_clipping_predictor = false, 70 int update_input_volume_wait_frames = 0) { 71 InputVolumeControllerConfig config{ 72 .min_input_volume = 20, 73 .clipped_level_min = kClippedMin, 74 .clipped_level_step = clipped_level_step, 75 .clipped_ratio_threshold = clipped_ratio_threshold, 76 .clipped_wait_frames = clipped_wait_frames, 77 .enable_clipping_predictor = enable_clipping_predictor, 78 .target_range_max_dbfs = -18, 79 .target_range_min_dbfs = -30, 80 .update_input_volume_wait_frames = update_input_volume_wait_frames, 81 .speech_probability_threshold = 0.5f, 82 .speech_ratio_threshold = kSpeechRatioThreshold, 83 }; 84 85 return std::make_unique<InputVolumeController>(/*num_capture_channels=*/1, 86 config); 87 } 88 89 // (Over)writes `samples_value` for the samples in `audio_buffer`. 90 // When `clipped_ratio`, a value in [0, 1], is greater than 0, the corresponding 91 // fraction of the frame is set to a full scale value to simulate clipping. 92 void WriteAudioBufferSamples(float samples_value, 93 float clipped_ratio, 94 AudioBuffer& audio_buffer) { 95 RTC_DCHECK_GE(samples_value, kMinSample); 96 RTC_DCHECK_LE(samples_value, kMaxSample); 97 RTC_DCHECK_GE(clipped_ratio, 0.0f); 98 RTC_DCHECK_LE(clipped_ratio, 1.0f); 99 int num_channels = audio_buffer.num_channels(); 100 int num_samples = audio_buffer.num_frames(); 101 int num_clipping_samples = clipped_ratio * num_samples; 102 for (int ch = 0; ch < num_channels; ++ch) { 103 int i = 0; 104 for (; i < num_clipping_samples; ++i) { 105 audio_buffer.channels()[ch][i] = 32767.0f; 106 } 107 for (; i < num_samples; ++i) { 108 audio_buffer.channels()[ch][i] = samples_value; 109 } 110 } 111 } 112 113 // (Over)writes samples in `audio_buffer`. Alternates samples `samples_value` 114 // and zero. 115 void WriteAlternatingAudioBufferSamples(float samples_value, 116 AudioBuffer& audio_buffer) { 117 RTC_DCHECK_GE(samples_value, kMinSample); 118 RTC_DCHECK_LE(samples_value, kMaxSample); 119 const int num_channels = audio_buffer.num_channels(); 120 const int num_frames = audio_buffer.num_frames(); 121 for (int ch = 0; ch < num_channels; ++ch) { 122 for (int i = 0; i < num_frames; i += 2) { 123 audio_buffer.channels()[ch][i] = samples_value; 124 audio_buffer.channels()[ch][i + 1] = 0.0f; 125 } 126 } 127 } 128 129 // Reads a given number of 10 ms chunks from a PCM file and feeds them to 130 // `InputVolumeController`. 131 class SpeechSamplesReader { 132 private: 133 // Recording properties. 134 static constexpr int kPcmSampleRateHz = 16000; 135 static constexpr int kPcmNumChannels = 1; 136 static constexpr int kPcmBytesPerSamples = sizeof(int16_t); 137 138 public: 139 SpeechSamplesReader() 140 : is_(test::ResourcePath("audio_processing/agc/agc_audio", "pcm"), 141 std::ios::binary | std::ios::ate), 142 audio_buffer_(kPcmSampleRateHz, 143 kPcmNumChannels, 144 kPcmSampleRateHz, 145 kPcmNumChannels, 146 kPcmSampleRateHz, 147 kPcmNumChannels), 148 buffer_(audio_buffer_.num_frames()), 149 buffer_num_bytes_(buffer_.size() * kPcmBytesPerSamples) { 150 RTC_CHECK(is_); 151 } 152 153 // Reads `num_frames` 10 ms frames from the beginning of the PCM file, applies 154 // `gain_db` and feeds the frames into `controller` by calling 155 // `AnalyzeInputAudio()` and `RecommendInputVolume()` for each frame. Reads 156 // the number of 10 ms frames available in the PCM file if `num_frames` is too 157 // large - i.e., does not loop. `speech_probability` and `speech_level_dbfs` 158 // are passed to `RecommendInputVolume()`. 159 int Feed(int num_frames, 160 int applied_input_volume, 161 int gain_db, 162 float speech_probability, 163 std::optional<float> speech_level_dbfs, 164 InputVolumeController& controller) { 165 RTC_DCHECK(controller.capture_output_used()); 166 167 float gain = std::pow(10.0f, gain_db / 20.0f); // From dB to linear gain. 168 is_.seekg(0, 169 std::ifstream::beg); // Start from the beginning of the PCM file. 170 171 // Read and feed frames. 172 for (int i = 0; i < num_frames; ++i) { 173 is_.read(reinterpret_cast<char*>(buffer_.data()), buffer_num_bytes_); 174 if (is_.gcount() < buffer_num_bytes_) { 175 // EOF reached. Stop. 176 break; 177 } 178 // Apply gain and copy samples into `audio_buffer_`. 179 std::transform(buffer_.begin(), buffer_.end(), 180 audio_buffer_.channels()[0], [gain](int16_t v) -> float { 181 return SafeClamp(static_cast<float>(v) * gain, 182 kMinSample, kMaxSample); 183 }); 184 controller.AnalyzeInputAudio(applied_input_volume, audio_buffer_); 185 const auto recommended_input_volume = controller.RecommendInputVolume( 186 speech_probability, speech_level_dbfs); 187 188 // Expect no errors: Applied volume set for every frame; 189 // `RecommendInputVolume()` returns a non-empty value. 190 EXPECT_TRUE(recommended_input_volume.has_value()); 191 192 applied_input_volume = *recommended_input_volume; 193 } 194 return applied_input_volume; 195 } 196 197 private: 198 std::ifstream is_; 199 AudioBuffer audio_buffer_; 200 std::vector<int16_t> buffer_; 201 const std::streamsize buffer_num_bytes_; 202 }; 203 204 // Runs the MonoInputVolumeControl processing sequence following the API 205 // contract. Returns the updated recommended input volume. 206 float UpdateRecommendedInputVolume(MonoInputVolumeController& mono_controller, 207 int applied_input_volume, 208 float speech_probability, 209 std::optional<float> rms_error_dbfs) { 210 mono_controller.set_stream_analog_level(applied_input_volume); 211 EXPECT_EQ(mono_controller.recommended_analog_level(), applied_input_volume); 212 mono_controller.Process(rms_error_dbfs, speech_probability); 213 return mono_controller.recommended_analog_level(); 214 } 215 216 } // namespace 217 218 // TODO(bugs.webrtc.org/12874): Use constexpr struct with designated 219 // initializers once fixed. 220 constexpr InputVolumeControllerConfig GetInputVolumeControllerTestConfig() { 221 InputVolumeControllerConfig config{ 222 .clipped_level_min = kClippedMin, 223 .clipped_level_step = kClippedLevelStep, 224 .clipped_ratio_threshold = kClippedRatioThreshold, 225 .clipped_wait_frames = kClippedWaitFrames, 226 .enable_clipping_predictor = kDefaultClippingPredictorConfig.enabled, 227 .target_range_max_dbfs = -18, 228 .target_range_min_dbfs = -30, 229 .update_input_volume_wait_frames = 0, 230 .speech_probability_threshold = 0.5f, 231 .speech_ratio_threshold = 1.0f, 232 }; 233 return config; 234 } 235 236 // Helper class that provides an `InputVolumeController` instance with an 237 // `AudioBuffer` instance and `CallAgcSequence()`, a helper method that runs the 238 // `InputVolumeController` instance on the `AudioBuffer` one by sticking to the 239 // API contract. 240 class InputVolumeControllerTestHelper { 241 public: 242 // Ctor. Initializes `audio_buffer` with zeros. 243 // TODO(bugs.webrtc.org/7494): Remove the default argument. 244 InputVolumeControllerTestHelper(const InputVolumeController::Config& config = 245 GetInputVolumeControllerTestConfig()) 246 : audio_buffer(kSampleRateHz, 247 kNumChannels, 248 kSampleRateHz, 249 kNumChannels, 250 kSampleRateHz, 251 kNumChannels), 252 controller(/*num_capture_channels=*/1, config) { 253 controller.Initialize(); 254 WriteAudioBufferSamples(/*samples_value=*/0.0f, /*clipped_ratio=*/0.0f, 255 audio_buffer); 256 } 257 258 // Calls the sequence of `InputVolumeController` methods according to the API 259 // contract, namely: 260 // - Sets the applied input volume; 261 // - Uses `audio_buffer` to call `AnalyzeInputAudio()` and 262 // `RecommendInputVolume()`; 263 // Returns the recommended input volume. 264 std::optional<int> CallAgcSequence(int applied_input_volume, 265 float speech_probability, 266 std::optional<float> speech_level_dbfs, 267 int num_calls = 1) { 268 RTC_DCHECK_GE(num_calls, 1); 269 std::optional<int> volume = applied_input_volume; 270 for (int i = 0; i < num_calls; ++i) { 271 // Repeat the initial volume if `RecommendInputVolume()` doesn't return a 272 // value. 273 controller.AnalyzeInputAudio(volume.value_or(applied_input_volume), 274 audio_buffer); 275 volume = controller.RecommendInputVolume(speech_probability, 276 speech_level_dbfs); 277 278 // Allow deviation from the API contract: `RecommendInputVolume()` doesn't 279 // return a recommended input volume. 280 if (volume.has_value()) { 281 EXPECT_EQ(*volume, controller.recommended_input_volume()); 282 } 283 } 284 return volume; 285 } 286 287 // Deprecated. 288 // TODO(bugs.webrtc.org/7494): Let the caller write `audio_buffer` and use 289 // `CallAgcSequence()`. 290 int CallRecommendInputVolume(int num_calls, 291 int initial_volume, 292 float speech_probability, 293 std::optional<float> speech_level_dbfs) { 294 RTC_DCHECK(controller.capture_output_used()); 295 296 // Create non-clipping audio for `AnalyzeInputAudio()`. 297 WriteAlternatingAudioBufferSamples(0.1f * kMaxSample, audio_buffer); 298 int volume = initial_volume; 299 for (int i = 0; i < num_calls; ++i) { 300 controller.AnalyzeInputAudio(volume, audio_buffer); 301 const auto recommended_input_volume = controller.RecommendInputVolume( 302 speech_probability, speech_level_dbfs); 303 304 // Expect no errors: Applied volume set for every frame; 305 // `RecommendInputVolume()` returns a non-empty value. 306 EXPECT_TRUE(recommended_input_volume.has_value()); 307 308 volume = *recommended_input_volume; 309 } 310 return volume; 311 } 312 313 // Deprecated. 314 // TODO(bugs.webrtc.org/7494): Let the caller write `audio_buffer` and use 315 // `CallAgcSequence()`. 316 void CallAnalyzeInputAudio(int num_calls, float clipped_ratio) { 317 RTC_DCHECK(controller.capture_output_used()); 318 319 RTC_DCHECK_GE(clipped_ratio, 0.0f); 320 RTC_DCHECK_LE(clipped_ratio, 1.0f); 321 WriteAudioBufferSamples(/*samples_value=*/0.0f, clipped_ratio, 322 audio_buffer); 323 for (int i = 0; i < num_calls; ++i) { 324 controller.AnalyzeInputAudio(controller.recommended_input_volume(), 325 audio_buffer); 326 } 327 } 328 329 AudioBuffer audio_buffer; 330 InputVolumeController controller; 331 }; 332 333 class InputVolumeControllerChannelSampleRateTest 334 : public ::testing::TestWithParam<std::tuple<int, int>> { 335 protected: 336 int GetNumChannels() const { return std::get<0>(GetParam()); } 337 int GetSampleRateHz() const { return std::get<1>(GetParam()); } 338 }; 339 340 TEST_P(InputVolumeControllerChannelSampleRateTest, CheckIsAlive) { 341 const int num_channels = GetNumChannels(); 342 const int sample_rate_hz = GetSampleRateHz(); 343 344 constexpr InputVolumeController::Config kConfig{.enable_clipping_predictor = 345 true}; 346 InputVolumeController controller(num_channels, kConfig); 347 controller.Initialize(); 348 AudioBuffer buffer(sample_rate_hz, num_channels, sample_rate_hz, num_channels, 349 sample_rate_hz, num_channels); 350 351 constexpr int kStartupVolume = 100; 352 int applied_initial_volume = kStartupVolume; 353 354 // Trigger a downward adaptation with clipping. 355 constexpr int kLevelWithinTargetDbfs = 356 (kConfig.target_range_min_dbfs + kConfig.target_range_max_dbfs) / 2; 357 WriteAlternatingAudioBufferSamples(/*samples_value=*/kMaxSample, buffer); 358 const int initial_volume1 = applied_initial_volume; 359 for (int i = 0; i < 400; ++i) { 360 controller.AnalyzeInputAudio(applied_initial_volume, buffer); 361 auto recommended_input_volume = controller.RecommendInputVolume( 362 kLowSpeechProbability, 363 /*speech_level_dbfs=*/kLevelWithinTargetDbfs); 364 ASSERT_TRUE(recommended_input_volume.has_value()); 365 applied_initial_volume = *recommended_input_volume; 366 } 367 ASSERT_LT(controller.recommended_input_volume(), initial_volume1); 368 369 // Fill in audio that does not clip. 370 WriteAlternatingAudioBufferSamples(/*samples_value=*/1234.5f, buffer); 371 372 // Trigger an upward adaptation. 373 const int initial_volume2 = controller.recommended_input_volume(); 374 for (int i = 0; i < kConfig.clipped_wait_frames; ++i) { 375 controller.AnalyzeInputAudio(applied_initial_volume, buffer); 376 auto recommended_input_volume = controller.RecommendInputVolume( 377 kHighSpeechProbability, 378 /*speech_level_dbfs=*/kConfig.target_range_min_dbfs - 5); 379 ASSERT_TRUE(recommended_input_volume.has_value()); 380 applied_initial_volume = *recommended_input_volume; 381 } 382 EXPECT_GT(controller.recommended_input_volume(), initial_volume2); 383 384 // Trigger a downward adaptation. 385 const int initial_volume = controller.recommended_input_volume(); 386 for (int i = 0; i < kConfig.update_input_volume_wait_frames; ++i) { 387 controller.AnalyzeInputAudio(applied_initial_volume, buffer); 388 auto recommended_input_volume = controller.RecommendInputVolume( 389 kHighSpeechProbability, 390 /*speech_level_dbfs=*/kConfig.target_range_max_dbfs + 5); 391 ASSERT_TRUE(recommended_input_volume.has_value()); 392 applied_initial_volume = *recommended_input_volume; 393 } 394 EXPECT_LT(controller.recommended_input_volume(), initial_volume); 395 } 396 397 INSTANTIATE_TEST_SUITE_P( 398 , 399 InputVolumeControllerChannelSampleRateTest, 400 ::testing::Combine(::testing::Values(1, 2, 3, 6), 401 ::testing::Values(8000, 16000, 32000, 48000))); 402 403 class InputVolumeControllerParametrizedTest 404 : public ::testing::TestWithParam<int> {}; 405 406 TEST_P(InputVolumeControllerParametrizedTest, 407 StartupMinVolumeConfigurationRespectedWhenAppliedInputVolumeAboveMin) { 408 InputVolumeControllerTestHelper helper( 409 /*config=*/{.min_input_volume = GetParam()}); 410 411 EXPECT_EQ(*helper.CallAgcSequence(/*applied_input_volume=*/128, 412 /*speech_probability=*/0.9f, 413 /*speech_level_dbfs=*/-80), 414 128); 415 } 416 417 TEST_P( 418 InputVolumeControllerParametrizedTest, 419 StartupMinVolumeConfigurationRespectedWhenAppliedInputVolumeMaybeBelowMin) { 420 InputVolumeControllerTestHelper helper( 421 /*config=*/{.min_input_volume = GetParam()}); 422 423 EXPECT_GE(*helper.CallAgcSequence(/*applied_input_volume=*/10, 424 /*speech_probability=*/0.9f, 425 /*speech_level_dbfs=*/-80), 426 10); 427 } 428 429 TEST_P(InputVolumeControllerParametrizedTest, 430 StartupMinVolumeRespectedWhenAppliedVolumeNonZero) { 431 const int kMinInputVolume = GetParam(); 432 InputVolumeControllerTestHelper helper( 433 /*config=*/{.min_input_volume = kMinInputVolume, 434 .target_range_min_dbfs = -30, 435 .update_input_volume_wait_frames = 1, 436 .speech_probability_threshold = 0.5f, 437 .speech_ratio_threshold = 0.5f}); 438 439 // Volume change possible; speech level below the digital gain window. 440 int volume = *helper.CallAgcSequence(/*applied_input_volume=*/1, 441 /*speech_probability=*/0.9f, 442 /*speech_level_dbfs=*/-80); 443 444 EXPECT_EQ(volume, kMinInputVolume); 445 } 446 447 TEST_P(InputVolumeControllerParametrizedTest, 448 MinVolumeRepeatedlyRespectedWhenAppliedVolumeNonZero) { 449 const int kMinInputVolume = GetParam(); 450 InputVolumeControllerTestHelper helper( 451 /*config=*/{.min_input_volume = kMinInputVolume, 452 .target_range_min_dbfs = -30, 453 .update_input_volume_wait_frames = 1, 454 .speech_probability_threshold = 0.5f, 455 .speech_ratio_threshold = 0.5f}); 456 457 // Volume change possible; speech level below the digital gain window. 458 for (int i = 0; i < 100; ++i) { 459 const int volume = *helper.CallAgcSequence(/*applied_input_volume=*/1, 460 /*speech_probability=*/0.9f, 461 /*speech_level_dbfs=*/-80); 462 EXPECT_GE(volume, kMinInputVolume); 463 } 464 } 465 466 TEST_P(InputVolumeControllerParametrizedTest, 467 StartupMinVolumeRespectedOnceWhenAppliedVolumeZero) { 468 const int kMinInputVolume = GetParam(); 469 InputVolumeControllerTestHelper helper( 470 /*config=*/{.min_input_volume = kMinInputVolume, 471 .target_range_min_dbfs = -30, 472 .update_input_volume_wait_frames = 1, 473 .speech_probability_threshold = 0.5f, 474 .speech_ratio_threshold = 0.5f}); 475 476 int volume = *helper.CallAgcSequence(/*applied_input_volume=*/0, 477 /*speech_probability=*/0.9f, 478 /*speech_level_dbfs=*/-80); 479 480 EXPECT_EQ(volume, kMinInputVolume); 481 482 // No change of volume regardless of a speech level below the digital gain 483 // window; applied volume is zero. 484 volume = *helper.CallAgcSequence(/*applied_input_volume=*/0, 485 /*speech_probability=*/0.9f, 486 /*speech_level_dbfs=*/-80); 487 488 EXPECT_EQ(volume, 0); 489 } 490 491 TEST_P(InputVolumeControllerParametrizedTest, MicVolumeResponseToRmsError) { 492 InputVolumeControllerConfig config = GetInputVolumeControllerTestConfig(); 493 config.min_input_volume = GetParam(); 494 InputVolumeControllerTestHelper helper(config); 495 int volume = *helper.CallAgcSequence(kDefaultInitialInputVolume, 496 kHighSpeechProbability, kSpeechLevel); 497 498 // Inside the digital gain's window; no change of volume. 499 volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, 500 kHighSpeechProbability, -23.0f); 501 502 // Inside the digital gain's window; no change of volume. 503 volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, 504 kHighSpeechProbability, -28.0f); 505 506 // Above the digital gain's window; volume should be increased. 507 volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, 508 kHighSpeechProbability, -29.0f); 509 EXPECT_EQ(volume, 128); 510 511 volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, 512 kHighSpeechProbability, -38.0f); 513 EXPECT_EQ(volume, 156); 514 515 // Inside the digital gain's window; no change of volume. 516 volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, 517 kHighSpeechProbability, -23.0f); 518 volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, 519 kHighSpeechProbability, -18.0f); 520 521 // Below the digial gain's window; volume should be decreased. 522 volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, 523 kHighSpeechProbability, -17.0f); 524 EXPECT_EQ(volume, 155); 525 526 volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, 527 kHighSpeechProbability, -17.0f); 528 EXPECT_EQ(volume, 151); 529 530 volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, 531 kHighSpeechProbability, -9.0f); 532 EXPECT_EQ(volume, 119); 533 } 534 535 TEST_P(InputVolumeControllerParametrizedTest, MicVolumeIsLimited) { 536 InputVolumeControllerConfig config = GetInputVolumeControllerTestConfig(); 537 const int min_input_volume = GetParam(); 538 config.min_input_volume = min_input_volume; 539 InputVolumeControllerTestHelper helper(config); 540 int volume = *helper.CallAgcSequence(kDefaultInitialInputVolume, 541 kHighSpeechProbability, kSpeechLevel); 542 543 // Maximum upwards change is limited. 544 volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, 545 kHighSpeechProbability, -48.0f); 546 EXPECT_EQ(volume, 183); 547 548 volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, 549 kHighSpeechProbability, -48.0f); 550 EXPECT_EQ(volume, 243); 551 552 // Won't go higher than the maximum. 553 volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, 554 kHighSpeechProbability, -48.0f); 555 EXPECT_EQ(volume, 255); 556 557 volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, 558 kHighSpeechProbability, -17.0f); 559 EXPECT_EQ(volume, 254); 560 561 // Maximum downwards change is limited. 562 volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, 563 kHighSpeechProbability, 22.0f); 564 EXPECT_EQ(volume, 194); 565 566 volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, 567 kHighSpeechProbability, 22.0f); 568 EXPECT_EQ(volume, 137); 569 570 volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, 571 kHighSpeechProbability, 22.0f); 572 EXPECT_EQ(volume, 88); 573 574 volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, 575 kHighSpeechProbability, 22.0f); 576 EXPECT_EQ(volume, 54); 577 578 volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, 579 kHighSpeechProbability, 22.0f); 580 EXPECT_EQ(volume, 33); 581 582 // Won't go lower than the minimum. 583 volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, 584 kHighSpeechProbability, 22.0f); 585 EXPECT_EQ(volume, std::max(18, min_input_volume)); 586 587 volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, 588 kHighSpeechProbability, 22.0f); 589 EXPECT_EQ(volume, std::max(12, min_input_volume)); 590 } 591 592 TEST_P(InputVolumeControllerParametrizedTest, NoActionWhileMuted) { 593 InputVolumeControllerTestHelper helper_1( 594 /*config=*/{.min_input_volume = GetParam()}); 595 InputVolumeControllerTestHelper helper_2( 596 /*config=*/{.min_input_volume = GetParam()}); 597 598 int volume_1 = *helper_1.CallAgcSequence(/*applied_input_volume=*/255, 599 kHighSpeechProbability, kSpeechLevel, 600 /*num_calls=*/1); 601 int volume_2 = *helper_2.CallAgcSequence(/*applied_input_volume=*/255, 602 kHighSpeechProbability, kSpeechLevel, 603 /*num_calls=*/1); 604 605 EXPECT_EQ(volume_1, 255); 606 EXPECT_EQ(volume_2, 255); 607 608 helper_2.controller.HandleCaptureOutputUsedChange(false); 609 610 WriteAlternatingAudioBufferSamples(kMaxSample, helper_1.audio_buffer); 611 WriteAlternatingAudioBufferSamples(kMaxSample, helper_2.audio_buffer); 612 613 volume_1 = 614 *helper_1.CallAgcSequence(volume_1, kHighSpeechProbability, kSpeechLevel, 615 /*num_calls=*/1); 616 volume_2 = 617 *helper_2.CallAgcSequence(volume_2, kHighSpeechProbability, kSpeechLevel, 618 /*num_calls=*/1); 619 620 EXPECT_LT(volume_1, 255); 621 EXPECT_EQ(volume_2, 255); 622 } 623 624 TEST_P(InputVolumeControllerParametrizedTest, 625 UnmutingChecksVolumeWithoutRaising) { 626 InputVolumeControllerTestHelper helper( 627 /*config=*/{.min_input_volume = GetParam()}); 628 helper.CallAgcSequence(kDefaultInitialInputVolume, kHighSpeechProbability, 629 kSpeechLevel); 630 631 helper.controller.HandleCaptureOutputUsedChange(false); 632 helper.controller.HandleCaptureOutputUsedChange(true); 633 634 constexpr int kInputVolume = 127; 635 636 // SetMicVolume should not be called. 637 EXPECT_EQ( 638 helper.CallRecommendInputVolume(/*num_calls=*/1, kInputVolume, 639 kHighSpeechProbability, kSpeechLevel), 640 kInputVolume); 641 } 642 643 TEST_P(InputVolumeControllerParametrizedTest, UnmutingRaisesTooLowVolume) { 644 const int min_input_volume = GetParam(); 645 InputVolumeControllerTestHelper helper( 646 /*config=*/{.min_input_volume = min_input_volume}); 647 helper.CallAgcSequence(kDefaultInitialInputVolume, kHighSpeechProbability, 648 kSpeechLevel); 649 650 helper.controller.HandleCaptureOutputUsedChange(false); 651 helper.controller.HandleCaptureOutputUsedChange(true); 652 653 constexpr int kInputVolume = 11; 654 655 EXPECT_EQ( 656 helper.CallRecommendInputVolume(/*num_calls=*/1, kInputVolume, 657 kHighSpeechProbability, kSpeechLevel), 658 min_input_volume); 659 } 660 661 TEST_P(InputVolumeControllerParametrizedTest, 662 ManualLevelChangeResultsInNoSetMicCall) { 663 InputVolumeControllerConfig config = GetInputVolumeControllerTestConfig(); 664 config.min_input_volume = GetParam(); 665 InputVolumeControllerTestHelper helper(config); 666 int volume = *helper.CallAgcSequence(kDefaultInitialInputVolume, 667 kHighSpeechProbability, kSpeechLevel); 668 669 // GetMicVolume returns a value outside of the quantization slack, indicating 670 // a manual volume change. 671 ASSERT_NE(volume, 154); 672 volume = helper.CallRecommendInputVolume( 673 /*num_calls=*/1, /*initial_volume=*/154, kHighSpeechProbability, -29.0f); 674 EXPECT_EQ(volume, 154); 675 676 // Do the same thing, except downwards now. 677 volume = helper.CallRecommendInputVolume( 678 /*num_calls=*/1, /*initial_volume=*/100, kHighSpeechProbability, -17.0f); 679 EXPECT_EQ(volume, 100); 680 681 // And finally verify the AGC continues working without a manual change. 682 volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, 683 kHighSpeechProbability, -17.0f); 684 EXPECT_EQ(volume, 99); 685 } 686 687 TEST_P(InputVolumeControllerParametrizedTest, 688 RecoveryAfterManualLevelChangeFromMax) { 689 InputVolumeControllerConfig config = GetInputVolumeControllerTestConfig(); 690 config.min_input_volume = GetParam(); 691 InputVolumeControllerTestHelper helper(config); 692 int volume = *helper.CallAgcSequence(kDefaultInitialInputVolume, 693 kHighSpeechProbability, kSpeechLevel); 694 695 // Force the mic up to max volume. Takes a few steps due to the residual 696 // gain limitation. 697 volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, 698 kHighSpeechProbability, -48.0f); 699 EXPECT_EQ(volume, 183); 700 volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, 701 kHighSpeechProbability, -48.0f); 702 EXPECT_EQ(volume, 243); 703 volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, 704 kHighSpeechProbability, -48.0f); 705 EXPECT_EQ(volume, 255); 706 707 // Manual change does not result in SetMicVolume call. 708 volume = helper.CallRecommendInputVolume( 709 /*num_calls=*/1, /*initial_volume=*/50, kHighSpeechProbability, -17.0f); 710 EXPECT_EQ(helper.controller.recommended_input_volume(), 50); 711 712 // Continues working as usual afterwards. 713 volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, 714 kHighSpeechProbability, -38.0f); 715 716 EXPECT_EQ(volume, 65); 717 } 718 719 // Checks that the minimum input volume is enforced during the upward adjustment 720 // of the input volume. 721 TEST_P(InputVolumeControllerParametrizedTest, 722 EnforceMinInputVolumeDuringUpwardsAdjustment) { 723 const int min_input_volume = GetParam(); 724 InputVolumeControllerConfig config = GetInputVolumeControllerTestConfig(); 725 config.min_input_volume = min_input_volume; 726 InputVolumeControllerTestHelper helper(config); 727 int volume = *helper.CallAgcSequence(kDefaultInitialInputVolume, 728 kHighSpeechProbability, kSpeechLevel); 729 730 // Manual change below min, but strictly positive, otherwise no action will be 731 // taken. 732 volume = helper.CallRecommendInputVolume( 733 /*num_calls=*/1, /*initial_volume=*/1, kHighSpeechProbability, -17.0f); 734 735 // Trigger an upward adjustment of the input volume. 736 EXPECT_EQ(volume, min_input_volume); 737 volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, 738 kHighSpeechProbability, -29.0f); 739 EXPECT_EQ(volume, min_input_volume); 740 volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, 741 kHighSpeechProbability, -30.0f); 742 EXPECT_EQ(volume, min_input_volume); 743 744 // After a number of consistently low speech level observations, the input 745 // volume is eventually raised above the minimum. 746 volume = helper.CallRecommendInputVolume(/*num_calls=*/10, volume, 747 kHighSpeechProbability, -38.0f); 748 EXPECT_GT(volume, min_input_volume); 749 } 750 751 // Checks that, when the min mic level override is specified, AGC immediately 752 // applies the minimum mic level after the mic level is manually set below the 753 // minimum gain to enforce. 754 TEST_P(InputVolumeControllerParametrizedTest, 755 RecoveryAfterManualLevelChangeBelowMin) { 756 const int min_input_volume = GetParam(); 757 InputVolumeControllerTestHelper helper( 758 /*config=*/{.min_input_volume = min_input_volume}); 759 int volume = *helper.CallAgcSequence(kDefaultInitialInputVolume, 760 kHighSpeechProbability, kSpeechLevel); 761 762 // Manual change below min, but strictly positive, otherwise 763 // AGC won't take any action. 764 volume = helper.CallRecommendInputVolume( 765 /*num_calls=*/1, /*initial_volume=*/1, kHighSpeechProbability, -17.0f); 766 EXPECT_EQ(volume, min_input_volume); 767 } 768 769 TEST_P(InputVolumeControllerParametrizedTest, NoClippingHasNoImpact) { 770 InputVolumeControllerTestHelper helper( 771 /*config=*/{.min_input_volume = GetParam()}); 772 helper.CallAgcSequence(kDefaultInitialInputVolume, kHighSpeechProbability, 773 kSpeechLevel); 774 775 helper.CallAnalyzeInputAudio(/*num_calls=*/100, /*clipped_ratio=*/0); 776 EXPECT_EQ(helper.controller.recommended_input_volume(), 128); 777 } 778 779 TEST_P(InputVolumeControllerParametrizedTest, 780 ClippingUnderThresholdHasNoImpact) { 781 InputVolumeControllerTestHelper helper( 782 /*config=*/{.min_input_volume = GetParam()}); 783 helper.CallAgcSequence(kDefaultInitialInputVolume, kHighSpeechProbability, 784 kSpeechLevel); 785 786 helper.CallAnalyzeInputAudio(/*num_calls=*/1, /*clipped_ratio=*/0.099); 787 EXPECT_EQ(helper.controller.recommended_input_volume(), 128); 788 } 789 790 TEST_P(InputVolumeControllerParametrizedTest, ClippingLowersVolume) { 791 InputVolumeControllerTestHelper helper( 792 /*config=*/{.min_input_volume = GetParam()}); 793 helper.CallAgcSequence(/*applied_input_volume=*/255, kHighSpeechProbability, 794 kSpeechLevel); 795 796 helper.CallAnalyzeInputAudio(/*num_calls=*/1, /*clipped_ratio=*/0.2); 797 EXPECT_EQ(helper.controller.recommended_input_volume(), 240); 798 } 799 800 TEST_P(InputVolumeControllerParametrizedTest, 801 WaitingPeriodBetweenClippingChecks) { 802 InputVolumeControllerTestHelper helper( 803 /*config=*/{.min_input_volume = GetParam()}); 804 helper.CallAgcSequence(/*applied_input_volume=*/255, kHighSpeechProbability, 805 kSpeechLevel); 806 807 helper.CallAnalyzeInputAudio(/*num_calls=*/1, 808 /*clipped_ratio=*/kAboveClippedThreshold); 809 EXPECT_EQ(helper.controller.recommended_input_volume(), 240); 810 811 helper.CallAnalyzeInputAudio(/*num_calls=*/300, 812 /*clipped_ratio=*/kAboveClippedThreshold); 813 EXPECT_EQ(helper.controller.recommended_input_volume(), 240); 814 815 helper.CallAnalyzeInputAudio(/*num_calls=*/1, 816 /*clipped_ratio=*/kAboveClippedThreshold); 817 EXPECT_EQ(helper.controller.recommended_input_volume(), 225); 818 } 819 820 TEST_P(InputVolumeControllerParametrizedTest, ClippingLoweringIsLimited) { 821 InputVolumeControllerConfig config = GetInputVolumeControllerTestConfig(); 822 config.min_input_volume = GetParam(); 823 InputVolumeControllerTestHelper helper(config); 824 helper.CallAgcSequence(/*applied_input_volume=*/180, kHighSpeechProbability, 825 kSpeechLevel); 826 827 helper.CallAnalyzeInputAudio(/*num_calls=*/1, 828 /*clipped_ratio=*/kAboveClippedThreshold); 829 EXPECT_EQ(helper.controller.recommended_input_volume(), kClippedMin); 830 831 helper.CallAnalyzeInputAudio(/*num_calls=*/1000, 832 /*clipped_ratio=*/kAboveClippedThreshold); 833 EXPECT_EQ(helper.controller.recommended_input_volume(), kClippedMin); 834 } 835 836 TEST_P(InputVolumeControllerParametrizedTest, 837 ClippingMaxIsRespectedWhenEqualToLevel) { 838 InputVolumeControllerTestHelper helper( 839 /*config=*/{.min_input_volume = GetParam()}); 840 helper.CallAgcSequence(/*applied_input_volume=*/255, kHighSpeechProbability, 841 kSpeechLevel); 842 843 helper.CallAnalyzeInputAudio(/*num_calls=*/1, 844 /*clipped_ratio=*/kAboveClippedThreshold); 845 EXPECT_EQ(helper.controller.recommended_input_volume(), 240); 846 847 helper.CallRecommendInputVolume(/*num_calls=*/10, /*initial_volume=*/240, 848 kHighSpeechProbability, -48.0f); 849 EXPECT_EQ(helper.controller.recommended_input_volume(), 240); 850 } 851 852 TEST_P(InputVolumeControllerParametrizedTest, 853 ClippingMaxIsRespectedWhenHigherThanLevel) { 854 InputVolumeControllerConfig config = GetInputVolumeControllerTestConfig(); 855 config.min_input_volume = GetParam(); 856 InputVolumeControllerTestHelper helper(config); 857 helper.CallAgcSequence(/*applied_input_volume=*/200, kHighSpeechProbability, 858 kSpeechLevel); 859 860 helper.CallAnalyzeInputAudio(/*num_calls=*/1, 861 /*clipped_ratio=*/kAboveClippedThreshold); 862 int volume = helper.controller.recommended_input_volume(); 863 EXPECT_EQ(volume, 185); 864 865 volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, 866 kHighSpeechProbability, -58.0f); 867 EXPECT_EQ(volume, 240); 868 volume = helper.CallRecommendInputVolume(/*num_calls=*/10, volume, 869 kHighSpeechProbability, -58.0f); 870 EXPECT_EQ(volume, 240); 871 } 872 873 TEST_P(InputVolumeControllerParametrizedTest, UserCanRaiseVolumeAfterClipping) { 874 InputVolumeControllerConfig config = GetInputVolumeControllerTestConfig(); 875 config.min_input_volume = GetParam(); 876 InputVolumeControllerTestHelper helper(config); 877 helper.CallAgcSequence(/*applied_input_volume=*/225, kHighSpeechProbability, 878 kSpeechLevel); 879 880 helper.CallAnalyzeInputAudio(/*num_calls=*/1, 881 /*clipped_ratio=*/kAboveClippedThreshold); 882 EXPECT_EQ(helper.controller.recommended_input_volume(), 210); 883 884 // User changed the volume. 885 int volume = helper.CallRecommendInputVolume( 886 /*num_calls=*/1, /*initial_volume-*/ 250, kHighSpeechProbability, -32.0f); 887 EXPECT_EQ(volume, 250); 888 889 // Move down... 890 volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, 891 kHighSpeechProbability, -8.0f); 892 EXPECT_EQ(volume, 210); 893 // And back up to the new max established by the user. 894 volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, 895 kHighSpeechProbability, -58.0f); 896 EXPECT_EQ(volume, 250); 897 // Will not move above new maximum. 898 volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, 899 kHighSpeechProbability, -48.0f); 900 EXPECT_EQ(volume, 250); 901 } 902 903 TEST_P(InputVolumeControllerParametrizedTest, 904 ClippingDoesNotPullLowVolumeBackUp) { 905 InputVolumeControllerConfig config = GetInputVolumeControllerTestConfig(); 906 config.min_input_volume = GetParam(); 907 InputVolumeControllerTestHelper helper(config); 908 helper.CallAgcSequence(/*applied_input_volume=*/80, kHighSpeechProbability, 909 kSpeechLevel); 910 911 int initial_volume = helper.controller.recommended_input_volume(); 912 helper.CallAnalyzeInputAudio(/*num_calls=*/1, 913 /*clipped_ratio=*/kAboveClippedThreshold); 914 EXPECT_EQ(helper.controller.recommended_input_volume(), initial_volume); 915 } 916 917 TEST_P(InputVolumeControllerParametrizedTest, TakesNoActionOnZeroMicVolume) { 918 InputVolumeControllerTestHelper helper( 919 /*config=*/{.min_input_volume = GetParam()}); 920 helper.CallAgcSequence(kDefaultInitialInputVolume, kHighSpeechProbability, 921 kSpeechLevel); 922 923 EXPECT_EQ( 924 helper.CallRecommendInputVolume(/*num_calls=*/10, /*initial_volume=*/0, 925 kHighSpeechProbability, -48.0f), 926 0); 927 } 928 929 TEST_P(InputVolumeControllerParametrizedTest, ClippingDetectionLowersVolume) { 930 InputVolumeControllerConfig config = GetInputVolumeControllerTestConfig(); 931 config.min_input_volume = GetParam(); 932 InputVolumeControllerTestHelper helper(config); 933 int volume = *helper.CallAgcSequence(/*applied_input_volume=*/255, 934 kHighSpeechProbability, kSpeechLevel, 935 /*num_calls=*/1); 936 937 EXPECT_EQ(volume, 255); 938 939 WriteAlternatingAudioBufferSamples(0.99f * kMaxSample, helper.audio_buffer); 940 volume = *helper.CallAgcSequence(volume, kHighSpeechProbability, kSpeechLevel, 941 /*num_calls=*/100); 942 943 EXPECT_EQ(volume, 255); 944 945 WriteAlternatingAudioBufferSamples(kMaxSample, helper.audio_buffer); 946 volume = *helper.CallAgcSequence(volume, kHighSpeechProbability, kSpeechLevel, 947 /*num_calls=*/100); 948 949 EXPECT_EQ(volume, 240); 950 } 951 952 // TODO(bugs.webrtc.org/12774): Test the bahavior of `clipped_level_step`. 953 // TODO(bugs.webrtc.org/12774): Test the bahavior of `clipped_ratio_threshold`. 954 // TODO(bugs.webrtc.org/12774): Test the bahavior of `clipped_wait_frames`. 955 // Verifies that configurable clipping parameters are initialized as intended. 956 TEST_P(InputVolumeControllerParametrizedTest, ClippingParametersVerified) { 957 std::unique_ptr<InputVolumeController> controller = 958 CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold, 959 kClippedWaitFrames); 960 controller->Initialize(); 961 EXPECT_EQ(controller->clipped_level_step_, kClippedLevelStep); 962 EXPECT_EQ(controller->clipped_ratio_threshold_, kClippedRatioThreshold); 963 EXPECT_EQ(controller->clipped_wait_frames_, kClippedWaitFrames); 964 std::unique_ptr<InputVolumeController> controller_custom = 965 CreateInputVolumeController(/*clipped_level_step=*/10, 966 /*clipped_ratio_threshold=*/0.2f, 967 /*clipped_wait_frames=*/50); 968 controller_custom->Initialize(); 969 EXPECT_EQ(controller_custom->clipped_level_step_, 10); 970 EXPECT_EQ(controller_custom->clipped_ratio_threshold_, 0.2f); 971 EXPECT_EQ(controller_custom->clipped_wait_frames_, 50); 972 } 973 974 TEST_P(InputVolumeControllerParametrizedTest, 975 DisableClippingPredictorDisablesClippingPredictor) { 976 std::unique_ptr<InputVolumeController> controller = 977 CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold, 978 kClippedWaitFrames, 979 /*enable_clipping_predictor=*/false); 980 controller->Initialize(); 981 982 EXPECT_FALSE(controller->clipping_predictor_enabled()); 983 EXPECT_FALSE(controller->use_clipping_predictor_step()); 984 } 985 986 TEST_P(InputVolumeControllerParametrizedTest, 987 EnableClippingPredictorEnablesClippingPredictor) { 988 std::unique_ptr<InputVolumeController> controller = 989 CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold, 990 kClippedWaitFrames, 991 /*enable_clipping_predictor=*/true); 992 controller->Initialize(); 993 994 EXPECT_TRUE(controller->clipping_predictor_enabled()); 995 EXPECT_TRUE(controller->use_clipping_predictor_step()); 996 } 997 998 TEST_P(InputVolumeControllerParametrizedTest, 999 DisableClippingPredictorDoesNotLowerVolume) { 1000 int volume = 255; 1001 InputVolumeControllerConfig config = GetInputVolumeControllerTestConfig(); 1002 config.enable_clipping_predictor = false; 1003 auto helper = InputVolumeControllerTestHelper(config); 1004 helper.controller.Initialize(); 1005 1006 EXPECT_FALSE(helper.controller.clipping_predictor_enabled()); 1007 EXPECT_FALSE(helper.controller.use_clipping_predictor_step()); 1008 1009 // Expect no change if clipping prediction is enabled. 1010 for (int j = 0; j < 31; ++j) { 1011 WriteAlternatingAudioBufferSamples(0.99f * kMaxSample, helper.audio_buffer); 1012 volume = 1013 *helper.CallAgcSequence(volume, kLowSpeechProbability, kSpeechLevel, 1014 /*num_calls=*/5); 1015 1016 WriteAudioBufferSamples(0.99f * kMaxSample, /*clipped_ratio=*/0.0f, 1017 helper.audio_buffer); 1018 volume = 1019 *helper.CallAgcSequence(volume, kLowSpeechProbability, kSpeechLevel, 1020 /*num_calls=*/5); 1021 1022 EXPECT_EQ(volume, 255); 1023 } 1024 } 1025 1026 // TODO(bugs.webrtc.org/7494): Split into several smaller tests. 1027 TEST_P(InputVolumeControllerParametrizedTest, 1028 UsedClippingPredictionsProduceLowerAnalogLevels) { 1029 constexpr int kInitialLevel = 255; 1030 constexpr float kCloseToClippingPeakRatio = 0.99f; 1031 int volume_1 = kInitialLevel; 1032 int volume_2 = kInitialLevel; 1033 1034 // Create two helpers, one with clipping prediction and one without. 1035 auto config_1 = GetInputVolumeControllerTestConfig(); 1036 auto config_2 = GetInputVolumeControllerTestConfig(); 1037 config_1.enable_clipping_predictor = true; 1038 config_2.enable_clipping_predictor = false; 1039 auto helper_1 = InputVolumeControllerTestHelper(config_1); 1040 auto helper_2 = InputVolumeControllerTestHelper(config_2); 1041 helper_1.controller.Initialize(); 1042 helper_2.controller.Initialize(); 1043 1044 EXPECT_TRUE(helper_1.controller.clipping_predictor_enabled()); 1045 EXPECT_FALSE(helper_2.controller.clipping_predictor_enabled()); 1046 EXPECT_TRUE(helper_1.controller.use_clipping_predictor_step()); 1047 1048 // Expect a change if clipping prediction is enabled. 1049 WriteAlternatingAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample, 1050 helper_1.audio_buffer); 1051 WriteAlternatingAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample, 1052 helper_2.audio_buffer); 1053 volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability, 1054 kSpeechLevel, 5); 1055 volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability, 1056 kSpeechLevel, 5); 1057 1058 WriteAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample, 1059 /*clipped_ratio=*/0.0f, helper_1.audio_buffer); 1060 WriteAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample, 1061 /*clipped_ratio=*/0.0f, helper_2.audio_buffer); 1062 volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability, 1063 kSpeechLevel, 5); 1064 volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability, 1065 kSpeechLevel, 5); 1066 1067 EXPECT_EQ(volume_1, kInitialLevel - kClippedLevelStep); 1068 EXPECT_EQ(volume_2, kInitialLevel); 1069 1070 // Expect no change during waiting. 1071 for (int i = 0; i < kClippedWaitFrames / 10; ++i) { 1072 WriteAlternatingAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample, 1073 helper_1.audio_buffer); 1074 WriteAlternatingAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample, 1075 helper_2.audio_buffer); 1076 volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability, 1077 kSpeechLevel, 5); 1078 volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability, 1079 kSpeechLevel, 5); 1080 1081 WriteAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample, 1082 /*clipped_ratio=*/0.0f, helper_1.audio_buffer); 1083 WriteAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample, 1084 /*clipped_ratio=*/0.0f, helper_2.audio_buffer); 1085 volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability, 1086 kSpeechLevel, 5); 1087 volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability, 1088 kSpeechLevel, 5); 1089 1090 EXPECT_EQ(volume_1, kInitialLevel - kClippedLevelStep); 1091 EXPECT_EQ(volume_2, kInitialLevel); 1092 } 1093 1094 // Expect a change when the prediction step is used. 1095 WriteAlternatingAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample, 1096 helper_1.audio_buffer); 1097 WriteAlternatingAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample, 1098 helper_2.audio_buffer); 1099 volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability, 1100 kSpeechLevel, 5); 1101 volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability, 1102 kSpeechLevel, 5); 1103 1104 WriteAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample, 1105 /*clipped_ratio=*/0.0f, helper_1.audio_buffer); 1106 WriteAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample, 1107 /*clipped_ratio=*/0.0f, helper_2.audio_buffer); 1108 volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability, 1109 kSpeechLevel, 5); 1110 volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability, 1111 kSpeechLevel, 5); 1112 1113 EXPECT_EQ(volume_1, kInitialLevel - 2 * kClippedLevelStep); 1114 EXPECT_EQ(volume_2, kInitialLevel); 1115 1116 // Expect no change when clipping is not detected or predicted. 1117 for (int i = 0; i < 2 * kClippedWaitFrames / 10; ++i) { 1118 WriteAlternatingAudioBufferSamples(/*samples_value=*/0.0f, 1119 helper_1.audio_buffer); 1120 WriteAlternatingAudioBufferSamples(/*samples_value=*/0.0f, 1121 helper_2.audio_buffer); 1122 volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability, 1123 kSpeechLevel, 5); 1124 volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability, 1125 kSpeechLevel, 5); 1126 1127 WriteAudioBufferSamples(/*samples_value=*/0.0f, /*clipped_ratio=*/0.0f, 1128 helper_1.audio_buffer); 1129 WriteAudioBufferSamples(/*samples_value=*/0.0f, /*clipped_ratio=*/0.0f, 1130 helper_2.audio_buffer); 1131 volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability, 1132 kSpeechLevel, 5); 1133 volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability, 1134 kSpeechLevel, 5); 1135 } 1136 1137 EXPECT_EQ(volume_1, kInitialLevel - 2 * kClippedLevelStep); 1138 EXPECT_EQ(volume_2, kInitialLevel); 1139 1140 // Expect a change for clipping frames. 1141 WriteAlternatingAudioBufferSamples(kMaxSample, helper_1.audio_buffer); 1142 WriteAlternatingAudioBufferSamples(kMaxSample, helper_2.audio_buffer); 1143 volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability, 1144 kSpeechLevel, 1); 1145 volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability, 1146 kSpeechLevel, 1); 1147 1148 EXPECT_EQ(volume_1, kInitialLevel - 3 * kClippedLevelStep); 1149 EXPECT_EQ(volume_2, kInitialLevel - kClippedLevelStep); 1150 1151 // Expect no change during waiting. 1152 for (int i = 0; i < kClippedWaitFrames / 10; ++i) { 1153 WriteAlternatingAudioBufferSamples(kMaxSample, helper_1.audio_buffer); 1154 WriteAlternatingAudioBufferSamples(kMaxSample, helper_2.audio_buffer); 1155 volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability, 1156 kSpeechLevel, 5); 1157 volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability, 1158 kSpeechLevel, 5); 1159 1160 WriteAudioBufferSamples(kMaxSample, /*clipped_ratio=*/1.0f, 1161 helper_1.audio_buffer); 1162 WriteAudioBufferSamples(kMaxSample, /*clipped_ratio=*/1.0f, 1163 helper_2.audio_buffer); 1164 volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability, 1165 kSpeechLevel, 5); 1166 volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability, 1167 kSpeechLevel, 5); 1168 } 1169 1170 EXPECT_EQ(volume_1, kInitialLevel - 3 * kClippedLevelStep); 1171 EXPECT_EQ(volume_2, kInitialLevel - kClippedLevelStep); 1172 1173 // Expect a change for clipping frames. 1174 WriteAlternatingAudioBufferSamples(kMaxSample, helper_1.audio_buffer); 1175 WriteAlternatingAudioBufferSamples(kMaxSample, helper_2.audio_buffer); 1176 volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability, 1177 kSpeechLevel, 1); 1178 volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability, 1179 kSpeechLevel, 1); 1180 1181 EXPECT_EQ(volume_1, kInitialLevel - 4 * kClippedLevelStep); 1182 EXPECT_EQ(volume_2, kInitialLevel - 2 * kClippedLevelStep); 1183 } 1184 1185 // Checks that passing an empty speech level has no effect on the input volume. 1186 TEST_P(InputVolumeControllerParametrizedTest, EmptyRmsErrorHasNoEffect) { 1187 InputVolumeController controller(kNumChannels, 1188 GetInputVolumeControllerTestConfig()); 1189 controller.Initialize(); 1190 1191 // Feed speech with low energy that would trigger an upward adapation of 1192 // the analog level if an speech level was not low and the RMS level empty. 1193 constexpr int kNumFrames = 125; 1194 constexpr int kGainDb = -20; 1195 SpeechSamplesReader reader; 1196 int volume = reader.Feed(kNumFrames, kDefaultInitialInputVolume, kGainDb, 1197 kLowSpeechProbability, std::nullopt, controller); 1198 1199 // Check that no adaptation occurs. 1200 ASSERT_EQ(volume, kDefaultInitialInputVolume); 1201 } 1202 1203 // Checks that the recommended input volume is not updated unless enough 1204 // frames have been processed after the previous update. 1205 TEST(InputVolumeControllerTest, UpdateInputVolumeWaitFramesIsEffective) { 1206 constexpr int kInputVolume = kDefaultInitialInputVolume; 1207 std::unique_ptr<InputVolumeController> controller_wait_0 = 1208 CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold, 1209 kClippedWaitFrames, 1210 /*enable_clipping_predictor=*/false, 1211 /*update_input_volume_wait_frames=*/0); 1212 std::unique_ptr<InputVolumeController> controller_wait_100 = 1213 CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold, 1214 kClippedWaitFrames, 1215 /*enable_clipping_predictor=*/false, 1216 /*update_input_volume_wait_frames=*/100); 1217 controller_wait_0->Initialize(); 1218 controller_wait_100->Initialize(); 1219 1220 SpeechSamplesReader reader_1; 1221 SpeechSamplesReader reader_2; 1222 int volume_wait_0 = reader_1.Feed( 1223 /*num_frames=*/99, kInputVolume, /*gain_db=*/0, kHighSpeechProbability, 1224 /*speech_level_dbfs=*/-42.0f, *controller_wait_0); 1225 int volume_wait_100 = reader_2.Feed( 1226 /*num_frames=*/99, kInputVolume, /*gain_db=*/0, kHighSpeechProbability, 1227 /*speech_level_dbfs=*/-42.0f, *controller_wait_100); 1228 1229 // Check that adaptation only occurs if enough frames have been processed. 1230 ASSERT_GT(volume_wait_0, kInputVolume); 1231 ASSERT_EQ(volume_wait_100, kInputVolume); 1232 1233 volume_wait_0 = 1234 reader_1.Feed(/*num_frames=*/1, volume_wait_0, 1235 /*gain_db=*/0, kHighSpeechProbability, 1236 /*speech_level_dbfs=*/-42.0f, *controller_wait_0); 1237 volume_wait_100 = 1238 reader_2.Feed(/*num_frames=*/1, volume_wait_100, 1239 /*gain_db=*/0, kHighSpeechProbability, 1240 /*speech_level_dbfs=*/-42.0f, *controller_wait_100); 1241 1242 // Check that adaptation only occurs when enough frames have been processed. 1243 ASSERT_GT(volume_wait_0, kInputVolume); 1244 ASSERT_GT(volume_wait_100, kInputVolume); 1245 } 1246 1247 INSTANTIATE_TEST_SUITE_P(, 1248 InputVolumeControllerParametrizedTest, 1249 ::testing::Values(12, 20)); 1250 1251 TEST(InputVolumeControllerTest, 1252 MinInputVolumeEnforcedWithClippingWhenAboveClippedLevelMin) { 1253 InputVolumeControllerTestHelper helper( 1254 /*config=*/{.min_input_volume = 80, .clipped_level_min = 70}); 1255 1256 // Trigger a downward adjustment caused by clipping input. Use a low speech 1257 // probability to limit the volume changes to clipping handling. 1258 WriteAudioBufferSamples(/*samples_value=*/4000.0f, /*clipped_ratio=*/0.8f, 1259 helper.audio_buffer); 1260 constexpr int kNumCalls = 800; 1261 helper.CallAgcSequence(/*applied_input_volume=*/100, kLowSpeechProbability, 1262 /*speech_level_dbfs=*/-18.0f, kNumCalls); 1263 1264 EXPECT_EQ(helper.controller.recommended_input_volume(), 80); 1265 } 1266 1267 TEST(InputVolumeControllerTest, 1268 ClippedlevelMinEnforcedWithClippingWhenAboveMinInputVolume) { 1269 InputVolumeControllerTestHelper helper( 1270 /*config=*/{.min_input_volume = 70, .clipped_level_min = 80}); 1271 1272 // Trigger a downward adjustment caused by clipping input. Use a low speech 1273 // probability to limit the volume changes to clipping handling. 1274 WriteAudioBufferSamples(/*samples_value=*/4000.0f, /*clipped_ratio=*/0.8f, 1275 helper.audio_buffer); 1276 constexpr int kNumCalls = 800; 1277 helper.CallAgcSequence(/*applied_input_volume=*/100, kLowSpeechProbability, 1278 /*speech_level_dbfs=*/-18.0f, kNumCalls); 1279 1280 EXPECT_EQ(helper.controller.recommended_input_volume(), 80); 1281 } 1282 1283 TEST(InputVolumeControllerTest, SpeechRatioThresholdIsEffective) { 1284 constexpr int kInputVolume = kDefaultInitialInputVolume; 1285 // Create two input volume controllers with 10 frames between volume updates 1286 // and the minimum speech ratio of 0.8 and speech probability threshold 0.5. 1287 std::unique_ptr<InputVolumeController> controller_1 = 1288 CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold, 1289 kClippedWaitFrames, 1290 /*enable_clipping_predictor=*/false, 1291 /*update_input_volume_wait_frames=*/10); 1292 std::unique_ptr<InputVolumeController> controller_2 = 1293 CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold, 1294 kClippedWaitFrames, 1295 /*enable_clipping_predictor=*/false, 1296 /*update_input_volume_wait_frames=*/10); 1297 controller_1->Initialize(); 1298 controller_2->Initialize(); 1299 1300 SpeechSamplesReader reader_1; 1301 SpeechSamplesReader reader_2; 1302 1303 int volume_1 = reader_1.Feed(/*num_frames=*/1, kInputVolume, /*gain_db=*/0, 1304 /*speech_probability=*/0.7f, 1305 /*speech_level_dbfs=*/-42.0f, *controller_1); 1306 int volume_2 = reader_2.Feed(/*num_frames=*/1, kInputVolume, /*gain_db=*/0, 1307 /*speech_probability=*/0.4f, 1308 /*speech_level_dbfs=*/-42.0f, *controller_2); 1309 1310 ASSERT_EQ(volume_1, kInputVolume); 1311 ASSERT_EQ(volume_2, kInputVolume); 1312 1313 volume_1 = reader_1.Feed(/*num_frames=*/2, volume_1, /*gain_db=*/0, 1314 /*speech_probability=*/0.4f, 1315 /*speech_level_dbfs=*/-42.0f, *controller_1); 1316 volume_2 = reader_2.Feed(/*num_frames=*/2, volume_2, /*gain_db=*/0, 1317 /*speech_probability=*/0.4f, 1318 /*speech_level_dbfs=*/-42.0f, *controller_2); 1319 1320 ASSERT_EQ(volume_1, kInputVolume); 1321 ASSERT_EQ(volume_2, kInputVolume); 1322 1323 volume_1 = reader_1.Feed( 1324 /*num_frames=*/7, volume_1, /*gain_db=*/0, 1325 /*speech_probability=*/0.7f, /*speech_level_dbfs=*/-42.0f, *controller_1); 1326 volume_2 = reader_2.Feed( 1327 /*num_frames=*/7, volume_2, /*gain_db=*/0, 1328 /*speech_probability=*/0.7f, /*speech_level_dbfs=*/-42.0f, *controller_2); 1329 1330 ASSERT_GT(volume_1, kInputVolume); 1331 ASSERT_EQ(volume_2, kInputVolume); 1332 } 1333 1334 TEST(InputVolumeControllerTest, SpeechProbabilityThresholdIsEffective) { 1335 constexpr int kInputVolume = kDefaultInitialInputVolume; 1336 // Create two input volume controllers with the exact same settings and 1337 // 10 frames between volume updates. 1338 std::unique_ptr<InputVolumeController> controller_1 = 1339 CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold, 1340 kClippedWaitFrames, 1341 /*enable_clipping_predictor=*/false, 1342 /*update_input_volume_wait_frames=*/10); 1343 std::unique_ptr<InputVolumeController> controller_2 = 1344 CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold, 1345 kClippedWaitFrames, 1346 /*enable_clipping_predictor=*/false, 1347 /*update_input_volume_wait_frames=*/10); 1348 controller_1->Initialize(); 1349 controller_2->Initialize(); 1350 1351 SpeechSamplesReader reader_1; 1352 SpeechSamplesReader reader_2; 1353 1354 // Process with two sets of inputs: Use `reader_1` to process inputs 1355 // that make the volume to be adjusted after enough frames have been 1356 // processsed and `reader_2` to process inputs that won't make the volume 1357 // to be adjusted. 1358 int volume_1 = reader_1.Feed(/*num_frames=*/1, kInputVolume, /*gain_db=*/0, 1359 /*speech_probability=*/0.5f, 1360 /*speech_level_dbfs=*/-42.0f, *controller_1); 1361 int volume_2 = reader_2.Feed(/*num_frames=*/1, kInputVolume, /*gain_db=*/0, 1362 /*speech_probability=*/0.49f, 1363 /*speech_level_dbfs=*/-42.0f, *controller_2); 1364 1365 ASSERT_EQ(volume_1, kInputVolume); 1366 ASSERT_EQ(volume_2, kInputVolume); 1367 1368 reader_1.Feed(/*num_frames=*/2, volume_1, /*gain_db=*/0, 1369 /*speech_probability=*/0.49f, /*speech_level_dbfs=*/-42.0f, 1370 *controller_1); 1371 reader_2.Feed(/*num_frames=*/2, volume_2, /*gain_db=*/0, 1372 /*speech_probability=*/0.49f, /*speech_level_dbfs=*/-42.0f, 1373 *controller_2); 1374 1375 ASSERT_EQ(volume_1, kInputVolume); 1376 ASSERT_EQ(volume_2, kInputVolume); 1377 1378 volume_1 = reader_1.Feed( 1379 /*num_frames=*/7, volume_1, /*gain_db=*/0, 1380 /*speech_probability=*/0.5f, /*speech_level_dbfs=*/-42.0f, *controller_1); 1381 volume_2 = reader_2.Feed( 1382 /*num_frames=*/7, volume_2, /*gain_db=*/0, 1383 /*speech_probability=*/0.5f, /*speech_level_dbfs=*/-42.0f, *controller_2); 1384 1385 ASSERT_GT(volume_1, kInputVolume); 1386 ASSERT_EQ(volume_2, kInputVolume); 1387 } 1388 1389 TEST(InputVolumeControllerTest, 1390 DoNotLogRecommendedInputVolumeOnChangeToMatchTarget) { 1391 metrics::Reset(); 1392 1393 SpeechSamplesReader reader; 1394 auto controller = CreateInputVolumeController(); 1395 controller->Initialize(); 1396 // Trigger a downward volume change by inputting audio that clips. Pass a 1397 // speech level that falls in the target range to make sure that the 1398 // adaptation is not made to match the target range. 1399 constexpr int kStartupVolume = 255; 1400 const int volume = reader.Feed(/*num_frames=*/14, kStartupVolume, 1401 /*gain_db=*/50, kHighSpeechProbability, 1402 /*speech_level_dbfs=*/-20.0f, *controller); 1403 ASSERT_LT(volume, kStartupVolume); 1404 EXPECT_METRIC_THAT( 1405 metrics::Samples( 1406 "WebRTC.Audio.Apm.RecommendedInputVolume.OnChangeToMatchTarget"), 1407 ::testing::IsEmpty()); 1408 } 1409 1410 TEST(InputVolumeControllerTest, 1411 LogRecommendedInputVolumeOnUpwardChangeToMatchTarget) { 1412 metrics::Reset(); 1413 1414 SpeechSamplesReader reader; 1415 auto controller = CreateInputVolumeController(); 1416 controller->Initialize(); 1417 constexpr int kStartupVolume = 100; 1418 // Trigger an upward volume change by inputting audio that does not clip and 1419 // by passing a speech level below the target range. 1420 const int volume = reader.Feed(/*num_frames=*/14, kStartupVolume, 1421 /*gain_db=*/-6, kHighSpeechProbability, 1422 /*speech_level_dbfs=*/-50.0f, *controller); 1423 ASSERT_GT(volume, kStartupVolume); 1424 EXPECT_METRIC_THAT( 1425 metrics::Samples( 1426 "WebRTC.Audio.Apm.RecommendedInputVolume.OnChangeToMatchTarget"), 1427 ::testing::Not(::testing::IsEmpty())); 1428 } 1429 1430 TEST(InputVolumeControllerTest, 1431 LogRecommendedInputVolumeOnDownwardChangeToMatchTarget) { 1432 metrics::Reset(); 1433 1434 SpeechSamplesReader reader; 1435 auto controller = CreateInputVolumeController(); 1436 controller->Initialize(); 1437 constexpr int kStartupVolume = 100; 1438 // Trigger a downward volume change by inputting audio that does not clip and 1439 // by passing a speech level above the target range. 1440 const int volume = reader.Feed(/*num_frames=*/14, kStartupVolume, 1441 /*gain_db=*/-6, kHighSpeechProbability, 1442 /*speech_level_dbfs=*/-5.0f, *controller); 1443 ASSERT_LT(volume, kStartupVolume); 1444 EXPECT_METRIC_THAT( 1445 metrics::Samples( 1446 "WebRTC.Audio.Apm.RecommendedInputVolume.OnChangeToMatchTarget"), 1447 ::testing::Not(::testing::IsEmpty())); 1448 } 1449 1450 TEST(MonoInputVolumeControllerTest, CheckHandleClippingLowersVolume) { 1451 constexpr int kInitialInputVolume = 100; 1452 constexpr int kInputVolumeStep = 29; 1453 MonoInputVolumeController mono_controller( 1454 /*clipped_level_min=*/70, 1455 /*min_mic_level=*/32, 1456 /*update_input_volume_wait_frames=*/3, kHighSpeechProbability, 1457 kSpeechRatioThreshold); 1458 mono_controller.Initialize(); 1459 1460 UpdateRecommendedInputVolume(mono_controller, kInitialInputVolume, 1461 kLowSpeechProbability, 1462 /*rms_error_dbfs*/ -10.0f); 1463 1464 mono_controller.HandleClipping(kInputVolumeStep); 1465 1466 EXPECT_EQ(mono_controller.recommended_analog_level(), 1467 kInitialInputVolume - kInputVolumeStep); 1468 } 1469 1470 TEST(MonoInputVolumeControllerTest, 1471 CheckProcessNegativeRmsErrorDecreasesInputVolume) { 1472 constexpr int kInitialInputVolume = 100; 1473 MonoInputVolumeController mono_controller( 1474 /*clipped_level_min=*/64, 1475 /*min_mic_level=*/32, 1476 /*update_input_volume_wait_frames=*/3, kHighSpeechProbability, 1477 kSpeechRatioThreshold); 1478 mono_controller.Initialize(); 1479 1480 int volume = UpdateRecommendedInputVolume( 1481 mono_controller, kInitialInputVolume, kHighSpeechProbability, -10.0f); 1482 volume = UpdateRecommendedInputVolume(mono_controller, volume, 1483 kHighSpeechProbability, -10.0f); 1484 volume = UpdateRecommendedInputVolume(mono_controller, volume, 1485 kHighSpeechProbability, -10.0f); 1486 1487 EXPECT_LT(volume, kInitialInputVolume); 1488 } 1489 1490 TEST(MonoInputVolumeControllerTest, 1491 CheckProcessPositiveRmsErrorIncreasesInputVolume) { 1492 constexpr int kInitialInputVolume = 100; 1493 MonoInputVolumeController mono_controller( 1494 /*clipped_level_min=*/64, 1495 /*min_mic_level=*/32, 1496 /*update_input_volume_wait_frames=*/3, kHighSpeechProbability, 1497 kSpeechRatioThreshold); 1498 mono_controller.Initialize(); 1499 1500 int volume = UpdateRecommendedInputVolume( 1501 mono_controller, kInitialInputVolume, kHighSpeechProbability, 10.0f); 1502 volume = UpdateRecommendedInputVolume(mono_controller, volume, 1503 kHighSpeechProbability, 10.0f); 1504 volume = UpdateRecommendedInputVolume(mono_controller, volume, 1505 kHighSpeechProbability, 10.0f); 1506 1507 EXPECT_GT(volume, kInitialInputVolume); 1508 } 1509 1510 TEST(MonoInputVolumeControllerTest, 1511 CheckProcessNegativeRmsErrorDecreasesInputVolumeWithLimit) { 1512 constexpr int kInitialInputVolume = 100; 1513 MonoInputVolumeController mono_controller_1( 1514 /*clipped_level_min=*/64, 1515 /*min_mic_level=*/32, 1516 /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, 1517 kSpeechRatioThreshold); 1518 MonoInputVolumeController mono_controller_2( 1519 /*clipped_level_min=*/64, 1520 /*min_mic_level=*/32, 1521 /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, 1522 kSpeechRatioThreshold); 1523 MonoInputVolumeController mono_controller_3( 1524 /*clipped_level_min=*/64, 1525 /*min_mic_level=*/32, 1526 /*update_input_volume_wait_frames=*/2, 1527 /*speech_probability_threshold=*/0.7, 1528 /*speech_ratio_threshold=*/0.8); 1529 mono_controller_1.Initialize(); 1530 mono_controller_2.Initialize(); 1531 mono_controller_3.Initialize(); 1532 1533 // Process RMS errors in the range 1534 // [`-kMaxResidualGainChange`, `kMaxResidualGainChange`]. 1535 int volume_1 = UpdateRecommendedInputVolume( 1536 mono_controller_1, kInitialInputVolume, kHighSpeechProbability, -14.0f); 1537 volume_1 = UpdateRecommendedInputVolume(mono_controller_1, volume_1, 1538 kHighSpeechProbability, -14.0f); 1539 // Process RMS errors outside the range 1540 // [`-kMaxResidualGainChange`, `kMaxResidualGainChange`]. 1541 int volume_2 = UpdateRecommendedInputVolume( 1542 mono_controller_2, kInitialInputVolume, kHighSpeechProbability, -15.0f); 1543 int volume_3 = UpdateRecommendedInputVolume( 1544 mono_controller_3, kInitialInputVolume, kHighSpeechProbability, -30.0f); 1545 volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2, 1546 kHighSpeechProbability, -15.0f); 1547 volume_3 = UpdateRecommendedInputVolume(mono_controller_3, volume_3, 1548 kHighSpeechProbability, -30.0f); 1549 1550 EXPECT_LT(volume_1, kInitialInputVolume); 1551 EXPECT_LT(volume_2, volume_1); 1552 EXPECT_EQ(volume_2, volume_3); 1553 } 1554 1555 TEST(MonoInputVolumeControllerTest, 1556 CheckProcessPositiveRmsErrorIncreasesInputVolumeWithLimit) { 1557 constexpr int kInitialInputVolume = 100; 1558 MonoInputVolumeController mono_controller_1( 1559 /*clipped_level_min=*/64, 1560 /*min_mic_level=*/32, 1561 /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, 1562 kSpeechRatioThreshold); 1563 MonoInputVolumeController mono_controller_2( 1564 /*clipped_level_min=*/64, 1565 /*min_mic_level=*/32, 1566 /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, 1567 kSpeechRatioThreshold); 1568 MonoInputVolumeController mono_controller_3( 1569 /*clipped_level_min=*/64, 1570 /*min_mic_level=*/32, 1571 /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, 1572 kSpeechRatioThreshold); 1573 mono_controller_1.Initialize(); 1574 mono_controller_2.Initialize(); 1575 mono_controller_3.Initialize(); 1576 1577 // Process RMS errors in the range 1578 // [`-kMaxResidualGainChange`, `kMaxResidualGainChange`]. 1579 int volume_1 = UpdateRecommendedInputVolume( 1580 mono_controller_1, kInitialInputVolume, kHighSpeechProbability, 14.0f); 1581 volume_1 = UpdateRecommendedInputVolume(mono_controller_1, volume_1, 1582 kHighSpeechProbability, 14.0f); 1583 // Process RMS errors outside the range 1584 // [`-kMaxResidualGainChange`, `kMaxResidualGainChange`]. 1585 int volume_2 = UpdateRecommendedInputVolume( 1586 mono_controller_2, kInitialInputVolume, kHighSpeechProbability, 15.0f); 1587 int volume_3 = UpdateRecommendedInputVolume( 1588 mono_controller_3, kInitialInputVolume, kHighSpeechProbability, 30.0f); 1589 volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2, 1590 kHighSpeechProbability, 15.0f); 1591 volume_3 = UpdateRecommendedInputVolume(mono_controller_3, volume_3, 1592 kHighSpeechProbability, 30.0f); 1593 1594 EXPECT_GT(volume_1, kInitialInputVolume); 1595 EXPECT_GT(volume_2, volume_1); 1596 EXPECT_EQ(volume_2, volume_3); 1597 } 1598 1599 TEST(MonoInputVolumeControllerTest, 1600 CheckProcessRmsErrorDecreasesInputVolumeRepeatedly) { 1601 constexpr int kInitialInputVolume = 100; 1602 MonoInputVolumeController mono_controller( 1603 /*clipped_level_min=*/64, 1604 /*min_mic_level=*/32, 1605 /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, 1606 kSpeechRatioThreshold); 1607 mono_controller.Initialize(); 1608 1609 int volume_before = UpdateRecommendedInputVolume( 1610 mono_controller, kInitialInputVolume, kHighSpeechProbability, -10.0f); 1611 volume_before = UpdateRecommendedInputVolume(mono_controller, volume_before, 1612 kHighSpeechProbability, -10.0f); 1613 1614 EXPECT_LT(volume_before, kInitialInputVolume); 1615 1616 int volume_after = UpdateRecommendedInputVolume( 1617 mono_controller, volume_before, kHighSpeechProbability, -10.0f); 1618 volume_after = UpdateRecommendedInputVolume(mono_controller, volume_after, 1619 kHighSpeechProbability, -10.0f); 1620 1621 EXPECT_LT(volume_after, volume_before); 1622 } 1623 1624 TEST(MonoInputVolumeControllerTest, 1625 CheckProcessPositiveRmsErrorIncreasesInputVolumeRepeatedly) { 1626 constexpr int kInitialInputVolume = 100; 1627 MonoInputVolumeController mono_controller( 1628 /*clipped_level_min=*/64, 1629 /*min_mic_level=*/32, 1630 /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, 1631 kSpeechRatioThreshold); 1632 mono_controller.Initialize(); 1633 1634 int volume_before = UpdateRecommendedInputVolume( 1635 mono_controller, kInitialInputVolume, kHighSpeechProbability, 10.0f); 1636 volume_before = UpdateRecommendedInputVolume(mono_controller, volume_before, 1637 kHighSpeechProbability, 10.0f); 1638 1639 EXPECT_GT(volume_before, kInitialInputVolume); 1640 1641 int volume_after = UpdateRecommendedInputVolume( 1642 mono_controller, volume_before, kHighSpeechProbability, 10.0f); 1643 volume_after = UpdateRecommendedInputVolume(mono_controller, volume_after, 1644 kHighSpeechProbability, 10.0f); 1645 1646 EXPECT_GT(volume_after, volume_before); 1647 } 1648 1649 TEST(MonoInputVolumeControllerTest, CheckClippedLevelMinIsEffective) { 1650 constexpr int kInitialInputVolume = 100; 1651 constexpr int kClippedLevelMin = 70; 1652 MonoInputVolumeController mono_controller_1( 1653 kClippedLevelMin, 1654 /*min_mic_level=*/84, 1655 /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, 1656 kSpeechRatioThreshold); 1657 MonoInputVolumeController mono_controller_2( 1658 kClippedLevelMin, 1659 /*min_mic_level=*/84, 1660 /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, 1661 kSpeechRatioThreshold); 1662 mono_controller_1.Initialize(); 1663 mono_controller_2.Initialize(); 1664 1665 // Process one frame to reset the state for `HandleClipping()`. 1666 EXPECT_EQ(UpdateRecommendedInputVolume(mono_controller_1, kInitialInputVolume, 1667 kLowSpeechProbability, -10.0f), 1668 kInitialInputVolume); 1669 EXPECT_EQ(UpdateRecommendedInputVolume(mono_controller_2, kInitialInputVolume, 1670 kLowSpeechProbability, -10.0f), 1671 kInitialInputVolume); 1672 1673 mono_controller_1.HandleClipping(29); 1674 mono_controller_2.HandleClipping(31); 1675 1676 EXPECT_EQ(mono_controller_2.recommended_analog_level(), kClippedLevelMin); 1677 EXPECT_LT(mono_controller_2.recommended_analog_level(), 1678 mono_controller_1.recommended_analog_level()); 1679 } 1680 1681 TEST(MonoInputVolumeControllerTest, CheckMinMicLevelIsEffective) { 1682 constexpr int kInitialInputVolume = 100; 1683 constexpr int kMinMicLevel = 64; 1684 MonoInputVolumeController mono_controller_1( 1685 /*clipped_level_min=*/64, kMinMicLevel, 1686 /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, 1687 kSpeechRatioThreshold); 1688 MonoInputVolumeController mono_controller_2( 1689 /*clipped_level_min=*/64, kMinMicLevel, 1690 /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, 1691 kSpeechRatioThreshold); 1692 mono_controller_1.Initialize(); 1693 mono_controller_2.Initialize(); 1694 1695 int volume_1 = UpdateRecommendedInputVolume( 1696 mono_controller_1, kInitialInputVolume, kHighSpeechProbability, -10.0f); 1697 int volume_2 = UpdateRecommendedInputVolume( 1698 mono_controller_2, kInitialInputVolume, kHighSpeechProbability, -10.0f); 1699 1700 EXPECT_EQ(volume_1, kInitialInputVolume); 1701 EXPECT_EQ(volume_2, kInitialInputVolume); 1702 1703 volume_1 = UpdateRecommendedInputVolume(mono_controller_1, volume_1, 1704 kHighSpeechProbability, -10.0f); 1705 volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2, 1706 kHighSpeechProbability, -30.0f); 1707 1708 EXPECT_LT(volume_1, kInitialInputVolume); 1709 EXPECT_LT(volume_2, volume_1); 1710 EXPECT_EQ(volume_2, kMinMicLevel); 1711 } 1712 1713 TEST(MonoInputVolumeControllerTest, 1714 CheckUpdateInputVolumeWaitFramesIsEffective) { 1715 constexpr int kInitialInputVolume = 100; 1716 MonoInputVolumeController mono_controller_1( 1717 /*clipped_level_min=*/64, 1718 /*min_mic_level=*/84, 1719 /*update_input_volume_wait_frames=*/1, kHighSpeechProbability, 1720 kSpeechRatioThreshold); 1721 MonoInputVolumeController mono_controller_2( 1722 /*clipped_level_min=*/64, 1723 /*min_mic_level=*/84, 1724 /*update_input_volume_wait_frames=*/3, kHighSpeechProbability, 1725 kSpeechRatioThreshold); 1726 mono_controller_1.Initialize(); 1727 mono_controller_2.Initialize(); 1728 1729 int volume_1 = UpdateRecommendedInputVolume( 1730 mono_controller_1, kInitialInputVolume, kHighSpeechProbability, -10.0f); 1731 int volume_2 = UpdateRecommendedInputVolume( 1732 mono_controller_2, kInitialInputVolume, kHighSpeechProbability, -10.0f); 1733 1734 EXPECT_EQ(volume_1, kInitialInputVolume); 1735 EXPECT_EQ(volume_2, kInitialInputVolume); 1736 1737 volume_1 = UpdateRecommendedInputVolume(mono_controller_1, volume_1, 1738 kHighSpeechProbability, -10.0f); 1739 volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2, 1740 kHighSpeechProbability, -10.0f); 1741 1742 EXPECT_LT(volume_1, kInitialInputVolume); 1743 EXPECT_EQ(volume_2, kInitialInputVolume); 1744 1745 volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2, 1746 kHighSpeechProbability, -10.0f); 1747 1748 EXPECT_LT(volume_2, kInitialInputVolume); 1749 } 1750 1751 TEST(MonoInputVolumeControllerTest, 1752 CheckSpeechProbabilityThresholdIsEffective) { 1753 constexpr int kInitialInputVolume = 100; 1754 constexpr float kSpeechProbabilityThreshold = 0.8f; 1755 MonoInputVolumeController mono_controller_1( 1756 /*clipped_level_min=*/64, 1757 /*min_mic_level=*/84, 1758 /*update_input_volume_wait_frames=*/2, kSpeechProbabilityThreshold, 1759 kSpeechRatioThreshold); 1760 MonoInputVolumeController mono_controller_2( 1761 /*clipped_level_min=*/64, 1762 /*min_mic_level=*/84, 1763 /*update_input_volume_wait_frames=*/2, kSpeechProbabilityThreshold, 1764 kSpeechRatioThreshold); 1765 mono_controller_1.Initialize(); 1766 mono_controller_2.Initialize(); 1767 1768 int volume_1 = 1769 UpdateRecommendedInputVolume(mono_controller_1, kInitialInputVolume, 1770 kSpeechProbabilityThreshold, -10.0f); 1771 int volume_2 = 1772 UpdateRecommendedInputVolume(mono_controller_2, kInitialInputVolume, 1773 kSpeechProbabilityThreshold, -10.0f); 1774 1775 EXPECT_EQ(volume_1, kInitialInputVolume); 1776 EXPECT_EQ(volume_2, kInitialInputVolume); 1777 1778 volume_1 = UpdateRecommendedInputVolume( 1779 mono_controller_1, volume_1, kSpeechProbabilityThreshold - 0.1f, -10.0f); 1780 volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2, 1781 kSpeechProbabilityThreshold, -10.0f); 1782 1783 EXPECT_EQ(volume_1, kInitialInputVolume); 1784 EXPECT_LT(volume_2, volume_1); 1785 } 1786 1787 TEST(MonoInputVolumeControllerTest, CheckSpeechRatioThresholdIsEffective) { 1788 constexpr int kInitialInputVolume = 100; 1789 MonoInputVolumeController mono_controller_1( 1790 /*clipped_level_min=*/64, 1791 /*min_mic_level=*/84, 1792 /*update_input_volume_wait_frames=*/4, kHighSpeechProbability, 1793 /*speech_ratio_threshold=*/0.75f); 1794 MonoInputVolumeController mono_controller_2( 1795 /*clipped_level_min=*/64, 1796 /*min_mic_level=*/84, 1797 /*update_input_volume_wait_frames=*/4, kHighSpeechProbability, 1798 /*speech_ratio_threshold=*/0.75f); 1799 mono_controller_1.Initialize(); 1800 mono_controller_2.Initialize(); 1801 1802 int volume_1 = UpdateRecommendedInputVolume( 1803 mono_controller_1, kInitialInputVolume, kHighSpeechProbability, -10.0f); 1804 int volume_2 = UpdateRecommendedInputVolume( 1805 mono_controller_2, kInitialInputVolume, kHighSpeechProbability, -10.0f); 1806 1807 volume_1 = UpdateRecommendedInputVolume(mono_controller_1, volume_1, 1808 kHighSpeechProbability, -10.0f); 1809 volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2, 1810 kHighSpeechProbability, -10.0f); 1811 1812 volume_1 = UpdateRecommendedInputVolume(mono_controller_1, volume_1, 1813 kLowSpeechProbability, -10.0f); 1814 volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2, 1815 kLowSpeechProbability, -10.0f); 1816 1817 EXPECT_EQ(volume_1, kInitialInputVolume); 1818 EXPECT_EQ(volume_2, kInitialInputVolume); 1819 1820 volume_1 = UpdateRecommendedInputVolume(mono_controller_1, volume_1, 1821 kLowSpeechProbability, -10.0f); 1822 volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2, 1823 kHighSpeechProbability, -10.0f); 1824 1825 EXPECT_EQ(volume_1, kInitialInputVolume); 1826 EXPECT_LT(volume_2, volume_1); 1827 } 1828 1829 TEST(MonoInputVolumeControllerTest, 1830 CheckProcessEmptyRmsErrorDoesNotLowerVolume) { 1831 constexpr int kInitialInputVolume = 100; 1832 MonoInputVolumeController mono_controller_1( 1833 /*clipped_level_min=*/64, 1834 /*min_mic_level=*/84, 1835 /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, 1836 kSpeechRatioThreshold); 1837 MonoInputVolumeController mono_controller_2( 1838 /*clipped_level_min=*/64, 1839 /*min_mic_level=*/84, 1840 /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, 1841 kSpeechRatioThreshold); 1842 mono_controller_1.Initialize(); 1843 mono_controller_2.Initialize(); 1844 1845 int volume_1 = UpdateRecommendedInputVolume( 1846 mono_controller_1, kInitialInputVolume, kHighSpeechProbability, -10.0f); 1847 int volume_2 = UpdateRecommendedInputVolume( 1848 mono_controller_2, kInitialInputVolume, kHighSpeechProbability, -10.0f); 1849 1850 EXPECT_EQ(volume_1, kInitialInputVolume); 1851 EXPECT_EQ(volume_2, kInitialInputVolume); 1852 1853 volume_1 = UpdateRecommendedInputVolume(mono_controller_1, volume_1, 1854 kHighSpeechProbability, std::nullopt); 1855 volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2, 1856 kHighSpeechProbability, -10.0f); 1857 1858 EXPECT_EQ(volume_1, kInitialInputVolume); 1859 EXPECT_LT(volume_2, volume_1); 1860 } 1861 1862 } // namespace webrtc