audio_mixer_impl_unittest.cc (18379B)
1 /* 2 * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "modules/audio_mixer/audio_mixer_impl.h" 12 13 #include <algorithm> 14 #include <cstdint> 15 #include <cstring> 16 #include <memory> 17 #include <optional> 18 #include <string> 19 #include <vector> 20 21 #include "api/array_view.h" 22 #include "api/audio/audio_frame.h" 23 #include "api/audio/audio_mixer.h" 24 #include "api/rtp_packet_info.h" 25 #include "api/rtp_packet_infos.h" 26 #include "api/scoped_refptr.h" 27 #include "api/units/timestamp.h" 28 #include "modules/audio_mixer/default_output_rate_calculator.h" 29 #include "modules/audio_mixer/output_rate_calculator.h" 30 #include "rtc_base/checks.h" 31 #include "rtc_base/strings/string_builder.h" 32 #include "rtc_base/task_queue_for_test.h" 33 #include "system_wrappers/include/metrics.h" 34 #include "test/gmock.h" 35 #include "test/gtest.h" 36 37 using ::testing::_; 38 using ::testing::Exactly; 39 using ::testing::Invoke; 40 using ::testing::Return; 41 using ::testing::UnorderedElementsAre; 42 43 namespace webrtc { 44 45 namespace { 46 47 constexpr int kDefaultSampleRateHz = 48000; 48 constexpr char kSourceCountHistogramName[] = 49 "WebRTC.Audio.AudioMixer.NewHighestSourceCount"; 50 51 // Utility function that resets the frame member variables with 52 // sensible defaults. 53 void ResetFrame(AudioFrame* frame) { 54 frame->sample_rate_hz_ = kDefaultSampleRateHz; 55 frame->num_channels_ = 1; 56 57 // Frame duration 10ms. 58 frame->samples_per_channel_ = kDefaultSampleRateHz / 100; 59 frame->vad_activity_ = AudioFrame::kVadActive; 60 frame->speech_type_ = AudioFrame::kNormalSpeech; 61 } 62 63 std::string ProduceDebugText(int sample_rate_hz, 64 int number_of_channels, 65 int number_of_sources) { 66 StringBuilder ss; 67 ss << "Sample rate: " << sample_rate_hz << " "; 68 ss << "Number of channels: " << number_of_channels << " "; 69 ss << "Number of sources: " << number_of_sources; 70 return ss.Release(); 71 } 72 73 AudioFrame frame_for_mixing; 74 75 } // namespace 76 77 class MockMixerAudioSource : public ::testing::NiceMock<AudioMixer::Source> { 78 public: 79 MockMixerAudioSource() 80 : fake_audio_frame_info_(AudioMixer::Source::AudioFrameInfo::kNormal) { 81 ON_CALL(*this, GetAudioFrameWithInfo(_, _)) 82 .WillByDefault( 83 Invoke(this, &MockMixerAudioSource::FakeAudioFrameWithInfo)); 84 ON_CALL(*this, PreferredSampleRate()) 85 .WillByDefault(Return(kDefaultSampleRateHz)); 86 } 87 88 MOCK_METHOD(AudioFrameInfo, 89 GetAudioFrameWithInfo, 90 (int sample_rate_hz, AudioFrame* audio_frame), 91 (override)); 92 93 MOCK_METHOD(int, PreferredSampleRate, (), (const, override)); 94 MOCK_METHOD(int, Ssrc, (), (const, override)); 95 96 AudioFrame* fake_frame() { return &fake_frame_; } 97 AudioFrameInfo fake_info() { return fake_audio_frame_info_; } 98 void set_fake_info(const AudioFrameInfo audio_frame_info) { 99 fake_audio_frame_info_ = audio_frame_info; 100 } 101 102 void set_packet_infos(const RtpPacketInfos& packet_infos) { 103 packet_infos_ = packet_infos; 104 } 105 106 private: 107 AudioFrameInfo FakeAudioFrameWithInfo(int sample_rate_hz, 108 AudioFrame* audio_frame) { 109 audio_frame->CopyFrom(fake_frame_); 110 audio_frame->sample_rate_hz_ = sample_rate_hz; 111 audio_frame->samples_per_channel_ = CheckedDivExact(sample_rate_hz, 100); 112 audio_frame->packet_infos_ = packet_infos_; 113 return fake_info(); 114 } 115 116 AudioFrame fake_frame_; 117 AudioFrameInfo fake_audio_frame_info_; 118 RtpPacketInfos packet_infos_; 119 }; 120 121 class CustomRateCalculator : public OutputRateCalculator { 122 public: 123 explicit CustomRateCalculator(int rate) : rate_(rate) {} 124 int CalculateOutputRateFromRange( 125 ArrayView<const int> /* preferred_rates */) override { 126 return rate_; 127 } 128 129 private: 130 const int rate_; 131 }; 132 133 void MixMonoAtGivenNativeRate(int native_sample_rate, 134 AudioFrame* mix_frame, 135 scoped_refptr<AudioMixer> mixer, 136 MockMixerAudioSource* audio_source) { 137 ON_CALL(*audio_source, PreferredSampleRate()) 138 .WillByDefault(Return(native_sample_rate)); 139 audio_source->fake_frame()->sample_rate_hz_ = native_sample_rate; 140 audio_source->fake_frame()->samples_per_channel_ = native_sample_rate / 100; 141 142 mixer->Mix(1, mix_frame); 143 } 144 145 TEST(AudioMixer, UpdatesSourceCountHistogram) { 146 metrics::Reset(); 147 constexpr int kAudioSourcesGroup1 = 5; 148 constexpr int kAudioSourcesGroup2 = 3; 149 150 const auto mixer = AudioMixerImpl::Create(); 151 152 MockMixerAudioSource participants[kAudioSourcesGroup1 + kAudioSourcesGroup2]; 153 154 // Add the sources in group 1. 155 for (int i = 0; i < kAudioSourcesGroup1; ++i) { 156 EXPECT_TRUE(mixer->AddSource(&participants[i])); 157 EXPECT_EQ(i + 1, metrics::NumSamples(kSourceCountHistogramName)); 158 EXPECT_EQ(1, metrics::NumEvents(kSourceCountHistogramName, i + 1)); 159 } 160 // Remove the sources again. 161 for (int i = 0; i < kAudioSourcesGroup1; ++i) { 162 mixer->RemoveSource(&participants[i]); 163 } 164 // Add the first group again. This should not add anything new to the 165 // histogram. 166 for (int i = 0; i < kAudioSourcesGroup1; ++i) { 167 EXPECT_TRUE(mixer->AddSource(&participants[i])); 168 EXPECT_EQ(kAudioSourcesGroup1, 169 metrics::NumSamples(kSourceCountHistogramName)); 170 EXPECT_EQ(1, metrics::NumEvents(kSourceCountHistogramName, i + 1)); 171 } 172 // Add the second group. This adds to the histogram again. 173 for (int i = kAudioSourcesGroup1; 174 i < kAudioSourcesGroup1 + kAudioSourcesGroup2; ++i) { 175 EXPECT_TRUE(mixer->AddSource(&participants[i])); 176 EXPECT_EQ(i + 1, metrics::NumSamples(kSourceCountHistogramName)); 177 EXPECT_EQ(1, metrics::NumEvents(kSourceCountHistogramName, i + 1)); 178 } 179 } 180 181 TEST(AudioMixer, FrameNotModifiedForSingleParticipant) { 182 const auto mixer = AudioMixerImpl::Create(); 183 184 MockMixerAudioSource participant; 185 186 ResetFrame(participant.fake_frame()); 187 const size_t n_samples = participant.fake_frame()->samples_per_channel_; 188 189 // Modify the frame so that it's not zero. 190 int16_t* fake_frame_data = participant.fake_frame()->mutable_data(); 191 for (size_t j = 0; j < n_samples; ++j) { 192 fake_frame_data[j] = static_cast<int16_t>(j); 193 } 194 195 EXPECT_TRUE(mixer->AddSource(&participant)); 196 EXPECT_CALL(participant, GetAudioFrameWithInfo(_, _)).Times(Exactly(2)); 197 198 AudioFrame audio_frame; 199 // Two mix iteration to compare after the ramp-up step. 200 for (int i = 0; i < 2; ++i) { 201 mixer->Mix(1, // number of channels 202 &audio_frame); 203 } 204 205 EXPECT_EQ(0, memcmp(participant.fake_frame()->data(), audio_frame.data(), 206 n_samples)); 207 } 208 209 TEST(AudioMixer, SourceAtNativeRateShouldNeverResample) { 210 const auto mixer = AudioMixerImpl::Create(); 211 212 MockMixerAudioSource audio_source; 213 ResetFrame(audio_source.fake_frame()); 214 215 mixer->AddSource(&audio_source); 216 217 for (auto frequency : {8000, 16000, 32000, 48000}) { 218 EXPECT_CALL(audio_source, GetAudioFrameWithInfo(frequency, _)) 219 .Times(Exactly(1)); 220 221 MixMonoAtGivenNativeRate(frequency, &frame_for_mixing, mixer, 222 &audio_source); 223 } 224 } 225 226 TEST(AudioMixer, MixerShouldMixAtNativeSourceRate) { 227 const auto mixer = AudioMixerImpl::Create(); 228 229 MockMixerAudioSource audio_source; 230 ResetFrame(audio_source.fake_frame()); 231 232 mixer->AddSource(&audio_source); 233 234 for (auto frequency : {8000, 16000, 32000, 48000}) { 235 MixMonoAtGivenNativeRate(frequency, &frame_for_mixing, mixer, 236 &audio_source); 237 238 EXPECT_EQ(frequency, frame_for_mixing.sample_rate_hz_); 239 } 240 } 241 242 TEST(AudioMixer, MixerShouldAlwaysMixAtNativeRate) { 243 const auto mixer = AudioMixerImpl::Create(); 244 245 MockMixerAudioSource participant; 246 ResetFrame(participant.fake_frame()); 247 mixer->AddSource(&participant); 248 249 const int needed_frequency = 44100; 250 ON_CALL(participant, PreferredSampleRate()) 251 .WillByDefault(Return(needed_frequency)); 252 253 // We expect mixing frequency to be native and >= needed_frequency. 254 const int expected_mix_frequency = 48000; 255 EXPECT_CALL(participant, GetAudioFrameWithInfo(expected_mix_frequency, _)) 256 .Times(Exactly(1)); 257 participant.fake_frame()->sample_rate_hz_ = expected_mix_frequency; 258 participant.fake_frame()->samples_per_channel_ = expected_mix_frequency / 100; 259 260 mixer->Mix(1, &frame_for_mixing); 261 262 EXPECT_EQ(48000, frame_for_mixing.sample_rate_hz_); 263 } 264 265 // Check that the mixing rate is always >= participants preferred rate. 266 TEST(AudioMixer, ShouldNotCauseQualityLossForMultipleSources) { 267 const auto mixer = AudioMixerImpl::Create(); 268 269 std::vector<MockMixerAudioSource> audio_sources(2); 270 const std::vector<int> source_sample_rates = {8000, 16000}; 271 for (int i = 0; i < 2; ++i) { 272 auto& source = audio_sources[i]; 273 ResetFrame(source.fake_frame()); 274 mixer->AddSource(&source); 275 const auto sample_rate = source_sample_rates[i]; 276 EXPECT_CALL(source, PreferredSampleRate()).WillOnce(Return(sample_rate)); 277 278 EXPECT_CALL(source, GetAudioFrameWithInfo(::testing::Ge(sample_rate), _)); 279 } 280 mixer->Mix(1, &frame_for_mixing); 281 } 282 283 TEST(AudioMixer, ParticipantNumberOfChannels) { 284 const auto mixer = AudioMixerImpl::Create(); 285 286 MockMixerAudioSource participant; 287 ResetFrame(participant.fake_frame()); 288 289 EXPECT_TRUE(mixer->AddSource(&participant)); 290 for (size_t number_of_channels : {1, 2}) { 291 EXPECT_CALL(participant, GetAudioFrameWithInfo(kDefaultSampleRateHz, _)) 292 .Times(Exactly(1)); 293 mixer->Mix(number_of_channels, &frame_for_mixing); 294 EXPECT_EQ(number_of_channels, frame_for_mixing.num_channels_); 295 } 296 } 297 298 // This test checks that the initialization and participant addition 299 // can be done on a different thread. 300 TEST(AudioMixer, ConstructFromOtherThread) { 301 TaskQueueForTest init_queue("init"); 302 scoped_refptr<AudioMixer> mixer; 303 init_queue.SendTask([&mixer]() { mixer = AudioMixerImpl::Create(); }); 304 305 MockMixerAudioSource participant; 306 EXPECT_CALL(participant, PreferredSampleRate()) 307 .WillRepeatedly(Return(kDefaultSampleRateHz)); 308 309 ResetFrame(participant.fake_frame()); 310 311 TaskQueueForTest participant_queue("participant"); 312 participant_queue.SendTask( 313 [&mixer, &participant]() { mixer->AddSource(&participant); }); 314 315 EXPECT_CALL(participant, GetAudioFrameWithInfo(kDefaultSampleRateHz, _)) 316 .Times(Exactly(1)); 317 318 // Do one mixer iteration 319 mixer->Mix(1, &frame_for_mixing); 320 } 321 322 TEST(AudioMixer, MixingRateShouldBeDecidedByRateCalculator) { 323 constexpr int kOutputRate = 22000; 324 const auto mixer = 325 AudioMixerImpl::Create(std::unique_ptr<OutputRateCalculator>( 326 new CustomRateCalculator(kOutputRate)), 327 true); 328 MockMixerAudioSource audio_source; 329 mixer->AddSource(&audio_source); 330 ResetFrame(audio_source.fake_frame()); 331 332 EXPECT_CALL(audio_source, GetAudioFrameWithInfo(kOutputRate, _)) 333 .Times(Exactly(1)); 334 335 mixer->Mix(1, &frame_for_mixing); 336 } 337 338 TEST(AudioMixer, ZeroSourceRateShouldBeDecidedByRateCalculator) { 339 constexpr int kOutputRate = 8000; 340 const auto mixer = 341 AudioMixerImpl::Create(std::unique_ptr<OutputRateCalculator>( 342 new CustomRateCalculator(kOutputRate)), 343 true); 344 345 mixer->Mix(1, &frame_for_mixing); 346 347 EXPECT_EQ(kOutputRate, frame_for_mixing.sample_rate_hz_); 348 } 349 350 TEST(AudioMixer, NoLimiterBasicApiCalls) { 351 const auto mixer = AudioMixerImpl::Create( 352 std::unique_ptr<OutputRateCalculator>(new DefaultOutputRateCalculator()), 353 false); 354 mixer->Mix(1, &frame_for_mixing); 355 } 356 357 TEST(AudioMixer, AnyRateIsPossibleWithNoLimiter) { 358 // No APM limiter means no AudioProcessing::NativeRate restriction 359 // on mixing rate. The rate has to be divisible by 100 since we use 360 // 10 ms frames, though. 361 for (const auto rate : {8000, 20000, 24000, 32000, 44100}) { 362 for (const size_t number_of_channels : {1, 2}) { 363 for (const auto number_of_sources : {0, 1, 2, 3, 4}) { 364 SCOPED_TRACE( 365 ProduceDebugText(rate, number_of_sources, number_of_sources)); 366 const auto mixer = 367 AudioMixerImpl::Create(std::unique_ptr<OutputRateCalculator>( 368 new CustomRateCalculator(rate)), 369 false); 370 371 std::vector<MockMixerAudioSource> sources(number_of_sources); 372 for (auto& source : sources) { 373 ResetFrame(source.fake_frame()); 374 mixer->AddSource(&source); 375 } 376 377 mixer->Mix(number_of_channels, &frame_for_mixing); 378 EXPECT_EQ(rate, frame_for_mixing.sample_rate_hz_); 379 EXPECT_EQ(number_of_channels, frame_for_mixing.num_channels_); 380 } 381 } 382 } 383 } 384 385 TEST(AudioMixer, MultipleChannelsOneParticipant) { 386 // Set up a participant with a 6-channel frame, and make sure a 6-channel 387 // frame with the right sample values comes out from the mixer. There are 2 388 // Mix calls because of ramp-up. 389 constexpr size_t kNumberOfChannels = 6; 390 MockMixerAudioSource source; 391 ResetFrame(source.fake_frame()); 392 const auto mixer = AudioMixerImpl::Create(); 393 mixer->AddSource(&source); 394 mixer->Mix(1, &frame_for_mixing); 395 auto* frame = source.fake_frame(); 396 frame->num_channels_ = kNumberOfChannels; 397 std::fill(frame->mutable_data(), 398 frame->mutable_data() + AudioFrame::kMaxDataSizeSamples, 0); 399 for (size_t i = 0; i < kNumberOfChannels; ++i) { 400 frame->mutable_data()[100 * frame->num_channels_ + i] = 1000 * i; 401 } 402 403 mixer->Mix(kNumberOfChannels, &frame_for_mixing); 404 405 EXPECT_EQ(frame_for_mixing.num_channels_, kNumberOfChannels); 406 for (size_t i = 0; i < kNumberOfChannels; ++i) { 407 EXPECT_EQ(frame_for_mixing.data()[100 * frame_for_mixing.num_channels_ + i], 408 static_cast<int16_t>(1000 * i)); 409 } 410 } 411 412 TEST(AudioMixer, MultipleChannelsManyParticipants) { 413 // Sets up 2 participants. One has a 6-channel frame. Make sure a 6-channel 414 // frame with the right sample values comes out from the mixer. There are 2 415 // Mix calls because of ramp-up. 416 constexpr size_t kNumberOfChannels = 6; 417 MockMixerAudioSource source; 418 const auto mixer = AudioMixerImpl::Create(); 419 mixer->AddSource(&source); 420 ResetFrame(source.fake_frame()); 421 mixer->Mix(1, &frame_for_mixing); 422 auto* frame = source.fake_frame(); 423 frame->num_channels_ = kNumberOfChannels; 424 std::fill(frame->mutable_data(), 425 frame->mutable_data() + AudioFrame::kMaxDataSizeSamples, 0); 426 for (size_t i = 0; i < kNumberOfChannels; ++i) { 427 frame->mutable_data()[100 * frame->num_channels_ + i] = 1000 * i; 428 } 429 MockMixerAudioSource other_source; 430 ResetFrame(other_source.fake_frame()); 431 mixer->AddSource(&other_source); 432 433 mixer->Mix(kNumberOfChannels, &frame_for_mixing); 434 435 EXPECT_EQ(frame_for_mixing.num_channels_, kNumberOfChannels); 436 for (size_t i = 0; i < kNumberOfChannels; ++i) { 437 EXPECT_EQ(frame_for_mixing.data()[100 * frame_for_mixing.num_channels_ + i], 438 static_cast<int16_t>(1000 * i)); 439 } 440 } 441 442 TEST(AudioMixer, ShouldIncludeRtpPacketInfoFromAllMixedSources) { 443 const uint32_t kSsrc0 = 10; 444 const uint32_t kSsrc1 = 11; 445 const uint32_t kSsrc2 = 12; 446 const uint32_t kCsrc0 = 20; 447 const uint32_t kCsrc1 = 21; 448 const uint32_t kCsrc2 = 22; 449 const uint32_t kCsrc3 = 23; 450 const int kAudioLevel0 = 10; 451 const int kAudioLevel1 = 40; 452 const std::optional<uint32_t> kAudioLevel2 = std::nullopt; 453 const uint32_t kRtpTimestamp0 = 300; 454 const uint32_t kRtpTimestamp1 = 400; 455 const Timestamp kReceiveTime0 = Timestamp::Millis(10); 456 const Timestamp kReceiveTime1 = Timestamp::Millis(20); 457 458 RtpPacketInfo p0(kSsrc0, {kCsrc0, kCsrc1}, kRtpTimestamp0, kReceiveTime0); 459 p0.set_audio_level(kAudioLevel0); 460 RtpPacketInfo p1(kSsrc1, {kCsrc2}, kRtpTimestamp1, kReceiveTime1); 461 p1.set_audio_level(kAudioLevel1); 462 RtpPacketInfo p2(kSsrc2, {kCsrc3}, kRtpTimestamp1, kReceiveTime1); 463 p2.set_audio_level(kAudioLevel2); 464 465 const auto mixer = AudioMixerImpl::Create(); 466 467 MockMixerAudioSource source; 468 source.set_packet_infos(RtpPacketInfos({p0})); 469 mixer->AddSource(&source); 470 ResetFrame(source.fake_frame()); 471 mixer->Mix(1, &frame_for_mixing); 472 473 MockMixerAudioSource other_source; 474 other_source.set_packet_infos(RtpPacketInfos({p1, p2})); 475 ResetFrame(other_source.fake_frame()); 476 mixer->AddSource(&other_source); 477 478 mixer->Mix(/*number_of_channels=*/1, &frame_for_mixing); 479 480 EXPECT_THAT(frame_for_mixing.packet_infos_, UnorderedElementsAre(p0, p1, p2)); 481 } 482 483 class HighOutputRateCalculator : public OutputRateCalculator { 484 public: 485 static const int kDefaultFrequency = 76000; 486 int CalculateOutputRateFromRange( 487 ArrayView<const int> /* preferred_sample_rates */) override { 488 return kDefaultFrequency; 489 } 490 ~HighOutputRateCalculator() override {} 491 }; 492 const int HighOutputRateCalculator::kDefaultFrequency; 493 494 TEST(AudioMixerDeathTest, MultipleChannelsAndHighRate) { 495 constexpr size_t kSamplesPerChannel = 496 HighOutputRateCalculator::kDefaultFrequency / 100; 497 // As many channels as an AudioFrame can fit: 498 constexpr size_t kNumberOfChannels = 499 AudioFrame::kMaxDataSizeSamples / kSamplesPerChannel; 500 MockMixerAudioSource source; 501 const auto mixer = AudioMixerImpl::Create( 502 std::make_unique<HighOutputRateCalculator>(), true); 503 mixer->AddSource(&source); 504 ResetFrame(source.fake_frame()); 505 mixer->Mix(1, &frame_for_mixing); 506 auto* frame = source.fake_frame(); 507 frame->num_channels_ = kNumberOfChannels; 508 frame->sample_rate_hz_ = HighOutputRateCalculator::kDefaultFrequency; 509 frame->samples_per_channel_ = kSamplesPerChannel; 510 511 std::fill(frame->mutable_data(), 512 frame->mutable_data() + AudioFrame::kMaxDataSizeSamples, 0); 513 MockMixerAudioSource other_source; 514 ResetFrame(other_source.fake_frame()); 515 auto* other_frame = other_source.fake_frame(); 516 other_frame->num_channels_ = kNumberOfChannels; 517 other_frame->sample_rate_hz_ = HighOutputRateCalculator::kDefaultFrequency; 518 other_frame->samples_per_channel_ = kSamplesPerChannel; 519 mixer->AddSource(&other_source); 520 521 #if GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) 522 EXPECT_DEATH(mixer->Mix(kNumberOfChannels, &frame_for_mixing), ""); 523 #endif 524 } 525 526 } // namespace webrtc