audio_decoder_unittest.cc (19314B)
1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "api/audio_codecs/audio_decoder.h" 12 13 #include <array> 14 #include <cstdint> 15 #include <cstdlib> 16 #include <memory> 17 #include <optional> 18 #include <tuple> 19 #include <utility> 20 #include <vector> 21 22 #include "api/array_view.h" 23 #include "api/audio_codecs/audio_encoder.h" 24 #include "api/audio_codecs/g722/audio_encoder_g722_config.h" 25 #include "api/audio_codecs/opus/audio_encoder_opus.h" 26 #include "api/audio_codecs/opus/audio_encoder_opus_config.h" 27 #include "api/environment/environment_factory.h" 28 #include "modules/audio_coding/codecs/g711/audio_decoder_pcm.h" 29 #include "modules/audio_coding/codecs/g711/audio_encoder_pcm.h" 30 #include "modules/audio_coding/codecs/g722/audio_decoder_g722.h" 31 #include "modules/audio_coding/codecs/g722/audio_encoder_g722.h" 32 #include "modules/audio_coding/codecs/opus/audio_decoder_opus.h" 33 #include "modules/audio_coding/codecs/pcm16b/audio_decoder_pcm16b.h" 34 #include "modules/audio_coding/codecs/pcm16b/audio_encoder_pcm16b.h" 35 #include "modules/audio_coding/neteq/tools/input_audio_file.h" 36 #include "modules/audio_coding/neteq/tools/resample_input_audio_file.h" 37 #include "rtc_base/buffer.h" 38 #include "rtc_base/checks.h" 39 #include "test/create_test_field_trials.h" 40 #include "test/gtest.h" 41 #include "test/testsupport/file_utils.h" 42 43 namespace webrtc { 44 45 namespace { 46 47 constexpr int kOverheadBytesPerPacket = 50; 48 49 // The absolute difference between the input and output (the first channel) is 50 // compared vs `tolerance`. The parameter `delay` is used to correct for codec 51 // delays. 52 void CompareInputOutput(const std::vector<int16_t>& input, 53 const std::vector<int16_t>& output, 54 size_t num_samples, 55 size_t channels, 56 int tolerance, 57 int delay) { 58 ASSERT_LE(num_samples, input.size()); 59 ASSERT_LE(num_samples * channels, output.size()); 60 for (unsigned int n = 0; n < num_samples - delay; ++n) { 61 ASSERT_NEAR(input[n], output[channels * n + delay], tolerance) 62 << "Exit test on first diff; n = " << n; 63 } 64 } 65 66 // The absolute difference between the first two channels in `output` is 67 // compared vs `tolerance`. 68 void CompareTwoChannels(const std::vector<int16_t>& output, 69 size_t samples_per_channel, 70 size_t channels, 71 int tolerance) { 72 ASSERT_GE(channels, 2u); 73 ASSERT_LE(samples_per_channel * channels, output.size()); 74 for (unsigned int n = 0; n < samples_per_channel; ++n) 75 ASSERT_NEAR(output[channels * n], output[channels * n + 1], tolerance) 76 << "Stereo samples differ."; 77 } 78 79 // Calculates mean-squared error between input and output (the first channel). 80 // The parameter `delay` is used to correct for codec delays. 81 double MseInputOutput(const std::vector<int16_t>& input, 82 const std::vector<int16_t>& output, 83 size_t num_samples, 84 size_t channels, 85 int delay) { 86 RTC_DCHECK_LT(delay, static_cast<int>(num_samples)); 87 RTC_DCHECK_LE(num_samples, input.size()); 88 RTC_DCHECK_LE(num_samples * channels, output.size()); 89 if (num_samples == 0) 90 return 0.0; 91 double squared_sum = 0.0; 92 for (unsigned int n = 0; n < num_samples - delay; ++n) { 93 squared_sum += (input[n] - output[channels * n + delay]) * 94 (input[n] - output[channels * n + delay]); 95 } 96 return squared_sum / (num_samples - delay); 97 } 98 } // namespace 99 100 class AudioDecoderTest : public ::testing::Test { 101 protected: 102 AudioDecoderTest() 103 : input_audio_(test::ResourcePath("audio_coding/testfile32kHz", "pcm"), 104 32000), 105 codec_input_rate_hz_(32000), // Legacy default value. 106 frame_size_(0), 107 data_length_(0), 108 channels_(1), 109 payload_type_(17), 110 decoder_(nullptr) {} 111 112 ~AudioDecoderTest() override {} 113 114 void SetUp() override { 115 if (audio_encoder_) 116 codec_input_rate_hz_ = audio_encoder_->SampleRateHz(); 117 // Create arrays. 118 ASSERT_GT(data_length_, 0u) << "The test must set data_length_ > 0"; 119 } 120 121 void TearDown() override { 122 delete decoder_; 123 decoder_ = nullptr; 124 } 125 126 virtual void InitEncoder() {} 127 128 // TODO(henrik.lundin) Change return type to size_t once most/all overriding 129 // implementations are gone. 130 virtual int EncodeFrame(const int16_t* input, 131 size_t input_len_samples, 132 Buffer* output) { 133 AudioEncoder::EncodedInfo encoded_info; 134 const size_t samples_per_10ms = audio_encoder_->SampleRateHz() / 100; 135 RTC_CHECK_EQ(samples_per_10ms * audio_encoder_->Num10MsFramesInNextPacket(), 136 input_len_samples); 137 std::unique_ptr<int16_t[]> interleaved_input( 138 new int16_t[channels_ * samples_per_10ms]); 139 for (size_t i = 0; i < audio_encoder_->Num10MsFramesInNextPacket(); ++i) { 140 EXPECT_EQ(0u, encoded_info.encoded_bytes); 141 142 // Duplicate the mono input signal to however many channels the test 143 // wants. 144 test::InputAudioFile::DuplicateInterleaved(input + i * samples_per_10ms, 145 samples_per_10ms, channels_, 146 interleaved_input.get()); 147 148 encoded_info = audio_encoder_->Encode( 149 0, 150 ArrayView<const int16_t>(interleaved_input.get(), 151 audio_encoder_->NumChannels() * 152 audio_encoder_->SampleRateHz() / 100), 153 output); 154 } 155 EXPECT_EQ(payload_type_, encoded_info.payload_type); 156 return static_cast<int>(encoded_info.encoded_bytes); 157 } 158 159 // Encodes and decodes audio. The absolute difference between the input and 160 // output is compared vs `tolerance`, and the mean-squared error is compared 161 // with `mse`. The encoded stream should contain `expected_bytes`. For stereo 162 // audio, the absolute difference between the two channels is compared vs 163 // `channel_diff_tolerance`. 164 void EncodeDecodeTest(size_t expected_bytes, 165 int tolerance, 166 double mse, 167 int delay = 0, 168 int channel_diff_tolerance = 0) { 169 ASSERT_GE(tolerance, 0) << "Test must define a tolerance >= 0"; 170 ASSERT_GE(channel_diff_tolerance, 0) 171 << "Test must define a channel_diff_tolerance >= 0"; 172 size_t processed_samples = 0u; 173 size_t encoded_bytes = 0u; 174 InitEncoder(); 175 std::vector<int16_t> input; 176 std::vector<int16_t> decoded; 177 while (processed_samples + frame_size_ <= data_length_) { 178 // Extend input vector with `frame_size_`. 179 input.resize(input.size() + frame_size_, 0); 180 // Read from input file. 181 ASSERT_GE(input.size() - processed_samples, frame_size_); 182 ASSERT_TRUE(input_audio_.Read(frame_size_, codec_input_rate_hz_, 183 &input[processed_samples])); 184 Buffer encoded; 185 size_t enc_len = 186 EncodeFrame(&input[processed_samples], frame_size_, &encoded); 187 // Make sure that frame_size_ * channels_ samples are allocated and free. 188 decoded.resize((processed_samples + frame_size_) * channels_, 0); 189 190 const std::vector<AudioDecoder::ParseResult> parse_result = 191 decoder_->ParsePayload(std::move(encoded), /*timestamp=*/0); 192 RTC_CHECK_EQ(parse_result.size(), size_t{1}); 193 auto decode_result = parse_result[0].frame->Decode( 194 ArrayView<int16_t>(&decoded[processed_samples * channels_], 195 frame_size_ * channels_ * sizeof(int16_t))); 196 RTC_CHECK(decode_result.has_value()); 197 EXPECT_EQ(frame_size_ * channels_, decode_result->num_decoded_samples); 198 encoded_bytes += enc_len; 199 processed_samples += frame_size_; 200 } 201 // For some codecs it doesn't make sense to check expected number of bytes, 202 // since the number can vary for different platforms. Opus is such a codec. 203 // In this case expected_bytes is set to 0. 204 if (expected_bytes) { 205 EXPECT_EQ(expected_bytes, encoded_bytes); 206 } 207 CompareInputOutput(input, decoded, processed_samples, channels_, tolerance, 208 delay); 209 if (channels_ == 2) 210 CompareTwoChannels(decoded, processed_samples, channels_, 211 channel_diff_tolerance); 212 EXPECT_LE( 213 MseInputOutput(input, decoded, processed_samples, channels_, delay), 214 mse); 215 } 216 217 // Encodes a payload and decodes it twice with decoder re-init before each 218 // decode. Verifies that the decoded result is the same. 219 void ReInitTest() { 220 InitEncoder(); 221 std::unique_ptr<int16_t[]> input(new int16_t[frame_size_]); 222 ASSERT_TRUE( 223 input_audio_.Read(frame_size_, codec_input_rate_hz_, input.get())); 224 std::array<Buffer, 2> encoded; 225 EncodeFrame(input.get(), frame_size_, &encoded[0]); 226 // Make a copy. 227 encoded[1].SetData(encoded[0].data(), encoded[0].size()); 228 229 std::array<std::vector<int16_t>, 2> outputs; 230 for (size_t i = 0; i < outputs.size(); ++i) { 231 outputs[i].resize(frame_size_ * channels_); 232 decoder_->Reset(); 233 const std::vector<AudioDecoder::ParseResult> parse_result = 234 decoder_->ParsePayload(std::move(encoded[i]), /*timestamp=*/0); 235 RTC_CHECK_EQ(parse_result.size(), size_t{1}); 236 auto decode_result = parse_result[0].frame->Decode(outputs[i]); 237 RTC_CHECK(decode_result.has_value()); 238 EXPECT_EQ(frame_size_ * channels_, decode_result->num_decoded_samples); 239 } 240 EXPECT_EQ(outputs[0], outputs[1]); 241 } 242 243 // Call DecodePlc and verify that the correct number of samples is produced. 244 void DecodePlcTest() { 245 InitEncoder(); 246 std::unique_ptr<int16_t[]> input(new int16_t[frame_size_]); 247 ASSERT_TRUE( 248 input_audio_.Read(frame_size_, codec_input_rate_hz_, input.get())); 249 Buffer encoded; 250 EncodeFrame(input.get(), frame_size_, &encoded); 251 decoder_->Reset(); 252 std::vector<int16_t> output(frame_size_ * channels_); 253 const std::vector<AudioDecoder::ParseResult> parse_result = 254 decoder_->ParsePayload(std::move(encoded), /*timestamp=*/0); 255 RTC_CHECK_EQ(parse_result.size(), size_t{1}); 256 auto decode_result = parse_result[0].frame->Decode(output); 257 RTC_CHECK(decode_result.has_value()); 258 EXPECT_EQ(frame_size_ * channels_, decode_result->num_decoded_samples); 259 // Call DecodePlc and verify that we get one frame of data. 260 // (Overwrite the output from the above Decode call, but that does not 261 // matter.) 262 size_t dec_len = 263 decoder_->DecodePlc(/*num_frames=*/1, /*decoded=*/output.data()); 264 EXPECT_EQ(frame_size_ * channels_, dec_len); 265 } 266 267 test::ResampleInputAudioFile input_audio_; 268 int codec_input_rate_hz_; 269 size_t frame_size_; 270 size_t data_length_; 271 size_t channels_; 272 const int payload_type_; 273 AudioDecoder* decoder_; 274 std::unique_ptr<AudioEncoder> audio_encoder_; 275 }; 276 277 class AudioDecoderPcmUTest : public AudioDecoderTest { 278 protected: 279 AudioDecoderPcmUTest() : AudioDecoderTest() { 280 frame_size_ = 160; 281 data_length_ = 10 * frame_size_; 282 decoder_ = new AudioDecoderPcmU(1); 283 AudioEncoderPcmU::Config config; 284 config.frame_size_ms = static_cast<int>(frame_size_ / 8); 285 config.payload_type = payload_type_; 286 audio_encoder_.reset(new AudioEncoderPcmU(config)); 287 } 288 }; 289 290 class AudioDecoderPcmATest : public AudioDecoderTest { 291 protected: 292 AudioDecoderPcmATest() : AudioDecoderTest() { 293 frame_size_ = 160; 294 data_length_ = 10 * frame_size_; 295 decoder_ = new AudioDecoderPcmA(1); 296 AudioEncoderPcmA::Config config; 297 config.frame_size_ms = static_cast<int>(frame_size_ / 8); 298 config.payload_type = payload_type_; 299 audio_encoder_.reset(new AudioEncoderPcmA(config)); 300 } 301 }; 302 303 class AudioDecoderPcm16BTest : public AudioDecoderTest { 304 protected: 305 AudioDecoderPcm16BTest() : AudioDecoderTest() { 306 codec_input_rate_hz_ = 16000; 307 frame_size_ = 20 * codec_input_rate_hz_ / 1000; 308 data_length_ = 10 * frame_size_; 309 decoder_ = new AudioDecoderPcm16B(codec_input_rate_hz_, 1); 310 RTC_DCHECK(decoder_); 311 AudioEncoderPcm16B::Config config; 312 config.sample_rate_hz = codec_input_rate_hz_; 313 config.frame_size_ms = 314 static_cast<int>(frame_size_ / (config.sample_rate_hz / 1000)); 315 config.payload_type = payload_type_; 316 audio_encoder_.reset(new AudioEncoderPcm16B(config)); 317 } 318 }; 319 320 class AudioDecoderG722Test : public AudioDecoderTest { 321 protected: 322 AudioDecoderG722Test() : AudioDecoderTest() { 323 codec_input_rate_hz_ = 16000; 324 frame_size_ = 160; 325 data_length_ = 10 * frame_size_; 326 decoder_ = new AudioDecoderG722Impl; 327 RTC_DCHECK(decoder_); 328 AudioEncoderG722Config config; 329 config.frame_size_ms = 10; 330 config.num_channels = 1; 331 audio_encoder_.reset(new AudioEncoderG722Impl(config, payload_type_)); 332 } 333 }; 334 335 class AudioDecoderG722StereoTest : public AudioDecoderTest { 336 protected: 337 AudioDecoderG722StereoTest() : AudioDecoderTest() { 338 channels_ = 2; 339 codec_input_rate_hz_ = 16000; 340 frame_size_ = 160; 341 data_length_ = 10 * frame_size_; 342 decoder_ = new AudioDecoderG722StereoImpl; 343 RTC_DCHECK(decoder_); 344 AudioEncoderG722Config config; 345 config.frame_size_ms = 10; 346 config.num_channels = 2; 347 audio_encoder_.reset(new AudioEncoderG722Impl(config, payload_type_)); 348 } 349 }; 350 351 class AudioDecoderOpusTest 352 : public AudioDecoderTest, 353 public testing::WithParamInterface<std::tuple<int, int>> { 354 protected: 355 AudioDecoderOpusTest() : AudioDecoderTest() { 356 channels_ = opus_num_channels_; 357 codec_input_rate_hz_ = opus_sample_rate_hz_; 358 frame_size_ = CheckedDivExact(opus_sample_rate_hz_, 100); 359 data_length_ = 10 * frame_size_; 360 decoder_ = new AudioDecoderOpusImpl( 361 CreateTestFieldTrials(), opus_num_channels_, opus_sample_rate_hz_); 362 AudioEncoderOpusConfig config; 363 config.frame_size_ms = 10; 364 config.sample_rate_hz = opus_sample_rate_hz_; 365 config.num_channels = opus_num_channels_; 366 config.application = opus_num_channels_ == 1 367 ? AudioEncoderOpusConfig::ApplicationMode::kVoip 368 : AudioEncoderOpusConfig::ApplicationMode::kAudio; 369 audio_encoder_ = AudioEncoderOpus::MakeAudioEncoder( 370 CreateEnvironment(), config, {.payload_type = payload_type_}); 371 audio_encoder_->OnReceivedOverhead(kOverheadBytesPerPacket); 372 } 373 const int opus_sample_rate_hz_{std::get<0>(GetParam())}; 374 const int opus_num_channels_{std::get<1>(GetParam())}; 375 }; 376 377 INSTANTIATE_TEST_SUITE_P(Param, 378 AudioDecoderOpusTest, 379 testing::Combine(testing::Values(16000, 48000), 380 testing::Values(1, 2))); 381 382 TEST_F(AudioDecoderPcmUTest, EncodeDecode) { 383 int tolerance = 251; 384 double mse = 1734.0; 385 EncodeDecodeTest(data_length_, tolerance, mse); 386 ReInitTest(); 387 EXPECT_FALSE(decoder_->HasDecodePlc()); 388 } 389 390 namespace { 391 int SetAndGetTargetBitrate(AudioEncoder* audio_encoder, int rate) { 392 audio_encoder->OnReceivedUplinkBandwidth(rate, std::nullopt); 393 return audio_encoder->GetTargetBitrate(); 394 } 395 void TestSetAndGetTargetBitratesWithFixedCodec(AudioEncoder* audio_encoder, 396 int fixed_rate) { 397 EXPECT_EQ(fixed_rate, SetAndGetTargetBitrate(audio_encoder, 32000)); 398 EXPECT_EQ(fixed_rate, SetAndGetTargetBitrate(audio_encoder, fixed_rate - 1)); 399 EXPECT_EQ(fixed_rate, SetAndGetTargetBitrate(audio_encoder, fixed_rate)); 400 EXPECT_EQ(fixed_rate, SetAndGetTargetBitrate(audio_encoder, fixed_rate + 1)); 401 } 402 } // namespace 403 404 TEST_F(AudioDecoderPcmUTest, SetTargetBitrate) { 405 TestSetAndGetTargetBitratesWithFixedCodec(audio_encoder_.get(), 64000); 406 } 407 408 TEST_F(AudioDecoderPcmATest, EncodeDecode) { 409 int tolerance = 308; 410 double mse = 1931.0; 411 EncodeDecodeTest(data_length_, tolerance, mse); 412 ReInitTest(); 413 EXPECT_FALSE(decoder_->HasDecodePlc()); 414 } 415 416 TEST_F(AudioDecoderPcmATest, SetTargetBitrate) { 417 TestSetAndGetTargetBitratesWithFixedCodec(audio_encoder_.get(), 64000); 418 } 419 420 TEST_F(AudioDecoderPcm16BTest, EncodeDecode) { 421 int tolerance = 0; 422 double mse = 0.0; 423 EncodeDecodeTest(2 * data_length_, tolerance, mse); 424 ReInitTest(); 425 EXPECT_FALSE(decoder_->HasDecodePlc()); 426 } 427 428 TEST_F(AudioDecoderPcm16BTest, SetTargetBitrate) { 429 TestSetAndGetTargetBitratesWithFixedCodec(audio_encoder_.get(), 430 codec_input_rate_hz_ * 16); 431 } 432 433 // TODO(bugs.webrtc.org/345525069): Either fix/enable or remove G722. 434 #if defined(__has_feature) && __has_feature(undefined_behavior_sanitizer) 435 TEST_F(AudioDecoderG722Test, DISABLED_EncodeDecode) { 436 #else 437 TEST_F(AudioDecoderG722Test, EncodeDecode) { 438 #endif 439 int tolerance = 6176; 440 double mse = 238630.0; 441 int delay = 22; // Delay from input to output. 442 EncodeDecodeTest(data_length_ / 2, tolerance, mse, delay); 443 ReInitTest(); 444 EXPECT_FALSE(decoder_->HasDecodePlc()); 445 } 446 447 TEST_F(AudioDecoderG722Test, SetTargetBitrate) { 448 TestSetAndGetTargetBitratesWithFixedCodec(audio_encoder_.get(), 64000); 449 } 450 451 // TODO(bugs.webrtc.org/345525069): Either fix/enable or remove G722. 452 #if defined(__has_feature) && __has_feature(undefined_behavior_sanitizer) 453 TEST_F(AudioDecoderG722StereoTest, DISABLED_EncodeDecode) { 454 #else 455 TEST_F(AudioDecoderG722StereoTest, EncodeDecode) { 456 #endif 457 int tolerance = 6176; 458 int channel_diff_tolerance = 0; 459 double mse = 238630.0; 460 int delay = 22; // Delay from input to output. 461 EncodeDecodeTest(data_length_, tolerance, mse, delay, channel_diff_tolerance); 462 ReInitTest(); 463 EXPECT_FALSE(decoder_->HasDecodePlc()); 464 } 465 466 TEST_F(AudioDecoderG722StereoTest, SetTargetBitrate) { 467 TestSetAndGetTargetBitratesWithFixedCodec(audio_encoder_.get(), 128000); 468 } 469 470 // TODO(http://bugs.webrtc.org/12518): Enable the test after Opus has been 471 // updated. 472 TEST_P(AudioDecoderOpusTest, DISABLED_EncodeDecode) { 473 constexpr int tolerance = 6176; 474 constexpr int channel_diff_tolerance = 6; 475 constexpr double mse = 238630.0; 476 constexpr int delay = 22; // Delay from input to output. 477 EncodeDecodeTest(0, tolerance, mse, delay, channel_diff_tolerance); 478 ReInitTest(); 479 EXPECT_FALSE(decoder_->HasDecodePlc()); 480 } 481 482 TEST_P(AudioDecoderOpusTest, SetTargetBitrate) { 483 const int overhead_rate = 484 8 * kOverheadBytesPerPacket * codec_input_rate_hz_ / frame_size_; 485 EXPECT_EQ(6000, 486 SetAndGetTargetBitrate(audio_encoder_.get(), 5999 + overhead_rate)); 487 EXPECT_EQ(6000, 488 SetAndGetTargetBitrate(audio_encoder_.get(), 6000 + overhead_rate)); 489 EXPECT_EQ(32000, SetAndGetTargetBitrate(audio_encoder_.get(), 490 32000 + overhead_rate)); 491 EXPECT_EQ(510000, SetAndGetTargetBitrate(audio_encoder_.get(), 492 510000 + overhead_rate)); 493 EXPECT_EQ(510000, SetAndGetTargetBitrate(audio_encoder_.get(), 494 511000 + overhead_rate)); 495 } 496 497 } // namespace webrtc