tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

audio_decoder_unittest.cc (19314B)


      1 /*
      2 *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
      3 *
      4 *  Use of this source code is governed by a BSD-style license
      5 *  that can be found in the LICENSE file in the root of the source
      6 *  tree. An additional intellectual property rights grant can be found
      7 *  in the file PATENTS.  All contributing project authors may
      8 *  be found in the AUTHORS file in the root of the source tree.
      9 */
     10 
     11 #include "api/audio_codecs/audio_decoder.h"
     12 
     13 #include <array>
     14 #include <cstdint>
     15 #include <cstdlib>
     16 #include <memory>
     17 #include <optional>
     18 #include <tuple>
     19 #include <utility>
     20 #include <vector>
     21 
     22 #include "api/array_view.h"
     23 #include "api/audio_codecs/audio_encoder.h"
     24 #include "api/audio_codecs/g722/audio_encoder_g722_config.h"
     25 #include "api/audio_codecs/opus/audio_encoder_opus.h"
     26 #include "api/audio_codecs/opus/audio_encoder_opus_config.h"
     27 #include "api/environment/environment_factory.h"
     28 #include "modules/audio_coding/codecs/g711/audio_decoder_pcm.h"
     29 #include "modules/audio_coding/codecs/g711/audio_encoder_pcm.h"
     30 #include "modules/audio_coding/codecs/g722/audio_decoder_g722.h"
     31 #include "modules/audio_coding/codecs/g722/audio_encoder_g722.h"
     32 #include "modules/audio_coding/codecs/opus/audio_decoder_opus.h"
     33 #include "modules/audio_coding/codecs/pcm16b/audio_decoder_pcm16b.h"
     34 #include "modules/audio_coding/codecs/pcm16b/audio_encoder_pcm16b.h"
     35 #include "modules/audio_coding/neteq/tools/input_audio_file.h"
     36 #include "modules/audio_coding/neteq/tools/resample_input_audio_file.h"
     37 #include "rtc_base/buffer.h"
     38 #include "rtc_base/checks.h"
     39 #include "test/create_test_field_trials.h"
     40 #include "test/gtest.h"
     41 #include "test/testsupport/file_utils.h"
     42 
     43 namespace webrtc {
     44 
     45 namespace {
     46 
     47 constexpr int kOverheadBytesPerPacket = 50;
     48 
     49 // The absolute difference between the input and output (the first channel) is
     50 // compared vs `tolerance`. The parameter `delay` is used to correct for codec
     51 // delays.
     52 void CompareInputOutput(const std::vector<int16_t>& input,
     53                        const std::vector<int16_t>& output,
     54                        size_t num_samples,
     55                        size_t channels,
     56                        int tolerance,
     57                        int delay) {
     58  ASSERT_LE(num_samples, input.size());
     59  ASSERT_LE(num_samples * channels, output.size());
     60  for (unsigned int n = 0; n < num_samples - delay; ++n) {
     61    ASSERT_NEAR(input[n], output[channels * n + delay], tolerance)
     62        << "Exit test on first diff; n = " << n;
     63  }
     64 }
     65 
     66 // The absolute difference between the first two channels in `output` is
     67 // compared vs `tolerance`.
     68 void CompareTwoChannels(const std::vector<int16_t>& output,
     69                        size_t samples_per_channel,
     70                        size_t channels,
     71                        int tolerance) {
     72  ASSERT_GE(channels, 2u);
     73  ASSERT_LE(samples_per_channel * channels, output.size());
     74  for (unsigned int n = 0; n < samples_per_channel; ++n)
     75    ASSERT_NEAR(output[channels * n], output[channels * n + 1], tolerance)
     76        << "Stereo samples differ.";
     77 }
     78 
     79 // Calculates mean-squared error between input and output (the first channel).
     80 // The parameter `delay` is used to correct for codec delays.
     81 double MseInputOutput(const std::vector<int16_t>& input,
     82                      const std::vector<int16_t>& output,
     83                      size_t num_samples,
     84                      size_t channels,
     85                      int delay) {
     86  RTC_DCHECK_LT(delay, static_cast<int>(num_samples));
     87  RTC_DCHECK_LE(num_samples, input.size());
     88  RTC_DCHECK_LE(num_samples * channels, output.size());
     89  if (num_samples == 0)
     90    return 0.0;
     91  double squared_sum = 0.0;
     92  for (unsigned int n = 0; n < num_samples - delay; ++n) {
     93    squared_sum += (input[n] - output[channels * n + delay]) *
     94                   (input[n] - output[channels * n + delay]);
     95  }
     96  return squared_sum / (num_samples - delay);
     97 }
     98 }  // namespace
     99 
    100 class AudioDecoderTest : public ::testing::Test {
    101 protected:
    102  AudioDecoderTest()
    103      : input_audio_(test::ResourcePath("audio_coding/testfile32kHz", "pcm"),
    104                     32000),
    105        codec_input_rate_hz_(32000),  // Legacy default value.
    106        frame_size_(0),
    107        data_length_(0),
    108        channels_(1),
    109        payload_type_(17),
    110        decoder_(nullptr) {}
    111 
    112  ~AudioDecoderTest() override {}
    113 
    114  void SetUp() override {
    115    if (audio_encoder_)
    116      codec_input_rate_hz_ = audio_encoder_->SampleRateHz();
    117    // Create arrays.
    118    ASSERT_GT(data_length_, 0u) << "The test must set data_length_ > 0";
    119  }
    120 
    121  void TearDown() override {
    122    delete decoder_;
    123    decoder_ = nullptr;
    124  }
    125 
    126  virtual void InitEncoder() {}
    127 
    128  // TODO(henrik.lundin) Change return type to size_t once most/all overriding
    129  // implementations are gone.
    130  virtual int EncodeFrame(const int16_t* input,
    131                          size_t input_len_samples,
    132                          Buffer* output) {
    133    AudioEncoder::EncodedInfo encoded_info;
    134    const size_t samples_per_10ms = audio_encoder_->SampleRateHz() / 100;
    135    RTC_CHECK_EQ(samples_per_10ms * audio_encoder_->Num10MsFramesInNextPacket(),
    136                 input_len_samples);
    137    std::unique_ptr<int16_t[]> interleaved_input(
    138        new int16_t[channels_ * samples_per_10ms]);
    139    for (size_t i = 0; i < audio_encoder_->Num10MsFramesInNextPacket(); ++i) {
    140      EXPECT_EQ(0u, encoded_info.encoded_bytes);
    141 
    142      // Duplicate the mono input signal to however many channels the test
    143      // wants.
    144      test::InputAudioFile::DuplicateInterleaved(input + i * samples_per_10ms,
    145                                                 samples_per_10ms, channels_,
    146                                                 interleaved_input.get());
    147 
    148      encoded_info = audio_encoder_->Encode(
    149          0,
    150          ArrayView<const int16_t>(interleaved_input.get(),
    151                                   audio_encoder_->NumChannels() *
    152                                       audio_encoder_->SampleRateHz() / 100),
    153          output);
    154    }
    155    EXPECT_EQ(payload_type_, encoded_info.payload_type);
    156    return static_cast<int>(encoded_info.encoded_bytes);
    157  }
    158 
    159  // Encodes and decodes audio. The absolute difference between the input and
    160  // output is compared vs `tolerance`, and the mean-squared error is compared
    161  // with `mse`. The encoded stream should contain `expected_bytes`. For stereo
    162  // audio, the absolute difference between the two channels is compared vs
    163  // `channel_diff_tolerance`.
    164  void EncodeDecodeTest(size_t expected_bytes,
    165                        int tolerance,
    166                        double mse,
    167                        int delay = 0,
    168                        int channel_diff_tolerance = 0) {
    169    ASSERT_GE(tolerance, 0) << "Test must define a tolerance >= 0";
    170    ASSERT_GE(channel_diff_tolerance, 0)
    171        << "Test must define a channel_diff_tolerance >= 0";
    172    size_t processed_samples = 0u;
    173    size_t encoded_bytes = 0u;
    174    InitEncoder();
    175    std::vector<int16_t> input;
    176    std::vector<int16_t> decoded;
    177    while (processed_samples + frame_size_ <= data_length_) {
    178      // Extend input vector with `frame_size_`.
    179      input.resize(input.size() + frame_size_, 0);
    180      // Read from input file.
    181      ASSERT_GE(input.size() - processed_samples, frame_size_);
    182      ASSERT_TRUE(input_audio_.Read(frame_size_, codec_input_rate_hz_,
    183                                    &input[processed_samples]));
    184      Buffer encoded;
    185      size_t enc_len =
    186          EncodeFrame(&input[processed_samples], frame_size_, &encoded);
    187      // Make sure that frame_size_ * channels_ samples are allocated and free.
    188      decoded.resize((processed_samples + frame_size_) * channels_, 0);
    189 
    190      const std::vector<AudioDecoder::ParseResult> parse_result =
    191          decoder_->ParsePayload(std::move(encoded), /*timestamp=*/0);
    192      RTC_CHECK_EQ(parse_result.size(), size_t{1});
    193      auto decode_result = parse_result[0].frame->Decode(
    194          ArrayView<int16_t>(&decoded[processed_samples * channels_],
    195                             frame_size_ * channels_ * sizeof(int16_t)));
    196      RTC_CHECK(decode_result.has_value());
    197      EXPECT_EQ(frame_size_ * channels_, decode_result->num_decoded_samples);
    198      encoded_bytes += enc_len;
    199      processed_samples += frame_size_;
    200    }
    201    // For some codecs it doesn't make sense to check expected number of bytes,
    202    // since the number can vary for different platforms. Opus is such a codec.
    203    // In this case expected_bytes is set to 0.
    204    if (expected_bytes) {
    205      EXPECT_EQ(expected_bytes, encoded_bytes);
    206    }
    207    CompareInputOutput(input, decoded, processed_samples, channels_, tolerance,
    208                       delay);
    209    if (channels_ == 2)
    210      CompareTwoChannels(decoded, processed_samples, channels_,
    211                         channel_diff_tolerance);
    212    EXPECT_LE(
    213        MseInputOutput(input, decoded, processed_samples, channels_, delay),
    214        mse);
    215  }
    216 
    217  // Encodes a payload and decodes it twice with decoder re-init before each
    218  // decode. Verifies that the decoded result is the same.
    219  void ReInitTest() {
    220    InitEncoder();
    221    std::unique_ptr<int16_t[]> input(new int16_t[frame_size_]);
    222    ASSERT_TRUE(
    223        input_audio_.Read(frame_size_, codec_input_rate_hz_, input.get()));
    224    std::array<Buffer, 2> encoded;
    225    EncodeFrame(input.get(), frame_size_, &encoded[0]);
    226    // Make a copy.
    227    encoded[1].SetData(encoded[0].data(), encoded[0].size());
    228 
    229    std::array<std::vector<int16_t>, 2> outputs;
    230    for (size_t i = 0; i < outputs.size(); ++i) {
    231      outputs[i].resize(frame_size_ * channels_);
    232      decoder_->Reset();
    233      const std::vector<AudioDecoder::ParseResult> parse_result =
    234          decoder_->ParsePayload(std::move(encoded[i]), /*timestamp=*/0);
    235      RTC_CHECK_EQ(parse_result.size(), size_t{1});
    236      auto decode_result = parse_result[0].frame->Decode(outputs[i]);
    237      RTC_CHECK(decode_result.has_value());
    238      EXPECT_EQ(frame_size_ * channels_, decode_result->num_decoded_samples);
    239    }
    240    EXPECT_EQ(outputs[0], outputs[1]);
    241  }
    242 
    243  // Call DecodePlc and verify that the correct number of samples is produced.
    244  void DecodePlcTest() {
    245    InitEncoder();
    246    std::unique_ptr<int16_t[]> input(new int16_t[frame_size_]);
    247    ASSERT_TRUE(
    248        input_audio_.Read(frame_size_, codec_input_rate_hz_, input.get()));
    249    Buffer encoded;
    250    EncodeFrame(input.get(), frame_size_, &encoded);
    251    decoder_->Reset();
    252    std::vector<int16_t> output(frame_size_ * channels_);
    253    const std::vector<AudioDecoder::ParseResult> parse_result =
    254        decoder_->ParsePayload(std::move(encoded), /*timestamp=*/0);
    255    RTC_CHECK_EQ(parse_result.size(), size_t{1});
    256    auto decode_result = parse_result[0].frame->Decode(output);
    257    RTC_CHECK(decode_result.has_value());
    258    EXPECT_EQ(frame_size_ * channels_, decode_result->num_decoded_samples);
    259    // Call DecodePlc and verify that we get one frame of data.
    260    // (Overwrite the output from the above Decode call, but that does not
    261    // matter.)
    262    size_t dec_len =
    263        decoder_->DecodePlc(/*num_frames=*/1, /*decoded=*/output.data());
    264    EXPECT_EQ(frame_size_ * channels_, dec_len);
    265  }
    266 
    267  test::ResampleInputAudioFile input_audio_;
    268  int codec_input_rate_hz_;
    269  size_t frame_size_;
    270  size_t data_length_;
    271  size_t channels_;
    272  const int payload_type_;
    273  AudioDecoder* decoder_;
    274  std::unique_ptr<AudioEncoder> audio_encoder_;
    275 };
    276 
    277 class AudioDecoderPcmUTest : public AudioDecoderTest {
    278 protected:
    279  AudioDecoderPcmUTest() : AudioDecoderTest() {
    280    frame_size_ = 160;
    281    data_length_ = 10 * frame_size_;
    282    decoder_ = new AudioDecoderPcmU(1);
    283    AudioEncoderPcmU::Config config;
    284    config.frame_size_ms = static_cast<int>(frame_size_ / 8);
    285    config.payload_type = payload_type_;
    286    audio_encoder_.reset(new AudioEncoderPcmU(config));
    287  }
    288 };
    289 
    290 class AudioDecoderPcmATest : public AudioDecoderTest {
    291 protected:
    292  AudioDecoderPcmATest() : AudioDecoderTest() {
    293    frame_size_ = 160;
    294    data_length_ = 10 * frame_size_;
    295    decoder_ = new AudioDecoderPcmA(1);
    296    AudioEncoderPcmA::Config config;
    297    config.frame_size_ms = static_cast<int>(frame_size_ / 8);
    298    config.payload_type = payload_type_;
    299    audio_encoder_.reset(new AudioEncoderPcmA(config));
    300  }
    301 };
    302 
    303 class AudioDecoderPcm16BTest : public AudioDecoderTest {
    304 protected:
    305  AudioDecoderPcm16BTest() : AudioDecoderTest() {
    306    codec_input_rate_hz_ = 16000;
    307    frame_size_ = 20 * codec_input_rate_hz_ / 1000;
    308    data_length_ = 10 * frame_size_;
    309    decoder_ = new AudioDecoderPcm16B(codec_input_rate_hz_, 1);
    310    RTC_DCHECK(decoder_);
    311    AudioEncoderPcm16B::Config config;
    312    config.sample_rate_hz = codec_input_rate_hz_;
    313    config.frame_size_ms =
    314        static_cast<int>(frame_size_ / (config.sample_rate_hz / 1000));
    315    config.payload_type = payload_type_;
    316    audio_encoder_.reset(new AudioEncoderPcm16B(config));
    317  }
    318 };
    319 
    320 class AudioDecoderG722Test : public AudioDecoderTest {
    321 protected:
    322  AudioDecoderG722Test() : AudioDecoderTest() {
    323    codec_input_rate_hz_ = 16000;
    324    frame_size_ = 160;
    325    data_length_ = 10 * frame_size_;
    326    decoder_ = new AudioDecoderG722Impl;
    327    RTC_DCHECK(decoder_);
    328    AudioEncoderG722Config config;
    329    config.frame_size_ms = 10;
    330    config.num_channels = 1;
    331    audio_encoder_.reset(new AudioEncoderG722Impl(config, payload_type_));
    332  }
    333 };
    334 
    335 class AudioDecoderG722StereoTest : public AudioDecoderTest {
    336 protected:
    337  AudioDecoderG722StereoTest() : AudioDecoderTest() {
    338    channels_ = 2;
    339    codec_input_rate_hz_ = 16000;
    340    frame_size_ = 160;
    341    data_length_ = 10 * frame_size_;
    342    decoder_ = new AudioDecoderG722StereoImpl;
    343    RTC_DCHECK(decoder_);
    344    AudioEncoderG722Config config;
    345    config.frame_size_ms = 10;
    346    config.num_channels = 2;
    347    audio_encoder_.reset(new AudioEncoderG722Impl(config, payload_type_));
    348  }
    349 };
    350 
    351 class AudioDecoderOpusTest
    352    : public AudioDecoderTest,
    353      public testing::WithParamInterface<std::tuple<int, int>> {
    354 protected:
    355  AudioDecoderOpusTest() : AudioDecoderTest() {
    356    channels_ = opus_num_channels_;
    357    codec_input_rate_hz_ = opus_sample_rate_hz_;
    358    frame_size_ = CheckedDivExact(opus_sample_rate_hz_, 100);
    359    data_length_ = 10 * frame_size_;
    360    decoder_ = new AudioDecoderOpusImpl(
    361        CreateTestFieldTrials(), opus_num_channels_, opus_sample_rate_hz_);
    362    AudioEncoderOpusConfig config;
    363    config.frame_size_ms = 10;
    364    config.sample_rate_hz = opus_sample_rate_hz_;
    365    config.num_channels = opus_num_channels_;
    366    config.application = opus_num_channels_ == 1
    367                             ? AudioEncoderOpusConfig::ApplicationMode::kVoip
    368                             : AudioEncoderOpusConfig::ApplicationMode::kAudio;
    369    audio_encoder_ = AudioEncoderOpus::MakeAudioEncoder(
    370        CreateEnvironment(), config, {.payload_type = payload_type_});
    371    audio_encoder_->OnReceivedOverhead(kOverheadBytesPerPacket);
    372  }
    373  const int opus_sample_rate_hz_{std::get<0>(GetParam())};
    374  const int opus_num_channels_{std::get<1>(GetParam())};
    375 };
    376 
    377 INSTANTIATE_TEST_SUITE_P(Param,
    378                         AudioDecoderOpusTest,
    379                         testing::Combine(testing::Values(16000, 48000),
    380                                          testing::Values(1, 2)));
    381 
    382 TEST_F(AudioDecoderPcmUTest, EncodeDecode) {
    383  int tolerance = 251;
    384  double mse = 1734.0;
    385  EncodeDecodeTest(data_length_, tolerance, mse);
    386  ReInitTest();
    387  EXPECT_FALSE(decoder_->HasDecodePlc());
    388 }
    389 
    390 namespace {
    391 int SetAndGetTargetBitrate(AudioEncoder* audio_encoder, int rate) {
    392  audio_encoder->OnReceivedUplinkBandwidth(rate, std::nullopt);
    393  return audio_encoder->GetTargetBitrate();
    394 }
    395 void TestSetAndGetTargetBitratesWithFixedCodec(AudioEncoder* audio_encoder,
    396                                               int fixed_rate) {
    397  EXPECT_EQ(fixed_rate, SetAndGetTargetBitrate(audio_encoder, 32000));
    398  EXPECT_EQ(fixed_rate, SetAndGetTargetBitrate(audio_encoder, fixed_rate - 1));
    399  EXPECT_EQ(fixed_rate, SetAndGetTargetBitrate(audio_encoder, fixed_rate));
    400  EXPECT_EQ(fixed_rate, SetAndGetTargetBitrate(audio_encoder, fixed_rate + 1));
    401 }
    402 }  // namespace
    403 
    404 TEST_F(AudioDecoderPcmUTest, SetTargetBitrate) {
    405  TestSetAndGetTargetBitratesWithFixedCodec(audio_encoder_.get(), 64000);
    406 }
    407 
    408 TEST_F(AudioDecoderPcmATest, EncodeDecode) {
    409  int tolerance = 308;
    410  double mse = 1931.0;
    411  EncodeDecodeTest(data_length_, tolerance, mse);
    412  ReInitTest();
    413  EXPECT_FALSE(decoder_->HasDecodePlc());
    414 }
    415 
    416 TEST_F(AudioDecoderPcmATest, SetTargetBitrate) {
    417  TestSetAndGetTargetBitratesWithFixedCodec(audio_encoder_.get(), 64000);
    418 }
    419 
    420 TEST_F(AudioDecoderPcm16BTest, EncodeDecode) {
    421  int tolerance = 0;
    422  double mse = 0.0;
    423  EncodeDecodeTest(2 * data_length_, tolerance, mse);
    424  ReInitTest();
    425  EXPECT_FALSE(decoder_->HasDecodePlc());
    426 }
    427 
    428 TEST_F(AudioDecoderPcm16BTest, SetTargetBitrate) {
    429  TestSetAndGetTargetBitratesWithFixedCodec(audio_encoder_.get(),
    430                                            codec_input_rate_hz_ * 16);
    431 }
    432 
    433 // TODO(bugs.webrtc.org/345525069): Either fix/enable or remove G722.
    434 #if defined(__has_feature) && __has_feature(undefined_behavior_sanitizer)
    435 TEST_F(AudioDecoderG722Test, DISABLED_EncodeDecode) {
    436 #else
    437 TEST_F(AudioDecoderG722Test, EncodeDecode) {
    438 #endif
    439  int tolerance = 6176;
    440  double mse = 238630.0;
    441  int delay = 22;  // Delay from input to output.
    442  EncodeDecodeTest(data_length_ / 2, tolerance, mse, delay);
    443  ReInitTest();
    444  EXPECT_FALSE(decoder_->HasDecodePlc());
    445 }
    446 
    447 TEST_F(AudioDecoderG722Test, SetTargetBitrate) {
    448  TestSetAndGetTargetBitratesWithFixedCodec(audio_encoder_.get(), 64000);
    449 }
    450 
    451 // TODO(bugs.webrtc.org/345525069): Either fix/enable or remove G722.
    452 #if defined(__has_feature) && __has_feature(undefined_behavior_sanitizer)
    453 TEST_F(AudioDecoderG722StereoTest, DISABLED_EncodeDecode) {
    454 #else
    455 TEST_F(AudioDecoderG722StereoTest, EncodeDecode) {
    456 #endif
    457  int tolerance = 6176;
    458  int channel_diff_tolerance = 0;
    459  double mse = 238630.0;
    460  int delay = 22;  // Delay from input to output.
    461  EncodeDecodeTest(data_length_, tolerance, mse, delay, channel_diff_tolerance);
    462  ReInitTest();
    463  EXPECT_FALSE(decoder_->HasDecodePlc());
    464 }
    465 
    466 TEST_F(AudioDecoderG722StereoTest, SetTargetBitrate) {
    467  TestSetAndGetTargetBitratesWithFixedCodec(audio_encoder_.get(), 128000);
    468 }
    469 
    470 // TODO(http://bugs.webrtc.org/12518): Enable the test after Opus has been
    471 // updated.
    472 TEST_P(AudioDecoderOpusTest, DISABLED_EncodeDecode) {
    473  constexpr int tolerance = 6176;
    474  constexpr int channel_diff_tolerance = 6;
    475  constexpr double mse = 238630.0;
    476  constexpr int delay = 22;  // Delay from input to output.
    477  EncodeDecodeTest(0, tolerance, mse, delay, channel_diff_tolerance);
    478  ReInitTest();
    479  EXPECT_FALSE(decoder_->HasDecodePlc());
    480 }
    481 
    482 TEST_P(AudioDecoderOpusTest, SetTargetBitrate) {
    483  const int overhead_rate =
    484      8 * kOverheadBytesPerPacket * codec_input_rate_hz_ / frame_size_;
    485  EXPECT_EQ(6000,
    486            SetAndGetTargetBitrate(audio_encoder_.get(), 5999 + overhead_rate));
    487  EXPECT_EQ(6000,
    488            SetAndGetTargetBitrate(audio_encoder_.get(), 6000 + overhead_rate));
    489  EXPECT_EQ(32000, SetAndGetTargetBitrate(audio_encoder_.get(),
    490                                          32000 + overhead_rate));
    491  EXPECT_EQ(510000, SetAndGetTargetBitrate(audio_encoder_.get(),
    492                                           510000 + overhead_rate));
    493  EXPECT_EQ(510000, SetAndGetTargetBitrate(audio_encoder_.get(),
    494                                           511000 + overhead_rate));
    495 }
    496 
    497 }  // namespace webrtc