neteq_decoding_test.cc (16126B)
1 /* 2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "modules/audio_coding/neteq/test/neteq_decoding_test.h" 12 13 #include <cstddef> 14 #include <cstdint> 15 #include <optional> 16 #include <set> 17 #include <string> 18 19 #include "absl/strings/string_view.h" 20 #include "api/array_view.h" 21 #include "api/audio/audio_frame.h" 22 #include "api/audio_codecs/audio_format.h" 23 #include "api/audio_codecs/builtin_audio_decoder_factory.h" 24 #include "api/environment/environment_factory.h" 25 #include "api/neteq/default_neteq_factory.h" 26 #include "api/neteq/neteq.h" 27 #include "api/rtp_headers.h" 28 #include "api/units/timestamp.h" 29 #include "modules/audio_coding/neteq/test/result_sink.h" 30 #include "modules/audio_coding/neteq/tools/rtp_file_source.h" 31 #include "rtc_base/strings/string_builder.h" 32 #include "test/gtest.h" 33 #include "test/testsupport/file_utils.h" 34 35 #ifdef WEBRTC_NETEQ_UNITTEST_BITEXACT 36 37 #ifdef WEBRTC_ANDROID_PLATFORM_BUILD 38 #include "external/webrtc/webrtc/modules/audio_coding/neteq/neteq_unittest.pb.h" 39 #else 40 #include "modules/audio_coding/neteq/neteq_unittest.pb.h" 41 #endif 42 43 #endif 44 45 namespace webrtc { 46 47 namespace { 48 49 void LoadDecoders(NetEq* neteq) { 50 ASSERT_EQ(true, 51 neteq->RegisterPayloadType(0, SdpAudioFormat("pcmu", 8000, 1))); 52 ASSERT_EQ(true, 53 neteq->RegisterPayloadType(8, SdpAudioFormat("pcma", 8000, 1))); 54 #ifdef WEBRTC_CODEC_OPUS 55 ASSERT_EQ(true, 56 neteq->RegisterPayloadType( 57 111, SdpAudioFormat("opus", 48000, 2, {{"stereo", "0"}}))); 58 #endif 59 ASSERT_EQ(true, 60 neteq->RegisterPayloadType(93, SdpAudioFormat("L16", 8000, 1))); 61 ASSERT_EQ(true, 62 neteq->RegisterPayloadType(94, SdpAudioFormat("L16", 16000, 1))); 63 ASSERT_EQ(true, 64 neteq->RegisterPayloadType(95, SdpAudioFormat("L16", 32000, 1))); 65 ASSERT_EQ(true, 66 neteq->RegisterPayloadType(13, SdpAudioFormat("cn", 8000, 1))); 67 ASSERT_EQ(true, 68 neteq->RegisterPayloadType(98, SdpAudioFormat("cn", 16000, 1))); 69 } 70 71 } // namespace 72 73 NetEqDecodingTest::NetEqDecodingTest() 74 : clock_(0), 75 env_(CreateEnvironment(&clock_)), 76 config_(), 77 output_sample_rate_(kInitSampleRateHz), 78 algorithmic_delay_ms_(0) { 79 config_.sample_rate_hz = kInitSampleRateHz; 80 } 81 82 void NetEqDecodingTest::SetUp() { 83 neteq_ = DefaultNetEqFactory().Create(env_, config_, 84 CreateBuiltinAudioDecoderFactory()); 85 NetEqNetworkStatistics stat; 86 ASSERT_EQ(0, neteq_->NetworkStatistics(&stat)); 87 algorithmic_delay_ms_ = stat.current_buffer_size_ms; 88 ASSERT_TRUE(neteq_); 89 LoadDecoders(neteq_.get()); 90 } 91 92 void NetEqDecodingTest::TearDown() {} 93 94 void NetEqDecodingTest::OpenInputFile(absl::string_view rtp_file) { 95 rtp_source_.reset(test::RtpFileSource::Create(rtp_file)); 96 } 97 98 void NetEqDecodingTest::Process() { 99 // Check if time to receive. 100 while (packet_ && clock_.CurrentTime() >= packet_->arrival_time()) { 101 if (packet_->payload_size() > 0) { 102 RTPHeader rtp_header; 103 packet_->GetHeader(&rtp_header); 104 ASSERT_EQ(0, neteq_->InsertPacket(rtp_header, packet_->payload(), 105 clock_.CurrentTime())); 106 } 107 // Get next packet. 108 packet_ = rtp_source_->NextPacket(); 109 } 110 111 // Get audio from NetEq. 112 bool muted; 113 ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted)); 114 ASSERT_FALSE(muted); 115 ASSERT_TRUE((out_frame_.samples_per_channel_ == kBlockSize8kHz) || 116 (out_frame_.samples_per_channel_ == kBlockSize16kHz) || 117 (out_frame_.samples_per_channel_ == kBlockSize32kHz) || 118 (out_frame_.samples_per_channel_ == kBlockSize48kHz)); 119 output_sample_rate_ = out_frame_.sample_rate_hz_; 120 EXPECT_EQ(output_sample_rate_, neteq_->last_output_sample_rate_hz()); 121 122 // Increase time. 123 clock_.AdvanceTimeMilliseconds(kTimeStepMs); 124 } 125 126 void NetEqDecodingTest::DecodeAndCompare( 127 absl::string_view rtp_file, 128 absl::string_view output_checksum, 129 absl::string_view network_stats_checksum, 130 bool gen_ref) { 131 OpenInputFile(rtp_file); 132 133 std::string ref_out_file = 134 gen_ref ? test::OutputPath() + "neteq_universal_ref.pcm" : ""; 135 ResultSink output(ref_out_file); 136 137 std::string stat_out_file = 138 gen_ref ? test::OutputPath() + "neteq_network_stats.dat" : ""; 139 ResultSink network_stats(stat_out_file); 140 141 packet_ = rtp_source_->NextPacket(); 142 int i = 0; 143 uint64_t last_concealed_samples = 0; 144 uint64_t last_total_samples_received = 0; 145 while (packet_) { 146 StringBuilder ss; 147 ss << "Lap number " << i++ << " in DecodeAndCompare while loop"; 148 SCOPED_TRACE(ss.str()); // Print out the parameter values on failure. 149 ASSERT_NO_FATAL_FAILURE(Process()); 150 ASSERT_NO_FATAL_FAILURE( 151 output.AddResult(out_frame_.data(), out_frame_.samples_per_channel_)); 152 153 // Query the network statistics API once per second 154 if (clock_.TimeInMilliseconds() % 1000 == 0) { 155 // Process NetworkStatistics. 156 NetEqNetworkStatistics current_network_stats; 157 ASSERT_EQ(0, neteq_->NetworkStatistics(¤t_network_stats)); 158 ASSERT_NO_FATAL_FAILURE(network_stats.AddResult(current_network_stats)); 159 160 // Verify that liftime stats and network stats report similar loss 161 // concealment rates. 162 auto lifetime_stats = neteq_->GetLifetimeStatistics(); 163 const uint64_t delta_concealed_samples = 164 lifetime_stats.concealed_samples - last_concealed_samples; 165 last_concealed_samples = lifetime_stats.concealed_samples; 166 const uint64_t delta_total_samples_received = 167 lifetime_stats.total_samples_received - last_total_samples_received; 168 last_total_samples_received = lifetime_stats.total_samples_received; 169 // The tolerance is 1% but expressed in Q14. 170 EXPECT_NEAR( 171 (delta_concealed_samples << 14) / delta_total_samples_received, 172 current_network_stats.expand_rate, (2 << 14) / 100.0); 173 } 174 } 175 176 SCOPED_TRACE("Check output audio."); 177 output.VerifyChecksum(output_checksum); 178 SCOPED_TRACE("Check network stats."); 179 network_stats.VerifyChecksum(network_stats_checksum); 180 } 181 182 void NetEqDecodingTest::PopulateRtpInfo(int frame_index, 183 int timestamp, 184 RTPHeader* rtp_info) { 185 rtp_info->sequenceNumber = frame_index; 186 rtp_info->timestamp = timestamp; 187 rtp_info->ssrc = 0x1234; // Just an arbitrary SSRC. 188 rtp_info->payloadType = 94; // PCM16b WB codec. 189 rtp_info->markerBit = false; 190 } 191 192 void NetEqDecodingTest::PopulateCng(int frame_index, 193 int timestamp, 194 RTPHeader* rtp_info, 195 uint8_t* payload, 196 size_t* payload_len) { 197 rtp_info->sequenceNumber = frame_index; 198 rtp_info->timestamp = timestamp; 199 rtp_info->ssrc = 0x1234; // Just an arbitrary SSRC. 200 rtp_info->payloadType = 98; // WB CNG. 201 rtp_info->markerBit = false; 202 payload[0] = 64; // Noise level -64 dBov, quite arbitrarily chosen. 203 *payload_len = 1; // Only noise level, no spectral parameters. 204 } 205 206 void NetEqDecodingTest::WrapTest(uint16_t start_seq_no, 207 uint32_t start_timestamp, 208 const std::set<uint16_t>& drop_seq_numbers, 209 bool expect_seq_no_wrap, 210 bool expect_timestamp_wrap) { 211 uint16_t seq_no = start_seq_no; 212 uint32_t timestamp = start_timestamp; 213 const int kBlocksPerFrame = 3; // Number of 10 ms blocks per frame. 214 const int kFrameSizeMs = kBlocksPerFrame * kTimeStepMs; 215 const int kSamples = kBlockSize16kHz * kBlocksPerFrame; 216 const size_t kPayloadBytes = kSamples * sizeof(int16_t); 217 double next_input_time_ms = 0.0; 218 219 // Insert speech for 2 seconds. 220 const int kSpeechDurationMs = 2000; 221 uint16_t last_seq_no; 222 uint32_t last_timestamp; 223 bool timestamp_wrapped = false; 224 bool seq_no_wrapped = false; 225 for (double t_ms = 0; t_ms < kSpeechDurationMs; t_ms += 10) { 226 // Each turn in this for loop is 10 ms. 227 while (next_input_time_ms <= t_ms) { 228 // Insert one 30 ms speech frame. 229 uint8_t payload[kPayloadBytes] = {0}; 230 RTPHeader rtp_info; 231 PopulateRtpInfo(seq_no, timestamp, &rtp_info); 232 if (drop_seq_numbers.find(seq_no) == drop_seq_numbers.end()) { 233 // This sequence number was not in the set to drop. Insert it. 234 ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, 235 Timestamp::Millis(t_ms))); 236 } 237 NetEqNetworkStatistics network_stats; 238 ASSERT_EQ(0, neteq_->NetworkStatistics(&network_stats)); 239 240 EXPECT_LE(network_stats.preferred_buffer_size_ms, 80); 241 EXPECT_LE(network_stats.current_buffer_size_ms, 242 80 + algorithmic_delay_ms_); 243 last_seq_no = seq_no; 244 last_timestamp = timestamp; 245 246 ++seq_no; 247 timestamp += kSamples; 248 next_input_time_ms += static_cast<double>(kFrameSizeMs); 249 250 seq_no_wrapped |= seq_no < last_seq_no; 251 timestamp_wrapped |= timestamp < last_timestamp; 252 } 253 // Pull out data once. 254 AudioFrame output; 255 bool muted; 256 ASSERT_EQ(0, neteq_->GetAudio(&output, &muted)); 257 ASSERT_EQ(kBlockSize16kHz, output.samples_per_channel_); 258 ASSERT_EQ(1u, output.num_channels_); 259 260 // Expect delay (in samples) to be less than 2 packets. 261 std::optional<uint32_t> playout_timestamp = neteq_->GetPlayoutTimestamp(); 262 ASSERT_TRUE(playout_timestamp); 263 EXPECT_LE(timestamp - *playout_timestamp, 264 static_cast<uint32_t>(kSamples * 2)); 265 } 266 // Make sure we have actually tested wrap-around. 267 ASSERT_EQ(expect_seq_no_wrap, seq_no_wrapped); 268 ASSERT_EQ(expect_timestamp_wrap, timestamp_wrapped); 269 } 270 271 void NetEqDecodingTest::LongCngWithClockDrift(double drift_factor, 272 double network_freeze_ms, 273 bool pull_audio_during_freeze, 274 int delay_tolerance_ms, 275 int max_time_to_speech_ms) { 276 uint16_t seq_no = 0; 277 uint32_t timestamp = 0; 278 const int kFrameSizeMs = 30; 279 const size_t kSamples = kFrameSizeMs * 16; 280 const size_t kPayloadBytes = kSamples * 2; 281 double next_input_time_ms = 0.0; 282 double t_ms; 283 bool muted; 284 285 // Insert speech for 5 seconds. 286 const int kSpeechDurationMs = 5000; 287 for (t_ms = 0; t_ms < kSpeechDurationMs; t_ms += 10) { 288 // Each turn in this for loop is 10 ms. 289 while (next_input_time_ms <= t_ms) { 290 // Insert one 30 ms speech frame. 291 uint8_t payload[kPayloadBytes] = {0}; 292 RTPHeader rtp_info; 293 PopulateRtpInfo(seq_no, timestamp, &rtp_info); 294 ASSERT_EQ( 295 0, neteq_->InsertPacket(rtp_info, payload, Timestamp::Millis(t_ms))); 296 ++seq_no; 297 timestamp += kSamples; 298 next_input_time_ms += static_cast<double>(kFrameSizeMs) * drift_factor; 299 } 300 // Pull out data once. 301 ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted)); 302 ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_); 303 } 304 305 EXPECT_EQ(AudioFrame::kNormalSpeech, out_frame_.speech_type_); 306 std::optional<uint32_t> playout_timestamp = neteq_->GetPlayoutTimestamp(); 307 ASSERT_TRUE(playout_timestamp); 308 int32_t delay_before = timestamp - *playout_timestamp; 309 310 // Insert CNG for 1 minute (= 60000 ms). 311 const int kCngPeriodMs = 100; 312 const int kCngPeriodSamples = kCngPeriodMs * 16; // Period in 16 kHz samples. 313 const int kCngDurationMs = 60000; 314 for (; t_ms < kSpeechDurationMs + kCngDurationMs; t_ms += 10) { 315 // Each turn in this for loop is 10 ms. 316 while (next_input_time_ms <= t_ms) { 317 // Insert one CNG frame each 100 ms. 318 uint8_t payload[kPayloadBytes]; 319 size_t payload_len; 320 RTPHeader rtp_info; 321 PopulateCng(seq_no, timestamp, &rtp_info, payload, &payload_len); 322 ASSERT_EQ(0, neteq_->InsertPacket( 323 rtp_info, ArrayView<const uint8_t>(payload, payload_len), 324 Timestamp::Millis(t_ms))); 325 ++seq_no; 326 timestamp += kCngPeriodSamples; 327 next_input_time_ms += static_cast<double>(kCngPeriodMs) * drift_factor; 328 } 329 // Pull out data once. 330 ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted)); 331 ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_); 332 } 333 334 EXPECT_EQ(AudioFrame::kCNG, out_frame_.speech_type_); 335 336 if (network_freeze_ms > 0) { 337 // First keep pulling audio for `network_freeze_ms` without inserting 338 // any data, then insert CNG data corresponding to `network_freeze_ms` 339 // without pulling any output audio. 340 const double loop_end_time = t_ms + network_freeze_ms; 341 for (; t_ms < loop_end_time; t_ms += 10) { 342 // Pull out data once. 343 ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted)); 344 ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_); 345 EXPECT_EQ(AudioFrame::kCNG, out_frame_.speech_type_); 346 } 347 bool pull_once = pull_audio_during_freeze; 348 // If `pull_once` is true, GetAudio will be called once half-way through 349 // the network recovery period. 350 double pull_time_ms = (t_ms + next_input_time_ms) / 2; 351 while (next_input_time_ms <= t_ms) { 352 if (pull_once && next_input_time_ms >= pull_time_ms) { 353 pull_once = false; 354 // Pull out data once. 355 ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted)); 356 ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_); 357 EXPECT_EQ(AudioFrame::kCNG, out_frame_.speech_type_); 358 t_ms += 10; 359 } 360 // Insert one CNG frame each 100 ms. 361 uint8_t payload[kPayloadBytes]; 362 size_t payload_len; 363 RTPHeader rtp_info; 364 PopulateCng(seq_no, timestamp, &rtp_info, payload, &payload_len); 365 ASSERT_EQ(0, neteq_->InsertPacket( 366 rtp_info, ArrayView<const uint8_t>(payload, payload_len), 367 Timestamp::Millis(t_ms))); 368 ++seq_no; 369 timestamp += kCngPeriodSamples; 370 next_input_time_ms += kCngPeriodMs * drift_factor; 371 } 372 } 373 374 // Insert speech again until output type is speech. 375 double speech_restart_time_ms = t_ms; 376 while (out_frame_.speech_type_ != AudioFrame::kNormalSpeech) { 377 // Each turn in this for loop is 10 ms. 378 while (next_input_time_ms <= t_ms) { 379 // Insert one 30 ms speech frame. 380 uint8_t payload[kPayloadBytes] = {0}; 381 RTPHeader rtp_info; 382 PopulateRtpInfo(seq_no, timestamp, &rtp_info); 383 ASSERT_EQ( 384 0, neteq_->InsertPacket(rtp_info, payload, Timestamp::Millis(t_ms))); 385 ++seq_no; 386 timestamp += kSamples; 387 next_input_time_ms += kFrameSizeMs * drift_factor; 388 } 389 // Pull out data once. 390 ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted)); 391 ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_); 392 // Increase clock. 393 t_ms += 10; 394 } 395 396 // Check that the speech starts again within reasonable time. 397 double time_until_speech_returns_ms = t_ms - speech_restart_time_ms; 398 EXPECT_LT(time_until_speech_returns_ms, max_time_to_speech_ms); 399 playout_timestamp = neteq_->GetPlayoutTimestamp(); 400 ASSERT_TRUE(playout_timestamp); 401 int32_t delay_after = timestamp - *playout_timestamp; 402 // Compare delay before and after, and make sure it differs less than 20 ms. 403 EXPECT_LE(delay_after, delay_before + delay_tolerance_ms * 16); 404 EXPECT_GE(delay_after, delay_before - delay_tolerance_ms * 16); 405 } 406 407 void NetEqDecodingTestTwoInstances::SetUp() { 408 NetEqDecodingTest::SetUp(); 409 config2_ = config_; 410 } 411 412 void NetEqDecodingTestTwoInstances::CreateSecondInstance() { 413 neteq2_ = DefaultNetEqFactory().Create(env_, config2_, 414 CreateBuiltinAudioDecoderFactory()); 415 ASSERT_TRUE(neteq2_); 416 LoadDecoders(neteq2_.get()); 417 } 418 419 } // namespace webrtc