neteq_impl.cc (80319B)
1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "modules/audio_coding/neteq/neteq_impl.h" 12 13 #include <algorithm> 14 #include <cstdint> 15 #include <cstring> 16 #include <list> 17 #include <map> 18 #include <memory> 19 #include <optional> 20 #include <utility> 21 #include <vector> 22 23 #include "absl/strings/str_cat.h" 24 #include "api/array_view.h" 25 #include "api/audio/audio_frame.h" 26 #include "api/audio/audio_view.h" 27 #include "api/audio_codecs/audio_decoder.h" 28 #include "api/audio_codecs/audio_decoder_factory.h" 29 #include "api/audio_codecs/audio_format.h" 30 #include "api/environment/environment.h" 31 #include "api/neteq/neteq.h" 32 #include "api/neteq/neteq_controller.h" 33 #include "api/neteq/neteq_controller_factory.h" 34 #include "api/neteq/tick_timer.h" 35 #include "api/rtp_headers.h" 36 #include "api/rtp_packet_info.h" 37 #include "api/rtp_packet_infos.h" 38 #include "api/scoped_refptr.h" 39 #include "api/units/time_delta.h" 40 #include "modules/audio_coding/codecs/cng/webrtc_cng.h" 41 #include "modules/audio_coding/neteq/accelerate.h" 42 #include "modules/audio_coding/neteq/background_noise.h" 43 #include "modules/audio_coding/neteq/comfort_noise.h" 44 #include "modules/audio_coding/neteq/decoder_database.h" 45 #include "modules/audio_coding/neteq/dtmf_buffer.h" 46 #include "modules/audio_coding/neteq/dtmf_tone_generator.h" 47 #include "modules/audio_coding/neteq/expand.h" 48 #include "modules/audio_coding/neteq/merge.h" 49 #include "modules/audio_coding/neteq/nack_tracker.h" 50 #include "modules/audio_coding/neteq/normal.h" 51 #include "modules/audio_coding/neteq/packet.h" 52 #include "modules/audio_coding/neteq/packet_buffer.h" 53 #include "modules/audio_coding/neteq/preemptive_expand.h" 54 #include "modules/audio_coding/neteq/red_payload_splitter.h" 55 #include "modules/audio_coding/neteq/statistics_calculator.h" 56 #include "modules/audio_coding/neteq/sync_buffer.h" 57 #include "modules/audio_coding/neteq/time_stretch.h" 58 #include "modules/audio_coding/neteq/timestamp_scaler.h" 59 #include "rtc_base/checks.h" 60 #include "rtc_base/logging.h" 61 #include "rtc_base/numerics/safe_conversions.h" 62 #include "rtc_base/sanitizer.h" 63 #include "rtc_base/synchronization/mutex.h" 64 #include "rtc_base/trace_event.h" 65 #include "system_wrappers/include/clock.h" 66 67 namespace webrtc { 68 namespace { 69 70 AudioFrame::SpeechType ToSpeechType(NetEqImpl::OutputType type) { 71 switch (type) { 72 case NetEqImpl::OutputType::kNormalSpeech: { 73 return AudioFrame::kNormalSpeech; 74 } 75 case NetEqImpl::OutputType::kCNG: { 76 return AudioFrame::kCNG; 77 } 78 case NetEqImpl::OutputType::kPLC: { 79 return AudioFrame::kPLC; 80 } 81 case NetEqImpl::OutputType::kPLCCNG: { 82 return AudioFrame::kPLCCNG; 83 } 84 case NetEqImpl::OutputType::kCodecPLC: { 85 return AudioFrame::kCodecPLC; 86 } 87 default: 88 RTC_DCHECK_NOTREACHED(); 89 return AudioFrame::kUndefined; 90 } 91 } 92 93 // Returns true if both payload types are known to the decoder database, and 94 // have the same sample rate. 95 bool EqualSampleRates(uint8_t pt1, 96 uint8_t pt2, 97 const DecoderDatabase& decoder_database) { 98 auto* di1 = decoder_database.GetDecoderInfo(pt1); 99 auto* di2 = decoder_database.GetDecoderInfo(pt2); 100 return di1 && di2 && di1->SampleRateHz() == di2->SampleRateHz(); 101 } 102 103 } // namespace 104 105 NetEqImpl::Dependencies::Dependencies( 106 const Environment& env, 107 const NetEq::Config& config, 108 scoped_refptr<AudioDecoderFactory> decoder_factory, 109 const NetEqControllerFactory& controller_factory) 110 : env(env), 111 tick_timer(new TickTimer), 112 stats(std::make_unique<StatisticsCalculator>(tick_timer.get())), 113 decoder_database( 114 std::make_unique<DecoderDatabase>(env, 115 std::move(decoder_factory), 116 config.codec_pair_id)), 117 dtmf_buffer(new DtmfBuffer(config.sample_rate_hz)), 118 dtmf_tone_generator(new DtmfToneGenerator), 119 packet_buffer(new PacketBuffer(config.max_packets_in_buffer, 120 tick_timer.get(), 121 stats.get())), 122 neteq_controller(controller_factory.Create( 123 env, 124 {.allow_time_stretching = !config.for_test_no_time_stretching, 125 .max_packets_in_buffer = 126 static_cast<int>(config.max_packets_in_buffer), 127 .base_min_delay_ms = config.min_delay_ms, 128 .tick_timer = tick_timer.get()})), 129 red_payload_splitter(new RedPayloadSplitter), 130 timestamp_scaler(new TimestampScaler(*decoder_database)), 131 accelerate_factory(new AccelerateFactory), 132 expand_factory(new ExpandFactory), 133 preemptive_expand_factory(new PreemptiveExpandFactory) {} 134 135 NetEqImpl::Dependencies::~Dependencies() = default; 136 137 NetEqImpl::NetEqImpl(const NetEq::Config& config, 138 Dependencies&& deps, 139 bool create_components) 140 : env_(deps.env), 141 tick_timer_(std::move(deps.tick_timer)), 142 decoder_database_(std::move(deps.decoder_database)), 143 dtmf_buffer_(std::move(deps.dtmf_buffer)), 144 dtmf_tone_generator_(std::move(deps.dtmf_tone_generator)), 145 packet_buffer_(std::move(deps.packet_buffer)), 146 red_payload_splitter_(std::move(deps.red_payload_splitter)), 147 timestamp_scaler_(std::move(deps.timestamp_scaler)), 148 expand_factory_(std::move(deps.expand_factory)), 149 accelerate_factory_(std::move(deps.accelerate_factory)), 150 preemptive_expand_factory_(std::move(deps.preemptive_expand_factory)), 151 stats_(std::move(deps.stats)), 152 controller_(std::move(deps.neteq_controller)), 153 last_mode_(Mode::kNormal), 154 decoded_buffer_length_(kMaxFrameSize), 155 decoded_buffer_(new int16_t[decoded_buffer_length_]), 156 playout_timestamp_(0), 157 new_codec_(false), 158 timestamp_(0), 159 reset_decoder_(false), 160 first_packet_(true), 161 enable_fast_accelerate_(config.enable_fast_accelerate), 162 nack_enabled_(false), 163 enable_muted_state_(config.enable_muted_state), 164 no_time_stretching_(config.for_test_no_time_stretching) { 165 RTC_LOG(LS_INFO) << "NetEq config: " << config.ToString(); 166 int fs = config.sample_rate_hz; 167 if (fs != 8000 && fs != 16000 && fs != 32000 && fs != 48000) { 168 RTC_LOG(LS_ERROR) << "Sample rate " << fs 169 << " Hz not supported. " 170 "Changing to 8000 Hz."; 171 fs = 8000; 172 } 173 controller_->SetMaximumDelay(config.max_delay_ms); 174 fs_hz_ = fs; 175 fs_mult_ = fs / 8000; 176 last_output_sample_rate_hz_ = fs; 177 output_size_samples_ = static_cast<size_t>(kOutputSizeMs * 8 * fs_mult_); 178 controller_->SetSampleRate(fs_hz_, output_size_samples_); 179 decoder_frame_length_ = 2 * output_size_samples_; // 20 ms. 180 if (create_components) { 181 SetSampleRateAndChannels(fs, 1); // Default is 1 channel. 182 } 183 } 184 185 NetEqImpl::~NetEqImpl() = default; 186 187 int NetEqImpl::InsertPacket(const RTPHeader& rtp_header, 188 ArrayView<const uint8_t> payload, 189 const RtpPacketInfo& packet_info) { 190 MsanCheckInitialized(payload); 191 TRACE_EVENT0("webrtc", "NetEqImpl::InsertPacket"); 192 MutexLock lock(&mutex_); 193 if (InsertPacketInternal(rtp_header, payload, packet_info) != kNoError) { 194 return kFail; 195 } 196 return kOK; 197 } 198 199 void NetEqImpl::InsertEmptyPacket(const RTPHeader& rtp_header) { 200 MutexLock lock(&mutex_); 201 if (nack_enabled_) { 202 nack_->UpdateLastReceivedPacket(rtp_header.sequenceNumber, 203 rtp_header.timestamp); 204 } 205 controller_->RegisterEmptyPacket(); 206 } 207 208 int NetEqImpl::GetAudio(AudioFrame* audio_frame, 209 bool* muted, 210 int* current_sample_rate_hz, 211 std::optional<Operation> action_override) { 212 TRACE_EVENT0("webrtc", "NetEqImpl::GetAudio"); 213 MutexLock lock(&mutex_); 214 if (GetAudioInternal(audio_frame, action_override) != kNoError) { 215 return kFail; 216 } 217 stats_->IncreaseCounter(output_size_samples_, fs_hz_); 218 RTC_DCHECK_EQ(audio_frame->sample_rate_hz_, 219 dchecked_cast<int>(audio_frame->samples_per_channel_ * 100)); 220 if (muted != nullptr) { 221 *muted = audio_frame->muted(); 222 } 223 audio_frame->speech_type_ = ToSpeechType(LastOutputType()); 224 last_output_sample_rate_hz_ = audio_frame->sample_rate_hz_; 225 RTC_DCHECK(last_output_sample_rate_hz_ == 8000 || 226 last_output_sample_rate_hz_ == 16000 || 227 last_output_sample_rate_hz_ == 32000 || 228 last_output_sample_rate_hz_ == 48000) 229 << "Unexpected sample rate " << last_output_sample_rate_hz_; 230 231 if (current_sample_rate_hz) { 232 *current_sample_rate_hz = last_output_sample_rate_hz_; 233 } 234 235 return kOK; 236 } 237 238 void NetEqImpl::SetCodecs(const std::map<int, SdpAudioFormat>& codecs) { 239 MutexLock lock(&mutex_); 240 const std::vector<int> changed_payload_types = 241 decoder_database_->SetCodecs(codecs); 242 for (const int pt : changed_payload_types) { 243 packet_buffer_->DiscardPacketsWithPayloadType(pt); 244 } 245 } 246 247 bool NetEqImpl::RegisterPayloadType(int rtp_payload_type, 248 const SdpAudioFormat& audio_format) { 249 RTC_LOG(LS_VERBOSE) << "NetEqImpl::RegisterPayloadType: payload type " 250 << rtp_payload_type << ", codec " 251 << absl::StrCat(audio_format); 252 MutexLock lock(&mutex_); 253 return decoder_database_->RegisterPayload(rtp_payload_type, audio_format) == 254 DecoderDatabase::kOK; 255 } 256 257 bool NetEqImpl::CreateDecoder(int rtp_payload_type) { 258 MutexLock lock(&mutex_); 259 return decoder_database_->GetDecoder(rtp_payload_type) != nullptr; 260 } 261 262 int NetEqImpl::RemovePayloadType(uint8_t rtp_payload_type) { 263 MutexLock lock(&mutex_); 264 int ret = decoder_database_->Remove(rtp_payload_type); 265 if (ret == DecoderDatabase::kOK || ret == DecoderDatabase::kDecoderNotFound) { 266 packet_buffer_->DiscardPacketsWithPayloadType(rtp_payload_type); 267 return kOK; 268 } 269 return kFail; 270 } 271 272 void NetEqImpl::RemoveAllPayloadTypes() { 273 MutexLock lock(&mutex_); 274 decoder_database_->RemoveAll(); 275 } 276 277 bool NetEqImpl::SetMinimumDelay(int delay_ms) { 278 MutexLock lock(&mutex_); 279 if (delay_ms >= 0 && delay_ms <= 10000) { 280 RTC_DCHECK(controller_.get()); 281 return controller_->SetMinimumDelay(delay_ms); 282 } 283 return false; 284 } 285 286 bool NetEqImpl::SetMaximumDelay(int delay_ms) { 287 MutexLock lock(&mutex_); 288 if (delay_ms >= 0 && delay_ms <= 10000) { 289 RTC_DCHECK(controller_.get()); 290 return controller_->SetMaximumDelay(delay_ms); 291 } 292 return false; 293 } 294 295 bool NetEqImpl::SetBaseMinimumDelayMs(int delay_ms) { 296 MutexLock lock(&mutex_); 297 if (delay_ms >= 0 && delay_ms <= 10000) { 298 return controller_->SetBaseMinimumDelay(delay_ms); 299 } 300 return false; 301 } 302 303 int NetEqImpl::GetBaseMinimumDelayMs() const { 304 MutexLock lock(&mutex_); 305 return controller_->GetBaseMinimumDelay(); 306 } 307 308 int NetEqImpl::TargetDelayMs() const { 309 MutexLock lock(&mutex_); 310 RTC_DCHECK(controller_.get()); 311 return controller_->TargetLevelMs(); 312 } 313 314 int NetEqImpl::FilteredCurrentDelayMs() const { 315 MutexLock lock(&mutex_); 316 // Sum up the filtered packet buffer level with the future length of the sync 317 // buffer. 318 const int delay_samples = 319 controller_->GetFilteredBufferLevel() + sync_buffer_->FutureLength(); 320 // The division below will truncate. The return value is in ms. 321 return delay_samples / CheckedDivExact(fs_hz_, 1000); 322 } 323 324 int NetEqImpl::NetworkStatistics(NetEqNetworkStatistics* stats) { 325 MutexLock lock(&mutex_); 326 RTC_DCHECK(decoder_database_.get()); 327 *stats = CurrentNetworkStatisticsInternal(); 328 stats_->GetNetworkStatistics(decoder_frame_length_, stats); 329 return 0; 330 } 331 332 NetEqNetworkStatistics NetEqImpl::CurrentNetworkStatistics() const { 333 MutexLock lock(&mutex_); 334 return CurrentNetworkStatisticsInternal(); 335 } 336 337 NetEqNetworkStatistics NetEqImpl::CurrentNetworkStatisticsInternal() const { 338 RTC_DCHECK(decoder_database_.get()); 339 NetEqNetworkStatistics stats; 340 const size_t total_samples_in_buffers = 341 packet_buffer_->NumSamplesInBuffer(decoder_frame_length_) + 342 sync_buffer_->FutureLength(); 343 344 RTC_DCHECK(controller_.get()); 345 stats.preferred_buffer_size_ms = controller_->TargetLevelMs(); 346 stats.jitter_peaks_found = controller_->PeakFound(); 347 RTC_DCHECK_GT(fs_hz_, 0); 348 stats.current_buffer_size_ms = 349 static_cast<uint16_t>(total_samples_in_buffers * 1000 / fs_hz_); 350 return stats; 351 } 352 353 NetEqLifetimeStatistics NetEqImpl::GetLifetimeStatistics() const { 354 MutexLock lock(&mutex_); 355 return stats_->GetLifetimeStatistics(); 356 } 357 358 NetEqOperationsAndState NetEqImpl::GetOperationsAndState() const { 359 MutexLock lock(&mutex_); 360 auto result = stats_->GetOperationsAndState(); 361 result.current_buffer_size_ms = 362 (packet_buffer_->NumSamplesInBuffer(decoder_frame_length_) + 363 sync_buffer_->FutureLength()) * 364 1000 / fs_hz_; 365 result.current_frame_size_ms = decoder_frame_length_ * 1000 / fs_hz_; 366 result.next_packet_available = packet_buffer_->PeekNextPacket() && 367 packet_buffer_->PeekNextPacket()->timestamp == 368 sync_buffer_->end_timestamp(); 369 return result; 370 } 371 372 std::optional<uint32_t> NetEqImpl::GetPlayoutTimestamp() const { 373 MutexLock lock(&mutex_); 374 if (first_packet_ || last_mode_ == Mode::kRfc3389Cng || 375 last_mode_ == Mode::kCodecInternalCng) { 376 // We don't have a valid RTP timestamp until we have decoded our first 377 // RTP packet. Also, the RTP timestamp is not accurate while playing CNG, 378 // which is indicated by returning an empty value. 379 return std::nullopt; 380 } 381 return timestamp_scaler_->ToExternal(playout_timestamp_); 382 } 383 384 int NetEqImpl::last_output_sample_rate_hz() const { 385 MutexLock lock(&mutex_); 386 return last_output_sample_rate_hz_; 387 } 388 389 std::optional<NetEq::DecoderFormat> NetEqImpl::GetCurrentDecoderFormat() const { 390 MutexLock lock(&mutex_); 391 if (!current_rtp_payload_type_.has_value()) { 392 return std::nullopt; 393 } 394 const DecoderDatabase::DecoderInfo* di = 395 decoder_database_->GetDecoderInfo(*current_rtp_payload_type_); 396 if (di == nullptr) { 397 return std::nullopt; 398 } 399 return DecoderFormat{ 400 .payload_type = *current_rtp_payload_type_, 401 .sample_rate_hz = di->SampleRateHz(), 402 .num_channels = dchecked_cast<int>(di->GetDecoder()->Channels()), 403 .sdp_format = di->GetFormat()}; 404 } 405 406 void NetEqImpl::FlushBuffers() { 407 MutexLock lock(&mutex_); 408 RTC_LOG(LS_VERBOSE) << "FlushBuffers"; 409 packet_buffer_->Flush(); 410 RTC_DCHECK(sync_buffer_.get()); 411 RTC_DCHECK(expand_.get()); 412 sync_buffer_->Flush(); 413 sync_buffer_->set_next_index(sync_buffer_->next_index() - 414 expand_->overlap_length()); 415 // Set to wait for new codec. 416 first_packet_ = true; 417 } 418 419 void NetEqImpl::EnableNack(size_t max_nack_list_size) { 420 MutexLock lock(&mutex_); 421 if (!nack_enabled_) { 422 nack_ = std::make_unique<NackTracker>(env_.field_trials()); 423 nack_enabled_ = true; 424 nack_->UpdateSampleRate(fs_hz_); 425 } 426 nack_->SetMaxNackListSize(max_nack_list_size); 427 } 428 429 void NetEqImpl::DisableNack() { 430 MutexLock lock(&mutex_); 431 nack_.reset(); 432 nack_enabled_ = false; 433 } 434 435 std::vector<uint16_t> NetEqImpl::GetNackList(int64_t round_trip_time_ms) const { 436 MutexLock lock(&mutex_); 437 if (!nack_enabled_) { 438 return std::vector<uint16_t>(); 439 } 440 RTC_DCHECK(nack_.get()); 441 return nack_->GetNackList(round_trip_time_ms); 442 } 443 444 int NetEqImpl::SyncBufferSizeMs() const { 445 MutexLock lock(&mutex_); 446 return dchecked_cast<int>(sync_buffer_->FutureLength() / 447 CheckedDivExact(fs_hz_, 1000)); 448 } 449 450 const SyncBuffer* NetEqImpl::sync_buffer_for_test() const { 451 MutexLock lock(&mutex_); 452 return sync_buffer_.get(); 453 } 454 455 NetEq::Operation NetEqImpl::last_operation_for_test() const { 456 MutexLock lock(&mutex_); 457 return last_operation_; 458 } 459 460 // Methods below this line are private. 461 462 NetEqImpl::Error NetEqImpl::InsertPacketInternal( 463 const RTPHeader& rtp_header, 464 ArrayView<const uint8_t> payload, 465 const RtpPacketInfo& packet_info) { 466 if (payload.empty()) { 467 RTC_LOG_F(LS_ERROR) << "payload is empty"; 468 return kInvalidPointer; 469 } 470 stats_->ReceivedPacket(); 471 472 PacketList packet_list; 473 // Insert packet in a packet list. 474 packet_list.push_back([&rtp_header, &payload] { 475 // Convert to Packet. 476 Packet packet; 477 packet.payload_type = rtp_header.payloadType; 478 packet.sequence_number = rtp_header.sequenceNumber; 479 packet.timestamp = rtp_header.timestamp; 480 packet.payload.SetData(payload.data(), payload.size()); 481 // Waiting time will be set upon inserting the packet in the buffer. 482 RTC_DCHECK(!packet.waiting_time); 483 return packet; 484 }()); 485 486 bool update_sample_rate_and_channels = first_packet_; 487 488 if (update_sample_rate_and_channels) { 489 // Reset timestamp scaling. 490 timestamp_scaler_->Reset(); 491 } 492 493 if (!decoder_database_->IsRed(rtp_header.payloadType)) { 494 // Scale timestamp to internal domain (only for some codecs). 495 timestamp_scaler_->ToInternal(&packet_list); 496 } 497 498 // Store these for later use, since the first packet may very well disappear 499 // before we need these values. 500 uint32_t main_timestamp = packet_list.front().timestamp; 501 uint16_t main_sequence_number = packet_list.front().sequence_number; 502 503 // Reinitialize NetEq if it's needed (changed SSRC or first call). 504 if (update_sample_rate_and_channels) { 505 // Note: `first_packet_` will be cleared further down in this method, once 506 // the packet has been successfully inserted into the packet buffer. 507 508 // Flush the packet buffer and DTMF buffer. 509 packet_buffer_->Flush(); 510 dtmf_buffer_->Flush(); 511 512 // Update audio buffer timestamp. 513 sync_buffer_->IncreaseEndTimestamp(main_timestamp - timestamp_); 514 515 // Update codecs. 516 timestamp_ = main_timestamp; 517 } 518 519 if (nack_enabled_) { 520 RTC_DCHECK(nack_); 521 if (update_sample_rate_and_channels) { 522 nack_->Reset(); 523 } 524 nack_->UpdateLastReceivedPacket(main_sequence_number, main_timestamp); 525 } 526 527 // Check for RED payload type, and separate payloads into several packets. 528 if (decoder_database_->IsRed(rtp_header.payloadType)) { 529 if (!red_payload_splitter_->SplitRed(&packet_list)) { 530 return kRedundancySplitError; 531 } 532 // Only accept a few RED payloads of the same type as the main data, 533 // DTMF events and CNG. 534 red_payload_splitter_->CheckRedPayloads(&packet_list, *decoder_database_); 535 if (packet_list.empty()) { 536 return kRedundancySplitError; 537 } 538 } 539 540 // Check payload types. 541 if (decoder_database_->CheckPayloadTypes(packet_list) == 542 DecoderDatabase::kDecoderNotFound) { 543 return kUnknownRtpPayloadType; 544 } 545 546 RTC_DCHECK(!packet_list.empty()); 547 548 // Update main_timestamp, if new packets appear in the list 549 // after RED splitting. 550 if (decoder_database_->IsRed(rtp_header.payloadType)) { 551 timestamp_scaler_->ToInternal(&packet_list); 552 main_timestamp = packet_list.front().timestamp; 553 main_sequence_number = packet_list.front().sequence_number; 554 } 555 556 // Process DTMF payloads. Cycle through the list of packets, and pick out any 557 // DTMF payloads found. 558 PacketList::iterator it = packet_list.begin(); 559 while (it != packet_list.end()) { 560 const Packet& current_packet = (*it); 561 RTC_DCHECK(!current_packet.payload.empty()); 562 if (decoder_database_->IsDtmf(current_packet.payload_type)) { 563 DtmfEvent event; 564 int ret = DtmfBuffer::ParseEvent(current_packet.timestamp, 565 current_packet.payload.data(), 566 current_packet.payload.size(), &event); 567 if (ret != DtmfBuffer::kOK) { 568 return kDtmfParsingError; 569 } 570 if (dtmf_buffer_->InsertEvent(event) != DtmfBuffer::kOK) { 571 return kDtmfInsertError; 572 } 573 it = packet_list.erase(it); 574 } else { 575 ++it; 576 } 577 } 578 579 PacketList parsed_packet_list; 580 bool is_dtx = false; 581 while (!packet_list.empty()) { 582 Packet& packet = packet_list.front(); 583 const DecoderDatabase::DecoderInfo* info = 584 decoder_database_->GetDecoderInfo(packet.payload_type); 585 if (!info) { 586 RTC_LOG(LS_WARNING) << "SplitAudio unknown payload type"; 587 return kUnknownRtpPayloadType; 588 } 589 590 if (info->IsComfortNoise()) { 591 // Carry comfort noise packets along. 592 parsed_packet_list.splice(parsed_packet_list.end(), packet_list, 593 packet_list.begin()); 594 } else { 595 const uint16_t sequence_number = packet.sequence_number; 596 const uint8_t payload_type = packet.payload_type; 597 const Packet::Priority original_priority = packet.priority; 598 auto packet_from_result = [&](AudioDecoder::ParseResult& result) { 599 Packet new_packet; 600 new_packet.sequence_number = sequence_number; 601 new_packet.payload_type = payload_type; 602 new_packet.timestamp = result.timestamp; 603 new_packet.priority.codec_level = result.priority; 604 new_packet.priority.red_level = original_priority.red_level; 605 // Only associate the header information with the primary packet. 606 if (new_packet.timestamp == packet_info.rtp_timestamp()) { 607 new_packet.packet_info = packet_info; 608 } 609 new_packet.frame = std::move(result.frame); 610 return new_packet; 611 }; 612 613 std::vector<AudioDecoder::ParseResult> results = 614 info->GetDecoder()->ParsePayload(std::move(packet.payload), 615 packet.timestamp); 616 if (results.empty()) { 617 packet_list.pop_front(); 618 } else { 619 bool first = true; 620 for (auto& result : results) { 621 RTC_DCHECK(result.frame); 622 RTC_DCHECK_GE(result.priority, 0); 623 is_dtx = is_dtx || result.frame->IsDtxPacket(); 624 if (first) { 625 // Re-use the node and move it to parsed_packet_list. 626 packet_list.front() = packet_from_result(result); 627 parsed_packet_list.splice(parsed_packet_list.end(), packet_list, 628 packet_list.begin()); 629 first = false; 630 } else { 631 parsed_packet_list.push_back(packet_from_result(result)); 632 } 633 } 634 } 635 } 636 } 637 638 // Calculate the number of primary (non-FEC/RED) packets. 639 const size_t number_of_primary_packets = std::count_if( 640 parsed_packet_list.begin(), parsed_packet_list.end(), 641 [](const Packet& in) { return in.priority.codec_level == 0; }); 642 if (number_of_primary_packets < parsed_packet_list.size()) { 643 stats_->SecondaryPacketsReceived(parsed_packet_list.size() - 644 number_of_primary_packets); 645 } 646 647 bool buffer_flush_occured = false; 648 for (Packet& packet : parsed_packet_list) { 649 if (MaybeChangePayloadType(packet.payload_type)) { 650 packet_buffer_->Flush(); 651 buffer_flush_occured = true; 652 } 653 NetEqController::PacketArrivedInfo info = ToPacketArrivedInfo(packet); 654 int return_val = packet_buffer_->InsertPacket(std::move(packet)); 655 if (return_val == PacketBuffer::kFlushed) { 656 buffer_flush_occured = true; 657 } else if (return_val != PacketBuffer::kOK) { 658 // An error occurred. 659 return kOtherError; 660 } 661 662 info.buffer_flush = buffer_flush_occured; 663 const bool should_update_stats = !new_codec_ && !buffer_flush_occured; 664 auto relative_delay = 665 controller_->PacketArrived(fs_hz_, should_update_stats, info); 666 if (relative_delay) { 667 stats_->RelativePacketArrivalDelay(relative_delay.value()); 668 } 669 } 670 671 if (buffer_flush_occured) { 672 // Reset DSP timestamp etc. if packet buffer flushed. 673 new_codec_ = true; 674 update_sample_rate_and_channels = true; 675 } 676 677 if (first_packet_) { 678 first_packet_ = false; 679 // Update the codec on the next GetAudio call. 680 new_codec_ = true; 681 } 682 683 if (current_rtp_payload_type_) { 684 RTC_DCHECK(decoder_database_->GetDecoderInfo(*current_rtp_payload_type_)) 685 << "Payload type " << static_cast<int>(*current_rtp_payload_type_) 686 << " is unknown where it shouldn't be"; 687 } 688 689 if (update_sample_rate_and_channels && !packet_buffer_->Empty()) { 690 // We do not use `current_rtp_payload_type_` to |set payload_type|, but 691 // get the next RTP header from `packet_buffer_` to obtain the payload type. 692 // The reason for it is the following corner case. If NetEq receives a 693 // CNG packet with a sample rate different than the current CNG then it 694 // flushes its buffer, assuming send codec must have been changed. However, 695 // payload type of the hypothetically new send codec is not known. 696 const Packet* next_packet = packet_buffer_->PeekNextPacket(); 697 RTC_DCHECK(next_packet); 698 const int payload_type = next_packet->payload_type; 699 size_t channels = 1; 700 if (!decoder_database_->IsComfortNoise(payload_type)) { 701 AudioDecoder* decoder = decoder_database_->GetDecoder(payload_type); 702 RTC_DCHECK(decoder); // Payloads are already checked to be valid. 703 channels = decoder->Channels(); 704 RTC_DCHECK_LE(channels, kMaxNumberOfAudioChannels); 705 } 706 const DecoderDatabase::DecoderInfo* decoder_info = 707 decoder_database_->GetDecoderInfo(payload_type); 708 RTC_DCHECK(decoder_info); 709 if (decoder_info->SampleRateHz() != fs_hz_ || 710 channels != algorithm_buffer_->Channels()) { 711 RTC_DCHECK_LE(channels, kMaxNumberOfAudioChannels); 712 SetSampleRateAndChannels(decoder_info->SampleRateHz(), channels); 713 } 714 if (nack_enabled_) { 715 RTC_DCHECK(nack_); 716 // Update the sample rate even if the rate is not new, because of Reset(). 717 nack_->UpdateSampleRate(fs_hz_); 718 } 719 } 720 721 return kNoError; 722 } 723 724 bool NetEqImpl::MaybeChangePayloadType(uint8_t payload_type) { 725 bool changed = false; 726 if (decoder_database_->IsComfortNoise(payload_type)) { 727 if (current_cng_rtp_payload_type_ && 728 *current_cng_rtp_payload_type_ != payload_type) { 729 // New CNG payload type implies new codec type. 730 current_rtp_payload_type_ = std::nullopt; 731 changed = true; 732 } 733 current_cng_rtp_payload_type_ = payload_type; 734 } else if (!decoder_database_->IsDtmf(payload_type)) { 735 // This must be speech. 736 if ((current_rtp_payload_type_ && 737 *current_rtp_payload_type_ != payload_type) || 738 (current_cng_rtp_payload_type_ && 739 !EqualSampleRates(payload_type, *current_cng_rtp_payload_type_, 740 *decoder_database_))) { 741 current_cng_rtp_payload_type_ = std::nullopt; 742 changed = true; 743 } 744 current_rtp_payload_type_ = payload_type; 745 } 746 return changed; 747 } 748 749 int NetEqImpl::GetAudioInternal(AudioFrame* audio_frame, 750 std::optional<Operation> action_override) { 751 PacketList packet_list; 752 DtmfEvent dtmf_event; 753 Operation operation; 754 bool play_dtmf; 755 last_decoded_packet_infos_.clear(); 756 tick_timer_->Increment(); 757 758 // Sanity check - should already be taken care of when setting 759 // output_size_samples_. 760 RTC_DCHECK_LE(output_size_samples_ * sync_buffer_->Channels(), 761 AudioFrame::kMaxDataSizeSamples); 762 763 // Check for muted state. 764 if (enable_muted_state_ && expand_->Muted() && packet_buffer_->Empty()) { 765 RTC_DCHECK_EQ(last_mode_, Mode::kExpand); 766 audio_frame->Reset(); 767 RTC_DCHECK(audio_frame->muted()); // Reset() should mute the frame. 768 playout_timestamp_ += static_cast<uint32_t>(output_size_samples_); 769 audio_frame->sample_rate_hz_ = fs_hz_; 770 audio_frame->samples_per_channel_ = output_size_samples_; 771 audio_frame->timestamp_ = 772 first_packet_ 773 ? 0 774 : timestamp_scaler_->ToExternal(playout_timestamp_) - 775 static_cast<uint32_t>(audio_frame->samples_per_channel_); 776 audio_frame->num_channels_ = sync_buffer_->Channels(); 777 stats_->ExpandedNoiseSamples(output_size_samples_, false); 778 controller_->NotifyMutedState(); 779 return 0; 780 } 781 int return_value = GetDecision(&operation, &packet_list, &dtmf_event, 782 &play_dtmf, action_override); 783 if (return_value != 0) { 784 last_mode_ = Mode::kError; 785 return return_value; 786 } 787 788 AudioDecoder::SpeechType speech_type; 789 int length = 0; 790 const size_t start_num_packets = packet_list.size(); 791 int decode_return_value = 792 Decode(&packet_list, &operation, &length, &speech_type); 793 if (length > 0) { 794 last_decoded_type_ = speech_type; 795 } 796 797 bool sid_frame_available = 798 (operation == Operation::kRfc3389Cng && !packet_list.empty()); 799 800 // This is the criterion that we did decode some data through the speech 801 // decoder, and the operation resulted in comfort noise. 802 const bool codec_internal_sid_frame = 803 (speech_type == AudioDecoder::kComfortNoise && 804 start_num_packets > packet_list.size()); 805 806 if (sid_frame_available || codec_internal_sid_frame) { 807 // Start a new stopwatch since we are decoding a new CNG packet. 808 generated_noise_stopwatch_ = tick_timer_->GetNewStopwatch(); 809 } 810 811 algorithm_buffer_->Clear(); 812 switch (operation) { 813 case Operation::kNormal: { 814 DoNormal(decoded_buffer_.get(), length, speech_type, play_dtmf); 815 if (length > 0) { 816 stats_->DecodedOutputPlayed(); 817 } 818 break; 819 } 820 case Operation::kMerge: { 821 DoMerge(decoded_buffer_.get(), length, speech_type, play_dtmf); 822 break; 823 } 824 case Operation::kExpand: { 825 RTC_DCHECK_EQ(return_value, 0); 826 if (!current_rtp_payload_type_ || !DoCodecPlc()) { 827 return_value = DoExpand(play_dtmf); 828 } 829 RTC_DCHECK_GE(sync_buffer_->FutureLength() - expand_->overlap_length(), 830 output_size_samples_); 831 break; 832 } 833 case Operation::kAccelerate: 834 case Operation::kFastAccelerate: { 835 const bool fast_accelerate = 836 enable_fast_accelerate_ && (operation == Operation::kFastAccelerate); 837 return_value = DoAccelerate(decoded_buffer_.get(), length, speech_type, 838 play_dtmf, fast_accelerate); 839 break; 840 } 841 case Operation::kPreemptiveExpand: { 842 return_value = DoPreemptiveExpand(decoded_buffer_.get(), length, 843 speech_type, play_dtmf); 844 break; 845 } 846 case Operation::kRfc3389Cng: 847 case Operation::kRfc3389CngNoPacket: { 848 return_value = DoRfc3389Cng(&packet_list, play_dtmf); 849 break; 850 } 851 case Operation::kCodecInternalCng: { 852 // This handles the case when there is no transmission and the decoder 853 // should produce internal comfort noise. 854 // TODO(hlundin): Write test for codec-internal CNG. 855 DoCodecInternalCng(decoded_buffer_.get(), length); 856 break; 857 } 858 case Operation::kDtmf: { 859 // TODO(hlundin): Write test for this. 860 return_value = DoDtmf(dtmf_event, &play_dtmf); 861 break; 862 } 863 case Operation::kUndefined: { 864 RTC_LOG(LS_ERROR) << "Invalid operation kUndefined."; 865 RTC_DCHECK_NOTREACHED(); // This should not happen. 866 last_mode_ = Mode::kError; 867 return kInvalidOperation; 868 } 869 } // End of switch. 870 last_operation_ = operation; 871 if (return_value < 0) { 872 return return_value; 873 } 874 875 if (last_mode_ != Mode::kRfc3389Cng) { 876 comfort_noise_->Reset(); 877 } 878 879 // We treat it as if all packets referenced to by `last_decoded_packet_infos_` 880 // were mashed together when creating the samples in `algorithm_buffer_`. 881 RtpPacketInfos packet_infos(last_decoded_packet_infos_); 882 883 // Copy samples from `algorithm_buffer_` to `sync_buffer_`. 884 // 885 // TODO(bugs.webrtc.org/10757): 886 // We would in the future also like to pass `packet_infos` so that we can do 887 // sample-perfect tracking of that information across `sync_buffer_`. 888 sync_buffer_->PushBack(*algorithm_buffer_); 889 890 // Extract data from `sync_buffer_` to `output`. 891 audio_frame->ResetWithoutMuting(); 892 audio_frame->SetSampleRateAndChannelSize(fs_hz_); 893 InterleavedView<int16_t> view = 894 audio_frame->mutable_data(output_size_samples_, sync_buffer_->Channels()); 895 bool got_audio = sync_buffer_->GetNextAudioInterleaved(view); 896 897 // TODO(bugs.webrtc.org/10757): 898 // We don't have the ability to properly track individual packets once their 899 // audio samples have entered `sync_buffer_`. So for now, treat it as if 900 // `packet_infos` from packets decoded by the current `GetAudioInternal()` 901 // call were all consumed assembling the current audio frame and the current 902 // audio frame only. 903 audio_frame->packet_infos_ = std::move(packet_infos); 904 if (sync_buffer_->FutureLength() < expand_->overlap_length()) { 905 // The sync buffer should always contain `overlap_length` samples, but now 906 // too many samples have been extracted. Reinstall the `overlap_length` 907 // lookahead by moving the index. 908 const size_t missing_lookahead_samples = 909 expand_->overlap_length() - sync_buffer_->FutureLength(); 910 RTC_DCHECK_GE(sync_buffer_->next_index(), missing_lookahead_samples); 911 sync_buffer_->set_next_index(sync_buffer_->next_index() - 912 missing_lookahead_samples); 913 } 914 915 if (!got_audio) { 916 RTC_LOG(LS_ERROR) << "audio_frame->samples_per_channel_ (" 917 << audio_frame->samples_per_channel_ 918 << ") != output_size_samples_ (" << output_size_samples_ 919 << ")"; 920 // TODO(minyue): treatment of under-run, filling zeros 921 audio_frame->Mute(); 922 return kSampleUnderrun; 923 } 924 925 // Should always have overlap samples left in the `sync_buffer_`. 926 RTC_DCHECK_GE(sync_buffer_->FutureLength(), expand_->overlap_length()); 927 928 // TODO(yujo): For muted frames, this can be a copy rather than an addition. 929 if (play_dtmf) { 930 return_value = DtmfOverdub(dtmf_event, sync_buffer_->Channels(), 931 audio_frame->mutable_data()); 932 } 933 934 // Update the background noise parameters if last operation wrote data 935 // straight from the decoder to the `sync_buffer_`. That is, none of the 936 // operations that modify the signal can be followed by a parameter update. 937 if ((last_mode_ == Mode::kNormal) || (last_mode_ == Mode::kAccelerateFail) || 938 (last_mode_ == Mode::kPreemptiveExpandFail) || 939 (last_mode_ == Mode::kRfc3389Cng) || 940 (last_mode_ == Mode::kCodecInternalCng)) { 941 background_noise_->Update(*sync_buffer_); 942 } 943 944 if (operation == Operation::kDtmf) { 945 // DTMF data was written the end of `sync_buffer_`. 946 // Update index to end of DTMF data in `sync_buffer_`. 947 sync_buffer_->set_dtmf_index(sync_buffer_->Size()); 948 } 949 950 if (last_mode_ != Mode::kExpand && last_mode_ != Mode::kCodecPlc) { 951 // If last operation was not expand, calculate the `playout_timestamp_` from 952 // the `sync_buffer_`. However, do not update the `playout_timestamp_` if it 953 // would be moved "backwards". 954 uint32_t temp_timestamp = 955 sync_buffer_->end_timestamp() - 956 static_cast<uint32_t>(sync_buffer_->FutureLength()); 957 if (static_cast<int32_t>(temp_timestamp - playout_timestamp_) > 0) { 958 playout_timestamp_ = temp_timestamp; 959 } 960 } else { 961 // Use dead reckoning to estimate the `playout_timestamp_`. 962 playout_timestamp_ += static_cast<uint32_t>(output_size_samples_); 963 } 964 // Set the timestamp in the audio frame to zero before the first packet has 965 // been inserted. Otherwise, subtract the frame size in samples to get the 966 // timestamp of the first sample in the frame (playout_timestamp_ is the 967 // last + 1). 968 audio_frame->timestamp_ = 969 first_packet_ 970 ? 0 971 : timestamp_scaler_->ToExternal(playout_timestamp_) - 972 static_cast<uint32_t>(audio_frame->samples_per_channel_); 973 974 if (!(last_mode_ == Mode::kRfc3389Cng || 975 last_mode_ == Mode::kCodecInternalCng || last_mode_ == Mode::kExpand || 976 last_mode_ == Mode::kCodecPlc)) { 977 generated_noise_stopwatch_.reset(); 978 } 979 980 if (decode_return_value) 981 return decode_return_value; 982 return return_value; 983 } 984 985 int NetEqImpl::GetDecision(Operation* operation, 986 PacketList* packet_list, 987 DtmfEvent* dtmf_event, 988 bool* play_dtmf, 989 std::optional<Operation> action_override) { 990 // Initialize output variables. 991 *play_dtmf = false; 992 *operation = Operation::kUndefined; 993 994 RTC_DCHECK(sync_buffer_.get()); 995 uint32_t end_timestamp = sync_buffer_->end_timestamp(); 996 if (!new_codec_) { 997 const uint32_t five_seconds_samples = 5 * fs_hz_; 998 packet_buffer_->DiscardOldPackets(end_timestamp, five_seconds_samples); 999 } 1000 const Packet* packet = packet_buffer_->PeekNextPacket(); 1001 1002 RTC_DCHECK(!generated_noise_stopwatch_ || 1003 generated_noise_stopwatch_->ElapsedTicks() >= 1); 1004 uint64_t generated_noise_samples = 1005 generated_noise_stopwatch_ ? (generated_noise_stopwatch_->ElapsedTicks() - 1006 1) * output_size_samples_ + 1007 controller_->noise_fast_forward() 1008 : 0; 1009 1010 if (last_mode_ == Mode::kRfc3389Cng) { 1011 // Because of timestamp peculiarities, we have to "manually" disallow using 1012 // a CNG packet with the same timestamp as the one that was last played. 1013 // This can happen when using redundancy and will cause the timing to shift. 1014 while (packet && decoder_database_->IsComfortNoise(packet->payload_type) && 1015 (end_timestamp >= packet->timestamp || 1016 end_timestamp + generated_noise_samples > packet->timestamp)) { 1017 // Don't use this packet, discard it. 1018 if (packet_buffer_->DiscardNextPacket() != PacketBuffer::kOK) { 1019 RTC_DCHECK_NOTREACHED(); // Must be ok by design. 1020 } 1021 // Check buffer again. 1022 if (!new_codec_) { 1023 packet_buffer_->DiscardOldPackets(end_timestamp, 5 * fs_hz_); 1024 } 1025 packet = packet_buffer_->PeekNextPacket(); 1026 } 1027 } 1028 1029 RTC_DCHECK(expand_.get()); 1030 const int samples_left = static_cast<int>(sync_buffer_->FutureLength() - 1031 expand_->overlap_length()); 1032 if (last_mode_ == Mode::kAccelerateSuccess || 1033 last_mode_ == Mode::kAccelerateLowEnergy || 1034 last_mode_ == Mode::kPreemptiveExpandSuccess || 1035 last_mode_ == Mode::kPreemptiveExpandLowEnergy) { 1036 // Subtract (samples_left + output_size_samples_) from sampleMemory. 1037 controller_->AddSampleMemory( 1038 -(samples_left + dchecked_cast<int>(output_size_samples_))); 1039 } 1040 1041 // Check if it is time to play a DTMF event. 1042 if (dtmf_buffer_->GetEvent( 1043 static_cast<uint32_t>(end_timestamp + generated_noise_samples), 1044 dtmf_event)) { 1045 *play_dtmf = true; 1046 } 1047 1048 // Get instruction. 1049 RTC_DCHECK(sync_buffer_.get()); 1050 RTC_DCHECK(expand_.get()); 1051 generated_noise_samples = 1052 generated_noise_stopwatch_ 1053 ? generated_noise_stopwatch_->ElapsedTicks() * output_size_samples_ + 1054 controller_->noise_fast_forward() 1055 : 0; 1056 NetEqController::NetEqStatus status; 1057 status.packet_buffer_info.dtx_or_cng = 1058 packet_buffer_->ContainsDtxOrCngPacket(decoder_database_.get()); 1059 status.packet_buffer_info.num_samples = 1060 packet_buffer_->NumSamplesInBuffer(decoder_frame_length_); 1061 status.packet_buffer_info.span_samples = packet_buffer_->GetSpanSamples( 1062 decoder_frame_length_, last_output_sample_rate_hz_, false); 1063 status.packet_buffer_info.span_samples_wait_time = 1064 packet_buffer_->GetSpanSamples(decoder_frame_length_, 1065 last_output_sample_rate_hz_, true); 1066 status.packet_buffer_info.num_packets = packet_buffer_->NumPacketsInBuffer(); 1067 status.target_timestamp = sync_buffer_->end_timestamp(); 1068 status.expand_mutefactor = expand_->MuteFactor(0); 1069 status.last_packet_samples = decoder_frame_length_; 1070 status.last_mode = last_mode_; 1071 status.play_dtmf = *play_dtmf; 1072 status.generated_noise_samples = generated_noise_samples; 1073 status.sync_buffer_samples = sync_buffer_->FutureLength(); 1074 if (packet) { 1075 status.next_packet = { 1076 .timestamp = packet->timestamp, 1077 .is_dtx = packet->frame && packet->frame->IsDtxPacket(), 1078 .is_cng = decoder_database_->IsComfortNoise(packet->payload_type)}; 1079 } 1080 *operation = controller_->GetDecision(status, &reset_decoder_); 1081 1082 // Disallow time stretching if this packet is DTX, because such a decision may 1083 // be based on earlier buffer level estimate, as we do not update buffer level 1084 // during DTX. When we have a better way to update buffer level during DTX, 1085 // this can be discarded. 1086 if (packet && packet->frame && packet->frame->IsDtxPacket() && 1087 (*operation == Operation::kMerge || 1088 *operation == Operation::kAccelerate || 1089 *operation == Operation::kFastAccelerate || 1090 *operation == Operation::kPreemptiveExpand)) { 1091 *operation = Operation::kNormal; 1092 } 1093 1094 if (action_override) { 1095 // Use the provided action instead of the decision NetEq decided on. 1096 *operation = *action_override; 1097 } 1098 // Check if we already have enough samples in the `sync_buffer_`. If so, 1099 // change decision to normal, unless the decision was merge, accelerate, or 1100 // preemptive expand. 1101 if (samples_left >= dchecked_cast<int>(output_size_samples_) && 1102 *operation != Operation::kMerge && *operation != Operation::kAccelerate && 1103 *operation != Operation::kFastAccelerate && 1104 *operation != Operation::kPreemptiveExpand) { 1105 *operation = Operation::kNormal; 1106 return 0; 1107 } 1108 1109 controller_->ExpandDecision(*operation); 1110 if ((last_mode_ == Mode::kCodecPlc) && (*operation != Operation::kExpand)) { 1111 // Getting out of the PLC expand mode, reporting interruptions. 1112 // NetEq PLC reports this metrics in expand.cc 1113 stats_->EndExpandEvent(fs_hz_); 1114 } 1115 1116 // Check conditions for reset. 1117 if (new_codec_ || *operation == Operation::kUndefined) { 1118 // The only valid reason to get kUndefined is that new_codec_ is set. 1119 RTC_DCHECK(new_codec_); 1120 if (*play_dtmf && !packet) { 1121 timestamp_ = dtmf_event->timestamp; 1122 } else { 1123 if (!packet) { 1124 RTC_LOG(LS_ERROR) << "Packet missing where it shouldn't."; 1125 return -1; 1126 } 1127 timestamp_ = packet->timestamp; 1128 if (*operation == Operation::kRfc3389CngNoPacket && 1129 decoder_database_->IsComfortNoise(packet->payload_type)) { 1130 // Change decision to CNG packet, since we do have a CNG packet, but it 1131 // was considered too early to use. Now, use it anyway. 1132 *operation = Operation::kRfc3389Cng; 1133 } else if (*operation != Operation::kRfc3389Cng) { 1134 *operation = Operation::kNormal; 1135 } 1136 } 1137 // Adjust `sync_buffer_` timestamp before setting `end_timestamp` to the 1138 // new value. 1139 sync_buffer_->IncreaseEndTimestamp(timestamp_ - end_timestamp); 1140 end_timestamp = timestamp_; 1141 new_codec_ = false; 1142 controller_->SoftReset(); 1143 stats_->ResetMcu(); 1144 } 1145 1146 size_t required_samples = output_size_samples_; 1147 const size_t samples_10_ms = static_cast<size_t>(80 * fs_mult_); 1148 const size_t samples_20_ms = 2 * samples_10_ms; 1149 const size_t samples_30_ms = 3 * samples_10_ms; 1150 1151 switch (*operation) { 1152 case Operation::kExpand: { 1153 timestamp_ = end_timestamp; 1154 return 0; 1155 } 1156 case Operation::kRfc3389CngNoPacket: 1157 case Operation::kCodecInternalCng: { 1158 return 0; 1159 } 1160 case Operation::kDtmf: { 1161 // TODO(hlundin): Write test for this. 1162 // Update timestamp. 1163 timestamp_ = end_timestamp; 1164 generated_noise_samples = 1165 generated_noise_stopwatch_ 1166 ? generated_noise_stopwatch_->ElapsedTicks() * 1167 output_size_samples_ + 1168 controller_->noise_fast_forward() 1169 : 0; 1170 if (generated_noise_samples > 0 && last_mode_ != Mode::kDtmf) { 1171 // Make a jump in timestamp due to the recently played comfort noise. 1172 uint32_t timestamp_jump = 1173 static_cast<uint32_t>(generated_noise_samples); 1174 sync_buffer_->IncreaseEndTimestamp(timestamp_jump); 1175 timestamp_ += timestamp_jump; 1176 } 1177 return 0; 1178 } 1179 case Operation::kAccelerate: 1180 case Operation::kFastAccelerate: { 1181 // In order to do an accelerate we need at least 30 ms of audio data. 1182 if (samples_left >= static_cast<int>(samples_30_ms)) { 1183 // Already have enough data, so we do not need to extract any more. 1184 controller_->set_sample_memory(samples_left); 1185 controller_->set_prev_time_scale(true); 1186 return 0; 1187 } else if (samples_left >= static_cast<int>(samples_10_ms) && 1188 decoder_frame_length_ >= samples_30_ms) { 1189 // Avoid decoding more data as it might overflow the playout buffer. 1190 *operation = Operation::kNormal; 1191 return 0; 1192 } else if (samples_left < static_cast<int>(samples_20_ms) && 1193 decoder_frame_length_ < samples_30_ms) { 1194 // Build up decoded data by decoding at least 20 ms of audio data. Do 1195 // not perform accelerate yet, but wait until we only need to do one 1196 // decoding. 1197 required_samples = 2 * output_size_samples_; 1198 *operation = Operation::kNormal; 1199 } 1200 // If none of the above is true, we have one of two possible situations: 1201 // (1) 20 ms <= samples_left < 30 ms and decoder_frame_length_ < 30 ms; or 1202 // (2) samples_left < 10 ms and decoder_frame_length_ >= 30 ms. 1203 // In either case, we move on with the accelerate decision, and decode one 1204 // frame now. 1205 break; 1206 } 1207 case Operation::kPreemptiveExpand: { 1208 // In order to do a preemptive expand we need at least 30 ms of decoded 1209 // audio data. 1210 if ((samples_left >= static_cast<int>(samples_30_ms)) || 1211 (samples_left >= static_cast<int>(samples_10_ms) && 1212 decoder_frame_length_ >= samples_30_ms)) { 1213 // Already have enough data, so we do not need to extract any more. 1214 // Or, avoid decoding more data as it might overflow the playout buffer. 1215 // Still try preemptive expand, though. 1216 controller_->set_sample_memory(samples_left); 1217 controller_->set_prev_time_scale(true); 1218 return 0; 1219 } 1220 if (samples_left < static_cast<int>(samples_20_ms) && 1221 decoder_frame_length_ < samples_30_ms) { 1222 // Build up decoded data by decoding at least 20 ms of audio data. 1223 // Still try to perform preemptive expand. 1224 required_samples = 2 * output_size_samples_; 1225 } 1226 // Move on with the preemptive expand decision. 1227 break; 1228 } 1229 case Operation::kMerge: { 1230 required_samples = 1231 std::max(merge_->RequiredFutureSamples(), required_samples); 1232 break; 1233 } 1234 default: { 1235 // Do nothing. 1236 } 1237 } 1238 1239 // Get packets from buffer. 1240 int extracted_samples = 0; 1241 if (packet) { 1242 sync_buffer_->IncreaseEndTimestamp(packet->timestamp - end_timestamp); 1243 extracted_samples = ExtractPackets(required_samples, packet_list); 1244 if (extracted_samples < 0) { 1245 return kPacketBufferCorruption; 1246 } 1247 } 1248 1249 if (*operation == Operation::kAccelerate || 1250 *operation == Operation::kFastAccelerate || 1251 *operation == Operation::kPreemptiveExpand) { 1252 controller_->set_sample_memory(samples_left + extracted_samples); 1253 controller_->set_prev_time_scale(true); 1254 } 1255 1256 if (*operation == Operation::kAccelerate || 1257 *operation == Operation::kFastAccelerate) { 1258 // Check that we have enough data (30ms) to do accelerate. 1259 if (extracted_samples + samples_left < static_cast<int>(samples_30_ms)) { 1260 // TODO(hlundin): Write test for this. 1261 // Not enough, do normal operation instead. 1262 *operation = Operation::kNormal; 1263 } 1264 } 1265 1266 timestamp_ = sync_buffer_->end_timestamp(); 1267 return 0; 1268 } 1269 1270 int NetEqImpl::Decode(PacketList* packet_list, 1271 Operation* operation, 1272 int* decoded_length, 1273 AudioDecoder::SpeechType* speech_type) { 1274 *speech_type = AudioDecoder::kSpeech; 1275 1276 // When packet_list is empty, we may be in kCodecInternalCng mode, and for 1277 // that we use current active decoder. 1278 AudioDecoder* decoder = decoder_database_->GetActiveDecoder(); 1279 1280 if (!packet_list->empty()) { 1281 const Packet& packet = packet_list->front(); 1282 uint8_t payload_type = packet.payload_type; 1283 if (!decoder_database_->IsComfortNoise(payload_type)) { 1284 decoder = decoder_database_->GetDecoder(payload_type); 1285 RTC_DCHECK(decoder); 1286 if (!decoder) { 1287 RTC_LOG(LS_WARNING) 1288 << "Unknown payload type " << static_cast<int>(payload_type); 1289 packet_list->clear(); 1290 return kDecoderNotFound; 1291 } 1292 bool decoder_changed; 1293 decoder_database_->SetActiveDecoder(payload_type, &decoder_changed); 1294 if (decoder_changed) { 1295 // We have a new decoder. Re-init some values. 1296 const DecoderDatabase::DecoderInfo* decoder_info = 1297 decoder_database_->GetDecoderInfo(payload_type); 1298 RTC_DCHECK(decoder_info); 1299 if (!decoder_info) { 1300 RTC_LOG(LS_WARNING) 1301 << "Unknown payload type " << static_cast<int>(payload_type); 1302 packet_list->clear(); 1303 return kDecoderNotFound; 1304 } 1305 // If sampling rate or number of channels has changed, we need to make 1306 // a reset. 1307 if (decoder_info->SampleRateHz() != fs_hz_ || 1308 decoder->Channels() != algorithm_buffer_->Channels()) { 1309 SetSampleRateAndChannels(decoder_info->SampleRateHz(), 1310 decoder->Channels()); 1311 } 1312 sync_buffer_->set_end_timestamp(timestamp_); 1313 playout_timestamp_ = timestamp_; 1314 } 1315 } 1316 } 1317 1318 if (reset_decoder_) { 1319 // TODO(hlundin): Write test for this. 1320 if (decoder) 1321 decoder->Reset(); 1322 1323 // Reset comfort noise decoder. 1324 ComfortNoiseDecoder* cng_decoder = decoder_database_->GetActiveCngDecoder(); 1325 if (cng_decoder) 1326 cng_decoder->Reset(); 1327 1328 reset_decoder_ = false; 1329 } 1330 1331 *decoded_length = 0; 1332 // Update codec-internal PLC state. 1333 if ((*operation == Operation::kMerge) && decoder && decoder->HasDecodePlc()) { 1334 decoder->DecodePlc(1, &decoded_buffer_[*decoded_length]); 1335 } 1336 1337 int return_value; 1338 if (*operation == Operation::kCodecInternalCng) { 1339 RTC_DCHECK(packet_list->empty()); 1340 return_value = DecodeCng(decoder, decoded_length, speech_type); 1341 } else { 1342 return_value = DecodeLoop(packet_list, *operation, decoder, decoded_length, 1343 speech_type); 1344 } 1345 1346 if (*decoded_length < 0) { 1347 // Error returned from the decoder. 1348 *decoded_length = 0; 1349 sync_buffer_->IncreaseEndTimestamp( 1350 static_cast<uint32_t>(decoder_frame_length_)); 1351 int error_code = 0; 1352 if (decoder) 1353 error_code = decoder->ErrorCode(); 1354 if (error_code != 0) { 1355 // Got some error code from the decoder. 1356 return_value = kDecoderErrorCode; 1357 RTC_LOG(LS_WARNING) << "Decoder returned error code: " << error_code; 1358 } else { 1359 // Decoder does not implement error codes. Return generic error. 1360 return_value = kOtherDecoderError; 1361 RTC_LOG(LS_WARNING) << "Decoder error (no error code)"; 1362 } 1363 *operation = Operation::kExpand; // Do expansion to get data instead. 1364 } 1365 if (*speech_type != AudioDecoder::kComfortNoise) { 1366 // Don't increment timestamp if codec returned CNG speech type 1367 // since in this case, the we will increment the CNGplayedTS counter. 1368 // Increase with number of samples per channel. 1369 RTC_DCHECK(*decoded_length == 0 || 1370 (decoder && decoder->Channels() == sync_buffer_->Channels())); 1371 sync_buffer_->IncreaseEndTimestamp( 1372 *decoded_length / static_cast<int>(sync_buffer_->Channels())); 1373 } 1374 return return_value; 1375 } 1376 1377 int NetEqImpl::DecodeCng(AudioDecoder* decoder, 1378 int* decoded_length, 1379 AudioDecoder::SpeechType* speech_type) { 1380 if (!decoder) { 1381 // This happens when active decoder is not defined. 1382 *decoded_length = -1; 1383 return 0; 1384 } 1385 1386 while (*decoded_length < dchecked_cast<int>(output_size_samples_)) { 1387 const int length = decoder->Decode( 1388 nullptr, 0, fs_hz_, 1389 (decoded_buffer_length_ - *decoded_length) * sizeof(int16_t), 1390 &decoded_buffer_[*decoded_length], speech_type); 1391 if (length > 0) { 1392 *decoded_length += length; 1393 } else { 1394 // Error. 1395 RTC_LOG(LS_WARNING) << "Failed to decode CNG"; 1396 *decoded_length = -1; 1397 break; 1398 } 1399 if (*decoded_length > static_cast<int>(decoded_buffer_length_)) { 1400 // Guard against overflow. 1401 RTC_LOG(LS_WARNING) << "Decoded too much CNG."; 1402 return kDecodedTooMuch; 1403 } 1404 } 1405 stats_->GeneratedNoiseSamples(*decoded_length); 1406 return 0; 1407 } 1408 1409 int NetEqImpl::DecodeLoop(PacketList* packet_list, 1410 const Operation& operation, 1411 AudioDecoder* decoder, 1412 int* decoded_length, 1413 AudioDecoder::SpeechType* speech_type) { 1414 RTC_DCHECK(last_decoded_packet_infos_.empty()); 1415 1416 // Do decoding. 1417 while (!packet_list->empty() && !decoder_database_->IsComfortNoise( 1418 packet_list->front().payload_type)) { 1419 RTC_DCHECK(decoder); // At this point, we must have a decoder object. 1420 // The number of channels in the `sync_buffer_` should be the same as the 1421 // number decoder channels. 1422 RTC_DCHECK_EQ(sync_buffer_->Channels(), decoder->Channels()); 1423 RTC_DCHECK_GE(decoded_buffer_length_, kMaxFrameSize * decoder->Channels()); 1424 RTC_DCHECK(operation == Operation::kNormal || 1425 operation == Operation::kAccelerate || 1426 operation == Operation::kFastAccelerate || 1427 operation == Operation::kMerge || 1428 operation == Operation::kPreemptiveExpand); 1429 1430 auto opt_result = packet_list->front().frame->Decode( 1431 ArrayView<int16_t>(&decoded_buffer_[*decoded_length], 1432 decoded_buffer_length_ - *decoded_length)); 1433 if (packet_list->front().packet_info) { 1434 last_decoded_packet_infos_.push_back(*packet_list->front().packet_info); 1435 } 1436 packet_list->pop_front(); 1437 if (opt_result) { 1438 const auto& result = *opt_result; 1439 *speech_type = result.speech_type; 1440 if (result.num_decoded_samples > 0) { 1441 *decoded_length += dchecked_cast<int>(result.num_decoded_samples); 1442 // Update `decoder_frame_length_` with number of samples per channel. 1443 decoder_frame_length_ = 1444 result.num_decoded_samples / decoder->Channels(); 1445 } 1446 } else { 1447 // Error. 1448 // TODO(ossu): What to put here? 1449 RTC_LOG(LS_WARNING) << "Decode error"; 1450 *decoded_length = -1; 1451 last_decoded_packet_infos_.clear(); 1452 packet_list->clear(); 1453 break; 1454 } 1455 if (*decoded_length > dchecked_cast<int>(decoded_buffer_length_)) { 1456 // Guard against overflow. 1457 RTC_LOG(LS_WARNING) << "Decoded too much."; 1458 packet_list->clear(); 1459 return kDecodedTooMuch; 1460 } 1461 } // End of decode loop. 1462 1463 // If the list is not empty at this point, either a decoding error terminated 1464 // the while-loop, or list must hold exactly one CNG packet. 1465 RTC_DCHECK( 1466 packet_list->empty() || *decoded_length < 0 || 1467 (packet_list->size() == 1 && 1468 decoder_database_->IsComfortNoise(packet_list->front().payload_type))); 1469 return 0; 1470 } 1471 1472 void NetEqImpl::DoNormal(const int16_t* decoded_buffer, 1473 size_t decoded_length, 1474 AudioDecoder::SpeechType speech_type, 1475 bool play_dtmf) { 1476 RTC_DCHECK(normal_.get()); 1477 normal_->Process(decoded_buffer, decoded_length, last_mode_, 1478 algorithm_buffer_.get()); 1479 if (decoded_length != 0) { 1480 last_mode_ = Mode::kNormal; 1481 } 1482 1483 // If last packet was decoded as an inband CNG, set mode to CNG instead. 1484 if ((speech_type == AudioDecoder::kComfortNoise) || 1485 ((last_mode_ == Mode::kCodecInternalCng) && (decoded_length == 0))) { 1486 // TODO(hlundin): Remove second part of || statement above. 1487 last_mode_ = Mode::kCodecInternalCng; 1488 } 1489 1490 if (!play_dtmf) { 1491 dtmf_tone_generator_->Reset(); 1492 } 1493 } 1494 1495 void NetEqImpl::DoMerge(int16_t* decoded_buffer, 1496 size_t decoded_length, 1497 AudioDecoder::SpeechType speech_type, 1498 bool play_dtmf) { 1499 RTC_DCHECK(merge_.get()); 1500 size_t new_length = 1501 merge_->Process(decoded_buffer, decoded_length, algorithm_buffer_.get()); 1502 // Correction can be negative. 1503 int expand_length_correction = 1504 dchecked_cast<int>(new_length) - 1505 dchecked_cast<int>(decoded_length / algorithm_buffer_->Channels()); 1506 1507 // Update in-call and post-call statistics. 1508 if (expand_->Muted() || last_decoded_type_ == AudioDecoder::kComfortNoise) { 1509 // Expand generates only noise. 1510 stats_->ExpandedNoiseSamplesCorrection(expand_length_correction); 1511 } else { 1512 // Expansion generates more than only noise. 1513 stats_->ExpandedVoiceSamplesCorrection(expand_length_correction); 1514 } 1515 1516 last_mode_ = Mode::kMerge; 1517 // If last packet was decoded as an inband CNG, set mode to CNG instead. 1518 if (speech_type == AudioDecoder::kComfortNoise) { 1519 last_mode_ = Mode::kCodecInternalCng; 1520 } 1521 expand_->Reset(); 1522 if (!play_dtmf) { 1523 dtmf_tone_generator_->Reset(); 1524 } 1525 } 1526 1527 bool NetEqImpl::DoCodecPlc() { 1528 AudioDecoder* decoder = decoder_database_->GetActiveDecoder(); 1529 if (!decoder) { 1530 return false; 1531 } 1532 const size_t channels = algorithm_buffer_->Channels(); 1533 const size_t requested_samples_per_channel = 1534 output_size_samples_ - 1535 (sync_buffer_->FutureLength() - expand_->overlap_length()); 1536 concealment_audio_.Clear(); 1537 decoder->GeneratePlc(requested_samples_per_channel, &concealment_audio_); 1538 if (concealment_audio_.empty()) { 1539 // Nothing produced. Resort to regular expand. 1540 return false; 1541 } 1542 RTC_CHECK_GE(concealment_audio_.size(), 1543 requested_samples_per_channel * channels); 1544 sync_buffer_->PushBackInterleaved(concealment_audio_); 1545 RTC_DCHECK_NE(algorithm_buffer_->Channels(), 0); 1546 const size_t concealed_samples_per_channel = 1547 concealment_audio_.size() / channels; 1548 1549 // Update in-call and post-call statistics. 1550 const bool is_new_concealment_event = (last_mode_ != Mode::kCodecPlc); 1551 if (std::all_of(concealment_audio_.cbegin(), concealment_audio_.cend(), 1552 [](int16_t i) { return i == 0; })) { 1553 // Expand operation generates only noise. 1554 stats_->ExpandedNoiseSamples(concealed_samples_per_channel, 1555 is_new_concealment_event); 1556 } else { 1557 // Expand operation generates more than only noise. 1558 stats_->ExpandedVoiceSamples(concealed_samples_per_channel, 1559 is_new_concealment_event); 1560 } 1561 last_mode_ = Mode::kCodecPlc; 1562 if (!generated_noise_stopwatch_) { 1563 // Start a new stopwatch since we may be covering for a lost CNG packet. 1564 generated_noise_stopwatch_ = tick_timer_->GetNewStopwatch(); 1565 } 1566 return true; 1567 } 1568 1569 int NetEqImpl::DoExpand(bool play_dtmf) { 1570 while ((sync_buffer_->FutureLength() - expand_->overlap_length()) < 1571 output_size_samples_) { 1572 algorithm_buffer_->Clear(); 1573 int return_value = expand_->Process(algorithm_buffer_.get()); 1574 size_t length = algorithm_buffer_->Size(); 1575 bool is_new_concealment_event = (last_mode_ != Mode::kExpand); 1576 1577 // Update in-call and post-call statistics. 1578 if (expand_->Muted() || last_decoded_type_ == AudioDecoder::kComfortNoise) { 1579 // Expand operation generates only noise. 1580 stats_->ExpandedNoiseSamples(length, is_new_concealment_event); 1581 } else { 1582 // Expand operation generates more than only noise. 1583 stats_->ExpandedVoiceSamples(length, is_new_concealment_event); 1584 } 1585 1586 last_mode_ = Mode::kExpand; 1587 1588 if (return_value < 0) { 1589 return return_value; 1590 } 1591 1592 sync_buffer_->PushBack(*algorithm_buffer_); 1593 algorithm_buffer_->Clear(); 1594 } 1595 if (!play_dtmf) { 1596 dtmf_tone_generator_->Reset(); 1597 } 1598 1599 if (!generated_noise_stopwatch_) { 1600 // Start a new stopwatch since we may be covering for a lost CNG packet. 1601 generated_noise_stopwatch_ = tick_timer_->GetNewStopwatch(); 1602 } 1603 1604 return 0; 1605 } 1606 1607 int NetEqImpl::DoAccelerate(int16_t* decoded_buffer, 1608 size_t decoded_length, 1609 AudioDecoder::SpeechType speech_type, 1610 bool play_dtmf, 1611 bool fast_accelerate) { 1612 const size_t required_samples = 1613 static_cast<size_t>(240 * fs_mult_); // Must have 30 ms. 1614 size_t borrowed_samples_per_channel = 0; 1615 size_t num_channels = algorithm_buffer_->Channels(); 1616 size_t decoded_length_per_channel = decoded_length / num_channels; 1617 if (decoded_length_per_channel < required_samples) { 1618 // Must move data from the `sync_buffer_` in order to get 30 ms. 1619 borrowed_samples_per_channel = 1620 static_cast<int>(required_samples - decoded_length_per_channel); 1621 memmove(&decoded_buffer[borrowed_samples_per_channel * num_channels], 1622 decoded_buffer, sizeof(int16_t) * decoded_length); 1623 sync_buffer_->ReadInterleavedFromEnd(borrowed_samples_per_channel, 1624 decoded_buffer); 1625 decoded_length = required_samples * num_channels; 1626 } 1627 1628 size_t samples_removed = 0; 1629 Accelerate::ReturnCodes return_code = 1630 accelerate_->Process(decoded_buffer, decoded_length, fast_accelerate, 1631 algorithm_buffer_.get(), &samples_removed); 1632 stats_->AcceleratedSamples(samples_removed); 1633 switch (return_code) { 1634 case Accelerate::kSuccess: 1635 last_mode_ = Mode::kAccelerateSuccess; 1636 break; 1637 case Accelerate::kSuccessLowEnergy: 1638 last_mode_ = Mode::kAccelerateLowEnergy; 1639 break; 1640 case Accelerate::kNoStretch: 1641 last_mode_ = Mode::kAccelerateFail; 1642 break; 1643 case Accelerate::kError: 1644 // TODO(hlundin): Map to Modes::kError instead? 1645 last_mode_ = Mode::kAccelerateFail; 1646 return kAccelerateError; 1647 } 1648 1649 if (borrowed_samples_per_channel > 0) { 1650 // Copy borrowed samples back to the `sync_buffer_`. 1651 size_t length = algorithm_buffer_->Size(); 1652 if (length < borrowed_samples_per_channel) { 1653 // This destroys the beginning of the buffer, but will not cause any 1654 // problems. 1655 sync_buffer_->ReplaceAtIndex( 1656 *algorithm_buffer_, 1657 sync_buffer_->Size() - borrowed_samples_per_channel); 1658 sync_buffer_->PushFrontZeros(borrowed_samples_per_channel - length); 1659 algorithm_buffer_->PopFront(length); 1660 RTC_DCHECK(algorithm_buffer_->Empty()); 1661 } else { 1662 sync_buffer_->ReplaceAtIndex( 1663 *algorithm_buffer_, borrowed_samples_per_channel, 1664 sync_buffer_->Size() - borrowed_samples_per_channel); 1665 algorithm_buffer_->PopFront(borrowed_samples_per_channel); 1666 } 1667 } 1668 1669 // If last packet was decoded as an inband CNG, set mode to CNG instead. 1670 if (speech_type == AudioDecoder::kComfortNoise) { 1671 last_mode_ = Mode::kCodecInternalCng; 1672 } 1673 if (!play_dtmf) { 1674 dtmf_tone_generator_->Reset(); 1675 } 1676 expand_->Reset(); 1677 return 0; 1678 } 1679 1680 int NetEqImpl::DoPreemptiveExpand(int16_t* decoded_buffer, 1681 size_t decoded_length, 1682 AudioDecoder::SpeechType speech_type, 1683 bool play_dtmf) { 1684 const size_t required_samples = 1685 static_cast<size_t>(240 * fs_mult_); // Must have 30 ms. 1686 size_t num_channels = algorithm_buffer_->Channels(); 1687 size_t borrowed_samples_per_channel = 0; 1688 size_t old_borrowed_samples_per_channel = 0; 1689 size_t decoded_length_per_channel = decoded_length / num_channels; 1690 if (decoded_length_per_channel < required_samples) { 1691 // Must move data from the `sync_buffer_` in order to get 30 ms. 1692 borrowed_samples_per_channel = 1693 required_samples - decoded_length_per_channel; 1694 // Calculate how many of these were already played out. 1695 old_borrowed_samples_per_channel = 1696 (borrowed_samples_per_channel > sync_buffer_->FutureLength()) 1697 ? (borrowed_samples_per_channel - sync_buffer_->FutureLength()) 1698 : 0; 1699 memmove(&decoded_buffer[borrowed_samples_per_channel * num_channels], 1700 decoded_buffer, sizeof(int16_t) * decoded_length); 1701 sync_buffer_->ReadInterleavedFromEnd(borrowed_samples_per_channel, 1702 decoded_buffer); 1703 decoded_length = required_samples * num_channels; 1704 } 1705 1706 size_t samples_added = 0; 1707 PreemptiveExpand::ReturnCodes return_code = preemptive_expand_->Process( 1708 decoded_buffer, decoded_length, old_borrowed_samples_per_channel, 1709 algorithm_buffer_.get(), &samples_added); 1710 stats_->PreemptiveExpandedSamples(samples_added); 1711 switch (return_code) { 1712 case PreemptiveExpand::kSuccess: 1713 last_mode_ = Mode::kPreemptiveExpandSuccess; 1714 break; 1715 case PreemptiveExpand::kSuccessLowEnergy: 1716 last_mode_ = Mode::kPreemptiveExpandLowEnergy; 1717 break; 1718 case PreemptiveExpand::kNoStretch: 1719 last_mode_ = Mode::kPreemptiveExpandFail; 1720 break; 1721 case PreemptiveExpand::kError: 1722 // TODO(hlundin): Map to Modes::kError instead? 1723 last_mode_ = Mode::kPreemptiveExpandFail; 1724 return kPreemptiveExpandError; 1725 } 1726 1727 if (borrowed_samples_per_channel > 0) { 1728 // Copy borrowed samples back to the `sync_buffer_`. 1729 sync_buffer_->ReplaceAtIndex( 1730 *algorithm_buffer_, borrowed_samples_per_channel, 1731 sync_buffer_->Size() - borrowed_samples_per_channel); 1732 algorithm_buffer_->PopFront(borrowed_samples_per_channel); 1733 } 1734 1735 // If last packet was decoded as an inband CNG, set mode to CNG instead. 1736 if (speech_type == AudioDecoder::kComfortNoise) { 1737 last_mode_ = Mode::kCodecInternalCng; 1738 } 1739 if (!play_dtmf) { 1740 dtmf_tone_generator_->Reset(); 1741 } 1742 expand_->Reset(); 1743 return 0; 1744 } 1745 1746 int NetEqImpl::DoRfc3389Cng(PacketList* packet_list, bool play_dtmf) { 1747 if (!packet_list->empty()) { 1748 // Must have exactly one SID frame at this point. 1749 RTC_DCHECK_EQ(packet_list->size(), 1); 1750 const Packet& packet = packet_list->front(); 1751 if (!decoder_database_->IsComfortNoise(packet.payload_type)) { 1752 RTC_LOG(LS_ERROR) << "Trying to decode non-CNG payload as CNG."; 1753 return kOtherError; 1754 } 1755 if (comfort_noise_->UpdateParameters(packet) == 1756 ComfortNoise::kInternalError) { 1757 algorithm_buffer_->Zeros(output_size_samples_); 1758 return -comfort_noise_->internal_error_code(); 1759 } 1760 } 1761 int cn_return = 1762 comfort_noise_->Generate(output_size_samples_, algorithm_buffer_.get()); 1763 expand_->Reset(); 1764 last_mode_ = Mode::kRfc3389Cng; 1765 if (!play_dtmf) { 1766 dtmf_tone_generator_->Reset(); 1767 } 1768 if (cn_return == ComfortNoise::kInternalError) { 1769 RTC_LOG(LS_WARNING) << "Comfort noise generator returned error code: " 1770 << comfort_noise_->internal_error_code(); 1771 return kComfortNoiseErrorCode; 1772 } else if (cn_return == ComfortNoise::kUnknownPayloadType) { 1773 return kUnknownRtpPayloadType; 1774 } 1775 return 0; 1776 } 1777 1778 void NetEqImpl::DoCodecInternalCng(const int16_t* decoded_buffer, 1779 size_t decoded_length) { 1780 RTC_DCHECK(normal_.get()); 1781 normal_->Process(decoded_buffer, decoded_length, last_mode_, 1782 algorithm_buffer_.get()); 1783 last_mode_ = Mode::kCodecInternalCng; 1784 expand_->Reset(); 1785 } 1786 1787 int NetEqImpl::DoDtmf(const DtmfEvent& dtmf_event, bool* play_dtmf) { 1788 // This block of the code and the block further down, handling `dtmf_switch` 1789 // are commented out. Otherwise playing out-of-band DTMF would fail in VoE 1790 // test, DtmfTest.ManualSuccessfullySendsOutOfBandTelephoneEvents. This is 1791 // equivalent to `dtmf_switch` always be false. 1792 // 1793 // See http://webrtc-codereview.appspot.com/1195004/ for discussion 1794 // On this issue. This change might cause some glitches at the point of 1795 // switch from audio to DTMF. Issue 1545 is filed to track this. 1796 // 1797 // bool dtmf_switch = false; 1798 // if ((last_mode_ != Modes::kDtmf) && 1799 // dtmf_tone_generator_->initialized()) { 1800 // // Special case; see below. 1801 // // We must catch this before calling Generate, since `initialized` is 1802 // // modified in that call. 1803 // dtmf_switch = true; 1804 // } 1805 1806 int dtmf_return_value = 0; 1807 if (!dtmf_tone_generator_->initialized()) { 1808 // Initialize if not already done. 1809 dtmf_return_value = dtmf_tone_generator_->Init(fs_hz_, dtmf_event.event_no, 1810 dtmf_event.volume); 1811 } 1812 1813 if (dtmf_return_value == 0) { 1814 // Generate DTMF signal. 1815 dtmf_return_value = dtmf_tone_generator_->Generate(output_size_samples_, 1816 algorithm_buffer_.get()); 1817 } 1818 1819 if (dtmf_return_value < 0) { 1820 algorithm_buffer_->Zeros(output_size_samples_); 1821 return dtmf_return_value; 1822 } 1823 1824 // if (dtmf_switch) { 1825 // // This is the special case where the previous operation was DTMF 1826 // // overdub, but the current instruction is "regular" DTMF. We must make 1827 // // sure that the DTMF does not have any discontinuities. The first DTMF 1828 // // sample that we generate now must be played out immediately, therefore 1829 // // it must be copied to the speech buffer. 1830 // // TODO(hlundin): This code seems incorrect. (Legacy.) Write test and 1831 // // verify correct operation. 1832 // RTC_DCHECK_NOTREACHED(); 1833 // // Must generate enough data to replace all of the `sync_buffer_` 1834 // // "future". 1835 // int required_length = sync_buffer_->FutureLength(); 1836 // RTC_DCHECK(dtmf_tone_generator_->initialized()); 1837 // dtmf_return_value = dtmf_tone_generator_->Generate(required_length, 1838 // algorithm_buffer_); 1839 // RTC_DCHECK((size_t) required_length == algorithm_buffer_->Size()); 1840 // if (dtmf_return_value < 0) { 1841 // algorithm_buffer_->Zeros(output_size_samples_); 1842 // return dtmf_return_value; 1843 // } 1844 // 1845 // // Overwrite the "future" part of the speech buffer with the new DTMF 1846 // // data. 1847 // // TODO(hlundin): It seems that this overwriting has gone lost. 1848 // // Not adapted for multi-channel yet. 1849 // RTC_DCHECK(algorithm_buffer_->Channels() == 1); 1850 // if (algorithm_buffer_->Channels() != 1) { 1851 // RTC_LOG(LS_WARNING) << "DTMF not supported for more than one channel"; 1852 // return kStereoNotSupported; 1853 // } 1854 // // Shuffle the remaining data to the beginning of algorithm buffer. 1855 // algorithm_buffer_->PopFront(sync_buffer_->FutureLength()); 1856 // } 1857 1858 sync_buffer_->IncreaseEndTimestamp( 1859 static_cast<uint32_t>(output_size_samples_)); 1860 expand_->Reset(); 1861 last_mode_ = Mode::kDtmf; 1862 1863 // Set to false because the DTMF is already in the algorithm buffer. 1864 *play_dtmf = false; 1865 return 0; 1866 } 1867 1868 int NetEqImpl::DtmfOverdub(const DtmfEvent& dtmf_event, 1869 size_t num_channels, 1870 int16_t* output) const { 1871 size_t out_index = 0; 1872 size_t overdub_length = output_size_samples_; // Default value. 1873 1874 if (sync_buffer_->dtmf_index() > sync_buffer_->next_index()) { 1875 // Special operation for transition from "DTMF only" to "DTMF overdub". 1876 out_index = 1877 std::min(sync_buffer_->dtmf_index() - sync_buffer_->next_index(), 1878 output_size_samples_); 1879 overdub_length = output_size_samples_ - out_index; 1880 } 1881 1882 AudioMultiVector dtmf_output(num_channels); 1883 int dtmf_return_value = 0; 1884 if (!dtmf_tone_generator_->initialized()) { 1885 dtmf_return_value = dtmf_tone_generator_->Init(fs_hz_, dtmf_event.event_no, 1886 dtmf_event.volume); 1887 } 1888 if (dtmf_return_value == 0) { 1889 dtmf_return_value = 1890 dtmf_tone_generator_->Generate(overdub_length, &dtmf_output); 1891 RTC_DCHECK_EQ(overdub_length, dtmf_output.Size()); 1892 } 1893 dtmf_output.ReadInterleaved(overdub_length, &output[out_index]); 1894 return dtmf_return_value < 0 ? dtmf_return_value : 0; 1895 } 1896 1897 int NetEqImpl::ExtractPackets(size_t required_samples, 1898 PacketList* packet_list) { 1899 bool first_packet = true; 1900 bool next_packet_available = false; 1901 1902 const Packet* next_packet = packet_buffer_->PeekNextPacket(); 1903 RTC_DCHECK(next_packet); 1904 if (!next_packet) { 1905 RTC_LOG(LS_ERROR) << "Packet buffer unexpectedly empty."; 1906 return -1; 1907 } 1908 uint32_t first_timestamp = next_packet->timestamp; 1909 size_t extracted_samples = 0; 1910 1911 // Packet extraction loop. 1912 do { 1913 timestamp_ = next_packet->timestamp; 1914 std::optional<Packet> packet = packet_buffer_->GetNextPacket(); 1915 // `next_packet` may be invalid after the `packet_buffer_` operation. 1916 next_packet = nullptr; 1917 if (!packet) { 1918 RTC_LOG(LS_ERROR) << "Should always be able to extract a packet here"; 1919 RTC_DCHECK_NOTREACHED(); // Should always be able to extract a packet 1920 // here. 1921 return -1; 1922 } 1923 const uint64_t waiting_time_ms = packet->waiting_time->ElapsedMs(); 1924 stats_->StoreWaitingTime(waiting_time_ms); 1925 RTC_DCHECK(!packet->empty()); 1926 1927 if (first_packet) { 1928 first_packet = false; 1929 if (nack_enabled_) { 1930 RTC_DCHECK(nack_); 1931 // TODO(henrik.lundin): Should we update this for all decoded packets? 1932 nack_->UpdateLastDecodedPacket(packet->sequence_number, 1933 packet->timestamp); 1934 } 1935 } 1936 1937 const bool has_cng_packet = 1938 decoder_database_->IsComfortNoise(packet->payload_type); 1939 // Store number of extracted samples. 1940 size_t packet_duration = 0; 1941 if (packet->frame) { 1942 packet_duration = packet->frame->Duration(); 1943 // TODO(ossu): Is this the correct way to track Opus FEC packets? 1944 if (packet->priority.codec_level > 0) { 1945 stats_->SecondaryDecodedSamples(dchecked_cast<int>(packet_duration)); 1946 } 1947 } else if (!has_cng_packet) { 1948 RTC_LOG(LS_WARNING) << "Unknown payload type " 1949 << static_cast<int>(packet->payload_type); 1950 RTC_DCHECK_NOTREACHED(); 1951 } 1952 1953 if (packet_duration == 0) { 1954 // Decoder did not return a packet duration. Assume that the packet 1955 // contains the same number of samples as the previous one. 1956 packet_duration = decoder_frame_length_; 1957 } 1958 extracted_samples = packet->timestamp - first_timestamp + packet_duration; 1959 1960 RTC_DCHECK(controller_); 1961 TimeDelta processing_time = TimeDelta::Zero(); 1962 1963 if (packet->packet_info.has_value() && 1964 !packet->packet_info->receive_time().IsMinusInfinity()) { 1965 processing_time = 1966 env_.clock().CurrentTime() - packet->packet_info->receive_time(); 1967 } 1968 1969 stats_->JitterBufferDelay( 1970 packet_duration, waiting_time_ms, controller_->TargetLevelMs(), 1971 controller_->UnlimitedTargetLevelMs(), processing_time.us()); 1972 1973 // Check what packet is available next. 1974 next_packet = packet_buffer_->PeekNextPacket(); 1975 next_packet_available = 1976 next_packet && next_packet->payload_type == packet->payload_type && 1977 next_packet->timestamp == packet->timestamp + packet_duration && 1978 !has_cng_packet; 1979 1980 packet_list->push_back(std::move(*packet)); // Store packet in list. 1981 packet = std::nullopt; // Ensure it's never used after the move. 1982 } while (extracted_samples < required_samples && next_packet_available); 1983 1984 if (extracted_samples > 0) { 1985 // Delete old packets only when we are going to decode something. Otherwise, 1986 // we could end up in the situation where we never decode anything, since 1987 // all incoming packets are considered too old but the buffer will also 1988 // never be flooded and flushed. 1989 packet_buffer_->DiscardAllOldPackets(timestamp_); 1990 } 1991 1992 return dchecked_cast<int>(extracted_samples); 1993 } 1994 1995 void NetEqImpl::UpdatePlcComponents(int fs_hz, size_t channels) { 1996 // Delete objects and create new ones. 1997 expand_.reset(expand_factory_->Create(background_noise_.get(), 1998 sync_buffer_.get(), &random_vector_, 1999 stats_.get(), fs_hz, channels)); 2000 merge_.reset(new Merge(fs_hz, channels, expand_.get(), sync_buffer_.get())); 2001 } 2002 2003 void NetEqImpl::SetSampleRateAndChannels(int fs_hz, size_t channels) { 2004 RTC_LOG(LS_VERBOSE) << "SetSampleRateAndChannels " << fs_hz << " " 2005 << channels; 2006 RTC_CHECK(fs_hz == 8000 || fs_hz == 16000 || fs_hz == 32000 || 2007 fs_hz == 48000); 2008 RTC_CHECK_GT(channels, 0); 2009 RTC_CHECK_LE(channels, kMaxNumberOfAudioChannels); 2010 2011 // The format must fit in an AudioFrame. Situations where this could 2012 // theoratically happen but aren't supported is e.g. if receiving 24 channels 2013 // of 10ms 48 kHz buffers. 2014 output_size_samples_ = SampleRateToDefaultChannelSize(fs_hz); 2015 RTC_CHECK_LE(channels * output_size_samples_, 2016 AudioFrame::kMaxDataSizeSamples); 2017 2018 // Before changing the sample rate, end and report any ongoing expand event. 2019 stats_->EndExpandEvent(fs_hz_); 2020 fs_hz_ = fs_hz; 2021 fs_mult_ = fs_hz / 8000; 2022 RTC_DCHECK_EQ(output_size_samples_, 2023 static_cast<size_t>(kOutputSizeMs * 8 * fs_mult_)); 2024 decoder_frame_length_ = 3 * output_size_samples_; // Initialize to 30ms. 2025 2026 last_mode_ = Mode::kNormal; 2027 2028 ComfortNoiseDecoder* cng_decoder = decoder_database_->GetActiveCngDecoder(); 2029 if (cng_decoder) 2030 cng_decoder->Reset(); 2031 2032 // Delete algorithm buffer and create a new one. 2033 algorithm_buffer_.reset(new AudioMultiVector(channels)); 2034 2035 // Delete sync buffer and create a new one. 2036 sync_buffer_.reset(new SyncBuffer(channels, kSyncBufferSize * fs_mult_)); 2037 2038 // Delete BackgroundNoise object and create a new one. 2039 background_noise_.reset(new BackgroundNoise(channels)); 2040 2041 // Reset random vector. 2042 random_vector_.Reset(); 2043 2044 UpdatePlcComponents(fs_hz, channels); 2045 2046 // Move index so that we create a small set of future samples (all 0). 2047 sync_buffer_->set_next_index(sync_buffer_->next_index() - 2048 expand_->overlap_length()); 2049 2050 normal_.reset(new Normal(fs_hz, decoder_database_.get(), *background_noise_, 2051 expand_.get(), stats_.get())); 2052 accelerate_.reset( 2053 accelerate_factory_->Create(fs_hz, channels, *background_noise_)); 2054 preemptive_expand_.reset(preemptive_expand_factory_->Create( 2055 fs_hz, channels, *background_noise_, expand_->overlap_length())); 2056 2057 // Delete ComfortNoise object and create a new one. 2058 comfort_noise_.reset( 2059 new ComfortNoise(fs_hz, decoder_database_.get(), sync_buffer_.get())); 2060 2061 // Verify that `decoded_buffer_` is long enough. 2062 if (decoded_buffer_length_ < kMaxFrameSize * channels) { 2063 // Reallocate to larger size. 2064 decoded_buffer_length_ = kMaxFrameSize * channels; 2065 decoded_buffer_.reset(new int16_t[decoded_buffer_length_]); 2066 } 2067 RTC_CHECK(controller_) << "Unexpectedly found no NetEqController"; 2068 controller_->SetSampleRate(fs_hz_, output_size_samples_); 2069 } 2070 2071 NetEqImpl::OutputType NetEqImpl::LastOutputType() { 2072 RTC_DCHECK(expand_.get()); 2073 if (last_mode_ == Mode::kCodecInternalCng || 2074 last_mode_ == Mode::kRfc3389Cng) { 2075 return OutputType::kCNG; 2076 } else if (last_mode_ == Mode::kExpand && expand_->MuteFactor(0) == 0) { 2077 // Expand mode has faded down to background noise only (very long expand). 2078 return OutputType::kPLCCNG; 2079 } else if (last_mode_ == Mode::kExpand) { 2080 return OutputType::kPLC; 2081 } else if (last_mode_ == Mode::kCodecPlc) { 2082 return OutputType::kCodecPLC; 2083 } else { 2084 return OutputType::kNormalSpeech; 2085 } 2086 } 2087 2088 NetEqController::PacketArrivedInfo NetEqImpl::ToPacketArrivedInfo( 2089 const Packet& packet) const { 2090 const DecoderDatabase::DecoderInfo* dec_info = 2091 decoder_database_->GetDecoderInfo(packet.payload_type); 2092 2093 NetEqController::PacketArrivedInfo info; 2094 info.is_cng_or_dtmf = 2095 dec_info && (dec_info->IsComfortNoise() || dec_info->IsDtmf()); 2096 info.packet_length_samples = 2097 packet.frame ? packet.frame->Duration() : decoder_frame_length_; 2098 info.main_timestamp = packet.timestamp; 2099 info.main_sequence_number = packet.sequence_number; 2100 info.is_dtx = packet.frame && packet.frame->IsDtxPacket(); 2101 return info; 2102 } 2103 2104 } // namespace webrtc