packet_buffer.cc (15762B)
1 /* 2 * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "modules/video_coding/packet_buffer.h" 12 13 #include <algorithm> 14 #include <cstdint> 15 #include <cstring> 16 #include <memory> 17 #include <utility> 18 #include <variant> 19 #include <vector> 20 21 #include "api/video/video_codec_type.h" 22 #include "api/video/video_frame_type.h" 23 #include "common_video/h264/h264_common.h" 24 #include "modules/rtp_rtcp/source/rtp_packet_received.h" 25 #include "modules/rtp_rtcp/source/rtp_video_header.h" 26 #include "modules/video_coding/codecs/h264/include/h264_globals.h" 27 #include "rtc_base/checks.h" 28 #include "rtc_base/logging.h" 29 #include "rtc_base/numerics/mod_ops.h" 30 #include "rtc_base/numerics/sequence_number_util.h" 31 32 namespace webrtc { 33 namespace video_coding { 34 35 PacketBuffer::Packet::Packet(const RtpPacketReceived& rtp_packet, 36 int64_t sequence_number, 37 const RTPVideoHeader& video_header) 38 : marker_bit(rtp_packet.Marker()), 39 payload_type(rtp_packet.PayloadType()), 40 sequence_number(sequence_number), 41 timestamp(rtp_packet.Timestamp()), 42 times_nacked(-1), 43 video_header(video_header) { 44 // Unwrapped sequence number should match the original wrapped one. 45 RTC_DCHECK_EQ(static_cast<uint16_t>(sequence_number), 46 rtp_packet.SequenceNumber()); 47 } 48 49 PacketBuffer::PacketBuffer(size_t start_buffer_size, size_t max_buffer_size) 50 : max_size_(max_buffer_size), 51 first_seq_num_(0), 52 first_packet_received_(false), 53 is_cleared_to_first_seq_num_(false), 54 buffer_(start_buffer_size), 55 sps_pps_idr_is_h264_keyframe_(false) { 56 RTC_DCHECK_LE(start_buffer_size, max_buffer_size); 57 // Buffer size must always be a power of 2. 58 RTC_DCHECK((start_buffer_size & (start_buffer_size - 1)) == 0); 59 RTC_DCHECK((max_buffer_size & (max_buffer_size - 1)) == 0); 60 } 61 62 PacketBuffer::~PacketBuffer() { 63 Clear(); 64 } 65 66 PacketBuffer::InsertResult PacketBuffer::InsertPacket( 67 std::unique_ptr<PacketBuffer::Packet> packet) { 68 PacketBuffer::InsertResult result; 69 70 uint16_t seq_num = packet->seq_num(); 71 size_t index = seq_num % buffer_.size(); 72 73 if (!first_packet_received_) { 74 first_seq_num_ = seq_num; 75 first_packet_received_ = true; 76 } else if (AheadOf(first_seq_num_, seq_num)) { 77 // If we have explicitly cleared past this packet then it's old, 78 // don't insert it, just silently ignore it. 79 if (is_cleared_to_first_seq_num_) { 80 return result; 81 } 82 83 if (ForwardDiff<uint16_t>(first_seq_num_, seq_num) >= max_size_ && 84 ForwardDiff<uint16_t>(seq_num, first_seq_num_) >= max_size_ / 2) { 85 // Large negative jump in rtp sequence number: clear the buffer and treat 86 // latest packet as the new first packet. 87 Clear(); 88 first_packet_received_ = true; 89 } 90 91 first_seq_num_ = seq_num; 92 } 93 94 if (buffer_[index] != nullptr) { 95 // Duplicate packet, just delete the payload. 96 if (buffer_[index]->seq_num() == packet->seq_num()) { 97 return result; 98 } 99 100 // The packet buffer is full, try to expand the buffer. 101 while (ExpandBufferSize() && buffer_[seq_num % buffer_.size()] != nullptr) { 102 } 103 index = seq_num % buffer_.size(); 104 105 // Packet buffer is still full since we were unable to expand the buffer. 106 if (buffer_[index] != nullptr) { 107 // Clear the buffer, delete payload, and return false to signal that a 108 // new keyframe is needed. 109 RTC_LOG(LS_WARNING) << "Clear PacketBuffer and request key frame."; 110 ClearInternal(); 111 result.buffer_cleared = true; 112 return result; 113 } 114 } 115 116 packet->continuous = false; 117 buffer_[index] = std::move(packet); 118 119 UpdateMissingPackets(seq_num); 120 121 received_padding_.erase( 122 received_padding_.begin(), 123 received_padding_.lower_bound(seq_num - (buffer_.size() / 4))); 124 125 result.packets = FindFrames(seq_num); 126 return result; 127 } 128 129 uint32_t PacketBuffer::ClearTo(uint16_t seq_num) { 130 // We have already cleared past this sequence number, no need to do anything. 131 if (AheadOf<uint16_t>(first_seq_num_, seq_num)) { 132 return 0; 133 } 134 135 // If the packet buffer was cleared between a frame was created and returned. 136 if (!first_packet_received_) 137 return 0; 138 139 // Avoid iterating over the buffer more than once by capping the number of 140 // iterations to the `size_` of the buffer. 141 ++seq_num; 142 uint32_t num_cleared_packets = 0; 143 size_t diff = ForwardDiff<uint16_t>(first_seq_num_, seq_num); 144 size_t iterations = std::min(diff, buffer_.size()); 145 for (size_t i = 0; i < iterations; ++i) { 146 auto& stored = buffer_[first_seq_num_ % buffer_.size()]; 147 if (stored != nullptr && AheadOf<uint16_t>(seq_num, stored->seq_num())) { 148 ++num_cleared_packets; 149 stored = nullptr; 150 } 151 ++first_seq_num_; 152 } 153 154 // If `diff` is larger than `iterations` it means that we don't increment 155 // `first_seq_num_` until we reach `seq_num`, so we set it here. 156 first_seq_num_ = seq_num; 157 158 is_cleared_to_first_seq_num_ = true; 159 missing_packets_.erase(missing_packets_.begin(), 160 missing_packets_.lower_bound(seq_num)); 161 162 received_padding_.erase(received_padding_.begin(), 163 received_padding_.lower_bound(seq_num)); 164 165 return num_cleared_packets; 166 } 167 168 void PacketBuffer::Clear() { 169 ClearInternal(); 170 } 171 172 PacketBuffer::InsertResult PacketBuffer::InsertPadding(uint16_t seq_num) { 173 PacketBuffer::InsertResult result; 174 UpdateMissingPackets(seq_num); 175 received_padding_.insert(seq_num); 176 result.packets = FindFrames(static_cast<uint16_t>(seq_num + 1)); 177 return result; 178 } 179 180 void PacketBuffer::ForceSpsPpsIdrIsH264Keyframe() { 181 sps_pps_idr_is_h264_keyframe_ = true; 182 } 183 184 void PacketBuffer::ResetSpsPpsIdrIsH264Keyframe() { 185 sps_pps_idr_is_h264_keyframe_ = false; 186 } 187 188 void PacketBuffer::ClearInternal() { 189 for (auto& entry : buffer_) { 190 entry = nullptr; 191 } 192 193 first_packet_received_ = false; 194 is_cleared_to_first_seq_num_ = false; 195 newest_inserted_seq_num_.reset(); 196 missing_packets_.clear(); 197 received_padding_.clear(); 198 } 199 200 bool PacketBuffer::ExpandBufferSize() { 201 if (buffer_.size() == max_size_) { 202 RTC_LOG(LS_WARNING) << "PacketBuffer is already at max size (" << max_size_ 203 << "), failed to increase size."; 204 return false; 205 } 206 207 size_t new_size = std::min(max_size_, 2 * buffer_.size()); 208 std::vector<std::unique_ptr<Packet>> new_buffer(new_size); 209 for (std::unique_ptr<Packet>& entry : buffer_) { 210 if (entry != nullptr) { 211 new_buffer[entry->seq_num() % new_size] = std::move(entry); 212 } 213 } 214 buffer_ = std::move(new_buffer); 215 RTC_LOG(LS_INFO) << "PacketBuffer size expanded to " << new_size; 216 return true; 217 } 218 219 bool PacketBuffer::PotentialNewFrame(uint16_t seq_num) const { 220 size_t index = seq_num % buffer_.size(); 221 int prev_index = index > 0 ? index - 1 : buffer_.size() - 1; 222 const auto& entry = buffer_[index]; 223 const auto& prev_entry = buffer_[prev_index]; 224 225 if (entry == nullptr) 226 return false; 227 if (entry->seq_num() != seq_num) 228 return false; 229 if (entry->is_first_packet_in_frame()) 230 return true; 231 if (prev_entry == nullptr) 232 return false; 233 if (prev_entry->seq_num() != static_cast<uint16_t>(entry->seq_num() - 1)) 234 return false; 235 if (prev_entry->timestamp != entry->timestamp) 236 return false; 237 if (prev_entry->continuous) 238 return true; 239 240 return false; 241 } 242 243 std::vector<std::unique_ptr<PacketBuffer::Packet>> PacketBuffer::FindFrames( 244 uint16_t seq_num) { 245 std::vector<std::unique_ptr<PacketBuffer::Packet>> found_frames; 246 auto start = seq_num; 247 248 for (size_t i = 0; i < buffer_.size(); ++i) { 249 if (received_padding_.find(seq_num) != received_padding_.end()) { 250 seq_num += 1; 251 continue; 252 } 253 254 if (!PotentialNewFrame(seq_num)) { 255 break; 256 } 257 258 size_t index = seq_num % buffer_.size(); 259 buffer_[index]->continuous = true; 260 261 // If all packets of the frame is continuous, find the first packet of the 262 // frame and add all packets of the frame to the returned packets. 263 if (buffer_[index]->is_last_packet_in_frame()) { 264 uint16_t start_seq_num = seq_num; 265 266 // Find the start index by searching backward until the packet with 267 // the `frame_begin` flag is set. 268 int start_index = index; 269 size_t tested_packets = 0; 270 int64_t frame_timestamp = buffer_[start_index]->timestamp; 271 272 // Identify H.264 keyframes by means of SPS, PPS, and IDR. 273 bool is_generic = buffer_[start_index]->video_header.generic.has_value(); 274 bool is_h264_descriptor = 275 (buffer_[start_index]->codec() == kVideoCodecH264) && !is_generic; 276 bool has_h264_sps = false; 277 bool has_h264_pps = false; 278 bool has_h264_idr = false; 279 bool is_h264_keyframe = false; 280 int idr_width = -1; 281 int idr_height = -1; 282 bool full_frame_found = false; 283 while (true) { 284 // GFD is only attached to first packet of frame, so update check on 285 // every packet. 286 if (buffer_[start_index] != nullptr) { 287 is_generic = buffer_[start_index]->video_header.generic.has_value(); 288 if (is_generic) { 289 is_h264_descriptor = false; 290 } 291 } 292 ++tested_packets; 293 294 if (!is_h264_descriptor) { 295 if (buffer_[start_index] == nullptr || 296 buffer_[start_index]->is_first_packet_in_frame()) { 297 full_frame_found = buffer_[start_index] != nullptr; 298 break; 299 } 300 } 301 302 if (is_h264_descriptor) { 303 const auto* h264_header = std::get_if<RTPVideoHeaderH264>( 304 &buffer_[start_index]->video_header.video_type_header); 305 if (!h264_header) 306 return found_frames; 307 308 for (const NaluInfo& nalu : h264_header->nalus) { 309 if (nalu.type == H264::NaluType::kSps) { 310 has_h264_sps = true; 311 } else if (nalu.type == H264::NaluType::kPps) { 312 has_h264_pps = true; 313 } else if (nalu.type == H264::NaluType::kIdr) { 314 has_h264_idr = true; 315 } 316 } 317 if ((sps_pps_idr_is_h264_keyframe_ && has_h264_idr && has_h264_sps && 318 has_h264_pps) || 319 (!sps_pps_idr_is_h264_keyframe_ && has_h264_idr)) { 320 is_h264_keyframe = true; 321 // Store the resolution of key frame which is the packet with 322 // smallest index and valid resolution; typically its IDR or SPS 323 // packet; there may be packet preceeding this packet, IDR's 324 // resolution will be applied to them. 325 if (buffer_[start_index]->width() > 0 && 326 buffer_[start_index]->height() > 0) { 327 idr_width = buffer_[start_index]->width(); 328 idr_height = buffer_[start_index]->height(); 329 } 330 } 331 } 332 333 if (tested_packets == buffer_.size()) 334 break; 335 336 start_index = start_index > 0 ? start_index - 1 : buffer_.size() - 1; 337 338 // In the case of H264 we don't have a frame_begin bit (yes, 339 // `frame_begin` might be set to true but that is a lie). So instead 340 // we traverese backwards as long as we have a previous packet and 341 // the timestamp of that packet is the same as this one. This may cause 342 // the PacketBuffer to hand out incomplete frames. 343 // See: https://bugs.chromium.org/p/webrtc/issues/detail?id=7106 344 if (is_h264_descriptor && 345 (buffer_[start_index] == nullptr || 346 buffer_[start_index]->timestamp != frame_timestamp)) { 347 break; 348 } 349 350 --start_seq_num; 351 } 352 353 if (is_h264_descriptor) { 354 // Warn if this is an unsafe frame. 355 if (has_h264_idr && (!has_h264_sps || !has_h264_pps)) { 356 RTC_LOG(LS_WARNING) 357 << "Received H.264-IDR frame " 358 "(SPS: " 359 << has_h264_sps << ", PPS: " << has_h264_pps << "). Treating as " 360 << (sps_pps_idr_is_h264_keyframe_ ? "delta" : "key") 361 << " frame since WebRTC-SpsPpsIdrIsH264Keyframe is " 362 << (sps_pps_idr_is_h264_keyframe_ ? "enabled." : "disabled"); 363 } 364 365 // Now that we have decided whether to treat this frame as a key frame 366 // or delta frame in the frame buffer, we update the field that 367 // determines if the RtpFrameObject is a key frame or delta frame. 368 const size_t first_packet_index = start_seq_num % buffer_.size(); 369 if (is_h264_keyframe) { 370 buffer_[first_packet_index]->video_header.frame_type = 371 VideoFrameType::kVideoFrameKey; 372 if (idr_width > 0 && idr_height > 0) { 373 // IDR frame was finalized and we have the correct resolution for 374 // IDR; update first packet to have same resolution as IDR. 375 buffer_[first_packet_index]->video_header.width = idr_width; 376 buffer_[first_packet_index]->video_header.height = idr_height; 377 } 378 } else { 379 buffer_[first_packet_index]->video_header.frame_type = 380 VideoFrameType::kVideoFrameDelta; 381 } 382 383 // If this is not a keyframe, make sure there are no gaps in the packet 384 // sequence numbers up until this point. 385 if (!is_h264_keyframe && missing_packets_.upper_bound(start_seq_num) != 386 missing_packets_.begin()) { 387 return found_frames; 388 } 389 } 390 391 if (is_h264_descriptor || full_frame_found) { 392 const uint16_t end_seq_num = seq_num + 1; 393 // Use uint16_t type to handle sequence number wrap around case. 394 uint16_t num_packets = end_seq_num - start_seq_num; 395 found_frames.reserve(found_frames.size() + num_packets); 396 for (uint16_t j = start_seq_num; j != end_seq_num; ++j) { 397 std::unique_ptr<Packet>& packet = buffer_[j % buffer_.size()]; 398 RTC_DCHECK(packet); 399 RTC_DCHECK_EQ(j, packet->seq_num()); 400 // Ensure frame boundary flags are properly set. 401 packet->video_header.is_first_packet_in_frame = (j == start_seq_num); 402 packet->video_header.is_last_packet_in_frame = (j == seq_num); 403 found_frames.push_back(std::move(packet)); 404 } 405 406 missing_packets_.erase(missing_packets_.begin(), 407 missing_packets_.upper_bound(seq_num)); 408 received_padding_.erase(received_padding_.lower_bound(start), 409 received_padding_.upper_bound(seq_num)); 410 } 411 } 412 ++seq_num; 413 } 414 return found_frames; 415 } 416 417 void PacketBuffer::UpdateMissingPackets(uint16_t seq_num) { 418 if (!newest_inserted_seq_num_) 419 newest_inserted_seq_num_ = seq_num; 420 421 const int kMaxPaddingAge = 1000; 422 if (AheadOf(seq_num, *newest_inserted_seq_num_)) { 423 uint16_t old_seq_num = seq_num - kMaxPaddingAge; 424 auto erase_to = missing_packets_.lower_bound(old_seq_num); 425 missing_packets_.erase(missing_packets_.begin(), erase_to); 426 427 // Guard against inserting a large amount of missing packets if there is a 428 // jump in the sequence number. 429 if (AheadOf(old_seq_num, *newest_inserted_seq_num_)) 430 *newest_inserted_seq_num_ = old_seq_num; 431 432 ++*newest_inserted_seq_num_; 433 while (AheadOf(seq_num, *newest_inserted_seq_num_)) { 434 missing_packets_.insert(*newest_inserted_seq_num_); 435 ++*newest_inserted_seq_num_; 436 } 437 } else { 438 missing_packets_.erase(seq_num); 439 } 440 } 441 442 } // namespace video_coding 443 } // namespace webrtc