h26x_packet_buffer.cc (18256B)
1 /* 2 * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "modules/video_coding/h26x_packet_buffer.h" 12 13 #include <algorithm> 14 #include <cstddef> 15 #include <cstdint> 16 #include <cstring> 17 #include <limits> 18 #include <memory> 19 #include <optional> 20 #include <string> 21 #include <utility> 22 #include <vector> 23 24 #include "absl/algorithm/container.h" 25 #include "api/array_view.h" 26 #include "api/video/video_codec_type.h" 27 #include "api/video/video_frame_type.h" 28 #include "common_video/h264/h264_common.h" 29 #include "common_video/h264/pps_parser.h" 30 #include "common_video/h264/sps_parser.h" 31 #include "modules/rtp_rtcp/source/rtp_video_header.h" 32 #include "modules/video_coding/codecs/h264/include/h264_globals.h" 33 #include "modules/video_coding/h264_sprop_parameter_sets.h" 34 #include "rtc_base/checks.h" 35 #include "rtc_base/copy_on_write_buffer.h" 36 #include "rtc_base/logging.h" 37 #include "rtc_base/numerics/sequence_number_util.h" 38 #ifdef RTC_ENABLE_H265 39 #include "common_video/h265/h265_common.h" 40 #endif 41 42 namespace webrtc { 43 namespace { 44 45 int64_t EuclideanMod(int64_t n, int64_t div) { 46 RTC_DCHECK_GT(div, 0); 47 return (n %= div) < 0 ? n + div : n; 48 } 49 50 bool IsFirstPacketOfFragment(const RTPVideoHeaderH264& h264_header) { 51 return !h264_header.nalus.empty(); 52 } 53 54 bool BeginningOfIdr(const H26xPacketBuffer::Packet& packet) { 55 const auto& h264_header = 56 std::get<RTPVideoHeaderH264>(packet.video_header.video_type_header); 57 const bool contains_idr_nalu = 58 absl::c_any_of(h264_header.nalus, [](const auto& nalu_info) { 59 return nalu_info.type == H264::NaluType::kIdr; 60 }); 61 switch (h264_header.packetization_type) { 62 case kH264StapA: 63 case kH264SingleNalu: { 64 return contains_idr_nalu; 65 } 66 case kH264FuA: { 67 return contains_idr_nalu && IsFirstPacketOfFragment(h264_header); 68 } 69 } 70 } 71 72 bool HasSps(const H26xPacketBuffer::Packet& packet) { 73 auto& h264_header = 74 std::get<RTPVideoHeaderH264>(packet.video_header.video_type_header); 75 return absl::c_any_of(h264_header.nalus, [](const auto& nalu_info) { 76 return nalu_info.type == H264::NaluType::kSps; 77 }); 78 } 79 80 int64_t* GetContinuousSequence(ArrayView<int64_t> last_continuous, 81 int64_t unwrapped_seq_num) { 82 for (int64_t& last : last_continuous) { 83 if (unwrapped_seq_num - 1 == last) { 84 return &last; 85 } 86 } 87 return nullptr; 88 } 89 90 #ifdef RTC_ENABLE_H265 91 bool HasVps(const H26xPacketBuffer::Packet& packet) { 92 std::vector<H265::NaluIndex> nalu_indices = 93 H265::FindNaluIndices(packet.video_payload); 94 return absl::c_any_of((nalu_indices), [&packet]( 95 const H265::NaluIndex& nalu_index) { 96 return H265::ParseNaluType( 97 packet.video_payload.cdata()[nalu_index.payload_start_offset]) == 98 H265::NaluType::kVps; 99 }); 100 } 101 #endif 102 103 } // namespace 104 105 H26xPacketBuffer::H26xPacketBuffer(bool h264_idr_only_keyframes_allowed) 106 : h264_idr_only_keyframes_allowed_(h264_idr_only_keyframes_allowed) { 107 last_continuous_in_sequence_.fill(std::numeric_limits<int64_t>::min()); 108 } 109 110 H26xPacketBuffer::InsertResult H26xPacketBuffer::InsertPadding( 111 uint16_t unwrapped_seq_num) { 112 int64_t* last_continuous_unwrapped_seq_num = 113 GetContinuousSequence(last_continuous_in_sequence_, unwrapped_seq_num); 114 if (last_continuous_unwrapped_seq_num == nullptr) { 115 last_continuous_in_sequence_[last_continuous_in_sequence_index_] = 116 unwrapped_seq_num; 117 last_continuous_unwrapped_seq_num = 118 &last_continuous_in_sequence_[last_continuous_in_sequence_index_]; 119 last_continuous_in_sequence_index_ = 120 (last_continuous_in_sequence_index_ + 1) % 121 last_continuous_in_sequence_.size(); 122 } else { 123 *last_continuous_unwrapped_seq_num = unwrapped_seq_num; 124 } 125 return {}; 126 } 127 128 H26xPacketBuffer::InsertResult H26xPacketBuffer::InsertPacket( 129 std::unique_ptr<Packet> packet) { 130 RTC_DCHECK(packet->video_header.codec == kVideoCodecH264 || 131 packet->video_header.codec == kVideoCodecH265); 132 133 InsertResult result; 134 135 int64_t unwrapped_seq_num = packet->sequence_number; 136 auto& packet_slot = GetPacket(unwrapped_seq_num); 137 if (packet_slot != nullptr && 138 AheadOrAt(packet_slot->timestamp, packet->timestamp)) { 139 // The incoming `packet` is old or a duplicate. 140 return result; 141 } else { 142 packet_slot = std::move(packet); 143 } 144 145 return FindFrames(unwrapped_seq_num); 146 } 147 148 std::unique_ptr<H26xPacketBuffer::Packet>& H26xPacketBuffer::GetPacket( 149 int64_t unwrapped_seq_num) { 150 return buffer_[EuclideanMod(unwrapped_seq_num, kBufferSize)]; 151 } 152 153 bool H26xPacketBuffer::BeginningOfStream( 154 const H26xPacketBuffer::Packet& packet) const { 155 if (packet.codec() == kVideoCodecH264) { 156 return HasSps(packet) || 157 (h264_idr_only_keyframes_allowed_ && BeginningOfIdr(packet)); 158 #ifdef RTC_ENABLE_H265 159 } else if (packet.codec() == kVideoCodecH265) { 160 return HasVps(packet); 161 #endif 162 } 163 RTC_DCHECK_NOTREACHED(); 164 return false; 165 } 166 167 H26xPacketBuffer::InsertResult H26xPacketBuffer::FindFrames( 168 int64_t unwrapped_seq_num) { 169 InsertResult result; 170 171 Packet* packet = GetPacket(unwrapped_seq_num).get(); 172 RTC_CHECK(packet != nullptr); 173 174 // Check if the packet is continuous or the beginning of a new coded video 175 // sequence. 176 int64_t* last_continuous_unwrapped_seq_num = 177 GetContinuousSequence(last_continuous_in_sequence_, unwrapped_seq_num); 178 if (last_continuous_unwrapped_seq_num == nullptr) { 179 if (!BeginningOfStream(*packet)) { 180 return result; 181 } 182 183 last_continuous_in_sequence_[last_continuous_in_sequence_index_] = 184 unwrapped_seq_num; 185 last_continuous_unwrapped_seq_num = 186 &last_continuous_in_sequence_[last_continuous_in_sequence_index_]; 187 last_continuous_in_sequence_index_ = 188 (last_continuous_in_sequence_index_ + 1) % 189 last_continuous_in_sequence_.size(); 190 } 191 192 for (int64_t seq_num = unwrapped_seq_num; 193 seq_num < unwrapped_seq_num + kBufferSize;) { 194 RTC_DCHECK_GE(seq_num, *last_continuous_unwrapped_seq_num); 195 196 // Packets that were never assembled into a completed frame will stay in 197 // the 'buffer_'. Check that the `packet` sequence number match the expected 198 // unwrapped sequence number. 199 if (seq_num != packet->sequence_number) { 200 return result; 201 } 202 203 *last_continuous_unwrapped_seq_num = seq_num; 204 // Last packet of the frame, try to assemble the frame. 205 if (packet->marker_bit) { 206 uint32_t rtp_timestamp = packet->timestamp; 207 208 // Iterate backwards to find where the frame starts. 209 for (int64_t seq_num_start = seq_num; 210 seq_num_start > seq_num - kBufferSize; --seq_num_start) { 211 auto& prev_packet = GetPacket(seq_num_start - 1); 212 213 if (prev_packet == nullptr || prev_packet->timestamp != rtp_timestamp) { 214 if (MaybeAssembleFrame(seq_num_start, seq_num, result)) { 215 // Frame was assembled, continue to look for more frames. 216 break; 217 } else { 218 // Frame was not assembled, no subsequent frame will be continuous. 219 return result; 220 } 221 } 222 } 223 } 224 225 seq_num++; 226 packet = GetPacket(seq_num).get(); 227 if (packet == nullptr) { 228 return result; 229 } 230 } 231 232 return result; 233 } 234 235 bool H26xPacketBuffer::MaybeAssembleFrame(int64_t start_seq_num_unwrapped, 236 int64_t end_sequence_number_unwrapped, 237 InsertResult& result) { 238 #ifdef RTC_ENABLE_H265 239 bool has_vps = false; 240 #endif 241 bool has_sps = false; 242 bool has_pps = false; 243 // Includes IDR, CRA and BLA for HEVC. 244 bool has_idr = false; 245 246 int width = -1; 247 int height = -1; 248 249 for (int64_t seq_num = start_seq_num_unwrapped; 250 seq_num <= end_sequence_number_unwrapped; ++seq_num) { 251 const auto& packet = GetPacket(seq_num); 252 if (packet->codec() == kVideoCodecH264) { 253 const auto& h264_header = 254 std::get<RTPVideoHeaderH264>(packet->video_header.video_type_header); 255 for (const auto& nalu : h264_header.nalus) { 256 has_idr |= nalu.type == H264::NaluType::kIdr; 257 has_sps |= nalu.type == H264::NaluType::kSps; 258 has_pps |= nalu.type == H264::NaluType::kPps; 259 } 260 if (has_idr) { 261 if (!h264_idr_only_keyframes_allowed_ && (!has_sps || !has_pps)) { 262 return false; 263 } 264 } 265 #ifdef RTC_ENABLE_H265 266 } else if (packet->codec() == kVideoCodecH265) { 267 std::vector<H265::NaluIndex> nalu_indices = 268 H265::FindNaluIndices(packet->video_payload); 269 for (const auto& nalu_index : nalu_indices) { 270 uint8_t nalu_type = H265::ParseNaluType( 271 packet->video_payload.cdata()[nalu_index.payload_start_offset]); 272 has_idr |= (nalu_type >= H265::NaluType::kBlaWLp && 273 nalu_type <= H265::NaluType::kRsvIrapVcl23); 274 has_vps |= nalu_type == H265::NaluType::kVps; 275 has_sps |= nalu_type == H265::NaluType::kSps; 276 has_pps |= nalu_type == H265::NaluType::kPps; 277 } 278 if (has_idr) { 279 if (!has_vps || !has_sps || !has_pps) { 280 return false; 281 } 282 } 283 #endif // RTC_ENABLE_H265 284 } 285 286 width = std::max<int>(packet->video_header.width, width); 287 height = std::max<int>(packet->video_header.height, height); 288 } 289 290 for (int64_t seq_num = start_seq_num_unwrapped; 291 seq_num <= end_sequence_number_unwrapped; ++seq_num) { 292 auto& packet = GetPacket(seq_num); 293 294 packet->video_header.is_first_packet_in_frame = 295 (seq_num == start_seq_num_unwrapped); 296 packet->video_header.is_last_packet_in_frame = 297 (seq_num == end_sequence_number_unwrapped); 298 299 if (packet->video_header.is_first_packet_in_frame) { 300 if (width > 0 && height > 0) { 301 packet->video_header.width = width; 302 packet->video_header.height = height; 303 } 304 305 packet->video_header.frame_type = has_idr 306 ? VideoFrameType::kVideoFrameKey 307 : VideoFrameType::kVideoFrameDelta; 308 } 309 310 // Only applies to H.264 because start code is inserted by depacktizer for 311 // H.265 and out-of-band parameter sets is not supported by H.265. 312 if (packet->codec() == kVideoCodecH264) { 313 if (!FixH264Packet(*packet)) { 314 // The buffer is not cleared actually, but a key frame request is 315 // needed. 316 result.buffer_cleared = true; 317 return false; 318 } 319 } 320 321 result.packets.push_back(std::move(packet)); 322 } 323 324 return true; 325 } 326 327 void H26xPacketBuffer::SetSpropParameterSets( 328 const std::string& sprop_parameter_sets) { 329 if (!h264_idr_only_keyframes_allowed_) { 330 RTC_LOG(LS_WARNING) << "Ignore sprop parameter sets because IDR only " 331 "keyframe is not allowed."; 332 return; 333 } 334 H264SpropParameterSets sprop_decoder; 335 if (!sprop_decoder.DecodeSprop(sprop_parameter_sets)) { 336 return; 337 } 338 InsertSpsPpsNalus(sprop_decoder.sps_nalu(), sprop_decoder.pps_nalu()); 339 } 340 341 void H26xPacketBuffer::InsertSpsPpsNalus(const std::vector<uint8_t>& sps, 342 const std::vector<uint8_t>& pps) { 343 RTC_CHECK(h264_idr_only_keyframes_allowed_); 344 constexpr size_t kNaluHeaderOffset = 1; 345 if (sps.size() < kNaluHeaderOffset) { 346 RTC_LOG(LS_WARNING) << "SPS size " << sps.size() << " is smaller than " 347 << kNaluHeaderOffset; 348 return; 349 } 350 if ((sps[0] & 0x1f) != H264::NaluType::kSps) { 351 RTC_LOG(LS_WARNING) << "SPS Nalu header missing"; 352 return; 353 } 354 if (pps.size() < kNaluHeaderOffset) { 355 RTC_LOG(LS_WARNING) << "PPS size " << pps.size() << " is smaller than " 356 << kNaluHeaderOffset; 357 return; 358 } 359 if ((pps[0] & 0x1f) != H264::NaluType::kPps) { 360 RTC_LOG(LS_WARNING) << "SPS Nalu header missing"; 361 return; 362 } 363 std::optional<SpsParser::SpsState> parsed_sps = SpsParser::ParseSps( 364 ArrayView<const uint8_t>(sps).subview(kNaluHeaderOffset)); 365 std::optional<PpsParser::PpsState> parsed_pps = PpsParser::ParsePps( 366 ArrayView<const uint8_t>(pps).subview(kNaluHeaderOffset)); 367 368 if (!parsed_sps) { 369 RTC_LOG(LS_WARNING) << "Failed to parse SPS."; 370 } 371 372 if (!parsed_pps) { 373 RTC_LOG(LS_WARNING) << "Failed to parse PPS."; 374 } 375 376 if (!parsed_pps || !parsed_sps) { 377 return; 378 } 379 380 SpsInfo sps_info; 381 sps_info.size = sps.size(); 382 sps_info.width = parsed_sps->width; 383 sps_info.height = parsed_sps->height; 384 uint8_t* sps_data = new uint8_t[sps_info.size]; 385 memcpy(sps_data, sps.data(), sps_info.size); 386 sps_info.payload.reset(sps_data); 387 sps_data_[parsed_sps->id] = std::move(sps_info); 388 389 PpsInfo pps_info; 390 pps_info.size = pps.size(); 391 pps_info.sps_id = parsed_pps->sps_id; 392 uint8_t* pps_data = new uint8_t[pps_info.size]; 393 memcpy(pps_data, pps.data(), pps_info.size); 394 pps_info.payload.reset(pps_data); 395 pps_data_[parsed_pps->id] = std::move(pps_info); 396 397 RTC_LOG(LS_INFO) << "Inserted SPS id " << parsed_sps->id << " and PPS id " 398 << parsed_pps->id << " (referencing SPS " 399 << parsed_pps->sps_id << ")"; 400 } 401 402 // TODO(bugs.webrtc.org/13157): Update the H264 depacketizer so we don't have to 403 // fiddle with the payload at this point. 404 bool H26xPacketBuffer::FixH264Packet(Packet& packet) { 405 constexpr uint8_t kStartCode[] = {0, 0, 0, 1}; 406 407 RTPVideoHeader& video_header = packet.video_header; 408 RTPVideoHeaderH264& h264_header = 409 std::get<RTPVideoHeaderH264>(video_header.video_type_header); 410 411 CopyOnWriteBuffer result; 412 413 if (h264_idr_only_keyframes_allowed_) { 414 // Check if sps and pps insertion is needed. 415 bool prepend_sps_pps = false; 416 auto sps = sps_data_.end(); 417 auto pps = pps_data_.end(); 418 419 for (const NaluInfo& nalu : h264_header.nalus) { 420 switch (nalu.type) { 421 case H264::NaluType::kSps: { 422 SpsInfo& sps_info = sps_data_[nalu.sps_id]; 423 sps_info.width = video_header.width; 424 sps_info.height = video_header.height; 425 break; 426 } 427 case H264::NaluType::kPps: { 428 pps_data_[nalu.pps_id].sps_id = nalu.sps_id; 429 break; 430 } 431 case H264::NaluType::kIdr: { 432 // If this is the first packet of an IDR, make sure we have the 433 // required SPS/PPS and also calculate how much extra space we need 434 // in the buffer to prepend the SPS/PPS to the bitstream with start 435 // codes. 436 if (video_header.is_first_packet_in_frame) { 437 if (nalu.pps_id == -1) { 438 RTC_LOG(LS_WARNING) << "No PPS id in IDR nalu."; 439 return false; 440 } 441 442 pps = pps_data_.find(nalu.pps_id); 443 if (pps == pps_data_.end()) { 444 RTC_LOG(LS_WARNING) 445 << "No PPS with id << " << nalu.pps_id << " received"; 446 return false; 447 } 448 449 sps = sps_data_.find(pps->second.sps_id); 450 if (sps == sps_data_.end()) { 451 RTC_LOG(LS_WARNING) 452 << "No SPS with id << " << pps->second.sps_id << " received"; 453 return false; 454 } 455 456 // Since the first packet of every keyframe should have its width 457 // and height set we set it here in the case of it being supplied 458 // out of band. 459 video_header.width = sps->second.width; 460 video_header.height = sps->second.height; 461 462 // If the SPS/PPS was supplied out of band then we will have saved 463 // the actual bitstream in `data`. 464 if (sps->second.payload && pps->second.payload) { 465 RTC_DCHECK_GT(sps->second.size, 0); 466 RTC_DCHECK_GT(pps->second.size, 0); 467 prepend_sps_pps = true; 468 } 469 } 470 break; 471 } 472 default: 473 break; 474 } 475 } 476 477 RTC_CHECK(!prepend_sps_pps || 478 (sps != sps_data_.end() && pps != pps_data_.end())); 479 480 // Insert SPS and PPS if they are missing. 481 if (prepend_sps_pps) { 482 // Insert SPS. 483 result.AppendData(kStartCode); 484 result.AppendData(sps->second.payload.get(), sps->second.size); 485 486 // Insert PPS. 487 result.AppendData(kStartCode); 488 result.AppendData(pps->second.payload.get(), pps->second.size); 489 490 // Update codec header to reflect the newly added SPS and PPS. 491 h264_header.nalus.push_back( 492 {.type = H264::NaluType::kSps, .sps_id = sps->first, .pps_id = -1}); 493 h264_header.nalus.push_back({.type = H264::NaluType::kPps, 494 .sps_id = sps->first, 495 .pps_id = pps->first}); 496 } 497 } 498 499 // Insert start code. 500 switch (h264_header.packetization_type) { 501 case kH264StapA: { 502 const uint8_t* payload_end = 503 packet.video_payload.data() + packet.video_payload.size(); 504 const uint8_t* nalu_ptr = packet.video_payload.data() + 1; 505 while (nalu_ptr < payload_end - 1) { 506 // The first two bytes describe the length of the segment, where a 507 // segment is the nalu type plus nalu payload. 508 uint16_t segment_length = nalu_ptr[0] << 8 | nalu_ptr[1]; 509 nalu_ptr += 2; 510 511 if (nalu_ptr + segment_length <= payload_end) { 512 result.AppendData(kStartCode); 513 result.AppendData(nalu_ptr, segment_length); 514 } 515 nalu_ptr += segment_length; 516 } 517 packet.video_payload = result; 518 return true; 519 } 520 521 case kH264FuA: { 522 if (IsFirstPacketOfFragment(h264_header)) { 523 result.AppendData(kStartCode); 524 } 525 result.AppendData(packet.video_payload); 526 packet.video_payload = result; 527 return true; 528 } 529 530 case kH264SingleNalu: { 531 result.AppendData(kStartCode); 532 result.AppendData(packet.video_payload); 533 packet.video_payload = result; 534 return true; 535 } 536 } 537 538 RTC_DCHECK_NOTREACHED(); 539 return false; 540 } 541 542 } // namespace webrtc