rtp_payload_params.cc (32535B)
1 /* 2 * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "call/rtp_payload_params.h" 12 13 #include <algorithm> 14 #include <cstddef> 15 #include <cstdint> 16 #include <iterator> 17 #include <optional> 18 19 #include "absl/container/inlined_vector.h" 20 #include "api/field_trials_view.h" 21 #include "api/transport/rtp/dependency_descriptor.h" 22 #include "api/video/encoded_image.h" 23 #include "api/video/render_resolution.h" 24 #include "api/video/video_codec_constants.h" 25 #include "api/video/video_codec_type.h" 26 #include "api/video/video_frame_type.h" 27 #include "api/video/video_timing.h" 28 #include "call/rtp_config.h" 29 #include "common_video/generic_frame_descriptor/generic_frame_info.h" 30 #include "modules/rtp_rtcp/source/rtp_generic_frame_descriptor.h" 31 #include "modules/rtp_rtcp/source/rtp_video_header.h" 32 #include "modules/video_coding/codecs/h264/include/h264_globals.h" 33 #include "modules/video_coding/codecs/interface/common_constants.h" 34 #include "modules/video_coding/codecs/vp8/include/vp8_globals.h" 35 #include "modules/video_coding/codecs/vp9/include/vp9_globals.h" 36 #include "modules/video_coding/frame_dependencies_calculator.h" 37 #include "modules/video_coding/include/video_codec_interface.h" 38 #include "rtc_base/checks.h" 39 #include "rtc_base/logging.h" 40 #include "rtc_base/random.h" 41 #include "rtc_base/time_utils.h" 42 43 namespace webrtc { 44 namespace { 45 46 constexpr int kMaxSimulatedSpatialLayers = 3; 47 48 void PopulateRtpWithCodecSpecifics(const CodecSpecificInfo& info, 49 std::optional<int> spatial_index, 50 RTPVideoHeader* rtp) { 51 rtp->codec = info.codecType; 52 rtp->is_last_frame_in_picture = info.end_of_picture; 53 rtp->frame_instrumentation_data = info.frame_instrumentation_data; 54 switch (info.codecType) { 55 case kVideoCodecVP8: { 56 auto& vp8_header = rtp->video_type_header.emplace<RTPVideoHeaderVP8>(); 57 vp8_header.InitRTPVideoHeaderVP8(); 58 vp8_header.nonReference = info.codecSpecific.VP8.nonReference; 59 vp8_header.temporalIdx = info.codecSpecific.VP8.temporalIdx; 60 vp8_header.layerSync = info.codecSpecific.VP8.layerSync; 61 vp8_header.keyIdx = info.codecSpecific.VP8.keyIdx; 62 return; 63 } 64 case kVideoCodecVP9: { 65 auto& vp9_header = rtp->video_type_header.emplace<RTPVideoHeaderVP9>(); 66 vp9_header.InitRTPVideoHeaderVP9(); 67 vp9_header.inter_pic_predicted = 68 info.codecSpecific.VP9.inter_pic_predicted; 69 vp9_header.flexible_mode = info.codecSpecific.VP9.flexible_mode; 70 vp9_header.ss_data_available = info.codecSpecific.VP9.ss_data_available; 71 vp9_header.non_ref_for_inter_layer_pred = 72 info.codecSpecific.VP9.non_ref_for_inter_layer_pred; 73 vp9_header.temporal_idx = info.codecSpecific.VP9.temporal_idx; 74 vp9_header.temporal_up_switch = info.codecSpecific.VP9.temporal_up_switch; 75 vp9_header.inter_layer_predicted = 76 info.codecSpecific.VP9.inter_layer_predicted; 77 vp9_header.gof_idx = info.codecSpecific.VP9.gof_idx; 78 vp9_header.num_spatial_layers = info.codecSpecific.VP9.num_spatial_layers; 79 vp9_header.first_active_layer = info.codecSpecific.VP9.first_active_layer; 80 if (vp9_header.num_spatial_layers > 1) { 81 vp9_header.spatial_idx = spatial_index.value_or(kNoSpatialIdx); 82 } else { 83 vp9_header.spatial_idx = kNoSpatialIdx; 84 } 85 if (info.codecSpecific.VP9.ss_data_available) { 86 vp9_header.spatial_layer_resolution_present = 87 info.codecSpecific.VP9.spatial_layer_resolution_present; 88 if (info.codecSpecific.VP9.spatial_layer_resolution_present) { 89 for (size_t i = 0; i < info.codecSpecific.VP9.num_spatial_layers; 90 ++i) { 91 vp9_header.width[i] = info.codecSpecific.VP9.width[i]; 92 vp9_header.height[i] = info.codecSpecific.VP9.height[i]; 93 } 94 } 95 vp9_header.gof.CopyGofInfoVP9(info.codecSpecific.VP9.gof); 96 } 97 98 vp9_header.num_ref_pics = info.codecSpecific.VP9.num_ref_pics; 99 for (int i = 0; i < info.codecSpecific.VP9.num_ref_pics; ++i) { 100 vp9_header.pid_diff[i] = info.codecSpecific.VP9.p_diff[i]; 101 } 102 vp9_header.end_of_picture = info.end_of_picture; 103 return; 104 } 105 case kVideoCodecH264: { 106 auto& h264_header = rtp->video_type_header.emplace<RTPVideoHeaderH264>(); 107 h264_header.packetization_mode = 108 info.codecSpecific.H264.packetization_mode; 109 return; 110 } 111 // These codec types do not have codec-specifics. 112 case kVideoCodecGeneric: 113 case kVideoCodecH265: 114 case kVideoCodecAV1: 115 return; 116 } 117 } 118 119 void SetVideoTiming(const EncodedImage& image, VideoSendTiming* timing) { 120 if (image.timing_.flags == VideoSendTiming::TimingFrameFlags::kInvalid || 121 image.timing_.flags == VideoSendTiming::TimingFrameFlags::kNotTriggered) { 122 timing->flags = VideoSendTiming::TimingFrameFlags::kInvalid; 123 return; 124 } 125 126 timing->encode_start_delta_ms = VideoSendTiming::GetDeltaCappedMs( 127 image.capture_time_ms_, image.timing_.encode_start_ms); 128 timing->encode_finish_delta_ms = VideoSendTiming::GetDeltaCappedMs( 129 image.capture_time_ms_, image.timing_.encode_finish_ms); 130 timing->packetization_finish_delta_ms = 0; 131 timing->pacer_exit_delta_ms = 0; 132 timing->network_timestamp_delta_ms = 0; 133 timing->network2_timestamp_delta_ms = 0; 134 timing->flags = image.timing_.flags; 135 } 136 137 // Returns structure that aligns with simulated generic info. The templates 138 // allow to produce valid dependency descriptor for any stream where 139 // `num_spatial_layers` * `num_temporal_layers` <= 32 (limited by 140 // https://aomediacodec.github.io/av1-rtp-spec/#a82-syntax, see 141 // template_fdiffs()). The set of the templates is not tuned for any paricular 142 // structure thus dependency descriptor would use more bytes on the wire than 143 // with tuned templates. 144 FrameDependencyStructure MinimalisticStructure(int num_spatial_layers, 145 int num_temporal_layers) { 146 RTC_DCHECK_LE(num_spatial_layers, DependencyDescriptor::kMaxSpatialIds); 147 RTC_DCHECK_LE(num_temporal_layers, DependencyDescriptor::kMaxTemporalIds); 148 RTC_DCHECK_LE(num_spatial_layers * num_temporal_layers, 32); 149 FrameDependencyStructure structure; 150 structure.num_decode_targets = num_spatial_layers * num_temporal_layers; 151 structure.num_chains = num_spatial_layers; 152 structure.templates.reserve(num_spatial_layers * num_temporal_layers); 153 for (int sid = 0; sid < num_spatial_layers; ++sid) { 154 for (int tid = 0; tid < num_temporal_layers; ++tid) { 155 FrameDependencyTemplate a_template; 156 a_template.spatial_id = sid; 157 a_template.temporal_id = tid; 158 for (int s = 0; s < num_spatial_layers; ++s) { 159 for (int t = 0; t < num_temporal_layers; ++t) { 160 // Prefer kSwitch indication for frames that is part of the decode 161 // target because dependency descriptor information generated in this 162 // class use kSwitch indications more often that kRequired, increasing 163 // the chance of a good (or complete) template match. 164 a_template.decode_target_indications.push_back( 165 sid <= s && tid <= t ? DecodeTargetIndication::kSwitch 166 : DecodeTargetIndication::kNotPresent); 167 } 168 } 169 a_template.frame_diffs.push_back(tid == 0 ? num_spatial_layers * 170 num_temporal_layers 171 : num_spatial_layers); 172 a_template.chain_diffs.assign(structure.num_chains, 1); 173 structure.templates.push_back(a_template); 174 175 structure.decode_target_protected_by_chain.push_back(sid); 176 } 177 } 178 return structure; 179 } 180 } // namespace 181 182 RtpPayloadParams::RtpPayloadParams(const uint32_t ssrc, 183 const RtpPayloadState* state, 184 const FieldTrialsView& trials) 185 : ssrc_(ssrc), 186 generic_picture_id_experiment_( 187 trials.IsEnabled("WebRTC-GenericPictureId")), 188 simulate_generic_structure_( 189 trials.IsEnabled("WebRTC-GenericCodecDependencyDescriptor")) { 190 for (auto& spatial_layer : last_frame_id_) 191 spatial_layer.fill(-1); 192 193 chain_last_frame_id_.fill(-1); 194 buffer_id_to_frame_id_.fill(-1); 195 196 Random random(TimeMicros()); 197 state_.picture_id = 198 state ? state->picture_id : (random.Rand<int16_t>() & 0x7FFF); 199 state_.tl0_pic_idx = state ? state->tl0_pic_idx : (random.Rand<uint8_t>()); 200 state_.frame_id = state ? state->frame_id : random.Rand<uint16_t>(); 201 } 202 203 RtpPayloadParams::RtpPayloadParams(const RtpPayloadParams& other) = default; 204 205 RtpPayloadParams::~RtpPayloadParams() {} 206 207 RTPVideoHeader RtpPayloadParams::GetRtpVideoHeader( 208 const EncodedImage& image, 209 const CodecSpecificInfo* codec_specific_info, 210 std::optional<int64_t> shared_frame_id) { 211 int64_t frame_id; 212 if (shared_frame_id) { 213 frame_id = *shared_frame_id; 214 } else { 215 frame_id = state_.frame_id++; 216 } 217 218 RTPVideoHeader rtp_video_header; 219 if (codec_specific_info) { 220 PopulateRtpWithCodecSpecifics(*codec_specific_info, image.SpatialIndex(), 221 &rtp_video_header); 222 } 223 rtp_video_header.simulcastIdx = image.SimulcastIndex().value_or(0); 224 rtp_video_header.frame_type = image._frameType; 225 rtp_video_header.rotation = image.rotation_; 226 rtp_video_header.content_type = image.content_type_; 227 rtp_video_header.playout_delay = image.PlayoutDelay(); 228 rtp_video_header.width = image._encodedWidth; 229 rtp_video_header.height = image._encodedHeight; 230 rtp_video_header.color_space = image.ColorSpace() 231 ? std::make_optional(*image.ColorSpace()) 232 : std::nullopt; 233 rtp_video_header.video_frame_tracking_id = image.VideoFrameTrackingId(); 234 SetVideoTiming(image, &rtp_video_header.video_timing); 235 236 const bool is_keyframe = image._frameType == VideoFrameType::kVideoFrameKey; 237 const bool first_frame_in_picture = 238 (codec_specific_info && codec_specific_info->codecType == kVideoCodecVP9) 239 ? codec_specific_info->codecSpecific.VP9.first_frame_in_picture 240 : true; 241 242 SetCodecSpecific(&rtp_video_header, first_frame_in_picture); 243 244 SetGeneric(codec_specific_info, frame_id, is_keyframe, &rtp_video_header); 245 246 return rtp_video_header; 247 } 248 249 uint32_t RtpPayloadParams::ssrc() const { 250 return ssrc_; 251 } 252 253 RtpPayloadState RtpPayloadParams::state() const { 254 return state_; 255 } 256 257 void RtpPayloadParams::SetCodecSpecific(RTPVideoHeader* rtp_video_header, 258 bool first_frame_in_picture) { 259 // Always set picture id. Set tl0_pic_idx iff temporal index is set. 260 if (first_frame_in_picture) { 261 state_.picture_id = (static_cast<uint16_t>(state_.picture_id) + 1) & 0x7FFF; 262 } 263 if (rtp_video_header->codec == kVideoCodecVP8) { 264 auto& vp8_header = 265 std::get<RTPVideoHeaderVP8>(rtp_video_header->video_type_header); 266 vp8_header.pictureId = state_.picture_id; 267 268 if (vp8_header.temporalIdx != kNoTemporalIdx) { 269 if (vp8_header.temporalIdx == 0) { 270 ++state_.tl0_pic_idx; 271 } 272 vp8_header.tl0PicIdx = state_.tl0_pic_idx; 273 } 274 } 275 if (rtp_video_header->codec == kVideoCodecVP9) { 276 auto& vp9_header = 277 std::get<RTPVideoHeaderVP9>(rtp_video_header->video_type_header); 278 vp9_header.picture_id = state_.picture_id; 279 280 // Note that in the case that we have no temporal layers but we do have 281 // spatial layers, packets will carry layering info with a temporal_idx of 282 // zero, and we then have to set and increment tl0_pic_idx. 283 if (vp9_header.temporal_idx != kNoTemporalIdx || 284 vp9_header.spatial_idx != kNoSpatialIdx) { 285 if (first_frame_in_picture && 286 (vp9_header.temporal_idx == 0 || 287 vp9_header.temporal_idx == kNoTemporalIdx)) { 288 ++state_.tl0_pic_idx; 289 } 290 vp9_header.tl0_pic_idx = state_.tl0_pic_idx; 291 } 292 } 293 if (generic_picture_id_experiment_ && 294 rtp_video_header->codec == kVideoCodecGeneric) { 295 rtp_video_header->video_type_header.emplace<RTPVideoHeaderLegacyGeneric>() 296 .picture_id = state_.picture_id; 297 } 298 } 299 300 RTPVideoHeader::GenericDescriptorInfo 301 RtpPayloadParams::GenericDescriptorFromFrameInfo( 302 const GenericFrameInfo& frame_info, 303 int64_t frame_id) { 304 RTPVideoHeader::GenericDescriptorInfo generic; 305 generic.frame_id = frame_id; 306 generic.dependencies = dependencies_calculator_.FromBuffersUsage( 307 frame_id, frame_info.encoder_buffers); 308 generic.chain_diffs = 309 chains_calculator_.From(frame_id, frame_info.part_of_chain); 310 generic.spatial_index = frame_info.spatial_id; 311 generic.temporal_index = frame_info.temporal_id; 312 generic.decode_target_indications = frame_info.decode_target_indications; 313 generic.active_decode_targets = frame_info.active_decode_targets; 314 return generic; 315 } 316 317 void RtpPayloadParams::SetGeneric(const CodecSpecificInfo* codec_specific_info, 318 int64_t frame_id, 319 bool is_keyframe, 320 RTPVideoHeader* rtp_video_header) { 321 if (codec_specific_info && codec_specific_info->generic_frame_info && 322 !codec_specific_info->generic_frame_info->encoder_buffers.empty()) { 323 if (is_keyframe) { 324 // Key frame resets all chains it is in. 325 chains_calculator_.Reset( 326 codec_specific_info->generic_frame_info->part_of_chain); 327 } 328 rtp_video_header->generic = GenericDescriptorFromFrameInfo( 329 *codec_specific_info->generic_frame_info, frame_id); 330 return; 331 } 332 333 switch (rtp_video_header->codec) { 334 case VideoCodecType::kVideoCodecGeneric: 335 GenericToGeneric(frame_id, is_keyframe, rtp_video_header); 336 return; 337 case VideoCodecType::kVideoCodecVP8: 338 if (codec_specific_info) { 339 Vp8ToGeneric(codec_specific_info->codecSpecific.VP8, frame_id, 340 is_keyframe, rtp_video_header); 341 } 342 return; 343 case VideoCodecType::kVideoCodecVP9: 344 if (codec_specific_info != nullptr) { 345 Vp9ToGeneric(codec_specific_info->codecSpecific.VP9, frame_id, 346 *rtp_video_header); 347 } 348 return; 349 case VideoCodecType::kVideoCodecAV1: 350 // Codec-specifics is not supported for AV1. We convert from the 351 // generic_frame_info. 352 return; 353 case VideoCodecType::kVideoCodecH264: 354 if (codec_specific_info) { 355 H264ToGeneric(codec_specific_info->codecSpecific.H264, frame_id, 356 is_keyframe, rtp_video_header); 357 } 358 return; 359 case VideoCodecType::kVideoCodecH265: 360 // Codec-specifics is not supported for H.265. We convert from the 361 // generic_frame_info. 362 return; 363 } 364 RTC_DCHECK_NOTREACHED() << "Unsupported codec."; 365 } 366 367 std::optional<FrameDependencyStructure> RtpPayloadParams::GenericStructure( 368 const CodecSpecificInfo* codec_specific_info) { 369 if (codec_specific_info == nullptr) { 370 return std::nullopt; 371 } 372 // This helper shouldn't be used when template structure is specified 373 // explicetly. 374 RTC_DCHECK(!codec_specific_info->template_structure.has_value()); 375 switch (codec_specific_info->codecType) { 376 case VideoCodecType::kVideoCodecGeneric: 377 if (simulate_generic_structure_) { 378 return MinimalisticStructure(/*num_spatial_layers=*/1, 379 /*num_temporal_layers=*/1); 380 } 381 return std::nullopt; 382 case VideoCodecType::kVideoCodecVP8: 383 return MinimalisticStructure(/*num_spatial_layers=*/1, 384 /*num_temporal_layers=*/kMaxTemporalStreams); 385 case VideoCodecType::kVideoCodecVP9: { 386 std::optional<FrameDependencyStructure> structure = MinimalisticStructure( 387 /*num_spatial_layers=*/kMaxSimulatedSpatialLayers, 388 /*num_temporal_layers=*/kMaxTemporalStreams); 389 const CodecSpecificInfoVP9& vp9 = codec_specific_info->codecSpecific.VP9; 390 if (vp9.ss_data_available && vp9.spatial_layer_resolution_present) { 391 RenderResolution first_valid; 392 RenderResolution last_valid; 393 for (size_t i = 0; i < vp9.num_spatial_layers; ++i) { 394 RenderResolution r(vp9.width[i], vp9.height[i]); 395 if (r.Valid()) { 396 if (!first_valid.Valid()) { 397 first_valid = r; 398 } 399 last_valid = r; 400 } 401 structure->resolutions.push_back(r); 402 } 403 if (!last_valid.Valid()) { 404 // No valid resolution found. Do not send resolutions. 405 structure->resolutions.clear(); 406 } else { 407 structure->resolutions.resize(kMaxSimulatedSpatialLayers, last_valid); 408 // VP9 encoder wrapper may disable first few spatial layers by 409 // setting invalid resolution (0,0). `structure->resolutions` 410 // doesn't support invalid resolution, so reset them to something 411 // valid. 412 for (RenderResolution& r : structure->resolutions) { 413 if (!r.Valid()) { 414 r = first_valid; 415 } 416 } 417 } 418 } 419 return structure; 420 } 421 case VideoCodecType::kVideoCodecH264: 422 return MinimalisticStructure( 423 /*num_spatial_layers=*/1, 424 /*num_temporal_layers=*/kMaxTemporalStreams); 425 case VideoCodecType::kVideoCodecAV1: 426 case VideoCodecType::kVideoCodecH265: 427 return std::nullopt; 428 } 429 RTC_DCHECK_NOTREACHED() << "Unsupported codec."; 430 } 431 432 void RtpPayloadParams::GenericToGeneric(int64_t frame_id, 433 bool is_keyframe, 434 RTPVideoHeader* rtp_video_header) { 435 RTPVideoHeader::GenericDescriptorInfo& generic = 436 rtp_video_header->generic.emplace(); 437 438 generic.frame_id = frame_id; 439 generic.decode_target_indications.push_back(DecodeTargetIndication::kSwitch); 440 441 if (is_keyframe) { 442 generic.chain_diffs.push_back(0); 443 last_frame_id_[0].fill(-1); 444 } else { 445 int64_t last_frame_id = last_frame_id_[0][0]; 446 RTC_DCHECK_NE(last_frame_id, -1); 447 RTC_DCHECK_LT(last_frame_id, frame_id); 448 generic.chain_diffs.push_back(frame_id - last_frame_id); 449 generic.dependencies.push_back(last_frame_id); 450 } 451 452 last_frame_id_[0][0] = frame_id; 453 } 454 455 void RtpPayloadParams::H264ToGeneric(const CodecSpecificInfoH264& h264_info, 456 int64_t frame_id, 457 bool is_keyframe, 458 RTPVideoHeader* rtp_video_header) { 459 const int temporal_index = 460 h264_info.temporal_idx != kNoTemporalIdx ? h264_info.temporal_idx : 0; 461 462 if (temporal_index >= RtpGenericFrameDescriptor::kMaxTemporalLayers) { 463 RTC_LOG(LS_WARNING) << "Temporal and/or spatial index is too high to be " 464 "used with generic frame descriptor."; 465 return; 466 } 467 468 RTPVideoHeader::GenericDescriptorInfo& generic = 469 rtp_video_header->generic.emplace(); 470 471 generic.frame_id = frame_id; 472 generic.temporal_index = temporal_index; 473 474 // Generate decode target indications. 475 RTC_DCHECK_LT(temporal_index, kMaxTemporalStreams); 476 generic.decode_target_indications.resize(kMaxTemporalStreams); 477 auto it = std::fill_n(generic.decode_target_indications.begin(), 478 temporal_index, DecodeTargetIndication::kNotPresent); 479 std::fill(it, generic.decode_target_indications.end(), 480 DecodeTargetIndication::kSwitch); 481 generic.chain_diffs = { 482 (is_keyframe || last_frame_id_[0][0] < 0) 483 ? 0 484 : static_cast<int>(frame_id - last_frame_id_[0][0])}; 485 486 if (is_keyframe) { 487 RTC_DCHECK_EQ(temporal_index, 0); 488 last_frame_id_[/*spatial index*/ 0].fill(-1); 489 last_frame_id_[/*spatial index*/ 0][temporal_index] = frame_id; 490 return; 491 } 492 493 if (h264_info.base_layer_sync) { 494 int64_t tl0_frame_id = last_frame_id_[/*spatial index*/ 0][0]; 495 496 for (int i = 1; i < RtpGenericFrameDescriptor::kMaxTemporalLayers; ++i) { 497 if (last_frame_id_[/*spatial index*/ 0][i] < tl0_frame_id) { 498 last_frame_id_[/*spatial index*/ 0][i] = -1; 499 } 500 } 501 502 RTC_DCHECK_GE(tl0_frame_id, 0); 503 RTC_DCHECK_LT(tl0_frame_id, frame_id); 504 generic.dependencies.push_back(tl0_frame_id); 505 } else { 506 for (int i = 0; i <= temporal_index; ++i) { 507 int64_t last_frame_id = last_frame_id_[/*spatial index*/ 0][i]; 508 509 if (last_frame_id != -1) { 510 RTC_DCHECK_LT(last_frame_id, frame_id); 511 generic.dependencies.push_back(last_frame_id); 512 } 513 } 514 } 515 516 last_frame_id_[/*spatial_index*/ 0][temporal_index] = frame_id; 517 } 518 519 void RtpPayloadParams::Vp8ToGeneric(const CodecSpecificInfoVP8& vp8_info, 520 int64_t frame_id, 521 bool is_keyframe, 522 RTPVideoHeader* rtp_video_header) { 523 const auto& vp8_header = 524 std::get<RTPVideoHeaderVP8>(rtp_video_header->video_type_header); 525 const int spatial_index = 0; 526 const int temporal_index = 527 vp8_header.temporalIdx != kNoTemporalIdx ? vp8_header.temporalIdx : 0; 528 529 if (temporal_index >= RtpGenericFrameDescriptor::kMaxTemporalLayers || 530 spatial_index >= RtpGenericFrameDescriptor::kMaxSpatialLayers) { 531 RTC_LOG(LS_WARNING) << "Temporal and/or spatial index is too high to be " 532 "used with generic frame descriptor."; 533 return; 534 } 535 536 RTPVideoHeader::GenericDescriptorInfo& generic = 537 rtp_video_header->generic.emplace(); 538 539 generic.frame_id = frame_id; 540 generic.spatial_index = spatial_index; 541 generic.temporal_index = temporal_index; 542 543 // Generate decode target indications. 544 RTC_DCHECK_LT(temporal_index, kMaxTemporalStreams); 545 generic.decode_target_indications.resize(kMaxTemporalStreams); 546 auto it = std::fill_n(generic.decode_target_indications.begin(), 547 temporal_index, DecodeTargetIndication::kNotPresent); 548 std::fill(it, generic.decode_target_indications.end(), 549 DecodeTargetIndication::kSwitch); 550 551 // Frame dependencies. 552 if (vp8_info.useExplicitDependencies) { 553 SetDependenciesVp8New(vp8_info, frame_id, is_keyframe, vp8_header.layerSync, 554 &generic); 555 } else { 556 SetDependenciesVp8Deprecated(vp8_info, frame_id, is_keyframe, spatial_index, 557 temporal_index, vp8_header.layerSync, 558 &generic); 559 } 560 561 // Calculate chains. 562 generic.chain_diffs = { 563 (is_keyframe || chain_last_frame_id_[0] < 0) 564 ? 0 565 : static_cast<int>(frame_id - chain_last_frame_id_[0])}; 566 if (temporal_index == 0) { 567 chain_last_frame_id_[0] = frame_id; 568 } 569 } 570 571 void RtpPayloadParams::Vp9ToGeneric(const CodecSpecificInfoVP9& /* vp9_info */, 572 int64_t frame_id, 573 RTPVideoHeader& rtp_video_header) { 574 const auto& vp9_header = 575 std::get<RTPVideoHeaderVP9>(rtp_video_header.video_type_header); 576 const int num_spatial_layers = kMaxSimulatedSpatialLayers; 577 const int first_active_spatial_id = vp9_header.first_active_layer; 578 const int last_active_spatial_id = vp9_header.num_spatial_layers - 1; 579 const int num_temporal_layers = kMaxTemporalStreams; 580 static_assert(num_spatial_layers <= 581 RtpGenericFrameDescriptor::kMaxSpatialLayers); 582 static_assert(num_temporal_layers <= 583 RtpGenericFrameDescriptor::kMaxTemporalLayers); 584 static_assert(num_spatial_layers <= DependencyDescriptor::kMaxSpatialIds); 585 static_assert(num_temporal_layers <= DependencyDescriptor::kMaxTemporalIds); 586 587 int spatial_index = 588 vp9_header.spatial_idx != kNoSpatialIdx ? vp9_header.spatial_idx : 0; 589 int temporal_index = 590 vp9_header.temporal_idx != kNoTemporalIdx ? vp9_header.temporal_idx : 0; 591 592 if (!(temporal_index < num_temporal_layers && 593 first_active_spatial_id <= spatial_index && 594 spatial_index <= last_active_spatial_id && 595 last_active_spatial_id < num_spatial_layers)) { 596 // Prefer to generate no generic layering than an inconsistent one. 597 RTC_LOG(LS_ERROR) << "Inconsistent layer id sid=" << spatial_index 598 << ",tid=" << temporal_index 599 << " in VP9 header. Active spatial ids: [" 600 << first_active_spatial_id << "," 601 << last_active_spatial_id << "]"; 602 return; 603 } 604 605 RTPVideoHeader::GenericDescriptorInfo& result = 606 rtp_video_header.generic.emplace(); 607 608 result.frame_id = frame_id; 609 result.spatial_index = spatial_index; 610 result.temporal_index = temporal_index; 611 612 result.decode_target_indications.reserve(num_spatial_layers * 613 num_temporal_layers); 614 for (int sid = 0; sid < num_spatial_layers; ++sid) { 615 for (int tid = 0; tid < num_temporal_layers; ++tid) { 616 DecodeTargetIndication dti; 617 if (sid < spatial_index || tid < temporal_index) { 618 dti = DecodeTargetIndication::kNotPresent; 619 } else if (spatial_index != sid && 620 vp9_header.non_ref_for_inter_layer_pred) { 621 dti = DecodeTargetIndication::kNotPresent; 622 } else if (sid == spatial_index && tid == temporal_index) { 623 // Assume that if frame is decodable, all of its own layer is decodable. 624 dti = DecodeTargetIndication::kSwitch; 625 } else if (sid == spatial_index && vp9_header.temporal_up_switch) { 626 dti = DecodeTargetIndication::kSwitch; 627 } else if (!vp9_header.inter_pic_predicted) { 628 // Key frame or spatial upswitch 629 dti = DecodeTargetIndication::kSwitch; 630 } else { 631 // Make no other assumptions. That should be safe, though suboptimal. 632 // To provide more accurate dti, encoder wrapper should fill in 633 // CodecSpecificInfo::generic_frame_info 634 dti = DecodeTargetIndication::kRequired; 635 } 636 result.decode_target_indications.push_back(dti); 637 } 638 } 639 640 // Calculate frame dependencies. 641 static constexpr int kPictureDiffLimit = 128; 642 if (last_vp9_frame_id_.empty()) { 643 // Create the array only if it is ever used. 644 last_vp9_frame_id_.resize(kPictureDiffLimit); 645 } 646 647 if (vp9_header.flexible_mode) { 648 if (vp9_header.inter_layer_predicted && spatial_index > 0) { 649 result.dependencies.push_back( 650 last_vp9_frame_id_[vp9_header.picture_id % kPictureDiffLimit] 651 [spatial_index - 1]); 652 } 653 if (vp9_header.inter_pic_predicted) { 654 for (size_t i = 0; i < vp9_header.num_ref_pics; ++i) { 655 // picture_id is 15 bit number that wraps around. Though undeflow may 656 // produce picture that exceeds 2^15, it is ok because in this 657 // code block only last 7 bits of the picture_id are used. 658 uint16_t depend_on = vp9_header.picture_id - vp9_header.pid_diff[i]; 659 result.dependencies.push_back( 660 last_vp9_frame_id_[depend_on % kPictureDiffLimit][spatial_index]); 661 } 662 } 663 last_vp9_frame_id_[vp9_header.picture_id % kPictureDiffLimit] 664 [spatial_index] = frame_id; 665 } else { 666 // Implementing general conversion logic for non-flexible mode requires some 667 // work and we will almost certainly never need it, so for now support only 668 // non-layerd streams. 669 if (spatial_index > 0 || temporal_index > 0) { 670 // Prefer to generate no generic layering than an inconsistent one. 671 rtp_video_header.generic.reset(); 672 return; 673 } 674 675 if (vp9_header.inter_pic_predicted) { 676 // Since we only support non-scalable streams we only need to save the 677 // last frame id. 678 result.dependencies.push_back(last_vp9_frame_id_[0][0]); 679 } 680 last_vp9_frame_id_[0][0] = frame_id; 681 } 682 683 result.active_decode_targets = 684 ((uint32_t{1} << num_temporal_layers * (last_active_spatial_id + 1)) - 685 1) ^ 686 ((uint32_t{1} << num_temporal_layers * first_active_spatial_id) - 1); 687 688 // Calculate chains, asuming chain includes all frames with temporal_id = 0 689 if (!vp9_header.inter_pic_predicted && !vp9_header.inter_layer_predicted) { 690 // Assume frames without dependencies also reset chains. 691 for (int sid = spatial_index; sid <= last_active_spatial_id; ++sid) { 692 chain_last_frame_id_[sid] = -1; 693 } 694 } 695 result.chain_diffs.resize(num_spatial_layers, 0); 696 for (int sid = first_active_spatial_id; sid <= last_active_spatial_id; 697 ++sid) { 698 if (chain_last_frame_id_[sid] == -1) { 699 result.chain_diffs[sid] = 0; 700 continue; 701 } 702 int64_t chain_diff = frame_id - chain_last_frame_id_[sid]; 703 if (chain_diff >= 256) { 704 RTC_LOG(LS_ERROR) 705 << "Too many frames since last VP9 T0 frame for spatial layer #" 706 << sid << " at frame#" << frame_id; 707 chain_last_frame_id_[sid] = -1; 708 chain_diff = 0; 709 } 710 result.chain_diffs[sid] = chain_diff; 711 } 712 713 if (temporal_index == 0) { 714 chain_last_frame_id_[spatial_index] = frame_id; 715 if (!vp9_header.non_ref_for_inter_layer_pred) { 716 for (int sid = spatial_index + 1; sid <= last_active_spatial_id; ++sid) { 717 chain_last_frame_id_[sid] = frame_id; 718 } 719 } 720 } 721 } 722 723 void RtpPayloadParams::SetDependenciesVp8Deprecated( 724 const CodecSpecificInfoVP8& vp8_info, 725 int64_t frame_id, 726 bool is_keyframe, 727 int spatial_index, 728 int temporal_index, 729 bool layer_sync, 730 RTPVideoHeader::GenericDescriptorInfo* generic) { 731 RTC_DCHECK(!vp8_info.useExplicitDependencies); 732 RTC_DCHECK(!new_version_used_.has_value() || !new_version_used_.value()); 733 new_version_used_ = false; 734 735 if (is_keyframe) { 736 RTC_DCHECK_EQ(temporal_index, 0); 737 last_frame_id_[spatial_index].fill(-1); 738 last_frame_id_[spatial_index][temporal_index] = frame_id; 739 return; 740 } 741 742 if (layer_sync) { 743 int64_t tl0_frame_id = last_frame_id_[spatial_index][0]; 744 745 for (int i = 1; i < RtpGenericFrameDescriptor::kMaxTemporalLayers; ++i) { 746 if (last_frame_id_[spatial_index][i] < tl0_frame_id) { 747 last_frame_id_[spatial_index][i] = -1; 748 } 749 } 750 751 RTC_DCHECK_GE(tl0_frame_id, 0); 752 RTC_DCHECK_LT(tl0_frame_id, frame_id); 753 generic->dependencies.push_back(tl0_frame_id); 754 } else { 755 for (int i = 0; i <= temporal_index; ++i) { 756 int64_t last_frame_id = last_frame_id_[spatial_index][i]; 757 758 if (last_frame_id != -1) { 759 RTC_DCHECK_LT(last_frame_id, frame_id); 760 generic->dependencies.push_back(last_frame_id); 761 } 762 } 763 } 764 765 last_frame_id_[spatial_index][temporal_index] = frame_id; 766 } 767 768 void RtpPayloadParams::SetDependenciesVp8New( 769 const CodecSpecificInfoVP8& vp8_info, 770 int64_t frame_id, 771 bool is_keyframe, 772 bool /* layer_sync */, 773 RTPVideoHeader::GenericDescriptorInfo* generic) { 774 RTC_DCHECK(vp8_info.useExplicitDependencies); 775 RTC_DCHECK(!new_version_used_.has_value() || new_version_used_.value()); 776 new_version_used_ = true; 777 778 if (is_keyframe) { 779 RTC_DCHECK_EQ(vp8_info.referencedBuffersCount, 0u); 780 buffer_id_to_frame_id_.fill(frame_id); 781 return; 782 } 783 784 constexpr size_t kBuffersCountVp8 = CodecSpecificInfoVP8::kBuffersCount; 785 786 RTC_DCHECK_GT(vp8_info.referencedBuffersCount, 0u); 787 RTC_DCHECK_LE(vp8_info.referencedBuffersCount, 788 std::size(vp8_info.referencedBuffers)); 789 790 for (size_t i = 0; i < vp8_info.referencedBuffersCount; ++i) { 791 const size_t referenced_buffer = vp8_info.referencedBuffers[i]; 792 RTC_DCHECK_LT(referenced_buffer, kBuffersCountVp8); 793 RTC_DCHECK_LT(referenced_buffer, buffer_id_to_frame_id_.size()); 794 795 const int64_t dependency_frame_id = 796 buffer_id_to_frame_id_[referenced_buffer]; 797 RTC_DCHECK_GE(dependency_frame_id, 0); 798 RTC_DCHECK_LT(dependency_frame_id, frame_id); 799 800 const bool is_new_dependency = 801 std::find(generic->dependencies.begin(), generic->dependencies.end(), 802 dependency_frame_id) == generic->dependencies.end(); 803 if (is_new_dependency) { 804 generic->dependencies.push_back(dependency_frame_id); 805 } 806 } 807 808 RTC_DCHECK_LE(vp8_info.updatedBuffersCount, kBuffersCountVp8); 809 for (size_t i = 0; i < vp8_info.updatedBuffersCount; ++i) { 810 const size_t updated_id = vp8_info.updatedBuffers[i]; 811 buffer_id_to_frame_id_[updated_id] = frame_id; 812 } 813 814 RTC_DCHECK_LE(buffer_id_to_frame_id_.size(), kBuffersCountVp8); 815 } 816 817 } // namespace webrtc