scalability_structure_full_svc.cc (18023B)
1 /* 2 * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 #include "modules/video_coding/svc/scalability_structure_full_svc.h" 11 12 #include <cstdint> 13 #include <optional> 14 #include <vector> 15 16 #include "absl/strings/string_view.h" 17 #include "api/transport/rtp/dependency_descriptor.h" 18 #include "api/video/video_bitrate_allocation.h" 19 #include "common_video/generic_frame_descriptor/generic_frame_info.h" 20 #include "modules/video_coding/svc/scalable_video_controller.h" 21 #include "rtc_base/checks.h" 22 #include "rtc_base/logging.h" 23 24 namespace webrtc { 25 26 ScalabilityStructureFullSvc::ScalabilityStructureFullSvc( 27 int num_spatial_layers, 28 int num_temporal_layers, 29 ScalingFactor resolution_factor) 30 : num_spatial_layers_(num_spatial_layers), 31 num_temporal_layers_(num_temporal_layers), 32 resolution_factor_(resolution_factor), 33 active_decode_targets_( 34 (uint32_t{1} << (num_spatial_layers * num_temporal_layers)) - 1) { 35 RTC_DCHECK_LE(num_spatial_layers, kMaxNumSpatialLayers); 36 RTC_DCHECK_LE(num_temporal_layers, kMaxNumTemporalLayers); 37 } 38 39 ScalabilityStructureFullSvc::~ScalabilityStructureFullSvc() = default; 40 41 ScalabilityStructureFullSvc::StreamLayersConfig 42 ScalabilityStructureFullSvc::StreamConfig() const { 43 StreamLayersConfig result; 44 result.num_spatial_layers = num_spatial_layers_; 45 result.num_temporal_layers = num_temporal_layers_; 46 result.scaling_factor_num[num_spatial_layers_ - 1] = 1; 47 result.scaling_factor_den[num_spatial_layers_ - 1] = 1; 48 for (int sid = num_spatial_layers_ - 1; sid > 0; --sid) { 49 result.scaling_factor_num[sid - 1] = 50 resolution_factor_.num * result.scaling_factor_num[sid]; 51 result.scaling_factor_den[sid - 1] = 52 resolution_factor_.den * result.scaling_factor_den[sid]; 53 } 54 result.uses_reference_scaling = num_spatial_layers_ > 1; 55 return result; 56 } 57 58 bool ScalabilityStructureFullSvc::TemporalLayerIsActive(int tid) const { 59 if (tid >= num_temporal_layers_) { 60 return false; 61 } 62 for (int sid = 0; sid < num_spatial_layers_; ++sid) { 63 if (DecodeTargetIsActive(sid, tid)) { 64 return true; 65 } 66 } 67 return false; 68 } 69 70 DecodeTargetIndication ScalabilityStructureFullSvc::Dti( 71 int sid, 72 int tid, 73 const LayerFrameConfig& config) { 74 if (sid < config.SpatialId() || tid < config.TemporalId()) { 75 return DecodeTargetIndication::kNotPresent; 76 } 77 if (sid == config.SpatialId()) { 78 if (tid == 0) { 79 RTC_DCHECK_EQ(config.TemporalId(), 0); 80 return DecodeTargetIndication::kSwitch; 81 } 82 if (tid == config.TemporalId()) { 83 return DecodeTargetIndication::kDiscardable; 84 } 85 if (tid > config.TemporalId()) { 86 RTC_DCHECK_GT(tid, config.TemporalId()); 87 return DecodeTargetIndication::kSwitch; 88 } 89 } 90 RTC_DCHECK_GT(sid, config.SpatialId()); 91 RTC_DCHECK_GE(tid, config.TemporalId()); 92 if (config.IsKeyframe() || config.Id() == kKey) { 93 return DecodeTargetIndication::kSwitch; 94 } 95 return DecodeTargetIndication::kRequired; 96 } 97 98 ScalabilityStructureFullSvc::FramePattern 99 ScalabilityStructureFullSvc::NextPattern() const { 100 switch (last_pattern_) { 101 case kNone: 102 return kKey; 103 case kDeltaT2B: 104 return kDeltaT0; 105 case kDeltaT2A: 106 if (TemporalLayerIsActive(1)) { 107 return kDeltaT1; 108 } 109 return kDeltaT0; 110 case kDeltaT1: 111 if (TemporalLayerIsActive(2)) { 112 return kDeltaT2B; 113 } 114 return kDeltaT0; 115 case kKey: 116 case kDeltaT0: 117 if (TemporalLayerIsActive(2)) { 118 return kDeltaT2A; 119 } 120 if (TemporalLayerIsActive(1)) { 121 return kDeltaT1; 122 } 123 return kDeltaT0; 124 } 125 RTC_DCHECK_NOTREACHED(); 126 return kNone; 127 } 128 129 std::vector<ScalableVideoController::LayerFrameConfig> 130 ScalabilityStructureFullSvc::NextFrameConfig(bool restart) { 131 std::vector<LayerFrameConfig> configs; 132 if (active_decode_targets_.none()) { 133 last_pattern_ = kNone; 134 return configs; 135 } 136 configs.reserve(num_spatial_layers_); 137 138 if (last_pattern_ == kNone || restart) { 139 can_reference_t0_frame_for_spatial_id_.reset(); 140 last_pattern_ = kNone; 141 } 142 FramePattern current_pattern = NextPattern(); 143 144 std::optional<int> spatial_dependency_buffer_id; 145 switch (current_pattern) { 146 case kDeltaT0: 147 case kKey: 148 // Disallow temporal references cross T0 on higher temporal layers. 149 can_reference_t1_frame_for_spatial_id_.reset(); 150 for (int sid = 0; sid < num_spatial_layers_; ++sid) { 151 if (!DecodeTargetIsActive(sid, /*tid=*/0)) { 152 // Next frame from the spatial layer `sid` shouldn't depend on 153 // potentially old previous frame from the spatial layer `sid`. 154 can_reference_t0_frame_for_spatial_id_.reset(sid); 155 continue; 156 } 157 configs.emplace_back(); 158 ScalableVideoController::LayerFrameConfig& config = configs.back(); 159 config.Id(current_pattern).S(sid).T(0); 160 161 if (spatial_dependency_buffer_id) { 162 config.Reference(*spatial_dependency_buffer_id); 163 } else if (current_pattern == kKey) { 164 config.Keyframe(); 165 } 166 167 if (can_reference_t0_frame_for_spatial_id_[sid]) { 168 config.ReferenceAndUpdate(BufferIndex(sid, /*tid=*/0)); 169 } else { 170 // TODO(bugs.webrtc.org/11999): Propagate chain restart on delta frame 171 // to ChainDiffCalculator 172 config.Update(BufferIndex(sid, /*tid=*/0)); 173 } 174 175 spatial_dependency_buffer_id = BufferIndex(sid, /*tid=*/0); 176 } 177 break; 178 case kDeltaT1: 179 for (int sid = 0; sid < num_spatial_layers_; ++sid) { 180 if (!DecodeTargetIsActive(sid, /*tid=*/1) || 181 !can_reference_t0_frame_for_spatial_id_[sid]) { 182 continue; 183 } 184 configs.emplace_back(); 185 ScalableVideoController::LayerFrameConfig& config = configs.back(); 186 config.Id(current_pattern).S(sid).T(1); 187 // Temporal reference. 188 config.Reference(BufferIndex(sid, /*tid=*/0)); 189 // Spatial reference unless this is the lowest active spatial layer. 190 if (spatial_dependency_buffer_id) { 191 config.Reference(*spatial_dependency_buffer_id); 192 } 193 // No frame reference top layer frame, so no need save it into a buffer. 194 if (num_temporal_layers_ > 2 || sid < num_spatial_layers_ - 1) { 195 config.Update(BufferIndex(sid, /*tid=*/1)); 196 } 197 spatial_dependency_buffer_id = BufferIndex(sid, /*tid=*/1); 198 } 199 break; 200 case kDeltaT2A: 201 case kDeltaT2B: 202 for (int sid = 0; sid < num_spatial_layers_; ++sid) { 203 if (!DecodeTargetIsActive(sid, /*tid=*/2) || 204 !can_reference_t0_frame_for_spatial_id_[sid]) { 205 continue; 206 } 207 configs.emplace_back(); 208 ScalableVideoController::LayerFrameConfig& config = configs.back(); 209 config.Id(current_pattern).S(sid).T(2); 210 // Temporal reference. 211 if (current_pattern == kDeltaT2B && 212 can_reference_t1_frame_for_spatial_id_[sid]) { 213 config.Reference(BufferIndex(sid, /*tid=*/1)); 214 } else { 215 config.Reference(BufferIndex(sid, /*tid=*/0)); 216 } 217 // Spatial reference unless this is the lowest active spatial layer. 218 if (spatial_dependency_buffer_id) { 219 config.Reference(*spatial_dependency_buffer_id); 220 } 221 // No frame reference top layer frame, so no need save it into a buffer. 222 if (sid < num_spatial_layers_ - 1) { 223 config.Update(BufferIndex(sid, /*tid=*/2)); 224 } 225 spatial_dependency_buffer_id = BufferIndex(sid, /*tid=*/2); 226 } 227 break; 228 case kNone: 229 RTC_DCHECK_NOTREACHED(); 230 break; 231 } 232 233 if (configs.empty() && !restart) { 234 RTC_LOG(LS_WARNING) << "Failed to generate configuration for L" 235 << num_spatial_layers_ << "T" << num_temporal_layers_ 236 << " with active decode targets " 237 << active_decode_targets_.to_string('-').substr( 238 active_decode_targets_.size() - 239 num_spatial_layers_ * num_temporal_layers_) 240 << " and transition from " 241 << kFramePatternNames[last_pattern_] << " to " 242 << kFramePatternNames[current_pattern] 243 << ". Resetting."; 244 return NextFrameConfig(/*restart=*/true); 245 } 246 247 return configs; 248 } 249 250 GenericFrameInfo ScalabilityStructureFullSvc::OnEncodeDone( 251 const LayerFrameConfig& config) { 252 // When encoder drops all frames for a temporal unit, it is better to reuse 253 // old temporal pattern rather than switch to next one, thus switch to next 254 // pattern defered here from the `NextFrameConfig`. 255 // In particular creating VP9 references rely on this behavior. 256 last_pattern_ = static_cast<FramePattern>(config.Id()); 257 if (config.TemporalId() == 0) { 258 can_reference_t0_frame_for_spatial_id_.set(config.SpatialId()); 259 } 260 if (config.TemporalId() == 1) { 261 can_reference_t1_frame_for_spatial_id_.set(config.SpatialId()); 262 } 263 264 GenericFrameInfo frame_info; 265 frame_info.spatial_id = config.SpatialId(); 266 frame_info.temporal_id = config.TemporalId(); 267 frame_info.encoder_buffers = config.Buffers(); 268 frame_info.decode_target_indications.reserve(num_spatial_layers_ * 269 num_temporal_layers_); 270 for (int sid = 0; sid < num_spatial_layers_; ++sid) { 271 for (int tid = 0; tid < num_temporal_layers_; ++tid) { 272 frame_info.decode_target_indications.push_back(Dti(sid, tid, config)); 273 } 274 } 275 if (config.TemporalId() == 0) { 276 frame_info.part_of_chain.resize(num_spatial_layers_); 277 for (int sid = 0; sid < num_spatial_layers_; ++sid) { 278 frame_info.part_of_chain[sid] = config.SpatialId() <= sid; 279 } 280 } else { 281 frame_info.part_of_chain.assign(num_spatial_layers_, false); 282 } 283 frame_info.active_decode_targets = active_decode_targets_; 284 return frame_info; 285 } 286 287 void ScalabilityStructureFullSvc::OnRatesUpdated( 288 const VideoBitrateAllocation& bitrates) { 289 for (int sid = 0; sid < num_spatial_layers_; ++sid) { 290 // Enable/disable spatial layers independetely. 291 bool active = true; 292 for (int tid = 0; tid < num_temporal_layers_; ++tid) { 293 // To enable temporal layer, require bitrates for lower temporal layers. 294 active = active && bitrates.GetBitrate(sid, tid) > 0; 295 SetDecodeTargetIsActive(sid, tid, active); 296 } 297 } 298 } 299 300 FrameDependencyStructure ScalabilityStructureL1T2::DependencyStructure() const { 301 FrameDependencyStructure structure; 302 structure.num_decode_targets = 2; 303 structure.num_chains = 1; 304 structure.decode_target_protected_by_chain = {0, 0}; 305 structure.templates.resize(3); 306 structure.templates[0].T(0).Dtis("SS").ChainDiffs({0}); 307 structure.templates[1].T(0).Dtis("SS").ChainDiffs({2}).FrameDiffs({2}); 308 structure.templates[2].T(1).Dtis("-D").ChainDiffs({1}).FrameDiffs({1}); 309 return structure; 310 } 311 312 FrameDependencyStructure ScalabilityStructureL1T3::DependencyStructure() const { 313 FrameDependencyStructure structure; 314 structure.num_decode_targets = 3; 315 structure.num_chains = 1; 316 structure.decode_target_protected_by_chain = {0, 0, 0}; 317 structure.templates.resize(5); 318 structure.templates[0].T(0).Dtis("SSS").ChainDiffs({0}); 319 structure.templates[1].T(0).Dtis("SSS").ChainDiffs({4}).FrameDiffs({4}); 320 structure.templates[2].T(1).Dtis("-DS").ChainDiffs({2}).FrameDiffs({2}); 321 structure.templates[3].T(2).Dtis("--D").ChainDiffs({1}).FrameDiffs({1}); 322 structure.templates[4].T(2).Dtis("--D").ChainDiffs({3}).FrameDiffs({1}); 323 return structure; 324 } 325 326 FrameDependencyStructure ScalabilityStructureL2T1::DependencyStructure() const { 327 FrameDependencyStructure structure; 328 structure.num_decode_targets = 2; 329 structure.num_chains = 2; 330 structure.decode_target_protected_by_chain = {0, 1}; 331 structure.templates.resize(4); 332 structure.templates[0].S(0).Dtis("SR").ChainDiffs({2, 1}).FrameDiffs({2}); 333 structure.templates[1].S(0).Dtis("SS").ChainDiffs({0, 0}); 334 structure.templates[2].S(1).Dtis("-S").ChainDiffs({1, 1}).FrameDiffs({2, 1}); 335 structure.templates[3].S(1).Dtis("-S").ChainDiffs({1, 1}).FrameDiffs({1}); 336 return structure; 337 } 338 339 FrameDependencyStructure ScalabilityStructureL2T2::DependencyStructure() const { 340 FrameDependencyStructure structure; 341 structure.num_decode_targets = 4; 342 structure.num_chains = 2; 343 structure.decode_target_protected_by_chain = {0, 0, 1, 1}; 344 structure.templates.resize(6); 345 auto& templates = structure.templates; 346 templates[0].S(0).T(0).Dtis("SSSS").ChainDiffs({0, 0}); 347 templates[1].S(0).T(0).Dtis("SSRR").ChainDiffs({4, 3}).FrameDiffs({4}); 348 templates[2].S(0).T(1).Dtis("-D-R").ChainDiffs({2, 1}).FrameDiffs({2}); 349 templates[3].S(1).T(0).Dtis("--SS").ChainDiffs({1, 1}).FrameDiffs({1}); 350 templates[4].S(1).T(0).Dtis("--SS").ChainDiffs({1, 1}).FrameDiffs({4, 1}); 351 templates[5].S(1).T(1).Dtis("---D").ChainDiffs({3, 2}).FrameDiffs({2, 1}); 352 return structure; 353 } 354 355 FrameDependencyStructure ScalabilityStructureL2T3::DependencyStructure() const { 356 FrameDependencyStructure structure; 357 structure.num_decode_targets = 6; 358 structure.num_chains = 2; 359 structure.decode_target_protected_by_chain = {0, 0, 0, 1, 1, 1}; 360 auto& t = structure.templates; 361 t.resize(10); 362 t[1].S(0).T(0).Dtis("SSSSSS").ChainDiffs({0, 0}); 363 t[6].S(1).T(0).Dtis("---SSS").ChainDiffs({1, 1}).FrameDiffs({1}); 364 t[3].S(0).T(2).Dtis("--D--R").ChainDiffs({2, 1}).FrameDiffs({2}); 365 t[8].S(1).T(2).Dtis("-----D").ChainDiffs({3, 2}).FrameDiffs({2, 1}); 366 t[2].S(0).T(1).Dtis("-DS-RR").ChainDiffs({4, 3}).FrameDiffs({4}); 367 t[7].S(1).T(1).Dtis("----DS").ChainDiffs({5, 4}).FrameDiffs({4, 1}); 368 t[4].S(0).T(2).Dtis("--D--R").ChainDiffs({6, 5}).FrameDiffs({2}); 369 t[9].S(1).T(2).Dtis("-----D").ChainDiffs({7, 6}).FrameDiffs({2, 1}); 370 t[0].S(0).T(0).Dtis("SSSRRR").ChainDiffs({8, 7}).FrameDiffs({8}); 371 t[5].S(1).T(0).Dtis("---SSS").ChainDiffs({1, 1}).FrameDiffs({8, 1}); 372 return structure; 373 } 374 375 FrameDependencyStructure ScalabilityStructureL3T1::DependencyStructure() const { 376 FrameDependencyStructure structure; 377 structure.num_decode_targets = 3; 378 structure.num_chains = 3; 379 structure.decode_target_protected_by_chain = {0, 1, 2}; 380 auto& templates = structure.templates; 381 templates.resize(6); 382 templates[0].S(0).Dtis("SRR").ChainDiffs({3, 2, 1}).FrameDiffs({3}); 383 templates[1].S(0).Dtis("SSS").ChainDiffs({0, 0, 0}); 384 templates[2].S(1).Dtis("-SR").ChainDiffs({1, 1, 1}).FrameDiffs({3, 1}); 385 templates[3].S(1).Dtis("-SS").ChainDiffs({1, 1, 1}).FrameDiffs({1}); 386 templates[4].S(2).Dtis("--S").ChainDiffs({2, 1, 1}).FrameDiffs({3, 1}); 387 templates[5].S(2).Dtis("--S").ChainDiffs({2, 1, 1}).FrameDiffs({1}); 388 return structure; 389 } 390 391 FrameDependencyStructure ScalabilityStructureL3T2::DependencyStructure() const { 392 FrameDependencyStructure structure; 393 structure.num_decode_targets = 6; 394 structure.num_chains = 3; 395 structure.decode_target_protected_by_chain = {0, 0, 1, 1, 2, 2}; 396 auto& t = structure.templates; 397 t.resize(9); 398 // Templates are shown in the order frames following them appear in the 399 // stream, but in `structure.templates` array templates are sorted by 400 // (`spatial_id`, `temporal_id`) since that is a dependency descriptor 401 // requirement. 402 t[1].S(0).T(0).Dtis("SSSSSS").ChainDiffs({0, 0, 0}); 403 t[4].S(1).T(0).Dtis("--SSSS").ChainDiffs({1, 1, 1}).FrameDiffs({1}); 404 t[7].S(2).T(0).Dtis("----SS").ChainDiffs({2, 1, 1}).FrameDiffs({1}); 405 t[2].S(0).T(1).Dtis("-D-R-R").ChainDiffs({3, 2, 1}).FrameDiffs({3}); 406 t[5].S(1).T(1).Dtis("---D-R").ChainDiffs({4, 3, 2}).FrameDiffs({3, 1}); 407 t[8].S(2).T(1).Dtis("-----D").ChainDiffs({5, 4, 3}).FrameDiffs({3, 1}); 408 t[0].S(0).T(0).Dtis("SSRRRR").ChainDiffs({6, 5, 4}).FrameDiffs({6}); 409 t[3].S(1).T(0).Dtis("--SSRR").ChainDiffs({1, 1, 1}).FrameDiffs({6, 1}); 410 t[6].S(2).T(0).Dtis("----SS").ChainDiffs({2, 1, 1}).FrameDiffs({6, 1}); 411 return structure; 412 } 413 414 FrameDependencyStructure ScalabilityStructureL3T3::DependencyStructure() const { 415 FrameDependencyStructure structure; 416 structure.num_decode_targets = 9; 417 structure.num_chains = 3; 418 structure.decode_target_protected_by_chain = {0, 0, 0, 1, 1, 1, 2, 2, 2}; 419 auto& t = structure.templates; 420 t.resize(15); 421 // Templates are shown in the order frames following them appear in the 422 // stream, but in `structure.templates` array templates are sorted by 423 // (`spatial_id`, `temporal_id`) since that is a dependency descriptor 424 // requirement. Indexes are written in hex for nicer alignment. 425 t[0x1].S(0).T(0).Dtis("SSSSSSSSS").ChainDiffs({0, 0, 0}); 426 t[0x6].S(1).T(0).Dtis("---SSSSSS").ChainDiffs({1, 1, 1}).FrameDiffs({1}); 427 t[0xB].S(2).T(0).Dtis("------SSS").ChainDiffs({2, 1, 1}).FrameDiffs({1}); 428 t[0x3].S(0).T(2).Dtis("--D--R--R").ChainDiffs({3, 2, 1}).FrameDiffs({3}); 429 t[0x8].S(1).T(2).Dtis("-----D--R").ChainDiffs({4, 3, 2}).FrameDiffs({3, 1}); 430 t[0xD].S(2).T(2).Dtis("--------D").ChainDiffs({5, 4, 3}).FrameDiffs({3, 1}); 431 t[0x2].S(0).T(1).Dtis("-DS-RR-RR").ChainDiffs({6, 5, 4}).FrameDiffs({6}); 432 t[0x7].S(1).T(1).Dtis("----DS-RR").ChainDiffs({7, 6, 5}).FrameDiffs({6, 1}); 433 t[0xC].S(2).T(1).Dtis("-------DS").ChainDiffs({8, 7, 6}).FrameDiffs({6, 1}); 434 t[0x4].S(0).T(2).Dtis("--D--R--R").ChainDiffs({9, 8, 7}).FrameDiffs({3}); 435 t[0x9].S(1).T(2).Dtis("-----D--R").ChainDiffs({10, 9, 8}).FrameDiffs({3, 1}); 436 t[0xE].S(2).T(2).Dtis("--------D").ChainDiffs({11, 10, 9}).FrameDiffs({3, 1}); 437 t[0x0].S(0).T(0).Dtis("SSSRRRRRR").ChainDiffs({12, 11, 10}).FrameDiffs({12}); 438 t[0x5].S(1).T(0).Dtis("---SSSRRR").ChainDiffs({1, 1, 1}).FrameDiffs({12, 1}); 439 t[0xA].S(2).T(0).Dtis("------SSS").ChainDiffs({2, 1, 1}).FrameDiffs({12, 1}); 440 return structure; 441 } 442 443 } // namespace webrtc