[ tor-browser ].git.dasho

rtp_payload_params.cc (32535B)
      1 /*
      2 *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
      3 *
      4 *  Use of this source code is governed by a BSD-style license
      5 *  that can be found in the LICENSE file in the root of the source
      6 *  tree. An additional intellectual property rights grant can be found
      7 *  in the file PATENTS.  All contributing project authors may
      8 *  be found in the AUTHORS file in the root of the source tree.
      9 */
     10 
     11 #include "call/rtp_payload_params.h"
     12 
     13 #include <algorithm>
     14 #include <cstddef>
     15 #include <cstdint>
     16 #include <iterator>
     17 #include <optional>
     18 
     19 #include "absl/container/inlined_vector.h"
     20 #include "api/field_trials_view.h"
     21 #include "api/transport/rtp/dependency_descriptor.h"
     22 #include "api/video/encoded_image.h"
     23 #include "api/video/render_resolution.h"
     24 #include "api/video/video_codec_constants.h"
     25 #include "api/video/video_codec_type.h"
     26 #include "api/video/video_frame_type.h"
     27 #include "api/video/video_timing.h"
     28 #include "call/rtp_config.h"
     29 #include "common_video/generic_frame_descriptor/generic_frame_info.h"
     30 #include "modules/rtp_rtcp/source/rtp_generic_frame_descriptor.h"
     31 #include "modules/rtp_rtcp/source/rtp_video_header.h"
     32 #include "modules/video_coding/codecs/h264/include/h264_globals.h"
     33 #include "modules/video_coding/codecs/interface/common_constants.h"
     34 #include "modules/video_coding/codecs/vp8/include/vp8_globals.h"
     35 #include "modules/video_coding/codecs/vp9/include/vp9_globals.h"
     36 #include "modules/video_coding/frame_dependencies_calculator.h"
     37 #include "modules/video_coding/include/video_codec_interface.h"
     38 #include "rtc_base/checks.h"
     39 #include "rtc_base/logging.h"
     40 #include "rtc_base/random.h"
     41 #include "rtc_base/time_utils.h"
     42 
     43 namespace webrtc {
     44 namespace {
     45 
     46 constexpr int kMaxSimulatedSpatialLayers = 3;
     47 
     48 void PopulateRtpWithCodecSpecifics(const CodecSpecificInfo& info,
     49                                   std::optional<int> spatial_index,
     50                                   RTPVideoHeader* rtp) {
     51  rtp->codec = info.codecType;
     52  rtp->is_last_frame_in_picture = info.end_of_picture;
     53  rtp->frame_instrumentation_data = info.frame_instrumentation_data;
     54  switch (info.codecType) {
     55    case kVideoCodecVP8: {
     56      auto& vp8_header = rtp->video_type_header.emplace<RTPVideoHeaderVP8>();
     57      vp8_header.InitRTPVideoHeaderVP8();
     58      vp8_header.nonReference = info.codecSpecific.VP8.nonReference;
     59      vp8_header.temporalIdx = info.codecSpecific.VP8.temporalIdx;
     60      vp8_header.layerSync = info.codecSpecific.VP8.layerSync;
     61      vp8_header.keyIdx = info.codecSpecific.VP8.keyIdx;
     62      return;
     63    }
     64    case kVideoCodecVP9: {
     65      auto& vp9_header = rtp->video_type_header.emplace<RTPVideoHeaderVP9>();
     66      vp9_header.InitRTPVideoHeaderVP9();
     67      vp9_header.inter_pic_predicted =
     68          info.codecSpecific.VP9.inter_pic_predicted;
     69      vp9_header.flexible_mode = info.codecSpecific.VP9.flexible_mode;
     70      vp9_header.ss_data_available = info.codecSpecific.VP9.ss_data_available;
     71      vp9_header.non_ref_for_inter_layer_pred =
     72          info.codecSpecific.VP9.non_ref_for_inter_layer_pred;
     73      vp9_header.temporal_idx = info.codecSpecific.VP9.temporal_idx;
     74      vp9_header.temporal_up_switch = info.codecSpecific.VP9.temporal_up_switch;
     75      vp9_header.inter_layer_predicted =
     76          info.codecSpecific.VP9.inter_layer_predicted;
     77      vp9_header.gof_idx = info.codecSpecific.VP9.gof_idx;
     78      vp9_header.num_spatial_layers = info.codecSpecific.VP9.num_spatial_layers;
     79      vp9_header.first_active_layer = info.codecSpecific.VP9.first_active_layer;
     80      if (vp9_header.num_spatial_layers > 1) {
     81        vp9_header.spatial_idx = spatial_index.value_or(kNoSpatialIdx);
     82      } else {
     83        vp9_header.spatial_idx = kNoSpatialIdx;
     84      }
     85      if (info.codecSpecific.VP9.ss_data_available) {
     86        vp9_header.spatial_layer_resolution_present =
     87            info.codecSpecific.VP9.spatial_layer_resolution_present;
     88        if (info.codecSpecific.VP9.spatial_layer_resolution_present) {
     89          for (size_t i = 0; i < info.codecSpecific.VP9.num_spatial_layers;
     90               ++i) {
     91            vp9_header.width[i] = info.codecSpecific.VP9.width[i];
     92            vp9_header.height[i] = info.codecSpecific.VP9.height[i];
     93          }
     94        }
     95        vp9_header.gof.CopyGofInfoVP9(info.codecSpecific.VP9.gof);
     96      }
     97 
     98      vp9_header.num_ref_pics = info.codecSpecific.VP9.num_ref_pics;
     99      for (int i = 0; i < info.codecSpecific.VP9.num_ref_pics; ++i) {
    100        vp9_header.pid_diff[i] = info.codecSpecific.VP9.p_diff[i];
    101      }
    102      vp9_header.end_of_picture = info.end_of_picture;
    103      return;
    104    }
    105    case kVideoCodecH264: {
    106      auto& h264_header = rtp->video_type_header.emplace<RTPVideoHeaderH264>();
    107      h264_header.packetization_mode =
    108          info.codecSpecific.H264.packetization_mode;
    109      return;
    110    }
    111    // These codec types do not have codec-specifics.
    112    case kVideoCodecGeneric:
    113    case kVideoCodecH265:
    114    case kVideoCodecAV1:
    115      return;
    116  }
    117 }
    118 
    119 void SetVideoTiming(const EncodedImage& image, VideoSendTiming* timing) {
    120  if (image.timing_.flags == VideoSendTiming::TimingFrameFlags::kInvalid ||
    121      image.timing_.flags == VideoSendTiming::TimingFrameFlags::kNotTriggered) {
    122    timing->flags = VideoSendTiming::TimingFrameFlags::kInvalid;
    123    return;
    124  }
    125 
    126  timing->encode_start_delta_ms = VideoSendTiming::GetDeltaCappedMs(
    127      image.capture_time_ms_, image.timing_.encode_start_ms);
    128  timing->encode_finish_delta_ms = VideoSendTiming::GetDeltaCappedMs(
    129      image.capture_time_ms_, image.timing_.encode_finish_ms);
    130  timing->packetization_finish_delta_ms = 0;
    131  timing->pacer_exit_delta_ms = 0;
    132  timing->network_timestamp_delta_ms = 0;
    133  timing->network2_timestamp_delta_ms = 0;
    134  timing->flags = image.timing_.flags;
    135 }
    136 
    137 // Returns structure that aligns with simulated generic info. The templates
    138 // allow to produce valid dependency descriptor for any stream where
    139 // `num_spatial_layers` * `num_temporal_layers` <= 32 (limited by
    140 // https://aomediacodec.github.io/av1-rtp-spec/#a82-syntax, see
    141 // template_fdiffs()). The set of the templates is not tuned for any paricular
    142 // structure thus dependency descriptor would use more bytes on the wire than
    143 // with tuned templates.
    144 FrameDependencyStructure MinimalisticStructure(int num_spatial_layers,
    145                                               int num_temporal_layers) {
    146  RTC_DCHECK_LE(num_spatial_layers, DependencyDescriptor::kMaxSpatialIds);
    147  RTC_DCHECK_LE(num_temporal_layers, DependencyDescriptor::kMaxTemporalIds);
    148  RTC_DCHECK_LE(num_spatial_layers * num_temporal_layers, 32);
    149  FrameDependencyStructure structure;
    150  structure.num_decode_targets = num_spatial_layers * num_temporal_layers;
    151  structure.num_chains = num_spatial_layers;
    152  structure.templates.reserve(num_spatial_layers * num_temporal_layers);
    153  for (int sid = 0; sid < num_spatial_layers; ++sid) {
    154    for (int tid = 0; tid < num_temporal_layers; ++tid) {
    155      FrameDependencyTemplate a_template;
    156      a_template.spatial_id = sid;
    157      a_template.temporal_id = tid;
    158      for (int s = 0; s < num_spatial_layers; ++s) {
    159        for (int t = 0; t < num_temporal_layers; ++t) {
    160          // Prefer kSwitch indication for frames that is part of the decode
    161          // target because dependency descriptor information generated in this
    162          // class use kSwitch indications more often that kRequired, increasing
    163          // the chance of a good (or complete) template match.
    164          a_template.decode_target_indications.push_back(
    165              sid <= s && tid <= t ? DecodeTargetIndication::kSwitch
    166                                   : DecodeTargetIndication::kNotPresent);
    167        }
    168      }
    169      a_template.frame_diffs.push_back(tid == 0 ? num_spatial_layers *
    170                                                      num_temporal_layers
    171                                                : num_spatial_layers);
    172      a_template.chain_diffs.assign(structure.num_chains, 1);
    173      structure.templates.push_back(a_template);
    174 
    175      structure.decode_target_protected_by_chain.push_back(sid);
    176    }
    177  }
    178  return structure;
    179 }
    180 }  // namespace
    181 
    182 RtpPayloadParams::RtpPayloadParams(const uint32_t ssrc,
    183                                   const RtpPayloadState* state,
    184                                   const FieldTrialsView& trials)
    185    : ssrc_(ssrc),
    186      generic_picture_id_experiment_(
    187          trials.IsEnabled("WebRTC-GenericPictureId")),
    188      simulate_generic_structure_(
    189          trials.IsEnabled("WebRTC-GenericCodecDependencyDescriptor")) {
    190  for (auto& spatial_layer : last_frame_id_)
    191    spatial_layer.fill(-1);
    192 
    193  chain_last_frame_id_.fill(-1);
    194  buffer_id_to_frame_id_.fill(-1);
    195 
    196  Random random(TimeMicros());
    197  state_.picture_id =
    198      state ? state->picture_id : (random.Rand<int16_t>() & 0x7FFF);
    199  state_.tl0_pic_idx = state ? state->tl0_pic_idx : (random.Rand<uint8_t>());
    200  state_.frame_id = state ? state->frame_id : random.Rand<uint16_t>();
    201 }
    202 
    203 RtpPayloadParams::RtpPayloadParams(const RtpPayloadParams& other) = default;
    204 
    205 RtpPayloadParams::~RtpPayloadParams() {}
    206 
    207 RTPVideoHeader RtpPayloadParams::GetRtpVideoHeader(
    208    const EncodedImage& image,
    209    const CodecSpecificInfo* codec_specific_info,
    210    std::optional<int64_t> shared_frame_id) {
    211  int64_t frame_id;
    212  if (shared_frame_id) {
    213    frame_id = *shared_frame_id;
    214  } else {
    215    frame_id = state_.frame_id++;
    216  }
    217 
    218  RTPVideoHeader rtp_video_header;
    219  if (codec_specific_info) {
    220    PopulateRtpWithCodecSpecifics(*codec_specific_info, image.SpatialIndex(),
    221                                  &rtp_video_header);
    222  }
    223  rtp_video_header.simulcastIdx = image.SimulcastIndex().value_or(0);
    224  rtp_video_header.frame_type = image._frameType;
    225  rtp_video_header.rotation = image.rotation_;
    226  rtp_video_header.content_type = image.content_type_;
    227  rtp_video_header.playout_delay = image.PlayoutDelay();
    228  rtp_video_header.width = image._encodedWidth;
    229  rtp_video_header.height = image._encodedHeight;
    230  rtp_video_header.color_space = image.ColorSpace()
    231                                     ? std::make_optional(*image.ColorSpace())
    232                                     : std::nullopt;
    233  rtp_video_header.video_frame_tracking_id = image.VideoFrameTrackingId();
    234  SetVideoTiming(image, &rtp_video_header.video_timing);
    235 
    236  const bool is_keyframe = image._frameType == VideoFrameType::kVideoFrameKey;
    237  const bool first_frame_in_picture =
    238      (codec_specific_info && codec_specific_info->codecType == kVideoCodecVP9)
    239          ? codec_specific_info->codecSpecific.VP9.first_frame_in_picture
    240          : true;
    241 
    242  SetCodecSpecific(&rtp_video_header, first_frame_in_picture);
    243 
    244  SetGeneric(codec_specific_info, frame_id, is_keyframe, &rtp_video_header);
    245 
    246  return rtp_video_header;
    247 }
    248 
    249 uint32_t RtpPayloadParams::ssrc() const {
    250  return ssrc_;
    251 }
    252 
    253 RtpPayloadState RtpPayloadParams::state() const {
    254  return state_;
    255 }
    256 
    257 void RtpPayloadParams::SetCodecSpecific(RTPVideoHeader* rtp_video_header,
    258                                        bool first_frame_in_picture) {
    259  // Always set picture id. Set tl0_pic_idx iff temporal index is set.
    260  if (first_frame_in_picture) {
    261    state_.picture_id = (static_cast<uint16_t>(state_.picture_id) + 1) & 0x7FFF;
    262  }
    263  if (rtp_video_header->codec == kVideoCodecVP8) {
    264    auto& vp8_header =
    265        std::get<RTPVideoHeaderVP8>(rtp_video_header->video_type_header);
    266    vp8_header.pictureId = state_.picture_id;
    267 
    268    if (vp8_header.temporalIdx != kNoTemporalIdx) {
    269      if (vp8_header.temporalIdx == 0) {
    270        ++state_.tl0_pic_idx;
    271      }
    272      vp8_header.tl0PicIdx = state_.tl0_pic_idx;
    273    }
    274  }
    275  if (rtp_video_header->codec == kVideoCodecVP9) {
    276    auto& vp9_header =
    277        std::get<RTPVideoHeaderVP9>(rtp_video_header->video_type_header);
    278    vp9_header.picture_id = state_.picture_id;
    279 
    280    // Note that in the case that we have no temporal layers but we do have
    281    // spatial layers, packets will carry layering info with a temporal_idx of
    282    // zero, and we then have to set and increment tl0_pic_idx.
    283    if (vp9_header.temporal_idx != kNoTemporalIdx ||
    284        vp9_header.spatial_idx != kNoSpatialIdx) {
    285      if (first_frame_in_picture &&
    286          (vp9_header.temporal_idx == 0 ||
    287           vp9_header.temporal_idx == kNoTemporalIdx)) {
    288        ++state_.tl0_pic_idx;
    289      }
    290      vp9_header.tl0_pic_idx = state_.tl0_pic_idx;
    291    }
    292  }
    293  if (generic_picture_id_experiment_ &&
    294      rtp_video_header->codec == kVideoCodecGeneric) {
    295    rtp_video_header->video_type_header.emplace<RTPVideoHeaderLegacyGeneric>()
    296        .picture_id = state_.picture_id;
    297  }
    298 }
    299 
    300 RTPVideoHeader::GenericDescriptorInfo
    301 RtpPayloadParams::GenericDescriptorFromFrameInfo(
    302    const GenericFrameInfo& frame_info,
    303    int64_t frame_id) {
    304  RTPVideoHeader::GenericDescriptorInfo generic;
    305  generic.frame_id = frame_id;
    306  generic.dependencies = dependencies_calculator_.FromBuffersUsage(
    307      frame_id, frame_info.encoder_buffers);
    308  generic.chain_diffs =
    309      chains_calculator_.From(frame_id, frame_info.part_of_chain);
    310  generic.spatial_index = frame_info.spatial_id;
    311  generic.temporal_index = frame_info.temporal_id;
    312  generic.decode_target_indications = frame_info.decode_target_indications;
    313  generic.active_decode_targets = frame_info.active_decode_targets;
    314  return generic;
    315 }
    316 
    317 void RtpPayloadParams::SetGeneric(const CodecSpecificInfo* codec_specific_info,
    318                                  int64_t frame_id,
    319                                  bool is_keyframe,
    320                                  RTPVideoHeader* rtp_video_header) {
    321  if (codec_specific_info && codec_specific_info->generic_frame_info &&
    322      !codec_specific_info->generic_frame_info->encoder_buffers.empty()) {
    323    if (is_keyframe) {
    324      // Key frame resets all chains it is in.
    325      chains_calculator_.Reset(
    326          codec_specific_info->generic_frame_info->part_of_chain);
    327    }
    328    rtp_video_header->generic = GenericDescriptorFromFrameInfo(
    329        *codec_specific_info->generic_frame_info, frame_id);
    330    return;
    331  }
    332 
    333  switch (rtp_video_header->codec) {
    334    case VideoCodecType::kVideoCodecGeneric:
    335      GenericToGeneric(frame_id, is_keyframe, rtp_video_header);
    336      return;
    337    case VideoCodecType::kVideoCodecVP8:
    338      if (codec_specific_info) {
    339        Vp8ToGeneric(codec_specific_info->codecSpecific.VP8, frame_id,
    340                     is_keyframe, rtp_video_header);
    341      }
    342      return;
    343    case VideoCodecType::kVideoCodecVP9:
    344      if (codec_specific_info != nullptr) {
    345        Vp9ToGeneric(codec_specific_info->codecSpecific.VP9, frame_id,
    346                     *rtp_video_header);
    347      }
    348      return;
    349    case VideoCodecType::kVideoCodecAV1:
    350      // Codec-specifics is not supported for AV1. We convert from the
    351      // generic_frame_info.
    352      return;
    353    case VideoCodecType::kVideoCodecH264:
    354      if (codec_specific_info) {
    355        H264ToGeneric(codec_specific_info->codecSpecific.H264, frame_id,
    356                      is_keyframe, rtp_video_header);
    357      }
    358      return;
    359    case VideoCodecType::kVideoCodecH265:
    360      // Codec-specifics is not supported for H.265. We convert from the
    361      // generic_frame_info.
    362      return;
    363  }
    364  RTC_DCHECK_NOTREACHED() << "Unsupported codec.";
    365 }
    366 
    367 std::optional<FrameDependencyStructure> RtpPayloadParams::GenericStructure(
    368    const CodecSpecificInfo* codec_specific_info) {
    369  if (codec_specific_info == nullptr) {
    370    return std::nullopt;
    371  }
    372  // This helper shouldn't be used when template structure is specified
    373  // explicetly.
    374  RTC_DCHECK(!codec_specific_info->template_structure.has_value());
    375  switch (codec_specific_info->codecType) {
    376    case VideoCodecType::kVideoCodecGeneric:
    377      if (simulate_generic_structure_) {
    378        return MinimalisticStructure(/*num_spatial_layers=*/1,
    379                                     /*num_temporal_layers=*/1);
    380      }
    381      return std::nullopt;
    382    case VideoCodecType::kVideoCodecVP8:
    383      return MinimalisticStructure(/*num_spatial_layers=*/1,
    384                                   /*num_temporal_layers=*/kMaxTemporalStreams);
    385    case VideoCodecType::kVideoCodecVP9: {
    386      std::optional<FrameDependencyStructure> structure = MinimalisticStructure(
    387          /*num_spatial_layers=*/kMaxSimulatedSpatialLayers,
    388          /*num_temporal_layers=*/kMaxTemporalStreams);
    389      const CodecSpecificInfoVP9& vp9 = codec_specific_info->codecSpecific.VP9;
    390      if (vp9.ss_data_available && vp9.spatial_layer_resolution_present) {
    391        RenderResolution first_valid;
    392        RenderResolution last_valid;
    393        for (size_t i = 0; i < vp9.num_spatial_layers; ++i) {
    394          RenderResolution r(vp9.width[i], vp9.height[i]);
    395          if (r.Valid()) {
    396            if (!first_valid.Valid()) {
    397              first_valid = r;
    398            }
    399            last_valid = r;
    400          }
    401          structure->resolutions.push_back(r);
    402        }
    403        if (!last_valid.Valid()) {
    404          // No valid resolution found. Do not send resolutions.
    405          structure->resolutions.clear();
    406        } else {
    407          structure->resolutions.resize(kMaxSimulatedSpatialLayers, last_valid);
    408          // VP9 encoder wrapper may disable first few spatial layers by
    409          // setting invalid resolution (0,0). `structure->resolutions`
    410          // doesn't support invalid resolution, so reset them to something
    411          // valid.
    412          for (RenderResolution& r : structure->resolutions) {
    413            if (!r.Valid()) {
    414              r = first_valid;
    415            }
    416          }
    417        }
    418      }
    419      return structure;
    420    }
    421    case VideoCodecType::kVideoCodecH264:
    422      return MinimalisticStructure(
    423          /*num_spatial_layers=*/1,
    424          /*num_temporal_layers=*/kMaxTemporalStreams);
    425    case VideoCodecType::kVideoCodecAV1:
    426    case VideoCodecType::kVideoCodecH265:
    427      return std::nullopt;
    428  }
    429  RTC_DCHECK_NOTREACHED() << "Unsupported codec.";
    430 }
    431 
    432 void RtpPayloadParams::GenericToGeneric(int64_t frame_id,
    433                                        bool is_keyframe,
    434                                        RTPVideoHeader* rtp_video_header) {
    435  RTPVideoHeader::GenericDescriptorInfo& generic =
    436      rtp_video_header->generic.emplace();
    437 
    438  generic.frame_id = frame_id;
    439  generic.decode_target_indications.push_back(DecodeTargetIndication::kSwitch);
    440 
    441  if (is_keyframe) {
    442    generic.chain_diffs.push_back(0);
    443    last_frame_id_[0].fill(-1);
    444  } else {
    445    int64_t last_frame_id = last_frame_id_[0][0];
    446    RTC_DCHECK_NE(last_frame_id, -1);
    447    RTC_DCHECK_LT(last_frame_id, frame_id);
    448    generic.chain_diffs.push_back(frame_id - last_frame_id);
    449    generic.dependencies.push_back(last_frame_id);
    450  }
    451 
    452  last_frame_id_[0][0] = frame_id;
    453 }
    454 
    455 void RtpPayloadParams::H264ToGeneric(const CodecSpecificInfoH264& h264_info,
    456                                     int64_t frame_id,
    457                                     bool is_keyframe,
    458                                     RTPVideoHeader* rtp_video_header) {
    459  const int temporal_index =
    460      h264_info.temporal_idx != kNoTemporalIdx ? h264_info.temporal_idx : 0;
    461 
    462  if (temporal_index >= RtpGenericFrameDescriptor::kMaxTemporalLayers) {
    463    RTC_LOG(LS_WARNING) << "Temporal and/or spatial index is too high to be "
    464                           "used with generic frame descriptor.";
    465    return;
    466  }
    467 
    468  RTPVideoHeader::GenericDescriptorInfo& generic =
    469      rtp_video_header->generic.emplace();
    470 
    471  generic.frame_id = frame_id;
    472  generic.temporal_index = temporal_index;
    473 
    474  // Generate decode target indications.
    475  RTC_DCHECK_LT(temporal_index, kMaxTemporalStreams);
    476  generic.decode_target_indications.resize(kMaxTemporalStreams);
    477  auto it = std::fill_n(generic.decode_target_indications.begin(),
    478                        temporal_index, DecodeTargetIndication::kNotPresent);
    479  std::fill(it, generic.decode_target_indications.end(),
    480            DecodeTargetIndication::kSwitch);
    481  generic.chain_diffs = {
    482      (is_keyframe || last_frame_id_[0][0] < 0)
    483          ? 0
    484          : static_cast<int>(frame_id - last_frame_id_[0][0])};
    485 
    486  if (is_keyframe) {
    487    RTC_DCHECK_EQ(temporal_index, 0);
    488    last_frame_id_[/*spatial index*/ 0].fill(-1);
    489    last_frame_id_[/*spatial index*/ 0][temporal_index] = frame_id;
    490    return;
    491  }
    492 
    493  if (h264_info.base_layer_sync) {
    494    int64_t tl0_frame_id = last_frame_id_[/*spatial index*/ 0][0];
    495 
    496    for (int i = 1; i < RtpGenericFrameDescriptor::kMaxTemporalLayers; ++i) {
    497      if (last_frame_id_[/*spatial index*/ 0][i] < tl0_frame_id) {
    498        last_frame_id_[/*spatial index*/ 0][i] = -1;
    499      }
    500    }
    501 
    502    RTC_DCHECK_GE(tl0_frame_id, 0);
    503    RTC_DCHECK_LT(tl0_frame_id, frame_id);
    504    generic.dependencies.push_back(tl0_frame_id);
    505  } else {
    506    for (int i = 0; i <= temporal_index; ++i) {
    507      int64_t last_frame_id = last_frame_id_[/*spatial index*/ 0][i];
    508 
    509      if (last_frame_id != -1) {
    510        RTC_DCHECK_LT(last_frame_id, frame_id);
    511        generic.dependencies.push_back(last_frame_id);
    512      }
    513    }
    514  }
    515 
    516  last_frame_id_[/*spatial_index*/ 0][temporal_index] = frame_id;
    517 }
    518 
    519 void RtpPayloadParams::Vp8ToGeneric(const CodecSpecificInfoVP8& vp8_info,
    520                                    int64_t frame_id,
    521                                    bool is_keyframe,
    522                                    RTPVideoHeader* rtp_video_header) {
    523  const auto& vp8_header =
    524      std::get<RTPVideoHeaderVP8>(rtp_video_header->video_type_header);
    525  const int spatial_index = 0;
    526  const int temporal_index =
    527      vp8_header.temporalIdx != kNoTemporalIdx ? vp8_header.temporalIdx : 0;
    528 
    529  if (temporal_index >= RtpGenericFrameDescriptor::kMaxTemporalLayers ||
    530      spatial_index >= RtpGenericFrameDescriptor::kMaxSpatialLayers) {
    531    RTC_LOG(LS_WARNING) << "Temporal and/or spatial index is too high to be "
    532                           "used with generic frame descriptor.";
    533    return;
    534  }
    535 
    536  RTPVideoHeader::GenericDescriptorInfo& generic =
    537      rtp_video_header->generic.emplace();
    538 
    539  generic.frame_id = frame_id;
    540  generic.spatial_index = spatial_index;
    541  generic.temporal_index = temporal_index;
    542 
    543  // Generate decode target indications.
    544  RTC_DCHECK_LT(temporal_index, kMaxTemporalStreams);
    545  generic.decode_target_indications.resize(kMaxTemporalStreams);
    546  auto it = std::fill_n(generic.decode_target_indications.begin(),
    547                        temporal_index, DecodeTargetIndication::kNotPresent);
    548  std::fill(it, generic.decode_target_indications.end(),
    549            DecodeTargetIndication::kSwitch);
    550 
    551  // Frame dependencies.
    552  if (vp8_info.useExplicitDependencies) {
    553    SetDependenciesVp8New(vp8_info, frame_id, is_keyframe, vp8_header.layerSync,
    554                          &generic);
    555  } else {
    556    SetDependenciesVp8Deprecated(vp8_info, frame_id, is_keyframe, spatial_index,
    557                                 temporal_index, vp8_header.layerSync,
    558                                 &generic);
    559  }
    560 
    561  // Calculate chains.
    562  generic.chain_diffs = {
    563      (is_keyframe || chain_last_frame_id_[0] < 0)
    564          ? 0
    565          : static_cast<int>(frame_id - chain_last_frame_id_[0])};
    566  if (temporal_index == 0) {
    567    chain_last_frame_id_[0] = frame_id;
    568  }
    569 }
    570 
    571 void RtpPayloadParams::Vp9ToGeneric(const CodecSpecificInfoVP9& /* vp9_info */,
    572                                    int64_t frame_id,
    573                                    RTPVideoHeader& rtp_video_header) {
    574  const auto& vp9_header =
    575      std::get<RTPVideoHeaderVP9>(rtp_video_header.video_type_header);
    576  const int num_spatial_layers = kMaxSimulatedSpatialLayers;
    577  const int first_active_spatial_id = vp9_header.first_active_layer;
    578  const int last_active_spatial_id = vp9_header.num_spatial_layers - 1;
    579  const int num_temporal_layers = kMaxTemporalStreams;
    580  static_assert(num_spatial_layers <=
    581                RtpGenericFrameDescriptor::kMaxSpatialLayers);
    582  static_assert(num_temporal_layers <=
    583                RtpGenericFrameDescriptor::kMaxTemporalLayers);
    584  static_assert(num_spatial_layers <= DependencyDescriptor::kMaxSpatialIds);
    585  static_assert(num_temporal_layers <= DependencyDescriptor::kMaxTemporalIds);
    586 
    587  int spatial_index =
    588      vp9_header.spatial_idx != kNoSpatialIdx ? vp9_header.spatial_idx : 0;
    589  int temporal_index =
    590      vp9_header.temporal_idx != kNoTemporalIdx ? vp9_header.temporal_idx : 0;
    591 
    592  if (!(temporal_index < num_temporal_layers &&
    593        first_active_spatial_id <= spatial_index &&
    594        spatial_index <= last_active_spatial_id &&
    595        last_active_spatial_id < num_spatial_layers)) {
    596    // Prefer to generate no generic layering than an inconsistent one.
    597    RTC_LOG(LS_ERROR) << "Inconsistent layer id sid=" << spatial_index
    598                      << ",tid=" << temporal_index
    599                      << " in VP9 header. Active spatial ids: ["
    600                      << first_active_spatial_id << ","
    601                      << last_active_spatial_id << "]";
    602    return;
    603  }
    604 
    605  RTPVideoHeader::GenericDescriptorInfo& result =
    606      rtp_video_header.generic.emplace();
    607 
    608  result.frame_id = frame_id;
    609  result.spatial_index = spatial_index;
    610  result.temporal_index = temporal_index;
    611 
    612  result.decode_target_indications.reserve(num_spatial_layers *
    613                                           num_temporal_layers);
    614  for (int sid = 0; sid < num_spatial_layers; ++sid) {
    615    for (int tid = 0; tid < num_temporal_layers; ++tid) {
    616      DecodeTargetIndication dti;
    617      if (sid < spatial_index || tid < temporal_index) {
    618        dti = DecodeTargetIndication::kNotPresent;
    619      } else if (spatial_index != sid &&
    620                 vp9_header.non_ref_for_inter_layer_pred) {
    621        dti = DecodeTargetIndication::kNotPresent;
    622      } else if (sid == spatial_index && tid == temporal_index) {
    623        // Assume that if frame is decodable, all of its own layer is decodable.
    624        dti = DecodeTargetIndication::kSwitch;
    625      } else if (sid == spatial_index && vp9_header.temporal_up_switch) {
    626        dti = DecodeTargetIndication::kSwitch;
    627      } else if (!vp9_header.inter_pic_predicted) {
    628        // Key frame or spatial upswitch
    629        dti = DecodeTargetIndication::kSwitch;
    630      } else {
    631        // Make no other assumptions. That should be safe, though suboptimal.
    632        // To provide more accurate dti, encoder wrapper should fill in
    633        // CodecSpecificInfo::generic_frame_info
    634        dti = DecodeTargetIndication::kRequired;
    635      }
    636      result.decode_target_indications.push_back(dti);
    637    }
    638  }
    639 
    640  // Calculate frame dependencies.
    641  static constexpr int kPictureDiffLimit = 128;
    642  if (last_vp9_frame_id_.empty()) {
    643    // Create the array only if it is ever used.
    644    last_vp9_frame_id_.resize(kPictureDiffLimit);
    645  }
    646 
    647  if (vp9_header.flexible_mode) {
    648    if (vp9_header.inter_layer_predicted && spatial_index > 0) {
    649      result.dependencies.push_back(
    650          last_vp9_frame_id_[vp9_header.picture_id % kPictureDiffLimit]
    651                            [spatial_index - 1]);
    652    }
    653    if (vp9_header.inter_pic_predicted) {
    654      for (size_t i = 0; i < vp9_header.num_ref_pics; ++i) {
    655        // picture_id is 15 bit number that wraps around. Though undeflow may
    656        // produce picture that exceeds 2^15, it is ok because in this
    657        // code block only last 7 bits of the picture_id are used.
    658        uint16_t depend_on = vp9_header.picture_id - vp9_header.pid_diff[i];
    659        result.dependencies.push_back(
    660            last_vp9_frame_id_[depend_on % kPictureDiffLimit][spatial_index]);
    661      }
    662    }
    663    last_vp9_frame_id_[vp9_header.picture_id % kPictureDiffLimit]
    664                      [spatial_index] = frame_id;
    665  } else {
    666    // Implementing general conversion logic for non-flexible mode requires some
    667    // work and we will almost certainly never need it, so for now support only
    668    // non-layerd streams.
    669    if (spatial_index > 0 || temporal_index > 0) {
    670      // Prefer to generate no generic layering than an inconsistent one.
    671      rtp_video_header.generic.reset();
    672      return;
    673    }
    674 
    675    if (vp9_header.inter_pic_predicted) {
    676      // Since we only support non-scalable streams we only need to save the
    677      // last frame id.
    678      result.dependencies.push_back(last_vp9_frame_id_[0][0]);
    679    }
    680    last_vp9_frame_id_[0][0] = frame_id;
    681  }
    682 
    683  result.active_decode_targets =
    684      ((uint32_t{1} << num_temporal_layers * (last_active_spatial_id + 1)) -
    685       1) ^
    686      ((uint32_t{1} << num_temporal_layers * first_active_spatial_id) - 1);
    687 
    688  // Calculate chains, asuming chain includes all frames with temporal_id = 0
    689  if (!vp9_header.inter_pic_predicted && !vp9_header.inter_layer_predicted) {
    690    // Assume frames without dependencies also reset chains.
    691    for (int sid = spatial_index; sid <= last_active_spatial_id; ++sid) {
    692      chain_last_frame_id_[sid] = -1;
    693    }
    694  }
    695  result.chain_diffs.resize(num_spatial_layers, 0);
    696  for (int sid = first_active_spatial_id; sid <= last_active_spatial_id;
    697       ++sid) {
    698    if (chain_last_frame_id_[sid] == -1) {
    699      result.chain_diffs[sid] = 0;
    700      continue;
    701    }
    702    int64_t chain_diff = frame_id - chain_last_frame_id_[sid];
    703    if (chain_diff >= 256) {
    704      RTC_LOG(LS_ERROR)
    705          << "Too many frames since last VP9 T0 frame for spatial layer #"
    706          << sid << " at frame#" << frame_id;
    707      chain_last_frame_id_[sid] = -1;
    708      chain_diff = 0;
    709    }
    710    result.chain_diffs[sid] = chain_diff;
    711  }
    712 
    713  if (temporal_index == 0) {
    714    chain_last_frame_id_[spatial_index] = frame_id;
    715    if (!vp9_header.non_ref_for_inter_layer_pred) {
    716      for (int sid = spatial_index + 1; sid <= last_active_spatial_id; ++sid) {
    717        chain_last_frame_id_[sid] = frame_id;
    718      }
    719    }
    720  }
    721 }
    722 
    723 void RtpPayloadParams::SetDependenciesVp8Deprecated(
    724    const CodecSpecificInfoVP8& vp8_info,
    725    int64_t frame_id,
    726    bool is_keyframe,
    727    int spatial_index,
    728    int temporal_index,
    729    bool layer_sync,
    730    RTPVideoHeader::GenericDescriptorInfo* generic) {
    731  RTC_DCHECK(!vp8_info.useExplicitDependencies);
    732  RTC_DCHECK(!new_version_used_.has_value() || !new_version_used_.value());
    733  new_version_used_ = false;
    734 
    735  if (is_keyframe) {
    736    RTC_DCHECK_EQ(temporal_index, 0);
    737    last_frame_id_[spatial_index].fill(-1);
    738    last_frame_id_[spatial_index][temporal_index] = frame_id;
    739    return;
    740  }
    741 
    742  if (layer_sync) {
    743    int64_t tl0_frame_id = last_frame_id_[spatial_index][0];
    744 
    745    for (int i = 1; i < RtpGenericFrameDescriptor::kMaxTemporalLayers; ++i) {
    746      if (last_frame_id_[spatial_index][i] < tl0_frame_id) {
    747        last_frame_id_[spatial_index][i] = -1;
    748      }
    749    }
    750 
    751    RTC_DCHECK_GE(tl0_frame_id, 0);
    752    RTC_DCHECK_LT(tl0_frame_id, frame_id);
    753    generic->dependencies.push_back(tl0_frame_id);
    754  } else {
    755    for (int i = 0; i <= temporal_index; ++i) {
    756      int64_t last_frame_id = last_frame_id_[spatial_index][i];
    757 
    758      if (last_frame_id != -1) {
    759        RTC_DCHECK_LT(last_frame_id, frame_id);
    760        generic->dependencies.push_back(last_frame_id);
    761      }
    762    }
    763  }
    764 
    765  last_frame_id_[spatial_index][temporal_index] = frame_id;
    766 }
    767 
    768 void RtpPayloadParams::SetDependenciesVp8New(
    769    const CodecSpecificInfoVP8& vp8_info,
    770    int64_t frame_id,
    771    bool is_keyframe,
    772    bool /* layer_sync */,
    773    RTPVideoHeader::GenericDescriptorInfo* generic) {
    774  RTC_DCHECK(vp8_info.useExplicitDependencies);
    775  RTC_DCHECK(!new_version_used_.has_value() || new_version_used_.value());
    776  new_version_used_ = true;
    777 
    778  if (is_keyframe) {
    779    RTC_DCHECK_EQ(vp8_info.referencedBuffersCount, 0u);
    780    buffer_id_to_frame_id_.fill(frame_id);
    781    return;
    782  }
    783 
    784  constexpr size_t kBuffersCountVp8 = CodecSpecificInfoVP8::kBuffersCount;
    785 
    786  RTC_DCHECK_GT(vp8_info.referencedBuffersCount, 0u);
    787  RTC_DCHECK_LE(vp8_info.referencedBuffersCount,
    788                std::size(vp8_info.referencedBuffers));
    789 
    790  for (size_t i = 0; i < vp8_info.referencedBuffersCount; ++i) {
    791    const size_t referenced_buffer = vp8_info.referencedBuffers[i];
    792    RTC_DCHECK_LT(referenced_buffer, kBuffersCountVp8);
    793    RTC_DCHECK_LT(referenced_buffer, buffer_id_to_frame_id_.size());
    794 
    795    const int64_t dependency_frame_id =
    796        buffer_id_to_frame_id_[referenced_buffer];
    797    RTC_DCHECK_GE(dependency_frame_id, 0);
    798    RTC_DCHECK_LT(dependency_frame_id, frame_id);
    799 
    800    const bool is_new_dependency =
    801        std::find(generic->dependencies.begin(), generic->dependencies.end(),
    802                  dependency_frame_id) == generic->dependencies.end();
    803    if (is_new_dependency) {
    804      generic->dependencies.push_back(dependency_frame_id);
    805    }
    806  }
    807 
    808  RTC_DCHECK_LE(vp8_info.updatedBuffersCount, kBuffersCountVp8);
    809  for (size_t i = 0; i < vp8_info.updatedBuffersCount; ++i) {
    810    const size_t updated_id = vp8_info.updatedBuffers[i];
    811    buffer_id_to_frame_id_[updated_id] = frame_id;
    812  }
    813 
    814  RTC_DCHECK_LE(buffer_id_to_frame_id_.size(), kBuffersCountVp8);
    815 }
    816 
    817 }  // namespace webrtc
	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE