tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

rtp_vp9_ref_finder.cc (13420B)


      1 /*
      2 *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
      3 *
      4 *  Use of this source code is governed by a BSD-style license
      5 *  that can be found in the LICENSE file in the root of the source
      6 *  tree. An additional intellectual property rights grant can be found
      7 *  in the file PATENTS.  All contributing project authors may
      8 *  be found in the AUTHORS file in the root of the source tree.
      9 */
     10 
     11 #include "modules/video_coding/rtp_vp9_ref_finder.h"
     12 
     13 #include <algorithm>
     14 #include <cstddef>
     15 #include <cstdint>
     16 #include <memory>
     17 #include <utility>
     18 
     19 #include "api/video/encoded_frame.h"
     20 #include "api/video/video_codec_constants.h"
     21 #include "api/video/video_frame_type.h"
     22 #include "modules/rtp_rtcp/source/frame_object.h"
     23 #include "modules/video_coding/codecs/interface/common_constants.h"
     24 #include "modules/video_coding/codecs/vp9/include/vp9_globals.h"
     25 #include "modules/video_coding/rtp_frame_reference_finder.h"
     26 #include "rtc_base/checks.h"
     27 #include "rtc_base/logging.h"
     28 #include "rtc_base/numerics/mod_ops.h"
     29 #include "rtc_base/numerics/sequence_number_util.h"
     30 
     31 namespace webrtc {
     32 RtpFrameReferenceFinder::ReturnVector RtpVp9RefFinder::ManageFrame(
     33    std::unique_ptr<RtpFrameObject> frame) {
     34  const RTPVideoHeaderVP9& codec_header =
     35      std::get<RTPVideoHeaderVP9>(frame->GetRtpVideoHeader().video_type_header);
     36 
     37  if (codec_header.temporal_idx != kNoTemporalIdx)
     38    frame->SetTemporalIndex(codec_header.temporal_idx);
     39  frame->SetSpatialIndex(codec_header.spatial_idx);
     40  frame->SetId(codec_header.picture_id & (kFrameIdLength - 1));
     41 
     42  FrameDecision decision;
     43  if (codec_header.temporal_idx >= kMaxTemporalLayers ||
     44      codec_header.spatial_idx >= kMaxSpatialLayers) {
     45    decision = kDrop;
     46  } else if (codec_header.flexible_mode) {
     47    decision = ManageFrameFlexible(frame.get(), codec_header);
     48  } else {
     49    if (codec_header.tl0_pic_idx == kNoTl0PicIdx) {
     50      RTC_LOG(LS_WARNING) << "TL0PICIDX is expected to be present in "
     51                             "non-flexible mode.";
     52      decision = kDrop;
     53    } else {
     54      int64_t unwrapped_tl0 =
     55          tl0_unwrapper_.Unwrap(codec_header.tl0_pic_idx & 0xFF);
     56      decision = ManageFrameGof(frame.get(), codec_header, unwrapped_tl0);
     57 
     58      if (decision == kStash) {
     59        if (stashed_frames_.size() > kMaxStashedFrames) {
     60          stashed_frames_.pop_back();
     61        }
     62 
     63        stashed_frames_.push_front(
     64            {.unwrapped_tl0 = unwrapped_tl0, .frame = std::move(frame)});
     65      }
     66    }
     67  }
     68 
     69  RtpFrameReferenceFinder::ReturnVector res;
     70  switch (decision) {
     71    case kStash:
     72      return res;
     73    case kHandOff:
     74      res.push_back(std::move(frame));
     75      RetryStashedFrames(res);
     76      return res;
     77    case kDrop:
     78      return res;
     79  }
     80 
     81  return res;
     82 }
     83 
     84 RtpVp9RefFinder::FrameDecision RtpVp9RefFinder::ManageFrameFlexible(
     85    RtpFrameObject* frame,
     86    const RTPVideoHeaderVP9& codec_header) {
     87  if (codec_header.num_ref_pics > EncodedFrame::kMaxFrameReferences) {
     88    return kDrop;
     89  }
     90 
     91  frame->num_references = codec_header.num_ref_pics;
     92  for (size_t i = 0; i < frame->num_references; ++i) {
     93    frame->references[i] =
     94        Subtract<kFrameIdLength>(frame->Id(), codec_header.pid_diff[i]);
     95  }
     96 
     97  FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted);
     98  return kHandOff;
     99 }
    100 
    101 RtpVp9RefFinder::FrameDecision RtpVp9RefFinder::ManageFrameGof(
    102    RtpFrameObject* frame,
    103    const RTPVideoHeaderVP9& codec_header,
    104    int64_t unwrapped_tl0) {
    105  GofInfo* info;
    106  if (codec_header.ss_data_available) {
    107    if (codec_header.temporal_idx != 0) {
    108      RTC_LOG(LS_WARNING) << "Received scalability structure on a non base "
    109                             "layer frame. Scalability structure ignored.";
    110    } else {
    111      if (codec_header.gof.num_frames_in_gof > kMaxVp9FramesInGof) {
    112        return kDrop;
    113      }
    114 
    115      for (size_t i = 0; i < codec_header.gof.num_frames_in_gof; ++i) {
    116        if (codec_header.gof.num_ref_pics[i] > kMaxVp9RefPics) {
    117          return kDrop;
    118        }
    119      }
    120 
    121      GofInfoVP9 gof = codec_header.gof;
    122      if (gof.num_frames_in_gof == 0) {
    123        RTC_LOG(LS_WARNING) << "Number of frames in GOF is zero. Assume "
    124                               "that stream has only one temporal layer.";
    125        gof.SetGofInfoVP9(kTemporalStructureMode1);
    126      }
    127 
    128      current_ss_idx_ = Add<kMaxGofSaved>(current_ss_idx_, 1);
    129      scalability_structures_[current_ss_idx_] = gof;
    130      scalability_structures_[current_ss_idx_].pid_start = frame->Id();
    131      gof_info_.emplace(
    132          unwrapped_tl0,
    133          GofInfo(&scalability_structures_[current_ss_idx_], frame->Id()));
    134    }
    135 
    136    const auto gof_info_it = gof_info_.find(unwrapped_tl0);
    137    if (gof_info_it == gof_info_.end())
    138      return kStash;
    139 
    140    info = &gof_info_it->second;
    141 
    142    if (frame->frame_type() == VideoFrameType::kVideoFrameKey) {
    143      frame->num_references = 0;
    144      FrameReceivedVp9(frame->Id(), info);
    145      FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted);
    146      return kHandOff;
    147    }
    148  } else {
    149    if (frame->frame_type() == VideoFrameType::kVideoFrameKey) {
    150      RTC_LOG(LS_WARNING) << "Received keyframe without scalability structure";
    151      return kDrop;
    152    }
    153 
    154    // tl0_idx is incremented on temporal_idx=0 frames of the lowest spatial
    155    // layer (which spatial_idx is not necessarily zero). Upper spatial layer
    156    // frames with inter-layer prediction use GOF info of their base spatial
    157    // layer frames.
    158    const bool use_prev_gof =
    159        codec_header.temporal_idx == 0 && !codec_header.inter_layer_predicted;
    160    auto gof_info_it =
    161        gof_info_.find(use_prev_gof ? unwrapped_tl0 - 1 : unwrapped_tl0);
    162 
    163    // Gof info for this frame is not available yet, stash this frame.
    164    if (gof_info_it == gof_info_.end())
    165      return kStash;
    166 
    167    if (codec_header.temporal_idx == 0) {
    168      gof_info_it = gof_info_
    169                        .emplace(unwrapped_tl0,
    170                                 GofInfo(gof_info_it->second.gof, frame->Id()))
    171                        .first;
    172    }
    173 
    174    info = &gof_info_it->second;
    175  }
    176 
    177  // Clean up info for base layers that are too old.
    178  int64_t old_tl0_pic_idx = unwrapped_tl0 - kMaxGofSaved;
    179  auto clean_gof_info_to = gof_info_.lower_bound(old_tl0_pic_idx);
    180  gof_info_.erase(gof_info_.begin(), clean_gof_info_to);
    181 
    182  FrameReceivedVp9(frame->Id(), info);
    183 
    184  // Make sure we don't miss any frame that could potentially have the
    185  // up switch flag set.
    186  if (MissingRequiredFrameVp9(frame->Id(), *info))
    187    return kStash;
    188 
    189  if (codec_header.temporal_up_switch)
    190    up_switch_.emplace(frame->Id(), codec_header.temporal_idx);
    191 
    192  // Clean out old info about up switch frames.
    193  uint16_t old_picture_id = Subtract<kFrameIdLength>(frame->Id(), 50);
    194  auto up_switch_erase_to = up_switch_.lower_bound(old_picture_id);
    195  up_switch_.erase(up_switch_.begin(), up_switch_erase_to);
    196 
    197  if (codec_header.inter_pic_predicted) {
    198    size_t diff = ForwardDiff<uint16_t, kFrameIdLength>(info->gof->pid_start,
    199                                                        frame->Id());
    200    size_t gof_idx = diff % info->gof->num_frames_in_gof;
    201 
    202    if (info->gof->num_ref_pics[gof_idx] > EncodedFrame::kMaxFrameReferences) {
    203      return kDrop;
    204    }
    205 
    206    // Populate references according to the scalability structure.
    207    frame->num_references = info->gof->num_ref_pics[gof_idx];
    208    for (size_t i = 0; i < frame->num_references; ++i) {
    209      frame->references[i] = Subtract<kFrameIdLength>(
    210          frame->Id(), info->gof->pid_diff[gof_idx][i]);
    211 
    212      // If this is a reference to a frame earlier than the last up switch
    213      // point, then ignore this reference.
    214      if (UpSwitchInIntervalVp9(frame->Id(), codec_header.temporal_idx,
    215                                frame->references[i])) {
    216        --frame->num_references;
    217      }
    218    }
    219  } else {
    220    frame->num_references = 0;
    221  }
    222 
    223  FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted);
    224  return kHandOff;
    225 }
    226 
    227 bool RtpVp9RefFinder::MissingRequiredFrameVp9(uint16_t picture_id,
    228                                              const GofInfo& info) {
    229  size_t diff =
    230      ForwardDiff<uint16_t, kFrameIdLength>(info.gof->pid_start, picture_id);
    231  size_t gof_idx = diff % info.gof->num_frames_in_gof;
    232  size_t temporal_idx = info.gof->temporal_idx[gof_idx];
    233 
    234  if (temporal_idx >= kMaxTemporalLayers) {
    235    RTC_LOG(LS_WARNING) << "At most " << kMaxTemporalLayers
    236                        << " temporal "
    237                           "layers are supported.";
    238    return true;
    239  }
    240 
    241  // For every reference this frame has, check if there is a frame missing in
    242  // the interval (`ref_pid`, `picture_id`) in any of the lower temporal
    243  // layers. If so, we are missing a required frame.
    244  uint8_t num_references = info.gof->num_ref_pics[gof_idx];
    245  for (size_t i = 0; i < num_references; ++i) {
    246    uint16_t ref_pid =
    247        Subtract<kFrameIdLength>(picture_id, info.gof->pid_diff[gof_idx][i]);
    248    for (size_t l = 0; l < temporal_idx; ++l) {
    249      auto missing_frame_it = missing_frames_for_layer_[l].lower_bound(ref_pid);
    250      if (missing_frame_it != missing_frames_for_layer_[l].end() &&
    251          AheadOf<uint16_t, kFrameIdLength>(picture_id, *missing_frame_it)) {
    252        return true;
    253      }
    254    }
    255  }
    256  return false;
    257 }
    258 
    259 void RtpVp9RefFinder::FrameReceivedVp9(uint16_t picture_id, GofInfo* info) {
    260  int last_picture_id = info->last_picture_id;
    261  size_t gof_size = std::min(info->gof->num_frames_in_gof, kMaxVp9FramesInGof);
    262 
    263  // If there is a gap, find which temporal layer the missing frames
    264  // belong to and add the frame as missing for that temporal layer.
    265  // Otherwise, remove this frame from the set of missing frames.
    266  if (AheadOf<uint16_t, kFrameIdLength>(picture_id, last_picture_id)) {
    267    size_t diff = ForwardDiff<uint16_t, kFrameIdLength>(info->gof->pid_start,
    268                                                        last_picture_id);
    269    size_t gof_idx = diff % gof_size;
    270 
    271    last_picture_id = Add<kFrameIdLength>(last_picture_id, 1);
    272    while (last_picture_id != picture_id) {
    273      gof_idx = (gof_idx + 1) % gof_size;
    274      RTC_CHECK(gof_idx < kMaxVp9FramesInGof);
    275 
    276      size_t temporal_idx = info->gof->temporal_idx[gof_idx];
    277      if (temporal_idx >= kMaxTemporalLayers) {
    278        RTC_LOG(LS_WARNING) << "At most " << kMaxTemporalLayers
    279                            << " temporal "
    280                               "layers are supported.";
    281        return;
    282      }
    283 
    284      missing_frames_for_layer_[temporal_idx].insert(last_picture_id);
    285      last_picture_id = Add<kFrameIdLength>(last_picture_id, 1);
    286    }
    287 
    288    info->last_picture_id = last_picture_id;
    289  } else {
    290    size_t diff =
    291        ForwardDiff<uint16_t, kFrameIdLength>(info->gof->pid_start, picture_id);
    292    size_t gof_idx = diff % gof_size;
    293    RTC_CHECK(gof_idx < kMaxVp9FramesInGof);
    294 
    295    size_t temporal_idx = info->gof->temporal_idx[gof_idx];
    296    if (temporal_idx >= kMaxTemporalLayers) {
    297      RTC_LOG(LS_WARNING) << "At most " << kMaxTemporalLayers
    298                          << " temporal "
    299                             "layers are supported.";
    300      return;
    301    }
    302 
    303    missing_frames_for_layer_[temporal_idx].erase(picture_id);
    304  }
    305 }
    306 
    307 bool RtpVp9RefFinder::UpSwitchInIntervalVp9(uint16_t picture_id,
    308                                            uint8_t temporal_idx,
    309                                            uint16_t pid_ref) {
    310  for (auto up_switch_it = up_switch_.upper_bound(pid_ref);
    311       up_switch_it != up_switch_.end() &&
    312       AheadOf<uint16_t, kFrameIdLength>(picture_id, up_switch_it->first);
    313       ++up_switch_it) {
    314    if (up_switch_it->second < temporal_idx)
    315      return true;
    316  }
    317 
    318  return false;
    319 }
    320 
    321 void RtpVp9RefFinder::RetryStashedFrames(
    322    RtpFrameReferenceFinder::ReturnVector& res) {
    323  bool complete_frame = false;
    324  do {
    325    complete_frame = false;
    326    for (auto it = stashed_frames_.begin(); it != stashed_frames_.end();) {
    327      const RTPVideoHeaderVP9& codec_header = std::get<RTPVideoHeaderVP9>(
    328          it->frame->GetRtpVideoHeader().video_type_header);
    329      RTC_DCHECK(!codec_header.flexible_mode);
    330      FrameDecision decision =
    331          ManageFrameGof(it->frame.get(), codec_header, it->unwrapped_tl0);
    332 
    333      switch (decision) {
    334        case kStash:
    335          ++it;
    336          break;
    337        case kHandOff:
    338          complete_frame = true;
    339          res.push_back(std::move(it->frame));
    340          [[fallthrough]];
    341        case kDrop:
    342          it = stashed_frames_.erase(it);
    343      }
    344    }
    345  } while (complete_frame);
    346 }
    347 
    348 void RtpVp9RefFinder::FlattenFrameIdAndRefs(RtpFrameObject* frame,
    349                                            bool inter_layer_predicted) {
    350  for (size_t i = 0; i < frame->num_references; ++i) {
    351    frame->references[i] =
    352        unwrapper_.Unwrap(frame->references[i]) * kMaxSpatialLayers +
    353        *frame->SpatialIndex();
    354  }
    355  frame->SetId(unwrapper_.Unwrap(frame->Id()) * kMaxSpatialLayers +
    356               *frame->SpatialIndex());
    357 
    358  if (inter_layer_predicted &&
    359      frame->num_references + 1 <= EncodedFrame::kMaxFrameReferences) {
    360    frame->references[frame->num_references] = frame->Id() - 1;
    361    ++frame->num_references;
    362  }
    363 }
    364 
    365 void RtpVp9RefFinder::ClearTo(uint16_t seq_num) {
    366  auto it = stashed_frames_.begin();
    367  while (it != stashed_frames_.end()) {
    368    if (AheadOf<uint16_t>(seq_num, it->frame->first_seq_num())) {
    369      it = stashed_frames_.erase(it);
    370    } else {
    371      ++it;
    372    }
    373  }
    374 }
    375 
    376 }  // namespace webrtc