tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

rtp_video_frame_assembler.cc (14613B)


      1 /*
      2 *  Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
      3 *
      4 *  Use of this source code is governed by a BSD-style license
      5 *  that can be found in the LICENSE file in the root of the source
      6 *  tree. An additional intellectual property rights grant can be found
      7 *  in the file PATENTS.  All contributing project authors may
      8 *  be found in the AUTHORS file in the root of the source tree.
      9 */
     10 
     11 #include "api/video/rtp_video_frame_assembler.h"
     12 
     13 #include <cstdint>
     14 #include <memory>
     15 #include <optional>
     16 #include <utility>
     17 #include <vector>
     18 
     19 #include "absl/container/inlined_vector.h"
     20 #include "api/array_view.h"
     21 #include "api/rtp_packet_infos.h"
     22 #include "api/scoped_refptr.h"
     23 #include "api/transport/rtp/dependency_descriptor.h"
     24 #include "api/video/encoded_image.h"
     25 #include "api/video/video_frame_type.h"
     26 #include "api/video/video_timing.h"
     27 #include "modules/rtp_rtcp/source/frame_object.h"
     28 #include "modules/rtp_rtcp/source/rtp_dependency_descriptor_extension.h"
     29 #include "modules/rtp_rtcp/source/rtp_generic_frame_descriptor.h"
     30 #include "modules/rtp_rtcp/source/rtp_generic_frame_descriptor_extension.h"
     31 #include "modules/rtp_rtcp/source/rtp_packet_received.h"
     32 #include "modules/rtp_rtcp/source/rtp_video_header.h"
     33 #include "modules/rtp_rtcp/source/video_rtp_depacketizer.h"
     34 #include "modules/rtp_rtcp/source/video_rtp_depacketizer_av1.h"
     35 #include "modules/rtp_rtcp/source/video_rtp_depacketizer_generic.h"
     36 #include "modules/rtp_rtcp/source/video_rtp_depacketizer_h264.h"
     37 #include "modules/rtp_rtcp/source/video_rtp_depacketizer_raw.h"
     38 #include "modules/rtp_rtcp/source/video_rtp_depacketizer_vp8.h"
     39 #include "modules/rtp_rtcp/source/video_rtp_depacketizer_vp9.h"
     40 #include "modules/video_coding/packet_buffer.h"
     41 #include "modules/video_coding/rtp_frame_reference_finder.h"
     42 #include "rtc_base/checks.h"
     43 #include "rtc_base/logging.h"
     44 #include "rtc_base/numerics/sequence_number_unwrapper.h"
     45 
     46 #ifdef RTC_ENABLE_H265
     47 #include "modules/rtp_rtcp/source/video_rtp_depacketizer_h265.h"
     48 #endif
     49 
     50 namespace webrtc {
     51 namespace {
     52 std::unique_ptr<VideoRtpDepacketizer> CreateDepacketizer(
     53    RtpVideoFrameAssembler::PayloadFormat payload_format) {
     54  switch (payload_format) {
     55    case RtpVideoFrameAssembler::kRaw:
     56      return std::make_unique<VideoRtpDepacketizerRaw>();
     57    case RtpVideoFrameAssembler::kH264:
     58      return std::make_unique<VideoRtpDepacketizerH264>();
     59    case RtpVideoFrameAssembler::kVp8:
     60      return std::make_unique<VideoRtpDepacketizerVp8>();
     61    case RtpVideoFrameAssembler::kVp9:
     62      return std::make_unique<VideoRtpDepacketizerVp9>();
     63    case RtpVideoFrameAssembler::kAv1:
     64      return std::make_unique<VideoRtpDepacketizerAv1>();
     65    case RtpVideoFrameAssembler::kGeneric:
     66      return std::make_unique<VideoRtpDepacketizerGeneric>();
     67    case RtpVideoFrameAssembler::kH265:
     68 #ifdef RTC_ENABLE_H265
     69      return std::make_unique<VideoRtpDepacketizerH265>();
     70 #else
     71      return nullptr;
     72 #endif
     73  }
     74  RTC_DCHECK_NOTREACHED();
     75  return nullptr;
     76 }
     77 }  // namespace
     78 
     79 class RtpVideoFrameAssembler::Impl {
     80 public:
     81  explicit Impl(std::unique_ptr<VideoRtpDepacketizer> depacketizer);
     82  ~Impl() = default;
     83 
     84  FrameVector InsertPacket(const RtpPacketReceived& packet);
     85 
     86 private:
     87  using RtpFrameVector =
     88      absl::InlinedVector<std::unique_ptr<RtpFrameObject>, 3>;
     89 
     90  RtpFrameVector AssembleFrames(
     91      video_coding::PacketBuffer::InsertResult insert_result);
     92  FrameVector FindReferences(RtpFrameVector frames);
     93  FrameVector UpdateWithPadding(uint16_t seq_num);
     94  bool ParseDependenciesDescriptorExtension(const RtpPacketReceived& rtp_packet,
     95                                            RTPVideoHeader& video_header);
     96  bool ParseGenericDescriptorExtension(const RtpPacketReceived& rtp_packet,
     97                                       RTPVideoHeader& video_header);
     98  void ClearOldData(uint16_t incoming_seq_num);
     99 
    100  std::unique_ptr<FrameDependencyStructure> video_structure_;
    101  SeqNumUnwrapper<uint16_t> rtp_sequence_number_unwrapper_;
    102  SeqNumUnwrapper<uint16_t> frame_id_unwrapper_;
    103  std::optional<int64_t> video_structure_frame_id_;
    104  std::unique_ptr<VideoRtpDepacketizer> depacketizer_;
    105  video_coding::PacketBuffer packet_buffer_;
    106  RtpFrameReferenceFinder reference_finder_;
    107 };
    108 
    109 RtpVideoFrameAssembler::Impl::Impl(
    110    std::unique_ptr<VideoRtpDepacketizer> depacketizer)
    111    : depacketizer_(std::move(depacketizer)),
    112      packet_buffer_(/*start_buffer_size=*/2048, /*max_buffer_size=*/2048) {}
    113 
    114 RtpVideoFrameAssembler::FrameVector RtpVideoFrameAssembler::Impl::InsertPacket(
    115    const RtpPacketReceived& rtp_packet) {
    116  if (rtp_packet.payload_size() == 0) {
    117    ClearOldData(rtp_packet.SequenceNumber());
    118    return UpdateWithPadding(rtp_packet.SequenceNumber());
    119  }
    120 
    121  std::optional<VideoRtpDepacketizer::ParsedRtpPayload> parsed_payload =
    122      depacketizer_->Parse(rtp_packet.PayloadBuffer());
    123 
    124  if (parsed_payload == std::nullopt) {
    125    return {};
    126  }
    127 
    128  if (rtp_packet.HasExtension<RtpDependencyDescriptorExtension>()) {
    129    if (!ParseDependenciesDescriptorExtension(rtp_packet,
    130                                              parsed_payload->video_header)) {
    131      return {};
    132    }
    133  } else if (rtp_packet.HasExtension<RtpGenericFrameDescriptorExtension00>()) {
    134    if (!ParseGenericDescriptorExtension(rtp_packet,
    135                                         parsed_payload->video_header)) {
    136      return {};
    137    }
    138  }
    139 
    140  parsed_payload->video_header.is_last_packet_in_frame |= rtp_packet.Marker();
    141 
    142  auto packet = std::make_unique<video_coding::PacketBuffer::Packet>(
    143      rtp_packet,
    144      rtp_sequence_number_unwrapper_.Unwrap(rtp_packet.SequenceNumber()),
    145      parsed_payload->video_header);
    146  packet->video_payload = std::move(parsed_payload->video_payload);
    147 
    148  ClearOldData(rtp_packet.SequenceNumber());
    149  return FindReferences(
    150      AssembleFrames(packet_buffer_.InsertPacket(std::move(packet))));
    151 }
    152 
    153 void RtpVideoFrameAssembler::Impl::ClearOldData(uint16_t incoming_seq_num) {
    154  constexpr uint16_t kOldSeqNumThreshold = 2000;
    155  uint16_t old_seq_num = incoming_seq_num - kOldSeqNumThreshold;
    156  packet_buffer_.ClearTo(old_seq_num);
    157  reference_finder_.ClearTo(old_seq_num);
    158 }
    159 
    160 RtpVideoFrameAssembler::Impl::RtpFrameVector
    161 RtpVideoFrameAssembler::Impl::AssembleFrames(
    162    video_coding::PacketBuffer::InsertResult insert_result) {
    163  video_coding::PacketBuffer::Packet* first_packet = nullptr;
    164  std::vector<ArrayView<const uint8_t>> payloads;
    165  RtpFrameVector result;
    166 
    167  for (auto& packet : insert_result.packets) {
    168    if (packet->is_first_packet_in_frame()) {
    169      first_packet = packet.get();
    170      payloads.clear();
    171    }
    172    payloads.emplace_back(packet->video_payload);
    173 
    174    if (packet->is_last_packet_in_frame()) {
    175      scoped_refptr<EncodedImageBuffer> bitstream =
    176          depacketizer_->AssembleFrame(payloads);
    177 
    178      if (!bitstream) {
    179        continue;
    180      }
    181 
    182      const video_coding::PacketBuffer::Packet& last_packet = *packet;
    183      result.push_back(std::make_unique<RtpFrameObject>(
    184          first_packet->seq_num(),                              //
    185          last_packet.seq_num(),                                //
    186          last_packet.marker_bit,                               //
    187          /*times_nacked=*/0,                                   //
    188          /*first_packet_received_time=*/0,                     //
    189          /*last_packet_received_time=*/0,                      //
    190          first_packet->timestamp,                              //
    191          /*ntp_time_ms=*/0,                                    //
    192          /*timing=*/VideoSendTiming(),                         //
    193          first_packet->payload_type,                           //
    194          first_packet->codec(),                                //
    195          last_packet.video_header.rotation,                    //
    196          last_packet.video_header.content_type,                //
    197          first_packet->video_header,                           //
    198          last_packet.video_header.color_space,                 //
    199          last_packet.video_header.frame_instrumentation_data,  //
    200          /*packet_infos=*/RtpPacketInfos(),                    //
    201          std::move(bitstream)));
    202    }
    203  }
    204 
    205  return result;
    206 }
    207 
    208 RtpVideoFrameAssembler::FrameVector
    209 RtpVideoFrameAssembler::Impl::FindReferences(RtpFrameVector frames) {
    210  FrameVector res;
    211  for (auto& frame : frames) {
    212    auto complete_frames = reference_finder_.ManageFrame(std::move(frame));
    213    for (std::unique_ptr<RtpFrameObject>& complete_frame : complete_frames) {
    214      uint16_t rtp_seq_num_start = complete_frame->first_seq_num();
    215      uint16_t rtp_seq_num_end = complete_frame->last_seq_num();
    216      res.emplace_back(rtp_seq_num_start, rtp_seq_num_end,
    217                       std::move(complete_frame));
    218    }
    219  }
    220  return res;
    221 }
    222 
    223 RtpVideoFrameAssembler::FrameVector
    224 RtpVideoFrameAssembler::Impl::UpdateWithPadding(uint16_t seq_num) {
    225  auto res =
    226      FindReferences(AssembleFrames(packet_buffer_.InsertPadding(seq_num)));
    227  auto ref_finder_update = reference_finder_.PaddingReceived(seq_num);
    228 
    229  for (std::unique_ptr<RtpFrameObject>& complete_frame : ref_finder_update) {
    230    uint16_t rtp_seq_num_start = complete_frame->first_seq_num();
    231    uint16_t rtp_seq_num_end = complete_frame->last_seq_num();
    232    res.emplace_back(rtp_seq_num_start, rtp_seq_num_end,
    233                     std::move(complete_frame));
    234  }
    235 
    236  return res;
    237 }
    238 
    239 bool RtpVideoFrameAssembler::Impl::ParseDependenciesDescriptorExtension(
    240    const RtpPacketReceived& rtp_packet,
    241    RTPVideoHeader& video_header) {
    242  DependencyDescriptor dependency_descriptor;
    243 
    244  if (!rtp_packet.GetExtension<RtpDependencyDescriptorExtension>(
    245          video_structure_.get(), &dependency_descriptor)) {
    246    // Descriptor is either malformed, or the template referenced is not in
    247    // the `video_structure_` currently being held.
    248    // TODO(bugs.webrtc.org/10342): Improve packet reordering behavior.
    249    RTC_LOG(LS_WARNING) << "ssrc: " << rtp_packet.Ssrc()
    250                        << " Failed to parse dependency descriptor.";
    251    return false;
    252  }
    253 
    254  if (dependency_descriptor.attached_structure != nullptr &&
    255      !dependency_descriptor.first_packet_in_frame) {
    256    RTC_LOG(LS_WARNING) << "ssrc: " << rtp_packet.Ssrc()
    257                        << "Invalid dependency descriptor: structure "
    258                           "attached to non first packet of a frame.";
    259    return false;
    260  }
    261 
    262  video_header.is_first_packet_in_frame =
    263      dependency_descriptor.first_packet_in_frame;
    264  video_header.is_last_packet_in_frame =
    265      dependency_descriptor.last_packet_in_frame;
    266 
    267  int64_t frame_id =
    268      frame_id_unwrapper_.Unwrap(dependency_descriptor.frame_number);
    269  auto& generic_descriptor_info = video_header.generic.emplace();
    270  generic_descriptor_info.frame_id = frame_id;
    271  generic_descriptor_info.spatial_index =
    272      dependency_descriptor.frame_dependencies.spatial_id;
    273  generic_descriptor_info.temporal_index =
    274      dependency_descriptor.frame_dependencies.temporal_id;
    275 
    276  for (int fdiff : dependency_descriptor.frame_dependencies.frame_diffs) {
    277    generic_descriptor_info.dependencies.push_back(frame_id - fdiff);
    278  }
    279  for (int cdiff : dependency_descriptor.frame_dependencies.chain_diffs) {
    280    generic_descriptor_info.chain_diffs.push_back(frame_id - cdiff);
    281  }
    282  generic_descriptor_info.decode_target_indications =
    283      dependency_descriptor.frame_dependencies.decode_target_indications;
    284  if (dependency_descriptor.resolution) {
    285    video_header.width = dependency_descriptor.resolution->Width();
    286    video_header.height = dependency_descriptor.resolution->Height();
    287  }
    288  if (dependency_descriptor.active_decode_targets_bitmask.has_value()) {
    289    generic_descriptor_info.active_decode_targets =
    290        *dependency_descriptor.active_decode_targets_bitmask;
    291  }
    292 
    293  // FrameDependencyStructure is sent in the dependency descriptor of the first
    294  // packet of a key frame and is required to parse all subsequent packets until
    295  // the next key frame.
    296  if (dependency_descriptor.attached_structure) {
    297    RTC_DCHECK(dependency_descriptor.first_packet_in_frame);
    298    if (video_structure_frame_id_ > frame_id) {
    299      RTC_LOG(LS_WARNING)
    300          << "Arrived key frame with id " << frame_id << " and structure id "
    301          << dependency_descriptor.attached_structure->structure_id
    302          << " is older than the latest received key frame with id "
    303          << *video_structure_frame_id_ << " and structure id "
    304          << video_structure_->structure_id;
    305      return false;
    306    }
    307    video_structure_ = std::move(dependency_descriptor.attached_structure);
    308    video_structure_frame_id_ = frame_id;
    309    video_header.frame_type = VideoFrameType::kVideoFrameKey;
    310  } else {
    311    video_header.frame_type = VideoFrameType::kVideoFrameDelta;
    312  }
    313  return true;
    314 }
    315 
    316 bool RtpVideoFrameAssembler::Impl::ParseGenericDescriptorExtension(
    317    const RtpPacketReceived& rtp_packet,
    318    RTPVideoHeader& video_header) {
    319  RtpGenericFrameDescriptor generic_frame_descriptor;
    320  if (!rtp_packet.GetExtension<RtpGenericFrameDescriptorExtension00>(
    321          &generic_frame_descriptor)) {
    322    return false;
    323  }
    324 
    325  video_header.is_first_packet_in_frame =
    326      generic_frame_descriptor.FirstPacketInSubFrame();
    327  video_header.is_last_packet_in_frame =
    328      generic_frame_descriptor.LastPacketInSubFrame();
    329 
    330  if (generic_frame_descriptor.FirstPacketInSubFrame()) {
    331    video_header.frame_type =
    332        generic_frame_descriptor.FrameDependenciesDiffs().empty()
    333            ? VideoFrameType::kVideoFrameKey
    334            : VideoFrameType::kVideoFrameDelta;
    335 
    336    auto& generic_descriptor_info = video_header.generic.emplace();
    337    int64_t frame_id =
    338        frame_id_unwrapper_.Unwrap(generic_frame_descriptor.FrameId());
    339    generic_descriptor_info.frame_id = frame_id;
    340    generic_descriptor_info.spatial_index =
    341        generic_frame_descriptor.SpatialLayer();
    342    generic_descriptor_info.temporal_index =
    343        generic_frame_descriptor.TemporalLayer();
    344    for (uint16_t fdiff : generic_frame_descriptor.FrameDependenciesDiffs()) {
    345      generic_descriptor_info.dependencies.push_back(frame_id - fdiff);
    346    }
    347  }
    348  video_header.width = generic_frame_descriptor.Width();
    349  video_header.height = generic_frame_descriptor.Height();
    350  return true;
    351 }
    352 
    353 RtpVideoFrameAssembler::RtpVideoFrameAssembler(PayloadFormat payload_format)
    354    : impl_(std::make_unique<Impl>(CreateDepacketizer(payload_format))) {}
    355 
    356 RtpVideoFrameAssembler::~RtpVideoFrameAssembler() = default;
    357 
    358 RtpVideoFrameAssembler::FrameVector RtpVideoFrameAssembler::InsertPacket(
    359    const RtpPacketReceived& packet) {
    360  return impl_->InsertPacket(packet);
    361 }
    362 
    363 }  // namespace webrtc