tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

video_rtp_depacketizer_av1.cc (14040B)


      1 /*
      2 *  Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
      3 *
      4 *  Use of this source code is governed by a BSD-style license
      5 *  that can be found in the LICENSE file in the root of the source
      6 *  tree. An additional intellectual property rights grant can be found
      7 *  in the file PATENTS.  All contributing project authors may
      8 *  be found in the AUTHORS file in the root of the source tree.
      9 */
     10 
     11 #include "modules/rtp_rtcp/source/video_rtp_depacketizer_av1.h"
     12 
     13 #include <array>
     14 #include <cstddef>
     15 #include <cstdint>
     16 #include <cstring>
     17 #include <iterator>
     18 #include <optional>
     19 #include <utility>
     20 
     21 #include "absl/container/inlined_vector.h"
     22 #include "api/array_view.h"
     23 #include "api/scoped_refptr.h"
     24 #include "api/video/encoded_image.h"
     25 #include "api/video/video_codec_type.h"
     26 #include "api/video/video_frame_type.h"
     27 #include "modules/rtp_rtcp/source/leb128.h"
     28 #include "modules/rtp_rtcp/source/rtp_video_header.h"
     29 #include "modules/rtp_rtcp/source/video_rtp_depacketizer.h"
     30 #include "rtc_base/byte_buffer.h"
     31 #include "rtc_base/checks.h"
     32 #include "rtc_base/copy_on_write_buffer.h"
     33 #include "rtc_base/logging.h"
     34 #include "rtc_base/numerics/safe_conversions.h"
     35 
     36 namespace webrtc {
     37 namespace {
     38 // AV1 format:
     39 //
     40 // RTP payload syntax:
     41 //     0 1 2 3 4 5 6 7
     42 //    +-+-+-+-+-+-+-+-+
     43 //    |Z|Y| W |N|-|-|-| (REQUIRED)
     44 //    +=+=+=+=+=+=+=+=+ (REPEATED W-1 times, or any times if W = 0)
     45 //    |1|             |
     46 //    +-+ OBU fragment|
     47 //    |1|             | (REQUIRED, leb128 encoded)
     48 //    +-+    size     |
     49 //    |0|             |
     50 //    +-+-+-+-+-+-+-+-+
     51 //    |  OBU fragment |
     52 //    |     ...       |
     53 //    +=+=+=+=+=+=+=+=+
     54 //    |     ...       |
     55 //    +=+=+=+=+=+=+=+=+ if W > 0, last fragment MUST NOT have size field
     56 //    |  OBU fragment |
     57 //    |     ...       |
     58 //    +=+=+=+=+=+=+=+=+
     59 //
     60 //
     61 // OBU syntax:
     62 //     0 1 2 3 4 5 6 7
     63 //    +-+-+-+-+-+-+-+-+
     64 //    |0| type  |X|S|-| (REQUIRED)
     65 //    +-+-+-+-+-+-+-+-+
     66 // X: | TID |SID|-|-|-| (OPTIONAL)
     67 //    +-+-+-+-+-+-+-+-+
     68 //    |1|             |
     69 //    +-+ OBU payload |
     70 // S: |1|             | (OPTIONAL, variable length leb128 encoded)
     71 //    +-+    size     |
     72 //    |0|             |
     73 //    +-+-+-+-+-+-+-+-+
     74 //    |  OBU payload  |
     75 //    |     ...       |
     76 class ArrayOfArrayViews {
     77 public:
     78  class const_iterator;
     79  ArrayOfArrayViews() = default;
     80  ArrayOfArrayViews(const ArrayOfArrayViews&) = default;
     81  ArrayOfArrayViews& operator=(const ArrayOfArrayViews&) = default;
     82  ~ArrayOfArrayViews() = default;
     83 
     84  const_iterator begin() const;
     85  const_iterator end() const;
     86  bool empty() const { return data_.empty(); }
     87  size_t size() const { return size_; }
     88  void CopyTo(uint8_t* destination, const_iterator first) const;
     89 
     90  void Append(const uint8_t* data, size_t size) {
     91    data_.emplace_back(data, size);
     92    size_ += size;
     93  }
     94 
     95 private:
     96  using Storage = absl::InlinedVector<ArrayView<const uint8_t>, 2>;
     97 
     98  size_t size_ = 0;
     99  Storage data_;
    100 };
    101 
    102 class ArrayOfArrayViews::const_iterator {
    103 public:
    104  const_iterator() = default;
    105  const_iterator(const const_iterator&) = default;
    106  const_iterator& operator=(const const_iterator&) = default;
    107 
    108  const_iterator& operator++() {
    109    if (++inner_ == outer_->size()) {
    110      ++outer_;
    111      inner_ = 0;
    112    }
    113    return *this;
    114  }
    115  uint8_t operator*() const { return (*outer_)[inner_]; }
    116 
    117  friend bool operator==(const const_iterator& lhs, const const_iterator& rhs) {
    118    return lhs.outer_ == rhs.outer_ && lhs.inner_ == rhs.inner_;
    119  }
    120 
    121 private:
    122  friend ArrayOfArrayViews;
    123  const_iterator(ArrayOfArrayViews::Storage::const_iterator outer, size_t inner)
    124      : outer_(outer), inner_(inner) {}
    125 
    126  Storage::const_iterator outer_;
    127  size_t inner_;
    128 };
    129 
    130 ArrayOfArrayViews::const_iterator ArrayOfArrayViews::begin() const {
    131  return const_iterator(data_.begin(), 0);
    132 }
    133 
    134 ArrayOfArrayViews::const_iterator ArrayOfArrayViews::end() const {
    135  return const_iterator(data_.end(), 0);
    136 }
    137 
    138 void ArrayOfArrayViews::CopyTo(uint8_t* destination,
    139                               const_iterator first) const {
    140  if (first == end()) {
    141    // Empty OBU payload. E.g. Temporal Delimiters are always empty.
    142    return;
    143  }
    144  size_t first_chunk_size = first.outer_->size() - first.inner_;
    145  memcpy(destination, first.outer_->data() + first.inner_, first_chunk_size);
    146  destination += first_chunk_size;
    147  for (auto it = std::next(first.outer_); it != data_.end(); ++it) {
    148    memcpy(destination, it->data(), it->size());
    149    destination += it->size();
    150  }
    151 }
    152 
    153 struct ObuInfo {
    154  // Size of the obu_header and obu_size fields in the ouput frame.
    155  size_t prefix_size = 0;
    156  // obu_header() and obu_size (leb128 encoded payload_size).
    157  // obu_header can be up to 2 bytes, obu_size - up to 5.
    158  std::array<uint8_t, 7> prefix;
    159  // Size of the obu payload in the output frame, i.e. excluding header
    160  size_t payload_size = 0;
    161  // iterator pointing to the beginning of the obu payload.
    162  ArrayOfArrayViews::const_iterator payload_offset;
    163  // OBU payloads as written in the rtp packet payloads.
    164  ArrayOfArrayViews data;
    165 };
    166 // Expect that majority of the frame won't use more than 4 obus.
    167 // In a simple stream delta frame consist of single Frame OBU, while key frame
    168 // also has Sequence Header OBU.
    169 using VectorObuInfo = absl::InlinedVector<ObuInfo, 4>;
    170 
    171 constexpr uint8_t kObuSizePresentBit = 0b0'0000'010;
    172 
    173 bool ObuHasExtension(uint8_t obu_header) {
    174  return obu_header & 0b0'0000'100u;
    175 }
    176 
    177 bool ObuHasSize(uint8_t obu_header) {
    178  return obu_header & kObuSizePresentBit;
    179 }
    180 
    181 bool RtpStartsWithFragment(uint8_t aggregation_header) {
    182  return aggregation_header & 0b1000'0000u;
    183 }
    184 bool RtpEndsWithFragment(uint8_t aggregation_header) {
    185  return aggregation_header & 0b0100'0000u;
    186 }
    187 int RtpNumObus(uint8_t aggregation_header) {  // 0 for any number of obus.
    188  return (aggregation_header & 0b0011'0000u) >> 4;
    189 }
    190 int RtpStartsNewCodedVideoSequence(uint8_t aggregation_header) {
    191  return aggregation_header & 0b0000'1000u;
    192 }
    193 
    194 // Reorgonizes array of rtp payloads into array of obus:
    195 // fills ObuInfo::data field.
    196 // Returns empty vector on error.
    197 VectorObuInfo ParseObus(
    198    ArrayView<const ArrayView<const uint8_t>> rtp_payloads) {
    199  VectorObuInfo obu_infos;
    200  bool expect_continues_obu = false;
    201  for (ArrayView<const uint8_t> rtp_payload : rtp_payloads) {
    202    ByteBufferReader payload(rtp_payload);
    203    uint8_t aggregation_header;
    204    if (!payload.ReadUInt8(&aggregation_header)) {
    205      RTC_DLOG(LS_WARNING)
    206          << "Failed to find aggregation header in the packet.";
    207      return {};
    208    }
    209    // Z-bit: 1 if the first OBU contained in the packet is a continuation of a
    210    // previous OBU.
    211    bool continues_obu = RtpStartsWithFragment(aggregation_header);
    212    if (continues_obu != expect_continues_obu) {
    213      RTC_DLOG(LS_WARNING) << "Unexpected Z-bit " << continues_obu;
    214      return {};
    215    }
    216    int num_expected_obus = RtpNumObus(aggregation_header);
    217    if (payload.Length() == 0) {
    218      // rtp packet has just the aggregation header. That may be valid only when
    219      // there is exactly one fragment in the packet of size 0.
    220      if (num_expected_obus != 1) {
    221        RTC_DLOG(LS_WARNING)
    222            << "Invalid packet with just an aggregation header.";
    223        return {};
    224      }
    225      if (!continues_obu) {
    226        // Empty packet just to notify there is a new OBU.
    227        obu_infos.emplace_back();
    228      }
    229      expect_continues_obu = RtpEndsWithFragment(aggregation_header);
    230      continue;
    231    }
    232 
    233    for (int obu_index = 1; payload.Length() > 0; ++obu_index) {
    234      ObuInfo& obu_info = (obu_index == 1 && continues_obu)
    235                              ? obu_infos.back()
    236                              : obu_infos.emplace_back();
    237      uint64_t fragment_size;
    238      // When num_expected_obus > 0, last OBU (fragment) is not preceeded by
    239      // the size field. See W field in
    240      // https://aomediacodec.github.io/av1-rtp-spec/#43-av1-aggregation-header
    241      bool has_fragment_size = (obu_index != num_expected_obus);
    242      if (has_fragment_size) {
    243        if (!payload.ReadUVarint(&fragment_size)) {
    244          RTC_DLOG(LS_WARNING) << "Failed to read fragment size for obu #"
    245                               << obu_index << "/" << num_expected_obus;
    246          return {};
    247        }
    248        if (fragment_size > payload.Length()) {
    249          // Malformed input: written size is larger than remaining buffer.
    250          RTC_DLOG(LS_WARNING) << "Malformed fragment size " << fragment_size
    251                               << " is larger than remaining size "
    252                               << payload.Length() << " while reading obu #"
    253                               << obu_index << "/" << num_expected_obus;
    254          return {};
    255        }
    256      } else {
    257        fragment_size = payload.Length();
    258      }
    259      // While it is in-practical to pass empty fragments, it is still possible.
    260      if (fragment_size > 0) {
    261        obu_info.data.Append(reinterpret_cast<const uint8_t*>(payload.Data()),
    262                             fragment_size);
    263        payload.Consume(fragment_size);
    264      }
    265    }
    266    // Z flag should be same as Y flag of the next packet.
    267    expect_continues_obu = RtpEndsWithFragment(aggregation_header);
    268  }
    269  if (expect_continues_obu) {
    270    RTC_DLOG(LS_WARNING) << "Last packet shouldn't have last obu fragmented.";
    271    return {};
    272  }
    273  return obu_infos;
    274 }
    275 
    276 // Calculates sizes for the Obu, i.e. base on ObuInfo::data field calculates
    277 // all other fields in the ObuInfo structure.
    278 // Returns false if obu found to be misformed.
    279 bool CalculateObuSizes(ObuInfo* obu_info) {
    280  if (obu_info->data.empty()) {
    281    RTC_DLOG(LS_WARNING) << "Invalid bitstream: empty obu provided.";
    282    return false;
    283  }
    284  auto it = obu_info->data.begin();
    285  uint8_t obu_header = *it;
    286  obu_info->prefix[0] = obu_header | kObuSizePresentBit;
    287  obu_info->prefix_size = 1;
    288  ++it;
    289  if (ObuHasExtension(obu_header)) {
    290    if (it == obu_info->data.end()) {
    291      return false;
    292    }
    293    obu_info->prefix[1] = *it;  // obu_extension_header
    294    obu_info->prefix_size = 2;
    295    ++it;
    296  }
    297  // Read, validate, and skip size, if present.
    298  if (!ObuHasSize(obu_header)) {
    299    obu_info->payload_size = obu_info->data.size() - obu_info->prefix_size;
    300  } else {
    301    // Read leb128 encoded field obu_size.
    302    uint64_t obu_size_bytes = 0;
    303    // Number of bytes obu_size field occupy in the bitstream.
    304    int size_of_obu_size_bytes = 0;
    305    uint8_t leb128_byte;
    306    do {
    307      if (it == obu_info->data.end() || size_of_obu_size_bytes >= 8) {
    308        RTC_DLOG(LS_WARNING)
    309            << "Failed to read obu_size. obu_size field is too long: "
    310            << size_of_obu_size_bytes << " bytes processed.";
    311        return false;
    312      }
    313      leb128_byte = *it;
    314      obu_size_bytes |= uint64_t{leb128_byte & 0x7Fu}
    315                        << (size_of_obu_size_bytes * 7);
    316      ++size_of_obu_size_bytes;
    317      ++it;
    318    } while ((leb128_byte & 0x80) != 0);
    319 
    320    obu_info->payload_size =
    321        obu_info->data.size() - obu_info->prefix_size - size_of_obu_size_bytes;
    322    if (obu_size_bytes != obu_info->payload_size) {
    323      // obu_size was present in the bitstream and mismatches calculated size.
    324      RTC_DLOG(LS_WARNING) << "Mismatch in obu_size. signaled: "
    325                           << obu_size_bytes
    326                           << ", actual: " << obu_info->payload_size;
    327      return false;
    328    }
    329  }
    330  obu_info->payload_offset = it;
    331  obu_info->prefix_size +=
    332      WriteLeb128(dchecked_cast<uint64_t>(obu_info->payload_size),
    333                  obu_info->prefix.data() + obu_info->prefix_size);
    334  return true;
    335 }
    336 
    337 }  // namespace
    338 
    339 scoped_refptr<EncodedImageBuffer> VideoRtpDepacketizerAv1::AssembleFrame(
    340    ArrayView<const ArrayView<const uint8_t>> rtp_payloads) {
    341  VectorObuInfo obu_infos = ParseObus(rtp_payloads);
    342  if (obu_infos.empty()) {
    343    return nullptr;
    344  }
    345 
    346  size_t frame_size = 0;
    347  for (ObuInfo& obu_info : obu_infos) {
    348    if (!CalculateObuSizes(&obu_info)) {
    349      return nullptr;
    350    }
    351    frame_size += (obu_info.prefix_size + obu_info.payload_size);
    352  }
    353 
    354  scoped_refptr<EncodedImageBuffer> bitstream =
    355      EncodedImageBuffer::Create(frame_size);
    356  uint8_t* write_at = bitstream->data();
    357  for (const ObuInfo& obu_info : obu_infos) {
    358    // Copy the obu_header and obu_size fields.
    359    memcpy(write_at, obu_info.prefix.data(), obu_info.prefix_size);
    360    write_at += obu_info.prefix_size;
    361    // Copy the obu payload.
    362    obu_info.data.CopyTo(write_at, obu_info.payload_offset);
    363    write_at += obu_info.payload_size;
    364  }
    365  RTC_CHECK_EQ(write_at - bitstream->data(), bitstream->size());
    366  return bitstream;
    367 }
    368 
    369 std::optional<VideoRtpDepacketizer::ParsedRtpPayload>
    370 VideoRtpDepacketizerAv1::Parse(CopyOnWriteBuffer rtp_payload) {
    371  if (rtp_payload.empty()) {
    372    RTC_DLOG(LS_ERROR) << "Empty rtp payload.";
    373    return std::nullopt;
    374  }
    375  uint8_t aggregation_header = rtp_payload.cdata()[0];
    376  if (RtpStartsNewCodedVideoSequence(aggregation_header) &&
    377      RtpStartsWithFragment(aggregation_header)) {
    378    // new coded video sequence can't start from an OBU fragment.
    379    return std::nullopt;
    380  }
    381  std::optional<ParsedRtpPayload> parsed(std::in_place);
    382 
    383  // To assemble frame, all of the rtp payload is required, including
    384  // aggregation header.
    385  parsed->video_payload = std::move(rtp_payload);
    386 
    387  parsed->video_header.codec = VideoCodecType::kVideoCodecAV1;
    388  // These are not accurate since frame may consist of several packet aligned
    389  // chunks of obus, but should be good enough for most cases. It might produce
    390  // frame that do not map to any real frame, but av1 decoder should be able to
    391  // handle it since it promise to handle individual obus rather than full
    392  // frames.
    393  parsed->video_header.is_first_packet_in_frame =
    394      !RtpStartsWithFragment(aggregation_header);
    395  parsed->video_header.is_last_packet_in_frame =
    396      !RtpEndsWithFragment(aggregation_header);
    397 
    398  parsed->video_header.frame_type =
    399      RtpStartsNewCodedVideoSequence(aggregation_header)
    400          ? VideoFrameType::kVideoFrameKey
    401          : VideoFrameType::kVideoFrameDelta;
    402  return parsed;
    403 }
    404 
    405 }  // namespace webrtc