tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

video_rtp_depacketizer_h265.cc (11143B)


      1 /*
      2 *  Copyright (c) 2024 The WebRTC project authors. All Rights Reserved.
      3 *
      4 *  Use of this source code is governed by a BSD-style license
      5 *  that can be found in the LICENSE file in the root of the source
      6 *  tree. An additional intellectual property rights grant can be found
      7 *  in the file PATENTS.  All contributing project authors may
      8 *  be found in the AUTHORS file in the root of the source tree.
      9 */
     10 
     11 #include "modules/rtp_rtcp/source/video_rtp_depacketizer_h265.h"
     12 
     13 #include <cstddef>
     14 #include <cstdint>
     15 #include <optional>
     16 #include <utility>
     17 #include <vector>
     18 
     19 #include "absl/base/attributes.h"
     20 #include "api/array_view.h"
     21 #include "api/video/video_codec_type.h"
     22 #include "api/video/video_frame_type.h"
     23 #include "common_video/h265/h265_bitstream_parser.h"
     24 #include "common_video/h265/h265_common.h"
     25 #include "common_video/h265/h265_sps_parser.h"
     26 #include "modules/rtp_rtcp/source/byte_io.h"
     27 #include "modules/rtp_rtcp/source/rtp_packet_h265_common.h"
     28 #include "modules/rtp_rtcp/source/video_rtp_depacketizer.h"
     29 #include "rtc_base/copy_on_write_buffer.h"
     30 #include "rtc_base/logging.h"
     31 
     32 // RTP Payload Format for HEVC: https://datatracker.ietf.org/doc/html/rfc7798
     33 
     34 namespace webrtc {
     35 namespace {
     36 
     37 bool ParseApStartOffsets(const uint8_t* nalu_ptr,
     38                         size_t length_remaining,
     39                         std::vector<size_t>* offsets) {
     40  size_t offset = 0;
     41  while (length_remaining > 0) {
     42    // Buffer doesn't contain room for additional NALU length.
     43    if (length_remaining < kH265LengthFieldSizeBytes)
     44      return false;
     45    // Read 16-bit NALU size defined in RFC7798 section 4.4.2.
     46    uint16_t nalu_size = ByteReader<uint16_t>::ReadBigEndian(nalu_ptr);
     47    nalu_ptr += kH265LengthFieldSizeBytes;
     48    length_remaining -= kH265LengthFieldSizeBytes;
     49    if (nalu_size > length_remaining)
     50      return false;
     51    nalu_ptr += nalu_size;
     52    length_remaining -= nalu_size;
     53 
     54    offsets->push_back(offset + kH265ApHeaderSizeBytes);
     55    offset += kH265LengthFieldSizeBytes + nalu_size;
     56  }
     57  return true;
     58 }
     59 
     60 // Single NALU packet structure
     61 // https://datatracker.ietf.org/doc/html/rfc7798#section-4.4.1
     62 // Aggregation Packet (AP) strcture
     63 // https://datatracker.ietf.org/doc/html/rfc7798#section-4.4.2
     64 std::optional<VideoRtpDepacketizer::ParsedRtpPayload> ProcessApOrSingleNalu(
     65    CopyOnWriteBuffer rtp_payload) {
     66  if (rtp_payload.size() < kH265PayloadHeaderSizeBytes) {
     67    RTC_LOG(LS_ERROR) << "RTP payload truncated.";
     68    return std::nullopt;
     69  }
     70  const uint8_t* const payload_data = rtp_payload.cdata();
     71  std::optional<VideoRtpDepacketizer::ParsedRtpPayload> parsed_payload(
     72      std::in_place);
     73  parsed_payload->video_header.width = 0;
     74  parsed_payload->video_header.height = 0;
     75  parsed_payload->video_header.codec = kVideoCodecH265;
     76  parsed_payload->video_header.is_first_packet_in_frame = false;
     77 
     78  const uint8_t* nalu_start = payload_data + kH265PayloadHeaderSizeBytes;
     79  const size_t nalu_length = rtp_payload.size() - kH265PayloadHeaderSizeBytes;
     80  uint8_t nal_type = (payload_data[0] & kH265TypeMask) >> 1;
     81  std::vector<size_t> nalu_start_offsets;
     82  CopyOnWriteBuffer video_payload;
     83  if (nal_type == H265::NaluType::kAp) {
     84    // Skip the aggregated packet header (Aggregated packet NAL type + length).
     85    if (rtp_payload.size() <= kH265ApHeaderSizeBytes) {
     86      RTC_LOG(LS_ERROR) << "Aggregated packet header truncated.";
     87      return std::nullopt;
     88    }
     89 
     90    if (!ParseApStartOffsets(nalu_start, nalu_length, &nalu_start_offsets)) {
     91      RTC_LOG(LS_ERROR)
     92          << "Aggregated packet with incorrect NALU packet lengths.";
     93      return std::nullopt;
     94    }
     95 
     96    nal_type = (payload_data[kH265ApHeaderSizeBytes] & kH265TypeMask) >> 1;
     97  } else {
     98    nalu_start_offsets.push_back(0);
     99  }
    100  parsed_payload->video_header.frame_type = VideoFrameType::kVideoFrameDelta;
    101 
    102  nalu_start_offsets.push_back(rtp_payload.size() +
    103                               kH265LengthFieldSizeBytes);  // End offset.
    104  for (size_t i = 0; i < nalu_start_offsets.size() - 1; ++i) {
    105    size_t start_offset = nalu_start_offsets[i];
    106    // End offset is actually start offset for next unit, excluding length field
    107    // so remove that from this units length.
    108    size_t end_offset = nalu_start_offsets[i + 1] - kH265LengthFieldSizeBytes;
    109    if (end_offset - start_offset < kH265NalHeaderSizeBytes) {
    110      RTC_LOG(LS_ERROR) << "Aggregated packet too short";
    111      return std::nullopt;
    112    }
    113 
    114    // Insert start code before each NALU in aggregated packet.
    115    video_payload.AppendData(kStartCode);
    116    video_payload.AppendData(&payload_data[start_offset],
    117                             end_offset - start_offset);
    118 
    119    uint8_t nalu_type = (payload_data[start_offset] & kH265TypeMask) >> 1;
    120    start_offset += kH265NalHeaderSizeBytes;
    121    ArrayView<const uint8_t> nalu_data(&payload_data[start_offset],
    122                                       end_offset - start_offset);
    123    switch (nalu_type) {
    124      case H265::NaluType::kBlaWLp:
    125      case H265::NaluType::kBlaWRadl:
    126      case H265::NaluType::kBlaNLp:
    127      case H265::NaluType::kIdrWRadl:
    128      case H265::NaluType::kIdrNLp:
    129      case H265::NaluType::kCra:
    130        // Mark IRAP(Intra Random Access Point) frames as key frames. Their NALU
    131        // types are in the range of BLA_W_LP (16) to CRA (21), inclusive.
    132        // https://datatracker.ietf.org/doc/html/rfc7798#section-3.1.1
    133        parsed_payload->video_header.frame_type =
    134            VideoFrameType::kVideoFrameKey;
    135        break;
    136      case H265::NaluType::kSps: {
    137        std::optional<H265SpsParser::SpsState> sps =
    138            H265SpsParser::ParseSps(nalu_data);
    139 
    140        if (sps) {
    141          // TODO(bugs.webrtc.org/13485): Implement the size calculation taking
    142          // VPS->vui_parameters.def_disp_win_xx_offset into account.
    143          parsed_payload->video_header.width = sps->width;
    144          parsed_payload->video_header.height = sps->height;
    145        } else {
    146          RTC_LOG(LS_WARNING) << "Failed to parse SPS from SPS slice.";
    147        }
    148      }
    149        ABSL_FALLTHROUGH_INTENDED;
    150      case H265::NaluType::kVps:
    151      case H265::NaluType::kPps:
    152      case H265::NaluType::kTrailN:
    153      case H265::NaluType::kTrailR:
    154      case H265::NaluType::kTsaN:
    155      case H265::NaluType::kTsaR:
    156      case H265::NaluType::kStsaN:
    157      case H265::NaluType::kStsaR:
    158      case H265::NaluType::kRadlN:
    159      case H265::NaluType::kRadlR:
    160      // Slices below don't contain SPS or PPS ids.
    161      case H265::NaluType::kAud:
    162      case H265::NaluType::kPrefixSei:
    163      case H265::NaluType::kSuffixSei:
    164        break;
    165      case H265::NaluType::kAp:
    166      case H265::NaluType::kFu:
    167      case H265::NaluType::kPaci:
    168        RTC_LOG(LS_WARNING) << "Unexpected AP, FU or PACI received.";
    169        return std::nullopt;
    170    }
    171 
    172    // Spec 7.4.2.4.4: Order of NAL units and codec pictures.
    173    if ((nalu_type >= H265::NaluType::kVps &&
    174         nalu_type <= H265::NaluType::kAud) ||
    175        nalu_type == H265::NaluType::kPrefixSei) {
    176      parsed_payload->video_header.is_first_packet_in_frame = true;
    177    } else if (nalu_type >= H265::NaluType::kTrailN &&
    178               nalu_type <= H265::NaluType::kRsvVcl31) {
    179      std::optional<bool> first_slice_segment_in_pic_flag =
    180          H265BitstreamParser::IsFirstSliceSegmentInPic(nalu_data);
    181      if (first_slice_segment_in_pic_flag.value_or(false)) {
    182        parsed_payload->video_header.is_first_packet_in_frame = true;
    183      }
    184    }
    185  }
    186  parsed_payload->video_payload = video_payload;
    187  return parsed_payload;
    188 }
    189 
    190 // Fragmentation Unit (FU) structure:
    191 // https://datatracker.ietf.org/doc/html/rfc7798#section-4.4.3
    192 std::optional<VideoRtpDepacketizer::ParsedRtpPayload> ParseFuNalu(
    193    CopyOnWriteBuffer rtp_payload) {
    194  if (rtp_payload.size() < kH265FuHeaderSizeBytes + kH265NalHeaderSizeBytes) {
    195    RTC_LOG(LS_ERROR) << "FU NAL units truncated.";
    196    return std::nullopt;
    197  }
    198  std::optional<VideoRtpDepacketizer::ParsedRtpPayload> parsed_payload(
    199      std::in_place);
    200 
    201  uint8_t f = rtp_payload.cdata()[0] & kH265FBit;
    202  uint8_t layer_id_h = rtp_payload.cdata()[0] & kH265LayerIDHMask;
    203  uint8_t layer_id_l_unshifted = rtp_payload.cdata()[1] & kH265LayerIDLMask;
    204  uint8_t tid = rtp_payload.cdata()[1] & kH265TIDMask;
    205 
    206  uint8_t original_nal_type = rtp_payload.cdata()[2] & kH265TypeMaskInFuHeader;
    207  bool first_fragment = rtp_payload.cdata()[2] & kH265SBitMask;
    208  bool is_first_packet_in_frame = false;
    209  if (first_fragment) {
    210    if (original_nal_type >= H265::NaluType::kTrailN &&
    211        original_nal_type <= H265::NaluType::kRsvVcl31) {
    212      size_t slice_offset =
    213          kH265FuHeaderSizeBytes + kH265PayloadHeaderSizeBytes;
    214      std::optional<bool> first_slice_segment_in_pic_flag =
    215          H265BitstreamParser::IsFirstSliceSegmentInPic(
    216              ArrayView<const uint8_t>(rtp_payload.cdata() + slice_offset,
    217                                       rtp_payload.size() - slice_offset));
    218      if (first_slice_segment_in_pic_flag.value_or(false)) {
    219        is_first_packet_in_frame = true;
    220      }
    221    }
    222    rtp_payload = rtp_payload.Slice(
    223        kH265FuHeaderSizeBytes, rtp_payload.size() - kH265FuHeaderSizeBytes);
    224    rtp_payload.MutableData()[0] = f | original_nal_type << 1 | layer_id_h;
    225    rtp_payload.MutableData()[1] = layer_id_l_unshifted | tid;
    226    CopyOnWriteBuffer video_payload;
    227    // Insert start code before the first fragment in FU.
    228    video_payload.AppendData(kStartCode);
    229    video_payload.AppendData(rtp_payload);
    230    parsed_payload->video_payload = video_payload;
    231  } else {
    232    parsed_payload->video_payload = rtp_payload.Slice(
    233        kH265NalHeaderSizeBytes + kH265FuHeaderSizeBytes,
    234        rtp_payload.size() - kH265NalHeaderSizeBytes - kH265FuHeaderSizeBytes);
    235  }
    236 
    237  if (original_nal_type >= H265::NaluType::kBlaWLp &&
    238      original_nal_type <= H265::NaluType::kRsvIrapVcl23) {
    239    // IRAP picture.
    240    // https://datatracker.ietf.org/doc/html/rfc7798#section-3.1.1
    241    parsed_payload->video_header.frame_type = VideoFrameType::kVideoFrameKey;
    242  } else {
    243    parsed_payload->video_header.frame_type = VideoFrameType::kVideoFrameDelta;
    244  }
    245  parsed_payload->video_header.width = 0;
    246  parsed_payload->video_header.height = 0;
    247  parsed_payload->video_header.codec = kVideoCodecH265;
    248  parsed_payload->video_header.is_first_packet_in_frame =
    249      is_first_packet_in_frame;
    250 
    251  return parsed_payload;
    252 }
    253 
    254 }  // namespace
    255 
    256 std::optional<VideoRtpDepacketizer::ParsedRtpPayload>
    257 VideoRtpDepacketizerH265::Parse(CopyOnWriteBuffer rtp_payload) {
    258  if (rtp_payload.empty()) {
    259    RTC_LOG(LS_ERROR) << "Empty payload.";
    260    return std::nullopt;
    261  }
    262 
    263  uint8_t nal_type = (rtp_payload.cdata()[0] & kH265TypeMask) >> 1;
    264 
    265  if (nal_type == H265::NaluType::kFu) {
    266    // Fragmented NAL units (FU).
    267    return ParseFuNalu(std::move(rtp_payload));
    268  } else if (nal_type == H265::NaluType::kPaci) {
    269    // TODO(bugs.webrtc.org/13485): Implement PACI parse for H265
    270    RTC_LOG(LS_ERROR) << "Not support type:" << nal_type;
    271    return std::nullopt;
    272  } else {
    273    // Single NAL unit packet or Aggregated packets (AP).
    274    return ProcessApOrSingleNalu(std::move(rtp_payload));
    275  }
    276 }
    277 
    278 }  // namespace webrtc