tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

video_rtp_depacketizer_h264.cc (11489B)


      1 /*
      2 *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
      3 *
      4 *  Use of this source code is governed by a BSD-style license
      5 *  that can be found in the LICENSE file in the root of the source
      6 *  tree. An additional intellectual property rights grant can be found
      7 *  in the file PATENTS.  All contributing project authors may
      8 *  be found in the AUTHORS file in the root of the source tree.
      9 */
     10 
     11 #include "modules/rtp_rtcp/source/video_rtp_depacketizer_h264.h"
     12 
     13 #include <cstddef>
     14 #include <cstdint>
     15 #include <optional>
     16 #include <utility>
     17 #include <vector>
     18 
     19 #include "api/array_view.h"
     20 #include "api/video/video_codec_type.h"
     21 #include "api/video/video_frame_type.h"
     22 #include "common_video/h264/h264_common.h"
     23 #include "common_video/h264/pps_parser.h"
     24 #include "common_video/h264/sps_parser.h"
     25 #include "common_video/h264/sps_vui_rewriter.h"
     26 #include "modules/rtp_rtcp/source/byte_io.h"
     27 #include "modules/rtp_rtcp/source/rtp_format_h264.h"
     28 #include "modules/rtp_rtcp/source/video_rtp_depacketizer.h"
     29 #include "modules/video_coding/codecs/h264/include/h264_globals.h"
     30 #include "rtc_base/buffer.h"
     31 #include "rtc_base/byte_buffer.h"
     32 #include "rtc_base/checks.h"
     33 #include "rtc_base/copy_on_write_buffer.h"
     34 #include "rtc_base/logging.h"
     35 
     36 namespace webrtc {
     37 namespace {
     38 
     39 constexpr size_t kNalHeaderSize = 1;
     40 constexpr size_t kFuAHeaderSize = 2;
     41 constexpr size_t kLengthFieldSize = 2;
     42 
     43 std::vector<ArrayView<const uint8_t>> ParseStapA(
     44    ArrayView<const uint8_t> data) {
     45  std::vector<ArrayView<const uint8_t>> nal_units;
     46  ByteBufferReader reader(data);
     47  if (!reader.Consume(kNalHeaderSize)) {
     48    return nal_units;
     49  }
     50 
     51  while (reader.Length() > 0) {
     52    uint16_t nalu_size;
     53    if (!reader.ReadUInt16(&nalu_size)) {
     54      return {};
     55    }
     56    if (nalu_size == 0 || nalu_size > reader.Length()) {
     57      return {};
     58    }
     59    nal_units.emplace_back(reader.Data(), nalu_size);
     60    reader.Consume(nalu_size);
     61  }
     62  return nal_units;
     63 }
     64 
     65 std::optional<VideoRtpDepacketizer::ParsedRtpPayload> ProcessStapAOrSingleNalu(
     66    CopyOnWriteBuffer rtp_payload) {
     67  ArrayView<const uint8_t> payload_data(rtp_payload);
     68  std::optional<VideoRtpDepacketizer::ParsedRtpPayload> parsed_payload(
     69      std::in_place);
     70  bool modified_buffer = false;
     71  Buffer output_buffer;
     72  parsed_payload->video_payload = rtp_payload;
     73  parsed_payload->video_header.width = 0;
     74  parsed_payload->video_header.height = 0;
     75  parsed_payload->video_header.codec = kVideoCodecH264;
     76  parsed_payload->video_header.simulcastIdx = 0;
     77  parsed_payload->video_header.is_first_packet_in_frame = false;
     78  auto& h264_header = parsed_payload->video_header.video_type_header
     79                          .emplace<RTPVideoHeaderH264>();
     80 
     81  uint8_t nal_type = payload_data[0] & kH264TypeMask;
     82  std::vector<ArrayView<const uint8_t>> nal_units;
     83  if (nal_type == H264::NaluType::kStapA) {
     84    nal_units = ParseStapA(payload_data);
     85    if (nal_units.empty()) {
     86      RTC_LOG(LS_ERROR) << "Incorrect StapA packet.";
     87      return std::nullopt;
     88    }
     89    h264_header.packetization_type = kH264StapA;
     90    h264_header.nalu_type = nal_units[0][0] & kH264TypeMask;
     91  } else {
     92    h264_header.packetization_type = kH264SingleNalu;
     93    h264_header.nalu_type = nal_type;
     94    nal_units.push_back(payload_data);
     95  }
     96 
     97  parsed_payload->video_header.frame_type = VideoFrameType::kVideoFrameDelta;
     98 
     99  for (const ArrayView<const uint8_t>& nal_unit : nal_units) {
    100    NaluInfo nalu;
    101    nalu.type = nal_unit[0] & kH264TypeMask;
    102    nalu.sps_id = -1;
    103    nalu.pps_id = -1;
    104    ArrayView<const uint8_t> nalu_data = nal_unit.subview(H264::kNaluTypeSize);
    105 
    106    if (nalu_data.empty()) {
    107      RTC_LOG(LS_WARNING) << "Skipping empty NAL unit.";
    108      continue;
    109    }
    110 
    111    switch (nalu.type) {
    112      case H264::NaluType::kSps: {
    113        // Check if VUI is present in SPS and if it needs to be modified to
    114        // avoid excessive decoder latency.
    115 
    116        // Copy any previous data first (likely just the first header).
    117        output_buffer.Clear();
    118        size_t start_offset = nalu_data.data() - payload_data.data();
    119        size_t end_offset = start_offset + nalu_data.size();
    120        if (start_offset) {
    121          output_buffer.AppendData(payload_data.data(), start_offset);
    122        }
    123 
    124        std::optional<SpsParser::SpsState> sps;
    125 
    126        SpsVuiRewriter::ParseResult result = SpsVuiRewriter::ParseAndRewriteSps(
    127            nalu_data, &sps, nullptr, &output_buffer,
    128            SpsVuiRewriter::Direction::kIncoming);
    129        switch (result) {
    130          case SpsVuiRewriter::ParseResult::kFailure:
    131            RTC_LOG(LS_WARNING) << "Failed to parse SPS NAL unit.";
    132            return std::nullopt;
    133          case SpsVuiRewriter::ParseResult::kVuiRewritten:
    134            if (modified_buffer) {
    135              RTC_LOG(LS_WARNING)
    136                  << "More than one H264 SPS NAL units needing "
    137                     "rewriting found within a single STAP-A packet. "
    138                     "Keeping the first and rewriting the last.";
    139            }
    140 
    141            // Rewrite length field to new SPS size.
    142            if (h264_header.packetization_type == kH264StapA) {
    143              size_t length_field_offset =
    144                  start_offset - (H264::kNaluTypeSize + kLengthFieldSize);
    145              // Stap-A Length includes payload data and type header.
    146              size_t rewritten_size =
    147                  output_buffer.size() - start_offset + H264::kNaluTypeSize;
    148              ByteWriter<uint16_t>::WriteBigEndian(
    149                  &output_buffer[length_field_offset], rewritten_size);
    150            }
    151 
    152            // Append rest of packet.
    153            output_buffer.AppendData(payload_data.subview(end_offset));
    154 
    155            modified_buffer = true;
    156            [[fallthrough]];
    157          case SpsVuiRewriter::ParseResult::kVuiOk:
    158            RTC_DCHECK(sps);
    159            nalu.sps_id = sps->id;
    160            parsed_payload->video_header.width = sps->width;
    161            parsed_payload->video_header.height = sps->height;
    162            parsed_payload->video_header.frame_type =
    163                VideoFrameType::kVideoFrameKey;
    164            break;
    165        }
    166        parsed_payload->video_header.is_first_packet_in_frame = true;
    167        break;
    168      }
    169      case H264::NaluType::kPps: {
    170        uint32_t pps_id;
    171        uint32_t sps_id;
    172        if (PpsParser::ParsePpsIds(nalu_data, &pps_id, &sps_id)) {
    173          nalu.pps_id = pps_id;
    174          nalu.sps_id = sps_id;
    175        } else {
    176          RTC_LOG(LS_WARNING)
    177              << "Failed to parse PPS id and SPS id from PPS slice.";
    178          return std::nullopt;
    179        }
    180        parsed_payload->video_header.is_first_packet_in_frame = true;
    181        break;
    182      }
    183      case H264::NaluType::kIdr:
    184        parsed_payload->video_header.frame_type =
    185            VideoFrameType::kVideoFrameKey;
    186        [[fallthrough]];
    187      case H264::NaluType::kSlice: {
    188        std::optional<PpsParser::SliceHeader> slice_header =
    189            PpsParser::ParseSliceHeader(nalu_data);
    190        if (slice_header) {
    191          nalu.pps_id = slice_header->pic_parameter_set_id;
    192          if (slice_header->first_mb_in_slice == 0) {
    193            parsed_payload->video_header.is_first_packet_in_frame = true;
    194          }
    195        } else {
    196          RTC_LOG(LS_WARNING) << "Failed to parse PPS id from slice of type: "
    197                              << static_cast<int>(nalu.type);
    198          return std::nullopt;
    199        }
    200        break;
    201      }
    202      case H264::NaluType::kAud:
    203        parsed_payload->video_header.is_first_packet_in_frame = true;
    204        break;
    205      case H264::NaluType::kSei:
    206        parsed_payload->video_header.is_first_packet_in_frame = true;
    207        break;
    208      // Slices below don't contain SPS or PPS ids.
    209      case H264::NaluType::kEndOfSequence:
    210      case H264::NaluType::kEndOfStream:
    211      case H264::NaluType::kFiller:
    212        break;
    213      case H264::NaluType::kStapA:
    214      case H264::NaluType::kFuA:
    215        RTC_LOG(LS_WARNING) << "Unexpected STAP-A or FU-A received.";
    216        return std::nullopt;
    217    }
    218 
    219    h264_header.nalus.push_back(nalu);
    220  }
    221 
    222  if (modified_buffer) {
    223    parsed_payload->video_payload.SetData(output_buffer.data(),
    224                                          output_buffer.size());
    225  }
    226  return parsed_payload;
    227 }
    228 
    229 std::optional<VideoRtpDepacketizer::ParsedRtpPayload> ParseFuaNalu(
    230    CopyOnWriteBuffer rtp_payload) {
    231  if (rtp_payload.size() < kFuAHeaderSize) {
    232    RTC_LOG(LS_ERROR) << "FU-A NAL units truncated.";
    233    return std::nullopt;
    234  }
    235  std::optional<VideoRtpDepacketizer::ParsedRtpPayload> parsed_payload(
    236      std::in_place);
    237  uint8_t fnri = rtp_payload.cdata()[0] & (kH264FBit | kH264NriMask);
    238  uint8_t original_nal_type = rtp_payload.cdata()[1] & kH264TypeMask;
    239  bool first_fragment = (rtp_payload.cdata()[1] & kH264SBit) > 0;
    240  bool is_first_packet_in_frame = false;
    241  NaluInfo nalu;
    242  nalu.type = original_nal_type;
    243  nalu.sps_id = -1;
    244  nalu.pps_id = -1;
    245  if (first_fragment) {
    246    if (original_nal_type == H264::NaluType::kIdr ||
    247        original_nal_type == H264::NaluType::kSlice) {
    248      std::optional<PpsParser::SliceHeader> slice_header =
    249          PpsParser::ParseSliceHeader(ArrayView<const uint8_t>(rtp_payload)
    250                                          .subview(2 * kNalHeaderSize));
    251      if (slice_header) {
    252        nalu.pps_id = slice_header->pic_parameter_set_id;
    253        is_first_packet_in_frame = slice_header->first_mb_in_slice == 0;
    254      } else {
    255        RTC_LOG(LS_WARNING)
    256            << "Failed to parse PPS from first fragment of FU-A NAL "
    257               "unit with original type: "
    258            << static_cast<int>(nalu.type);
    259      }
    260    }
    261    uint8_t original_nal_header = fnri | original_nal_type;
    262    rtp_payload =
    263        rtp_payload.Slice(kNalHeaderSize, rtp_payload.size() - kNalHeaderSize);
    264    rtp_payload.MutableData()[0] = original_nal_header;
    265    parsed_payload->video_payload = std::move(rtp_payload);
    266  } else {
    267    parsed_payload->video_payload =
    268        rtp_payload.Slice(kFuAHeaderSize, rtp_payload.size() - kFuAHeaderSize);
    269  }
    270 
    271  if (original_nal_type == H264::NaluType::kIdr) {
    272    parsed_payload->video_header.frame_type = VideoFrameType::kVideoFrameKey;
    273  } else {
    274    parsed_payload->video_header.frame_type = VideoFrameType::kVideoFrameDelta;
    275  }
    276  parsed_payload->video_header.width = 0;
    277  parsed_payload->video_header.height = 0;
    278  parsed_payload->video_header.codec = kVideoCodecH264;
    279  parsed_payload->video_header.simulcastIdx = 0;
    280  parsed_payload->video_header.is_first_packet_in_frame =
    281      is_first_packet_in_frame;
    282  auto& h264_header = parsed_payload->video_header.video_type_header
    283                          .emplace<RTPVideoHeaderH264>();
    284  h264_header.packetization_type = kH264FuA;
    285  h264_header.nalu_type = original_nal_type;
    286  if (first_fragment) {
    287    h264_header.nalus = {nalu};
    288  }
    289  return parsed_payload;
    290 }
    291 
    292 }  // namespace
    293 
    294 std::optional<VideoRtpDepacketizer::ParsedRtpPayload>
    295 VideoRtpDepacketizerH264::Parse(CopyOnWriteBuffer rtp_payload) {
    296  if (rtp_payload.empty()) {
    297    RTC_LOG(LS_ERROR) << "Empty payload.";
    298    return std::nullopt;
    299  }
    300 
    301  uint8_t nal_type = rtp_payload.cdata()[0] & kH264TypeMask;
    302 
    303  if (nal_type == H264::NaluType::kFuA) {
    304    // Fragmented NAL units (FU-A).
    305    return ParseFuaNalu(std::move(rtp_payload));
    306  } else {
    307    // We handle STAP-A and single NALU's the same way here. The jitter buffer
    308    // will depacketize the STAP-A into NAL units later.
    309    return ProcessStapAOrSingleNalu(std::move(rtp_payload));
    310  }
    311 }
    312 
    313 }  // namespace webrtc