video_rtp_depacketizer_h265.cc (11143B)
1 /* 2 * Copyright (c) 2024 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "modules/rtp_rtcp/source/video_rtp_depacketizer_h265.h" 12 13 #include <cstddef> 14 #include <cstdint> 15 #include <optional> 16 #include <utility> 17 #include <vector> 18 19 #include "absl/base/attributes.h" 20 #include "api/array_view.h" 21 #include "api/video/video_codec_type.h" 22 #include "api/video/video_frame_type.h" 23 #include "common_video/h265/h265_bitstream_parser.h" 24 #include "common_video/h265/h265_common.h" 25 #include "common_video/h265/h265_sps_parser.h" 26 #include "modules/rtp_rtcp/source/byte_io.h" 27 #include "modules/rtp_rtcp/source/rtp_packet_h265_common.h" 28 #include "modules/rtp_rtcp/source/video_rtp_depacketizer.h" 29 #include "rtc_base/copy_on_write_buffer.h" 30 #include "rtc_base/logging.h" 31 32 // RTP Payload Format for HEVC: https://datatracker.ietf.org/doc/html/rfc7798 33 34 namespace webrtc { 35 namespace { 36 37 bool ParseApStartOffsets(const uint8_t* nalu_ptr, 38 size_t length_remaining, 39 std::vector<size_t>* offsets) { 40 size_t offset = 0; 41 while (length_remaining > 0) { 42 // Buffer doesn't contain room for additional NALU length. 43 if (length_remaining < kH265LengthFieldSizeBytes) 44 return false; 45 // Read 16-bit NALU size defined in RFC7798 section 4.4.2. 46 uint16_t nalu_size = ByteReader<uint16_t>::ReadBigEndian(nalu_ptr); 47 nalu_ptr += kH265LengthFieldSizeBytes; 48 length_remaining -= kH265LengthFieldSizeBytes; 49 if (nalu_size > length_remaining) 50 return false; 51 nalu_ptr += nalu_size; 52 length_remaining -= nalu_size; 53 54 offsets->push_back(offset + kH265ApHeaderSizeBytes); 55 offset += kH265LengthFieldSizeBytes + nalu_size; 56 } 57 return true; 58 } 59 60 // Single NALU packet structure 61 // https://datatracker.ietf.org/doc/html/rfc7798#section-4.4.1 62 // Aggregation Packet (AP) strcture 63 // https://datatracker.ietf.org/doc/html/rfc7798#section-4.4.2 64 std::optional<VideoRtpDepacketizer::ParsedRtpPayload> ProcessApOrSingleNalu( 65 CopyOnWriteBuffer rtp_payload) { 66 if (rtp_payload.size() < kH265PayloadHeaderSizeBytes) { 67 RTC_LOG(LS_ERROR) << "RTP payload truncated."; 68 return std::nullopt; 69 } 70 const uint8_t* const payload_data = rtp_payload.cdata(); 71 std::optional<VideoRtpDepacketizer::ParsedRtpPayload> parsed_payload( 72 std::in_place); 73 parsed_payload->video_header.width = 0; 74 parsed_payload->video_header.height = 0; 75 parsed_payload->video_header.codec = kVideoCodecH265; 76 parsed_payload->video_header.is_first_packet_in_frame = false; 77 78 const uint8_t* nalu_start = payload_data + kH265PayloadHeaderSizeBytes; 79 const size_t nalu_length = rtp_payload.size() - kH265PayloadHeaderSizeBytes; 80 uint8_t nal_type = (payload_data[0] & kH265TypeMask) >> 1; 81 std::vector<size_t> nalu_start_offsets; 82 CopyOnWriteBuffer video_payload; 83 if (nal_type == H265::NaluType::kAp) { 84 // Skip the aggregated packet header (Aggregated packet NAL type + length). 85 if (rtp_payload.size() <= kH265ApHeaderSizeBytes) { 86 RTC_LOG(LS_ERROR) << "Aggregated packet header truncated."; 87 return std::nullopt; 88 } 89 90 if (!ParseApStartOffsets(nalu_start, nalu_length, &nalu_start_offsets)) { 91 RTC_LOG(LS_ERROR) 92 << "Aggregated packet with incorrect NALU packet lengths."; 93 return std::nullopt; 94 } 95 96 nal_type = (payload_data[kH265ApHeaderSizeBytes] & kH265TypeMask) >> 1; 97 } else { 98 nalu_start_offsets.push_back(0); 99 } 100 parsed_payload->video_header.frame_type = VideoFrameType::kVideoFrameDelta; 101 102 nalu_start_offsets.push_back(rtp_payload.size() + 103 kH265LengthFieldSizeBytes); // End offset. 104 for (size_t i = 0; i < nalu_start_offsets.size() - 1; ++i) { 105 size_t start_offset = nalu_start_offsets[i]; 106 // End offset is actually start offset for next unit, excluding length field 107 // so remove that from this units length. 108 size_t end_offset = nalu_start_offsets[i + 1] - kH265LengthFieldSizeBytes; 109 if (end_offset - start_offset < kH265NalHeaderSizeBytes) { 110 RTC_LOG(LS_ERROR) << "Aggregated packet too short"; 111 return std::nullopt; 112 } 113 114 // Insert start code before each NALU in aggregated packet. 115 video_payload.AppendData(kStartCode); 116 video_payload.AppendData(&payload_data[start_offset], 117 end_offset - start_offset); 118 119 uint8_t nalu_type = (payload_data[start_offset] & kH265TypeMask) >> 1; 120 start_offset += kH265NalHeaderSizeBytes; 121 ArrayView<const uint8_t> nalu_data(&payload_data[start_offset], 122 end_offset - start_offset); 123 switch (nalu_type) { 124 case H265::NaluType::kBlaWLp: 125 case H265::NaluType::kBlaWRadl: 126 case H265::NaluType::kBlaNLp: 127 case H265::NaluType::kIdrWRadl: 128 case H265::NaluType::kIdrNLp: 129 case H265::NaluType::kCra: 130 // Mark IRAP(Intra Random Access Point) frames as key frames. Their NALU 131 // types are in the range of BLA_W_LP (16) to CRA (21), inclusive. 132 // https://datatracker.ietf.org/doc/html/rfc7798#section-3.1.1 133 parsed_payload->video_header.frame_type = 134 VideoFrameType::kVideoFrameKey; 135 break; 136 case H265::NaluType::kSps: { 137 std::optional<H265SpsParser::SpsState> sps = 138 H265SpsParser::ParseSps(nalu_data); 139 140 if (sps) { 141 // TODO(bugs.webrtc.org/13485): Implement the size calculation taking 142 // VPS->vui_parameters.def_disp_win_xx_offset into account. 143 parsed_payload->video_header.width = sps->width; 144 parsed_payload->video_header.height = sps->height; 145 } else { 146 RTC_LOG(LS_WARNING) << "Failed to parse SPS from SPS slice."; 147 } 148 } 149 ABSL_FALLTHROUGH_INTENDED; 150 case H265::NaluType::kVps: 151 case H265::NaluType::kPps: 152 case H265::NaluType::kTrailN: 153 case H265::NaluType::kTrailR: 154 case H265::NaluType::kTsaN: 155 case H265::NaluType::kTsaR: 156 case H265::NaluType::kStsaN: 157 case H265::NaluType::kStsaR: 158 case H265::NaluType::kRadlN: 159 case H265::NaluType::kRadlR: 160 // Slices below don't contain SPS or PPS ids. 161 case H265::NaluType::kAud: 162 case H265::NaluType::kPrefixSei: 163 case H265::NaluType::kSuffixSei: 164 break; 165 case H265::NaluType::kAp: 166 case H265::NaluType::kFu: 167 case H265::NaluType::kPaci: 168 RTC_LOG(LS_WARNING) << "Unexpected AP, FU or PACI received."; 169 return std::nullopt; 170 } 171 172 // Spec 7.4.2.4.4: Order of NAL units and codec pictures. 173 if ((nalu_type >= H265::NaluType::kVps && 174 nalu_type <= H265::NaluType::kAud) || 175 nalu_type == H265::NaluType::kPrefixSei) { 176 parsed_payload->video_header.is_first_packet_in_frame = true; 177 } else if (nalu_type >= H265::NaluType::kTrailN && 178 nalu_type <= H265::NaluType::kRsvVcl31) { 179 std::optional<bool> first_slice_segment_in_pic_flag = 180 H265BitstreamParser::IsFirstSliceSegmentInPic(nalu_data); 181 if (first_slice_segment_in_pic_flag.value_or(false)) { 182 parsed_payload->video_header.is_first_packet_in_frame = true; 183 } 184 } 185 } 186 parsed_payload->video_payload = video_payload; 187 return parsed_payload; 188 } 189 190 // Fragmentation Unit (FU) structure: 191 // https://datatracker.ietf.org/doc/html/rfc7798#section-4.4.3 192 std::optional<VideoRtpDepacketizer::ParsedRtpPayload> ParseFuNalu( 193 CopyOnWriteBuffer rtp_payload) { 194 if (rtp_payload.size() < kH265FuHeaderSizeBytes + kH265NalHeaderSizeBytes) { 195 RTC_LOG(LS_ERROR) << "FU NAL units truncated."; 196 return std::nullopt; 197 } 198 std::optional<VideoRtpDepacketizer::ParsedRtpPayload> parsed_payload( 199 std::in_place); 200 201 uint8_t f = rtp_payload.cdata()[0] & kH265FBit; 202 uint8_t layer_id_h = rtp_payload.cdata()[0] & kH265LayerIDHMask; 203 uint8_t layer_id_l_unshifted = rtp_payload.cdata()[1] & kH265LayerIDLMask; 204 uint8_t tid = rtp_payload.cdata()[1] & kH265TIDMask; 205 206 uint8_t original_nal_type = rtp_payload.cdata()[2] & kH265TypeMaskInFuHeader; 207 bool first_fragment = rtp_payload.cdata()[2] & kH265SBitMask; 208 bool is_first_packet_in_frame = false; 209 if (first_fragment) { 210 if (original_nal_type >= H265::NaluType::kTrailN && 211 original_nal_type <= H265::NaluType::kRsvVcl31) { 212 size_t slice_offset = 213 kH265FuHeaderSizeBytes + kH265PayloadHeaderSizeBytes; 214 std::optional<bool> first_slice_segment_in_pic_flag = 215 H265BitstreamParser::IsFirstSliceSegmentInPic( 216 ArrayView<const uint8_t>(rtp_payload.cdata() + slice_offset, 217 rtp_payload.size() - slice_offset)); 218 if (first_slice_segment_in_pic_flag.value_or(false)) { 219 is_first_packet_in_frame = true; 220 } 221 } 222 rtp_payload = rtp_payload.Slice( 223 kH265FuHeaderSizeBytes, rtp_payload.size() - kH265FuHeaderSizeBytes); 224 rtp_payload.MutableData()[0] = f | original_nal_type << 1 | layer_id_h; 225 rtp_payload.MutableData()[1] = layer_id_l_unshifted | tid; 226 CopyOnWriteBuffer video_payload; 227 // Insert start code before the first fragment in FU. 228 video_payload.AppendData(kStartCode); 229 video_payload.AppendData(rtp_payload); 230 parsed_payload->video_payload = video_payload; 231 } else { 232 parsed_payload->video_payload = rtp_payload.Slice( 233 kH265NalHeaderSizeBytes + kH265FuHeaderSizeBytes, 234 rtp_payload.size() - kH265NalHeaderSizeBytes - kH265FuHeaderSizeBytes); 235 } 236 237 if (original_nal_type >= H265::NaluType::kBlaWLp && 238 original_nal_type <= H265::NaluType::kRsvIrapVcl23) { 239 // IRAP picture. 240 // https://datatracker.ietf.org/doc/html/rfc7798#section-3.1.1 241 parsed_payload->video_header.frame_type = VideoFrameType::kVideoFrameKey; 242 } else { 243 parsed_payload->video_header.frame_type = VideoFrameType::kVideoFrameDelta; 244 } 245 parsed_payload->video_header.width = 0; 246 parsed_payload->video_header.height = 0; 247 parsed_payload->video_header.codec = kVideoCodecH265; 248 parsed_payload->video_header.is_first_packet_in_frame = 249 is_first_packet_in_frame; 250 251 return parsed_payload; 252 } 253 254 } // namespace 255 256 std::optional<VideoRtpDepacketizer::ParsedRtpPayload> 257 VideoRtpDepacketizerH265::Parse(CopyOnWriteBuffer rtp_payload) { 258 if (rtp_payload.empty()) { 259 RTC_LOG(LS_ERROR) << "Empty payload."; 260 return std::nullopt; 261 } 262 263 uint8_t nal_type = (rtp_payload.cdata()[0] & kH265TypeMask) >> 1; 264 265 if (nal_type == H265::NaluType::kFu) { 266 // Fragmented NAL units (FU). 267 return ParseFuNalu(std::move(rtp_payload)); 268 } else if (nal_type == H265::NaluType::kPaci) { 269 // TODO(bugs.webrtc.org/13485): Implement PACI parse for H265 270 RTC_LOG(LS_ERROR) << "Not support type:" << nal_type; 271 return std::nullopt; 272 } else { 273 // Single NAL unit packet or Aggregated packets (AP). 274 return ProcessApOrSingleNalu(std::move(rtp_payload)); 275 } 276 } 277 278 } // namespace webrtc