video_rtp_depacketizer_h264.cc (11489B)
1 /* 2 * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "modules/rtp_rtcp/source/video_rtp_depacketizer_h264.h" 12 13 #include <cstddef> 14 #include <cstdint> 15 #include <optional> 16 #include <utility> 17 #include <vector> 18 19 #include "api/array_view.h" 20 #include "api/video/video_codec_type.h" 21 #include "api/video/video_frame_type.h" 22 #include "common_video/h264/h264_common.h" 23 #include "common_video/h264/pps_parser.h" 24 #include "common_video/h264/sps_parser.h" 25 #include "common_video/h264/sps_vui_rewriter.h" 26 #include "modules/rtp_rtcp/source/byte_io.h" 27 #include "modules/rtp_rtcp/source/rtp_format_h264.h" 28 #include "modules/rtp_rtcp/source/video_rtp_depacketizer.h" 29 #include "modules/video_coding/codecs/h264/include/h264_globals.h" 30 #include "rtc_base/buffer.h" 31 #include "rtc_base/byte_buffer.h" 32 #include "rtc_base/checks.h" 33 #include "rtc_base/copy_on_write_buffer.h" 34 #include "rtc_base/logging.h" 35 36 namespace webrtc { 37 namespace { 38 39 constexpr size_t kNalHeaderSize = 1; 40 constexpr size_t kFuAHeaderSize = 2; 41 constexpr size_t kLengthFieldSize = 2; 42 43 std::vector<ArrayView<const uint8_t>> ParseStapA( 44 ArrayView<const uint8_t> data) { 45 std::vector<ArrayView<const uint8_t>> nal_units; 46 ByteBufferReader reader(data); 47 if (!reader.Consume(kNalHeaderSize)) { 48 return nal_units; 49 } 50 51 while (reader.Length() > 0) { 52 uint16_t nalu_size; 53 if (!reader.ReadUInt16(&nalu_size)) { 54 return {}; 55 } 56 if (nalu_size == 0 || nalu_size > reader.Length()) { 57 return {}; 58 } 59 nal_units.emplace_back(reader.Data(), nalu_size); 60 reader.Consume(nalu_size); 61 } 62 return nal_units; 63 } 64 65 std::optional<VideoRtpDepacketizer::ParsedRtpPayload> ProcessStapAOrSingleNalu( 66 CopyOnWriteBuffer rtp_payload) { 67 ArrayView<const uint8_t> payload_data(rtp_payload); 68 std::optional<VideoRtpDepacketizer::ParsedRtpPayload> parsed_payload( 69 std::in_place); 70 bool modified_buffer = false; 71 Buffer output_buffer; 72 parsed_payload->video_payload = rtp_payload; 73 parsed_payload->video_header.width = 0; 74 parsed_payload->video_header.height = 0; 75 parsed_payload->video_header.codec = kVideoCodecH264; 76 parsed_payload->video_header.simulcastIdx = 0; 77 parsed_payload->video_header.is_first_packet_in_frame = false; 78 auto& h264_header = parsed_payload->video_header.video_type_header 79 .emplace<RTPVideoHeaderH264>(); 80 81 uint8_t nal_type = payload_data[0] & kH264TypeMask; 82 std::vector<ArrayView<const uint8_t>> nal_units; 83 if (nal_type == H264::NaluType::kStapA) { 84 nal_units = ParseStapA(payload_data); 85 if (nal_units.empty()) { 86 RTC_LOG(LS_ERROR) << "Incorrect StapA packet."; 87 return std::nullopt; 88 } 89 h264_header.packetization_type = kH264StapA; 90 h264_header.nalu_type = nal_units[0][0] & kH264TypeMask; 91 } else { 92 h264_header.packetization_type = kH264SingleNalu; 93 h264_header.nalu_type = nal_type; 94 nal_units.push_back(payload_data); 95 } 96 97 parsed_payload->video_header.frame_type = VideoFrameType::kVideoFrameDelta; 98 99 for (const ArrayView<const uint8_t>& nal_unit : nal_units) { 100 NaluInfo nalu; 101 nalu.type = nal_unit[0] & kH264TypeMask; 102 nalu.sps_id = -1; 103 nalu.pps_id = -1; 104 ArrayView<const uint8_t> nalu_data = nal_unit.subview(H264::kNaluTypeSize); 105 106 if (nalu_data.empty()) { 107 RTC_LOG(LS_WARNING) << "Skipping empty NAL unit."; 108 continue; 109 } 110 111 switch (nalu.type) { 112 case H264::NaluType::kSps: { 113 // Check if VUI is present in SPS and if it needs to be modified to 114 // avoid excessive decoder latency. 115 116 // Copy any previous data first (likely just the first header). 117 output_buffer.Clear(); 118 size_t start_offset = nalu_data.data() - payload_data.data(); 119 size_t end_offset = start_offset + nalu_data.size(); 120 if (start_offset) { 121 output_buffer.AppendData(payload_data.data(), start_offset); 122 } 123 124 std::optional<SpsParser::SpsState> sps; 125 126 SpsVuiRewriter::ParseResult result = SpsVuiRewriter::ParseAndRewriteSps( 127 nalu_data, &sps, nullptr, &output_buffer, 128 SpsVuiRewriter::Direction::kIncoming); 129 switch (result) { 130 case SpsVuiRewriter::ParseResult::kFailure: 131 RTC_LOG(LS_WARNING) << "Failed to parse SPS NAL unit."; 132 return std::nullopt; 133 case SpsVuiRewriter::ParseResult::kVuiRewritten: 134 if (modified_buffer) { 135 RTC_LOG(LS_WARNING) 136 << "More than one H264 SPS NAL units needing " 137 "rewriting found within a single STAP-A packet. " 138 "Keeping the first and rewriting the last."; 139 } 140 141 // Rewrite length field to new SPS size. 142 if (h264_header.packetization_type == kH264StapA) { 143 size_t length_field_offset = 144 start_offset - (H264::kNaluTypeSize + kLengthFieldSize); 145 // Stap-A Length includes payload data and type header. 146 size_t rewritten_size = 147 output_buffer.size() - start_offset + H264::kNaluTypeSize; 148 ByteWriter<uint16_t>::WriteBigEndian( 149 &output_buffer[length_field_offset], rewritten_size); 150 } 151 152 // Append rest of packet. 153 output_buffer.AppendData(payload_data.subview(end_offset)); 154 155 modified_buffer = true; 156 [[fallthrough]]; 157 case SpsVuiRewriter::ParseResult::kVuiOk: 158 RTC_DCHECK(sps); 159 nalu.sps_id = sps->id; 160 parsed_payload->video_header.width = sps->width; 161 parsed_payload->video_header.height = sps->height; 162 parsed_payload->video_header.frame_type = 163 VideoFrameType::kVideoFrameKey; 164 break; 165 } 166 parsed_payload->video_header.is_first_packet_in_frame = true; 167 break; 168 } 169 case H264::NaluType::kPps: { 170 uint32_t pps_id; 171 uint32_t sps_id; 172 if (PpsParser::ParsePpsIds(nalu_data, &pps_id, &sps_id)) { 173 nalu.pps_id = pps_id; 174 nalu.sps_id = sps_id; 175 } else { 176 RTC_LOG(LS_WARNING) 177 << "Failed to parse PPS id and SPS id from PPS slice."; 178 return std::nullopt; 179 } 180 parsed_payload->video_header.is_first_packet_in_frame = true; 181 break; 182 } 183 case H264::NaluType::kIdr: 184 parsed_payload->video_header.frame_type = 185 VideoFrameType::kVideoFrameKey; 186 [[fallthrough]]; 187 case H264::NaluType::kSlice: { 188 std::optional<PpsParser::SliceHeader> slice_header = 189 PpsParser::ParseSliceHeader(nalu_data); 190 if (slice_header) { 191 nalu.pps_id = slice_header->pic_parameter_set_id; 192 if (slice_header->first_mb_in_slice == 0) { 193 parsed_payload->video_header.is_first_packet_in_frame = true; 194 } 195 } else { 196 RTC_LOG(LS_WARNING) << "Failed to parse PPS id from slice of type: " 197 << static_cast<int>(nalu.type); 198 return std::nullopt; 199 } 200 break; 201 } 202 case H264::NaluType::kAud: 203 parsed_payload->video_header.is_first_packet_in_frame = true; 204 break; 205 case H264::NaluType::kSei: 206 parsed_payload->video_header.is_first_packet_in_frame = true; 207 break; 208 // Slices below don't contain SPS or PPS ids. 209 case H264::NaluType::kEndOfSequence: 210 case H264::NaluType::kEndOfStream: 211 case H264::NaluType::kFiller: 212 break; 213 case H264::NaluType::kStapA: 214 case H264::NaluType::kFuA: 215 RTC_LOG(LS_WARNING) << "Unexpected STAP-A or FU-A received."; 216 return std::nullopt; 217 } 218 219 h264_header.nalus.push_back(nalu); 220 } 221 222 if (modified_buffer) { 223 parsed_payload->video_payload.SetData(output_buffer.data(), 224 output_buffer.size()); 225 } 226 return parsed_payload; 227 } 228 229 std::optional<VideoRtpDepacketizer::ParsedRtpPayload> ParseFuaNalu( 230 CopyOnWriteBuffer rtp_payload) { 231 if (rtp_payload.size() < kFuAHeaderSize) { 232 RTC_LOG(LS_ERROR) << "FU-A NAL units truncated."; 233 return std::nullopt; 234 } 235 std::optional<VideoRtpDepacketizer::ParsedRtpPayload> parsed_payload( 236 std::in_place); 237 uint8_t fnri = rtp_payload.cdata()[0] & (kH264FBit | kH264NriMask); 238 uint8_t original_nal_type = rtp_payload.cdata()[1] & kH264TypeMask; 239 bool first_fragment = (rtp_payload.cdata()[1] & kH264SBit) > 0; 240 bool is_first_packet_in_frame = false; 241 NaluInfo nalu; 242 nalu.type = original_nal_type; 243 nalu.sps_id = -1; 244 nalu.pps_id = -1; 245 if (first_fragment) { 246 if (original_nal_type == H264::NaluType::kIdr || 247 original_nal_type == H264::NaluType::kSlice) { 248 std::optional<PpsParser::SliceHeader> slice_header = 249 PpsParser::ParseSliceHeader(ArrayView<const uint8_t>(rtp_payload) 250 .subview(2 * kNalHeaderSize)); 251 if (slice_header) { 252 nalu.pps_id = slice_header->pic_parameter_set_id; 253 is_first_packet_in_frame = slice_header->first_mb_in_slice == 0; 254 } else { 255 RTC_LOG(LS_WARNING) 256 << "Failed to parse PPS from first fragment of FU-A NAL " 257 "unit with original type: " 258 << static_cast<int>(nalu.type); 259 } 260 } 261 uint8_t original_nal_header = fnri | original_nal_type; 262 rtp_payload = 263 rtp_payload.Slice(kNalHeaderSize, rtp_payload.size() - kNalHeaderSize); 264 rtp_payload.MutableData()[0] = original_nal_header; 265 parsed_payload->video_payload = std::move(rtp_payload); 266 } else { 267 parsed_payload->video_payload = 268 rtp_payload.Slice(kFuAHeaderSize, rtp_payload.size() - kFuAHeaderSize); 269 } 270 271 if (original_nal_type == H264::NaluType::kIdr) { 272 parsed_payload->video_header.frame_type = VideoFrameType::kVideoFrameKey; 273 } else { 274 parsed_payload->video_header.frame_type = VideoFrameType::kVideoFrameDelta; 275 } 276 parsed_payload->video_header.width = 0; 277 parsed_payload->video_header.height = 0; 278 parsed_payload->video_header.codec = kVideoCodecH264; 279 parsed_payload->video_header.simulcastIdx = 0; 280 parsed_payload->video_header.is_first_packet_in_frame = 281 is_first_packet_in_frame; 282 auto& h264_header = parsed_payload->video_header.video_type_header 283 .emplace<RTPVideoHeaderH264>(); 284 h264_header.packetization_type = kH264FuA; 285 h264_header.nalu_type = original_nal_type; 286 if (first_fragment) { 287 h264_header.nalus = {nalu}; 288 } 289 return parsed_payload; 290 } 291 292 } // namespace 293 294 std::optional<VideoRtpDepacketizer::ParsedRtpPayload> 295 VideoRtpDepacketizerH264::Parse(CopyOnWriteBuffer rtp_payload) { 296 if (rtp_payload.empty()) { 297 RTC_LOG(LS_ERROR) << "Empty payload."; 298 return std::nullopt; 299 } 300 301 uint8_t nal_type = rtp_payload.cdata()[0] & kH264TypeMask; 302 303 if (nal_type == H264::NaluType::kFuA) { 304 // Fragmented NAL units (FU-A). 305 return ParseFuaNalu(std::move(rtp_payload)); 306 } else { 307 // We handle STAP-A and single NALU's the same way here. The jitter buffer 308 // will depacketize the STAP-A into NAL units later. 309 return ProcessStapAOrSingleNalu(std::move(rtp_payload)); 310 } 311 } 312 313 } // namespace webrtc