video_rtp_depacketizer_av1.cc (14040B)
1 /* 2 * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "modules/rtp_rtcp/source/video_rtp_depacketizer_av1.h" 12 13 #include <array> 14 #include <cstddef> 15 #include <cstdint> 16 #include <cstring> 17 #include <iterator> 18 #include <optional> 19 #include <utility> 20 21 #include "absl/container/inlined_vector.h" 22 #include "api/array_view.h" 23 #include "api/scoped_refptr.h" 24 #include "api/video/encoded_image.h" 25 #include "api/video/video_codec_type.h" 26 #include "api/video/video_frame_type.h" 27 #include "modules/rtp_rtcp/source/leb128.h" 28 #include "modules/rtp_rtcp/source/rtp_video_header.h" 29 #include "modules/rtp_rtcp/source/video_rtp_depacketizer.h" 30 #include "rtc_base/byte_buffer.h" 31 #include "rtc_base/checks.h" 32 #include "rtc_base/copy_on_write_buffer.h" 33 #include "rtc_base/logging.h" 34 #include "rtc_base/numerics/safe_conversions.h" 35 36 namespace webrtc { 37 namespace { 38 // AV1 format: 39 // 40 // RTP payload syntax: 41 // 0 1 2 3 4 5 6 7 42 // +-+-+-+-+-+-+-+-+ 43 // |Z|Y| W |N|-|-|-| (REQUIRED) 44 // +=+=+=+=+=+=+=+=+ (REPEATED W-1 times, or any times if W = 0) 45 // |1| | 46 // +-+ OBU fragment| 47 // |1| | (REQUIRED, leb128 encoded) 48 // +-+ size | 49 // |0| | 50 // +-+-+-+-+-+-+-+-+ 51 // | OBU fragment | 52 // | ... | 53 // +=+=+=+=+=+=+=+=+ 54 // | ... | 55 // +=+=+=+=+=+=+=+=+ if W > 0, last fragment MUST NOT have size field 56 // | OBU fragment | 57 // | ... | 58 // +=+=+=+=+=+=+=+=+ 59 // 60 // 61 // OBU syntax: 62 // 0 1 2 3 4 5 6 7 63 // +-+-+-+-+-+-+-+-+ 64 // |0| type |X|S|-| (REQUIRED) 65 // +-+-+-+-+-+-+-+-+ 66 // X: | TID |SID|-|-|-| (OPTIONAL) 67 // +-+-+-+-+-+-+-+-+ 68 // |1| | 69 // +-+ OBU payload | 70 // S: |1| | (OPTIONAL, variable length leb128 encoded) 71 // +-+ size | 72 // |0| | 73 // +-+-+-+-+-+-+-+-+ 74 // | OBU payload | 75 // | ... | 76 class ArrayOfArrayViews { 77 public: 78 class const_iterator; 79 ArrayOfArrayViews() = default; 80 ArrayOfArrayViews(const ArrayOfArrayViews&) = default; 81 ArrayOfArrayViews& operator=(const ArrayOfArrayViews&) = default; 82 ~ArrayOfArrayViews() = default; 83 84 const_iterator begin() const; 85 const_iterator end() const; 86 bool empty() const { return data_.empty(); } 87 size_t size() const { return size_; } 88 void CopyTo(uint8_t* destination, const_iterator first) const; 89 90 void Append(const uint8_t* data, size_t size) { 91 data_.emplace_back(data, size); 92 size_ += size; 93 } 94 95 private: 96 using Storage = absl::InlinedVector<ArrayView<const uint8_t>, 2>; 97 98 size_t size_ = 0; 99 Storage data_; 100 }; 101 102 class ArrayOfArrayViews::const_iterator { 103 public: 104 const_iterator() = default; 105 const_iterator(const const_iterator&) = default; 106 const_iterator& operator=(const const_iterator&) = default; 107 108 const_iterator& operator++() { 109 if (++inner_ == outer_->size()) { 110 ++outer_; 111 inner_ = 0; 112 } 113 return *this; 114 } 115 uint8_t operator*() const { return (*outer_)[inner_]; } 116 117 friend bool operator==(const const_iterator& lhs, const const_iterator& rhs) { 118 return lhs.outer_ == rhs.outer_ && lhs.inner_ == rhs.inner_; 119 } 120 121 private: 122 friend ArrayOfArrayViews; 123 const_iterator(ArrayOfArrayViews::Storage::const_iterator outer, size_t inner) 124 : outer_(outer), inner_(inner) {} 125 126 Storage::const_iterator outer_; 127 size_t inner_; 128 }; 129 130 ArrayOfArrayViews::const_iterator ArrayOfArrayViews::begin() const { 131 return const_iterator(data_.begin(), 0); 132 } 133 134 ArrayOfArrayViews::const_iterator ArrayOfArrayViews::end() const { 135 return const_iterator(data_.end(), 0); 136 } 137 138 void ArrayOfArrayViews::CopyTo(uint8_t* destination, 139 const_iterator first) const { 140 if (first == end()) { 141 // Empty OBU payload. E.g. Temporal Delimiters are always empty. 142 return; 143 } 144 size_t first_chunk_size = first.outer_->size() - first.inner_; 145 memcpy(destination, first.outer_->data() + first.inner_, first_chunk_size); 146 destination += first_chunk_size; 147 for (auto it = std::next(first.outer_); it != data_.end(); ++it) { 148 memcpy(destination, it->data(), it->size()); 149 destination += it->size(); 150 } 151 } 152 153 struct ObuInfo { 154 // Size of the obu_header and obu_size fields in the ouput frame. 155 size_t prefix_size = 0; 156 // obu_header() and obu_size (leb128 encoded payload_size). 157 // obu_header can be up to 2 bytes, obu_size - up to 5. 158 std::array<uint8_t, 7> prefix; 159 // Size of the obu payload in the output frame, i.e. excluding header 160 size_t payload_size = 0; 161 // iterator pointing to the beginning of the obu payload. 162 ArrayOfArrayViews::const_iterator payload_offset; 163 // OBU payloads as written in the rtp packet payloads. 164 ArrayOfArrayViews data; 165 }; 166 // Expect that majority of the frame won't use more than 4 obus. 167 // In a simple stream delta frame consist of single Frame OBU, while key frame 168 // also has Sequence Header OBU. 169 using VectorObuInfo = absl::InlinedVector<ObuInfo, 4>; 170 171 constexpr uint8_t kObuSizePresentBit = 0b0'0000'010; 172 173 bool ObuHasExtension(uint8_t obu_header) { 174 return obu_header & 0b0'0000'100u; 175 } 176 177 bool ObuHasSize(uint8_t obu_header) { 178 return obu_header & kObuSizePresentBit; 179 } 180 181 bool RtpStartsWithFragment(uint8_t aggregation_header) { 182 return aggregation_header & 0b1000'0000u; 183 } 184 bool RtpEndsWithFragment(uint8_t aggregation_header) { 185 return aggregation_header & 0b0100'0000u; 186 } 187 int RtpNumObus(uint8_t aggregation_header) { // 0 for any number of obus. 188 return (aggregation_header & 0b0011'0000u) >> 4; 189 } 190 int RtpStartsNewCodedVideoSequence(uint8_t aggregation_header) { 191 return aggregation_header & 0b0000'1000u; 192 } 193 194 // Reorgonizes array of rtp payloads into array of obus: 195 // fills ObuInfo::data field. 196 // Returns empty vector on error. 197 VectorObuInfo ParseObus( 198 ArrayView<const ArrayView<const uint8_t>> rtp_payloads) { 199 VectorObuInfo obu_infos; 200 bool expect_continues_obu = false; 201 for (ArrayView<const uint8_t> rtp_payload : rtp_payloads) { 202 ByteBufferReader payload(rtp_payload); 203 uint8_t aggregation_header; 204 if (!payload.ReadUInt8(&aggregation_header)) { 205 RTC_DLOG(LS_WARNING) 206 << "Failed to find aggregation header in the packet."; 207 return {}; 208 } 209 // Z-bit: 1 if the first OBU contained in the packet is a continuation of a 210 // previous OBU. 211 bool continues_obu = RtpStartsWithFragment(aggregation_header); 212 if (continues_obu != expect_continues_obu) { 213 RTC_DLOG(LS_WARNING) << "Unexpected Z-bit " << continues_obu; 214 return {}; 215 } 216 int num_expected_obus = RtpNumObus(aggregation_header); 217 if (payload.Length() == 0) { 218 // rtp packet has just the aggregation header. That may be valid only when 219 // there is exactly one fragment in the packet of size 0. 220 if (num_expected_obus != 1) { 221 RTC_DLOG(LS_WARNING) 222 << "Invalid packet with just an aggregation header."; 223 return {}; 224 } 225 if (!continues_obu) { 226 // Empty packet just to notify there is a new OBU. 227 obu_infos.emplace_back(); 228 } 229 expect_continues_obu = RtpEndsWithFragment(aggregation_header); 230 continue; 231 } 232 233 for (int obu_index = 1; payload.Length() > 0; ++obu_index) { 234 ObuInfo& obu_info = (obu_index == 1 && continues_obu) 235 ? obu_infos.back() 236 : obu_infos.emplace_back(); 237 uint64_t fragment_size; 238 // When num_expected_obus > 0, last OBU (fragment) is not preceeded by 239 // the size field. See W field in 240 // https://aomediacodec.github.io/av1-rtp-spec/#43-av1-aggregation-header 241 bool has_fragment_size = (obu_index != num_expected_obus); 242 if (has_fragment_size) { 243 if (!payload.ReadUVarint(&fragment_size)) { 244 RTC_DLOG(LS_WARNING) << "Failed to read fragment size for obu #" 245 << obu_index << "/" << num_expected_obus; 246 return {}; 247 } 248 if (fragment_size > payload.Length()) { 249 // Malformed input: written size is larger than remaining buffer. 250 RTC_DLOG(LS_WARNING) << "Malformed fragment size " << fragment_size 251 << " is larger than remaining size " 252 << payload.Length() << " while reading obu #" 253 << obu_index << "/" << num_expected_obus; 254 return {}; 255 } 256 } else { 257 fragment_size = payload.Length(); 258 } 259 // While it is in-practical to pass empty fragments, it is still possible. 260 if (fragment_size > 0) { 261 obu_info.data.Append(reinterpret_cast<const uint8_t*>(payload.Data()), 262 fragment_size); 263 payload.Consume(fragment_size); 264 } 265 } 266 // Z flag should be same as Y flag of the next packet. 267 expect_continues_obu = RtpEndsWithFragment(aggregation_header); 268 } 269 if (expect_continues_obu) { 270 RTC_DLOG(LS_WARNING) << "Last packet shouldn't have last obu fragmented."; 271 return {}; 272 } 273 return obu_infos; 274 } 275 276 // Calculates sizes for the Obu, i.e. base on ObuInfo::data field calculates 277 // all other fields in the ObuInfo structure. 278 // Returns false if obu found to be misformed. 279 bool CalculateObuSizes(ObuInfo* obu_info) { 280 if (obu_info->data.empty()) { 281 RTC_DLOG(LS_WARNING) << "Invalid bitstream: empty obu provided."; 282 return false; 283 } 284 auto it = obu_info->data.begin(); 285 uint8_t obu_header = *it; 286 obu_info->prefix[0] = obu_header | kObuSizePresentBit; 287 obu_info->prefix_size = 1; 288 ++it; 289 if (ObuHasExtension(obu_header)) { 290 if (it == obu_info->data.end()) { 291 return false; 292 } 293 obu_info->prefix[1] = *it; // obu_extension_header 294 obu_info->prefix_size = 2; 295 ++it; 296 } 297 // Read, validate, and skip size, if present. 298 if (!ObuHasSize(obu_header)) { 299 obu_info->payload_size = obu_info->data.size() - obu_info->prefix_size; 300 } else { 301 // Read leb128 encoded field obu_size. 302 uint64_t obu_size_bytes = 0; 303 // Number of bytes obu_size field occupy in the bitstream. 304 int size_of_obu_size_bytes = 0; 305 uint8_t leb128_byte; 306 do { 307 if (it == obu_info->data.end() || size_of_obu_size_bytes >= 8) { 308 RTC_DLOG(LS_WARNING) 309 << "Failed to read obu_size. obu_size field is too long: " 310 << size_of_obu_size_bytes << " bytes processed."; 311 return false; 312 } 313 leb128_byte = *it; 314 obu_size_bytes |= uint64_t{leb128_byte & 0x7Fu} 315 << (size_of_obu_size_bytes * 7); 316 ++size_of_obu_size_bytes; 317 ++it; 318 } while ((leb128_byte & 0x80) != 0); 319 320 obu_info->payload_size = 321 obu_info->data.size() - obu_info->prefix_size - size_of_obu_size_bytes; 322 if (obu_size_bytes != obu_info->payload_size) { 323 // obu_size was present in the bitstream and mismatches calculated size. 324 RTC_DLOG(LS_WARNING) << "Mismatch in obu_size. signaled: " 325 << obu_size_bytes 326 << ", actual: " << obu_info->payload_size; 327 return false; 328 } 329 } 330 obu_info->payload_offset = it; 331 obu_info->prefix_size += 332 WriteLeb128(dchecked_cast<uint64_t>(obu_info->payload_size), 333 obu_info->prefix.data() + obu_info->prefix_size); 334 return true; 335 } 336 337 } // namespace 338 339 scoped_refptr<EncodedImageBuffer> VideoRtpDepacketizerAv1::AssembleFrame( 340 ArrayView<const ArrayView<const uint8_t>> rtp_payloads) { 341 VectorObuInfo obu_infos = ParseObus(rtp_payloads); 342 if (obu_infos.empty()) { 343 return nullptr; 344 } 345 346 size_t frame_size = 0; 347 for (ObuInfo& obu_info : obu_infos) { 348 if (!CalculateObuSizes(&obu_info)) { 349 return nullptr; 350 } 351 frame_size += (obu_info.prefix_size + obu_info.payload_size); 352 } 353 354 scoped_refptr<EncodedImageBuffer> bitstream = 355 EncodedImageBuffer::Create(frame_size); 356 uint8_t* write_at = bitstream->data(); 357 for (const ObuInfo& obu_info : obu_infos) { 358 // Copy the obu_header and obu_size fields. 359 memcpy(write_at, obu_info.prefix.data(), obu_info.prefix_size); 360 write_at += obu_info.prefix_size; 361 // Copy the obu payload. 362 obu_info.data.CopyTo(write_at, obu_info.payload_offset); 363 write_at += obu_info.payload_size; 364 } 365 RTC_CHECK_EQ(write_at - bitstream->data(), bitstream->size()); 366 return bitstream; 367 } 368 369 std::optional<VideoRtpDepacketizer::ParsedRtpPayload> 370 VideoRtpDepacketizerAv1::Parse(CopyOnWriteBuffer rtp_payload) { 371 if (rtp_payload.empty()) { 372 RTC_DLOG(LS_ERROR) << "Empty rtp payload."; 373 return std::nullopt; 374 } 375 uint8_t aggregation_header = rtp_payload.cdata()[0]; 376 if (RtpStartsNewCodedVideoSequence(aggregation_header) && 377 RtpStartsWithFragment(aggregation_header)) { 378 // new coded video sequence can't start from an OBU fragment. 379 return std::nullopt; 380 } 381 std::optional<ParsedRtpPayload> parsed(std::in_place); 382 383 // To assemble frame, all of the rtp payload is required, including 384 // aggregation header. 385 parsed->video_payload = std::move(rtp_payload); 386 387 parsed->video_header.codec = VideoCodecType::kVideoCodecAV1; 388 // These are not accurate since frame may consist of several packet aligned 389 // chunks of obus, but should be good enough for most cases. It might produce 390 // frame that do not map to any real frame, but av1 decoder should be able to 391 // handle it since it promise to handle individual obus rather than full 392 // frames. 393 parsed->video_header.is_first_packet_in_frame = 394 !RtpStartsWithFragment(aggregation_header); 395 parsed->video_header.is_last_packet_in_frame = 396 !RtpEndsWithFragment(aggregation_header); 397 398 parsed->video_header.frame_type = 399 RtpStartsNewCodedVideoSequence(aggregation_header) 400 ? VideoFrameType::kVideoFrameKey 401 : VideoFrameType::kVideoFrameDelta; 402 return parsed; 403 } 404 405 } // namespace webrtc