rtp_header_extensions.cc (36207B)
1 /* 2 * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "modules/rtp_rtcp/source/rtp_header_extensions.h" 12 13 #include <string.h> 14 15 #include <cmath> 16 #include <cstddef> 17 #include <cstdint> 18 #include <cstring> 19 #include <limits> 20 #include <optional> 21 #include <string> 22 #include <vector> 23 24 #include "absl/strings/string_view.h" 25 #include "api/array_view.h" 26 #include "api/rtp_headers.h" 27 #include "api/units/time_delta.h" 28 #include "api/video/color_space.h" 29 #include "api/video/hdr_metadata.h" 30 #include "api/video/video_content_type.h" 31 #include "api/video/video_rotation.h" 32 #include "api/video/video_timing.h" 33 #include "modules/rtp_rtcp/include/rtp_cvo.h" 34 #include "modules/rtp_rtcp/source/byte_io.h" 35 #include "rtc_base/checks.h" 36 37 namespace webrtc { 38 // Absolute send time in RTP streams. 39 // 40 // The absolute send time is signaled to the receiver in-band using the 41 // general mechanism for RTP header extensions [RFC8285]. The payload 42 // of this extension (the transmitted value) is a 24-bit unsigned integer 43 // containing the sender's current time in seconds as a fixed point number 44 // with 18 bits fractional part. 45 // 46 // The form of the absolute send time extension block: 47 // 48 // 0 1 2 3 49 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 50 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 51 // | ID | len=2 | absolute send time | 52 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 53 bool AbsoluteSendTime::Parse(ArrayView<const uint8_t> data, 54 uint32_t* time_24bits) { 55 if (data.size() != 3) 56 return false; 57 *time_24bits = ByteReader<uint32_t, 3>::ReadBigEndian(data.data()); 58 return true; 59 } 60 61 bool AbsoluteSendTime::Write(ArrayView<uint8_t> data, uint32_t time_24bits) { 62 RTC_DCHECK_EQ(data.size(), 3); 63 RTC_DCHECK_LE(time_24bits, 0x00FFFFFF); 64 ByteWriter<uint32_t, 3>::WriteBigEndian(data.data(), time_24bits); 65 return true; 66 } 67 68 // Absolute Capture Time 69 // 70 // The Absolute Capture Time extension is used to stamp RTP packets with a NTP 71 // timestamp showing when the first audio or video frame in a packet was 72 // originally captured. The intent of this extension is to provide a way to 73 // accomplish audio-to-video synchronization when RTCP-terminating intermediate 74 // systems (e.g. mixers) are involved. 75 // 76 // Data layout of the shortened version of abs-capture-time: 77 // 78 // 0 1 2 3 79 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 80 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 81 // | ID | len=7 | absolute capture timestamp (bit 0-23) | 82 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 83 // | absolute capture timestamp (bit 24-55) | 84 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 85 // | ... (56-63) | 86 // +-+-+-+-+-+-+-+-+ 87 // 88 // Data layout of the extended version of abs-capture-time: 89 // 90 // 0 1 2 3 91 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 92 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 93 // | ID | len=15| absolute capture timestamp (bit 0-23) | 94 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 95 // | absolute capture timestamp (bit 24-55) | 96 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 97 // | ... (56-63) | estimated capture clock offset (bit 0-23) | 98 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 99 // | estimated capture clock offset (bit 24-55) | 100 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 101 // | ... (56-63) | 102 // +-+-+-+-+-+-+-+-+ 103 bool AbsoluteCaptureTimeExtension::Parse(ArrayView<const uint8_t> data, 104 AbsoluteCaptureTime* extension) { 105 if (data.size() != kValueSizeBytes && 106 data.size() != kValueSizeBytesWithoutEstimatedCaptureClockOffset) { 107 return false; 108 } 109 110 extension->absolute_capture_timestamp = 111 ByteReader<uint64_t>::ReadBigEndian(data.data()); 112 113 if (data.size() != kValueSizeBytesWithoutEstimatedCaptureClockOffset) { 114 extension->estimated_capture_clock_offset = 115 ByteReader<int64_t>::ReadBigEndian(data.data() + 8); 116 } 117 118 return true; 119 } 120 121 size_t AbsoluteCaptureTimeExtension::ValueSize( 122 const AbsoluteCaptureTime& extension) { 123 if (extension.estimated_capture_clock_offset != std::nullopt) { 124 return kValueSizeBytes; 125 } else { 126 return kValueSizeBytesWithoutEstimatedCaptureClockOffset; 127 } 128 } 129 130 bool AbsoluteCaptureTimeExtension::Write(ArrayView<uint8_t> data, 131 const AbsoluteCaptureTime& extension) { 132 RTC_DCHECK_EQ(data.size(), ValueSize(extension)); 133 134 ByteWriter<uint64_t>::WriteBigEndian(data.data(), 135 extension.absolute_capture_timestamp); 136 137 if (data.size() != kValueSizeBytesWithoutEstimatedCaptureClockOffset) { 138 ByteWriter<int64_t>::WriteBigEndian( 139 data.data() + 8, extension.estimated_capture_clock_offset.value()); 140 } 141 142 return true; 143 } 144 145 // An RTP Header Extension for Client-to-Mixer Audio Level Indication 146 // 147 // https://tools.ietf.org/html/rfc6464 148 // 149 // The form of the audio level extension block: 150 // 151 // 0 1 152 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 153 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 154 // | ID | len=0 |V| level | 155 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 156 // Sample Audio Level Encoding Using the One-Byte Header Format 157 // 158 // 0 1 2 159 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 160 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 161 // | ID | len=1 |V| level | 162 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 163 // Sample Audio Level Encoding Using the Two-Byte Header Format 164 bool AudioLevelExtension::Parse(ArrayView<const uint8_t> data, 165 AudioLevel* extension) { 166 // One-byte and two-byte format share the same data definition. 167 if (data.size() != 1) 168 return false; 169 bool voice_activity = (data[0] & 0x80) != 0; 170 int audio_level = data[0] & 0x7F; 171 *extension = AudioLevel(voice_activity, audio_level); 172 return true; 173 } 174 175 bool AudioLevelExtension::Write(ArrayView<uint8_t> data, 176 const AudioLevel& extension) { 177 // One-byte and two-byte format share the same data definition. 178 RTC_DCHECK_EQ(data.size(), 1); 179 RTC_CHECK_GE(extension.level(), 0); 180 RTC_CHECK_LE(extension.level(), 0x7f); 181 data[0] = (extension.voice_activity() ? 0x80 : 0x00) | extension.level(); 182 return true; 183 } 184 185 #if !defined(WEBRTC_MOZILLA_BUILD) 186 // An RTP Header Extension for Mixer-to-Client Audio Level Indication 187 // 188 // https://tools.ietf.org/html/rfc6465 189 // 190 // The form of the audio level extension block: 191 // 192 // 0 1 2 3 193 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 194 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 195 // | ID | len=2 |0| level 1 |0| level 2 |0| level 3 | 196 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 197 // Sample Audio Level Encoding Using the One-Byte Header Format 198 // 199 // 0 1 2 3 200 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 201 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 202 // | ID | len=3 |0| level 1 |0| level 2 | 203 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 204 // |0| level 3 | 0 (pad) | ... | 205 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 206 // Sample Audio Level Encoding Using the Two-Byte Header Format 207 bool CsrcAudioLevel::Parse(ArrayView<const uint8_t> data, 208 std::vector<uint8_t>* csrc_audio_levels) { 209 if (data.size() > kRtpCsrcSize) { 210 return false; 211 } 212 csrc_audio_levels->resize(data.size()); 213 for (size_t i = 0; i < data.size(); i++) { 214 (*csrc_audio_levels)[i] = data[i] & 0x7F; 215 } 216 return true; 217 } 218 219 size_t CsrcAudioLevel::ValueSize(ArrayView<const uint8_t> csrc_audio_levels) { 220 return csrc_audio_levels.size(); 221 } 222 223 bool CsrcAudioLevel::Write(ArrayView<uint8_t> data, 224 ArrayView<const uint8_t> csrc_audio_levels) { 225 RTC_CHECK_LE(csrc_audio_levels.size(), kRtpCsrcSize); 226 if (csrc_audio_levels.size() != data.size()) { 227 return false; 228 } 229 for (size_t i = 0; i < csrc_audio_levels.size(); i++) { 230 data[i] = csrc_audio_levels[i] & 0x7F; 231 } 232 return true; 233 } 234 #endif 235 236 // From RFC 5450: Transmission Time Offsets in RTP Streams. 237 // 238 // The transmission time is signaled to the receiver in-band using the 239 // general mechanism for RTP header extensions [RFC8285]. The payload 240 // of this extension (the transmitted value) is a 24-bit signed integer. 241 // When added to the RTP timestamp of the packet, it represents the 242 // "effective" RTP transmission time of the packet, on the RTP 243 // timescale. 244 // 245 // The form of the transmission offset extension block: 246 // 247 // 0 1 2 3 248 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 249 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 250 // | ID | len=2 | transmission offset | 251 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 252 bool TransmissionOffset::Parse(ArrayView<const uint8_t> data, 253 int32_t* rtp_time) { 254 if (data.size() != 3) 255 return false; 256 *rtp_time = ByteReader<int32_t, 3>::ReadBigEndian(data.data()); 257 return true; 258 } 259 260 bool TransmissionOffset::Write(ArrayView<uint8_t> data, int32_t rtp_time) { 261 RTC_DCHECK_EQ(data.size(), 3); 262 RTC_DCHECK_LE(rtp_time, 0x00ffffff); 263 ByteWriter<int32_t, 3>::WriteBigEndian(data.data(), rtp_time); 264 return true; 265 } 266 267 // TransportSequenceNumber 268 // 269 // 0 1 2 270 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 271 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 272 // | ID | L=1 |transport-wide sequence number | 273 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 274 bool TransportSequenceNumber::Parse(ArrayView<const uint8_t> data, 275 uint16_t* transport_sequence_number) { 276 if (data.size() != kValueSizeBytes) 277 return false; 278 *transport_sequence_number = ByteReader<uint16_t>::ReadBigEndian(data.data()); 279 return true; 280 } 281 282 bool TransportSequenceNumber::Write(ArrayView<uint8_t> data, 283 uint16_t transport_sequence_number) { 284 RTC_DCHECK_EQ(data.size(), ValueSize(transport_sequence_number)); 285 ByteWriter<uint16_t>::WriteBigEndian(data.data(), transport_sequence_number); 286 return true; 287 } 288 289 // TransportSequenceNumberV2 290 // 291 // In addition to the format used for TransportSequencNumber, V2 also supports 292 // the following packet format where two extra bytes are used to specify that 293 // the sender requests immediate feedback. 294 // 0 1 2 3 295 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 296 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 297 // | ID | L=3 |transport-wide sequence number |T| seq count | 298 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 299 // |seq count cont.| 300 // +-+-+-+-+-+-+-+-+ 301 // 302 // The bit `T` determines whether the feedback should include timing information 303 // or not and `seq_count` determines how many packets the feedback packet should 304 // cover including the current packet. If `seq_count` is zero no feedback is 305 // requested. 306 bool TransportSequenceNumberV2::Parse( 307 ArrayView<const uint8_t> data, 308 uint16_t* transport_sequence_number, 309 std::optional<FeedbackRequest>* feedback_request) { 310 if (data.size() != kValueSizeBytes && 311 data.size() != kValueSizeBytesWithoutFeedbackRequest) 312 return false; 313 314 *transport_sequence_number = ByteReader<uint16_t>::ReadBigEndian(data.data()); 315 316 *feedback_request = std::nullopt; 317 if (data.size() == kValueSizeBytes) { 318 uint16_t feedback_request_raw = 319 ByteReader<uint16_t>::ReadBigEndian(data.data() + 2); 320 bool include_timestamps = 321 (feedback_request_raw & kIncludeTimestampsBit) != 0; 322 uint16_t sequence_count = feedback_request_raw & ~kIncludeTimestampsBit; 323 324 // If `sequence_count` is zero no feedback is requested. 325 if (sequence_count != 0) { 326 *feedback_request = {.include_timestamps = include_timestamps, 327 .sequence_count = sequence_count}; 328 } 329 } 330 return true; 331 } 332 333 bool TransportSequenceNumberV2::Write( 334 ArrayView<uint8_t> data, 335 uint16_t transport_sequence_number, 336 const std::optional<FeedbackRequest>& feedback_request) { 337 RTC_DCHECK_EQ(data.size(), 338 ValueSize(transport_sequence_number, feedback_request)); 339 340 ByteWriter<uint16_t>::WriteBigEndian(data.data(), transport_sequence_number); 341 342 if (feedback_request) { 343 RTC_DCHECK_GE(feedback_request->sequence_count, 0); 344 RTC_DCHECK_LT(feedback_request->sequence_count, kIncludeTimestampsBit); 345 uint16_t feedback_request_raw = 346 feedback_request->sequence_count | 347 (feedback_request->include_timestamps ? kIncludeTimestampsBit : 0); 348 ByteWriter<uint16_t>::WriteBigEndian(data.data() + 2, feedback_request_raw); 349 } 350 return true; 351 } 352 353 // Coordination of Video Orientation in RTP streams. 354 // 355 // Coordination of Video Orientation consists in signaling of the current 356 // orientation of the image captured on the sender side to the receiver for 357 // appropriate rendering and displaying. 358 // 359 // 0 1 360 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 361 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 362 // | ID | len=0 |0 0 0 0 C F R R| 363 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 364 bool VideoOrientation::Parse(ArrayView<const uint8_t> data, 365 VideoRotation* rotation) { 366 if (data.size() != 1) 367 return false; 368 *rotation = ConvertCVOByteToVideoRotation(data[0]); 369 return true; 370 } 371 372 bool VideoOrientation::Write(ArrayView<uint8_t> data, VideoRotation rotation) { 373 RTC_DCHECK_EQ(data.size(), 1); 374 data[0] = ConvertVideoRotationToCVOByte(rotation); 375 return true; 376 } 377 378 bool VideoOrientation::Parse(ArrayView<const uint8_t> data, uint8_t* value) { 379 if (data.size() != 1) 380 return false; 381 *value = data[0]; 382 return true; 383 } 384 385 bool VideoOrientation::Write(ArrayView<uint8_t> data, uint8_t value) { 386 RTC_DCHECK_EQ(data.size(), 1); 387 data[0] = value; 388 return true; 389 } 390 391 // 0 1 2 3 392 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 393 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 394 // | ID | len=2 | MIN delay | MAX delay | 395 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 396 bool PlayoutDelayLimits::Parse(ArrayView<const uint8_t> data, 397 VideoPlayoutDelay* playout_delay) { 398 RTC_DCHECK(playout_delay); 399 if (data.size() != 3) 400 return false; 401 uint32_t raw = ByteReader<uint32_t, 3>::ReadBigEndian(data.data()); 402 uint16_t min_raw = (raw >> 12); 403 uint16_t max_raw = (raw & 0xfff); 404 return playout_delay->Set(min_raw * kGranularity, max_raw * kGranularity); 405 } 406 407 bool PlayoutDelayLimits::Write(ArrayView<uint8_t> data, 408 const VideoPlayoutDelay& playout_delay) { 409 RTC_DCHECK_EQ(data.size(), 3); 410 411 // Convert TimeDelta to value to be sent on extension header. 412 auto idiv = [](TimeDelta num, TimeDelta den) { return num.us() / den.us(); }; 413 int64_t min_delay = idiv(playout_delay.min(), kGranularity); 414 int64_t max_delay = idiv(playout_delay.max(), kGranularity); 415 416 // Double check min/max boundaries guaranteed by the `VideoPlayouDelay` type. 417 RTC_DCHECK_GE(min_delay, 0); 418 RTC_DCHECK_LT(min_delay, 1 << 12); 419 RTC_DCHECK_GE(max_delay, 0); 420 RTC_DCHECK_LT(max_delay, 1 << 12); 421 422 ByteWriter<uint32_t, 3>::WriteBigEndian(data.data(), 423 (min_delay << 12) | max_delay); 424 return true; 425 } 426 427 #if defined(WEBRTC_MOZILLA_BUILD) 428 // CSRCAudioLevel 429 // Sample Audio Level Encoding Using the One-Byte Header Format 430 // Note that the range of len is 1 to 15 which is encoded as 0 to 14 431 // 0 1 2 3 432 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 433 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 434 // | ID | len=2 |0| level 1 |0| level 2 |0| level 3 | 435 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 436 437 438 constexpr RTPExtensionType CsrcAudioLevel::kId; 439 constexpr const char* CsrcAudioLevel::kUri; 440 441 bool CsrcAudioLevel::Parse(ArrayView<const uint8_t> data, 442 CsrcAudioLevelList* csrcAudioLevels) { 443 if (data.size() < 1 || data.size() > kRtpCsrcSize) 444 return false; 445 csrcAudioLevels->numAudioLevels = data.size(); 446 for(uint8_t i = 0; i < csrcAudioLevels->numAudioLevels; i++) { 447 // Ensure range is 0 to 127 inclusive 448 csrcAudioLevels->arrOfAudioLevels[i] = 0x7f & data[i]; 449 } 450 return true; 451 } 452 453 size_t CsrcAudioLevel::ValueSize(const CsrcAudioLevelList& csrcAudioLevels) { 454 return csrcAudioLevels.numAudioLevels; 455 } 456 457 bool CsrcAudioLevel::Write(ArrayView<uint8_t> data, 458 const CsrcAudioLevelList& csrcAudioLevels) { 459 RTC_DCHECK_GE(csrcAudioLevels.numAudioLevels, 0); 460 for(uint8_t i = 0; i < csrcAudioLevels.numAudioLevels; i++) { 461 data[i] = csrcAudioLevels.arrOfAudioLevels[i] & 0x7f; 462 } 463 // This extension if used must have at least one audio level 464 return csrcAudioLevels.numAudioLevels; 465 } 466 #endif 467 468 // Video Content Type. 469 // 470 // E.g. default video or screenshare. 471 // 472 // 0 1 473 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 474 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 475 // | ID | len=0 | Content type | 476 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 477 bool VideoContentTypeExtension::Parse(ArrayView<const uint8_t> data, 478 VideoContentType* content_type) { 479 if (data.size() == 1 && 480 videocontenttypehelpers::IsValidContentType(data[0])) { 481 // Only the lowest bit of ContentType has a defined meaning. 482 // Due to previous, now removed, usage of 5 more bits, values with 483 // those bits set are accepted as valid, but we mask them out before 484 // converting to a VideoContentType. 485 *content_type = static_cast<VideoContentType>(data[0] & 0x1); 486 return true; 487 } 488 return false; 489 } 490 491 bool VideoContentTypeExtension::Write(ArrayView<uint8_t> data, 492 VideoContentType content_type) { 493 RTC_DCHECK_EQ(data.size(), 1); 494 data[0] = static_cast<uint8_t>(content_type); 495 return true; 496 } 497 498 // Video Timing. 499 // 6 timestamps in milliseconds counted from capture time stored in rtp header: 500 // encode start/finish, packetization complete, pacer exit and reserved for 501 // modification by the network modification. `flags` is a bitmask and has the 502 // following allowed values: 503 // 0 = Valid data, but no flags available (backwards compatibility) 504 // 1 = Frame marked as timing frame due to cyclic timer. 505 // 2 = Frame marked as timing frame due to size being outside limit. 506 // 255 = Invalid. The whole timing frame extension should be ignored. 507 // 508 // 0 1 2 3 509 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 510 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 511 // | ID | len=12| flags | encode start ms delta | 512 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 513 // | encode finish ms delta | packetizer finish ms delta | 514 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 515 // | pacer exit ms delta | network timestamp ms delta | 516 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 517 // | network2 timestamp ms delta | 518 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 519 bool VideoTimingExtension::Parse(ArrayView<const uint8_t> data, 520 VideoSendTiming* timing) { 521 RTC_DCHECK(timing); 522 // TODO(sprang): Deprecate support for old wire format. 523 ptrdiff_t off = 0; 524 switch (data.size()) { 525 case kValueSizeBytes - 1: 526 timing->flags = 0; 527 off = 1; // Old wire format without the flags field. 528 break; 529 case kValueSizeBytes: 530 timing->flags = ByteReader<uint8_t>::ReadBigEndian(data.data()); 531 break; 532 default: 533 return false; 534 } 535 536 timing->encode_start_delta_ms = ByteReader<uint16_t>::ReadBigEndian( 537 data.data() + kEncodeStartDeltaOffset - off); 538 timing->encode_finish_delta_ms = ByteReader<uint16_t>::ReadBigEndian( 539 data.data() + kEncodeFinishDeltaOffset - off); 540 timing->packetization_finish_delta_ms = ByteReader<uint16_t>::ReadBigEndian( 541 data.data() + kPacketizationFinishDeltaOffset - off); 542 timing->pacer_exit_delta_ms = ByteReader<uint16_t>::ReadBigEndian( 543 data.data() + kPacerExitDeltaOffset - off); 544 timing->network_timestamp_delta_ms = ByteReader<uint16_t>::ReadBigEndian( 545 data.data() + kNetworkTimestampDeltaOffset - off); 546 timing->network2_timestamp_delta_ms = ByteReader<uint16_t>::ReadBigEndian( 547 data.data() + kNetwork2TimestampDeltaOffset - off); 548 return true; 549 } 550 551 bool VideoTimingExtension::Write(ArrayView<uint8_t> data, 552 const VideoSendTiming& timing) { 553 RTC_DCHECK_EQ(data.size(), 1 + 2 * 6); 554 ByteWriter<uint8_t>::WriteBigEndian(data.data() + kFlagsOffset, timing.flags); 555 ByteWriter<uint16_t>::WriteBigEndian(data.data() + kEncodeStartDeltaOffset, 556 timing.encode_start_delta_ms); 557 ByteWriter<uint16_t>::WriteBigEndian(data.data() + kEncodeFinishDeltaOffset, 558 timing.encode_finish_delta_ms); 559 ByteWriter<uint16_t>::WriteBigEndian( 560 data.data() + kPacketizationFinishDeltaOffset, 561 timing.packetization_finish_delta_ms); 562 ByteWriter<uint16_t>::WriteBigEndian(data.data() + kPacerExitDeltaOffset, 563 timing.pacer_exit_delta_ms); 564 ByteWriter<uint16_t>::WriteBigEndian( 565 data.data() + kNetworkTimestampDeltaOffset, 566 timing.network_timestamp_delta_ms); 567 ByteWriter<uint16_t>::WriteBigEndian( 568 data.data() + kNetwork2TimestampDeltaOffset, 569 timing.network2_timestamp_delta_ms); 570 return true; 571 } 572 573 bool VideoTimingExtension::Write(ArrayView<uint8_t> data, 574 uint16_t time_delta_ms, 575 uint8_t offset) { 576 RTC_DCHECK_GE(data.size(), offset + 2); 577 RTC_DCHECK_LE(offset, kValueSizeBytes - sizeof(uint16_t)); 578 ByteWriter<uint16_t>::WriteBigEndian(data.data() + offset, time_delta_ms); 579 return true; 580 } 581 582 // Color space including HDR metadata as an optional field. 583 // 584 // RTP header extension to carry color space information and optionally HDR 585 // metadata. The float values in the HDR metadata struct are upscaled by a 586 // static factor and transmitted as unsigned integers. 587 // 588 // Data layout of color space with HDR metadata (two-byte RTP header extension) 589 // 0 1 2 3 590 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 591 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 592 // | ID | length=28 | primaries | transfer | 593 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 594 // | matrix |range+chr.sit. | luminance_max | 595 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 596 // | luminance_min | mastering_metadata.| 597 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 598 // |primary_r.x and .y | mastering_metadata.| 599 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 600 // |primary_g.x and .y | mastering_metadata.| 601 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 602 // |primary_b.x and .y | mastering_metadata.| 603 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 604 // |white.x and .y | max_content_light_level | 605 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 606 // | max_frame_average_light_level | 607 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 608 // 609 // Data layout of color space w/o HDR metadata (one-byte RTP header extension) 610 // 0 1 2 3 611 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 612 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 613 // | ID | L = 3 | primaries | transfer | matrix | 614 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 615 // |range+chr.sit. | 616 // +-+-+-+-+-+-+-+-+ 617 bool ColorSpaceExtension::Parse(ArrayView<const uint8_t> data, 618 ColorSpace* color_space) { 619 RTC_DCHECK(color_space); 620 if (data.size() != kValueSizeBytes && 621 data.size() != kValueSizeBytesWithoutHdrMetadata) 622 return false; 623 624 size_t offset = 0; 625 // Read color space information. 626 if (!color_space->set_primaries_from_uint8(data[offset++])) 627 return false; 628 if (!color_space->set_transfer_from_uint8(data[offset++])) 629 return false; 630 if (!color_space->set_matrix_from_uint8(data[offset++])) 631 return false; 632 633 uint8_t range_and_chroma_siting = data[offset++]; 634 if (!color_space->set_range_from_uint8((range_and_chroma_siting >> 4) & 0x03)) 635 return false; 636 if (!color_space->set_chroma_siting_horizontal_from_uint8( 637 (range_and_chroma_siting >> 2) & 0x03)) 638 return false; 639 if (!color_space->set_chroma_siting_vertical_from_uint8( 640 range_and_chroma_siting & 0x03)) 641 return false; 642 643 // Read HDR metadata if it exists, otherwise clear it. 644 if (data.size() == kValueSizeBytesWithoutHdrMetadata) { 645 color_space->set_hdr_metadata(nullptr); 646 } else { 647 HdrMetadata hdr_metadata; 648 offset += ParseHdrMetadata(data.subview(offset), &hdr_metadata); 649 if (!hdr_metadata.Validate()) 650 return false; 651 color_space->set_hdr_metadata(&hdr_metadata); 652 } 653 RTC_DCHECK_EQ(ValueSize(*color_space), offset); 654 return true; 655 } 656 657 bool ColorSpaceExtension::Write(ArrayView<uint8_t> data, 658 const ColorSpace& color_space) { 659 RTC_DCHECK_EQ(data.size(), ValueSize(color_space)); 660 size_t offset = 0; 661 // Write color space information. 662 data[offset++] = static_cast<uint8_t>(color_space.primaries()); 663 data[offset++] = static_cast<uint8_t>(color_space.transfer()); 664 data[offset++] = static_cast<uint8_t>(color_space.matrix()); 665 data[offset++] = CombineRangeAndChromaSiting( 666 color_space.range(), color_space.chroma_siting_horizontal(), 667 color_space.chroma_siting_vertical()); 668 669 // Write HDR metadata if it exists. 670 if (color_space.hdr_metadata()) { 671 offset += 672 WriteHdrMetadata(data.subview(offset), *color_space.hdr_metadata()); 673 } 674 RTC_DCHECK_EQ(ValueSize(color_space), offset); 675 return true; 676 } 677 678 // Combines range and chroma siting into one byte with the following bit layout: 679 // bits 0-1 Chroma siting vertical. 680 // 2-3 Chroma siting horizontal. 681 // 4-5 Range. 682 // 6-7 Unused. 683 uint8_t ColorSpaceExtension::CombineRangeAndChromaSiting( 684 ColorSpace::RangeID range, 685 ColorSpace::ChromaSiting chroma_siting_horizontal, 686 ColorSpace::ChromaSiting chroma_siting_vertical) { 687 RTC_DCHECK_LE(static_cast<uint8_t>(range), 3); 688 RTC_DCHECK_LE(static_cast<uint8_t>(chroma_siting_horizontal), 3); 689 RTC_DCHECK_LE(static_cast<uint8_t>(chroma_siting_vertical), 3); 690 return (static_cast<uint8_t>(range) << 4) | 691 (static_cast<uint8_t>(chroma_siting_horizontal) << 2) | 692 static_cast<uint8_t>(chroma_siting_vertical); 693 } 694 695 size_t ColorSpaceExtension::ParseHdrMetadata(ArrayView<const uint8_t> data, 696 HdrMetadata* hdr_metadata) { 697 RTC_DCHECK_EQ(data.size(), 698 kValueSizeBytes - kValueSizeBytesWithoutHdrMetadata); 699 size_t offset = 0; 700 offset += ParseLuminance(data.data() + offset, 701 &hdr_metadata->mastering_metadata.luminance_max, 702 kLuminanceMaxDenominator); 703 offset += ParseLuminance(data.data() + offset, 704 &hdr_metadata->mastering_metadata.luminance_min, 705 kLuminanceMinDenominator); 706 offset += ParseChromaticity(data.data() + offset, 707 &hdr_metadata->mastering_metadata.primary_r); 708 offset += ParseChromaticity(data.data() + offset, 709 &hdr_metadata->mastering_metadata.primary_g); 710 offset += ParseChromaticity(data.data() + offset, 711 &hdr_metadata->mastering_metadata.primary_b); 712 offset += ParseChromaticity(data.data() + offset, 713 &hdr_metadata->mastering_metadata.white_point); 714 hdr_metadata->max_content_light_level = 715 ByteReader<uint16_t>::ReadBigEndian(data.data() + offset); 716 offset += 2; 717 hdr_metadata->max_frame_average_light_level = 718 ByteReader<uint16_t>::ReadBigEndian(data.data() + offset); 719 offset += 2; 720 return offset; 721 } 722 723 size_t ColorSpaceExtension::ParseChromaticity( 724 const uint8_t* data, 725 HdrMasteringMetadata::Chromaticity* p) { 726 uint16_t chromaticity_x_scaled = ByteReader<uint16_t>::ReadBigEndian(data); 727 uint16_t chromaticity_y_scaled = 728 ByteReader<uint16_t>::ReadBigEndian(data + 2); 729 p->x = static_cast<float>(chromaticity_x_scaled) / kChromaticityDenominator; 730 p->y = static_cast<float>(chromaticity_y_scaled) / kChromaticityDenominator; 731 return 4; // Return number of bytes read. 732 } 733 734 size_t ColorSpaceExtension::ParseLuminance(const uint8_t* data, 735 float* f, 736 int denominator) { 737 uint16_t luminance_scaled = ByteReader<uint16_t>::ReadBigEndian(data); 738 *f = static_cast<float>(luminance_scaled) / denominator; 739 return 2; // Return number of bytes read. 740 } 741 742 size_t ColorSpaceExtension::WriteHdrMetadata(ArrayView<uint8_t> data, 743 const HdrMetadata& hdr_metadata) { 744 RTC_DCHECK_EQ(data.size(), 745 kValueSizeBytes - kValueSizeBytesWithoutHdrMetadata); 746 RTC_DCHECK(hdr_metadata.Validate()); 747 size_t offset = 0; 748 offset += WriteLuminance(data.data() + offset, 749 hdr_metadata.mastering_metadata.luminance_max, 750 kLuminanceMaxDenominator); 751 offset += WriteLuminance(data.data() + offset, 752 hdr_metadata.mastering_metadata.luminance_min, 753 kLuminanceMinDenominator); 754 offset += WriteChromaticity(data.data() + offset, 755 hdr_metadata.mastering_metadata.primary_r); 756 offset += WriteChromaticity(data.data() + offset, 757 hdr_metadata.mastering_metadata.primary_g); 758 offset += WriteChromaticity(data.data() + offset, 759 hdr_metadata.mastering_metadata.primary_b); 760 offset += WriteChromaticity(data.data() + offset, 761 hdr_metadata.mastering_metadata.white_point); 762 763 ByteWriter<uint16_t>::WriteBigEndian(data.data() + offset, 764 hdr_metadata.max_content_light_level); 765 offset += 2; 766 ByteWriter<uint16_t>::WriteBigEndian( 767 data.data() + offset, hdr_metadata.max_frame_average_light_level); 768 offset += 2; 769 return offset; 770 } 771 772 size_t ColorSpaceExtension::WriteChromaticity( 773 uint8_t* data, 774 const HdrMasteringMetadata::Chromaticity& p) { 775 RTC_DCHECK_GE(p.x, 0.0f); 776 RTC_DCHECK_LE(p.x, 1.0f); 777 RTC_DCHECK_GE(p.y, 0.0f); 778 RTC_DCHECK_LE(p.y, 1.0f); 779 ByteWriter<uint16_t>::WriteBigEndian( 780 data, std::round(p.x * kChromaticityDenominator)); 781 ByteWriter<uint16_t>::WriteBigEndian( 782 data + 2, std::round(p.y * kChromaticityDenominator)); 783 return 4; // Return number of bytes written. 784 } 785 786 size_t ColorSpaceExtension::WriteLuminance(uint8_t* data, 787 float f, 788 int denominator) { 789 RTC_DCHECK_GE(f, 0.0f); 790 float upscaled_value = f * denominator; 791 RTC_DCHECK_LE(upscaled_value, std::numeric_limits<uint16_t>::max()); 792 ByteWriter<uint16_t>::WriteBigEndian(data, std::round(upscaled_value)); 793 return 2; // Return number of bytes written. 794 } 795 796 bool BaseRtpStringExtension::Parse(ArrayView<const uint8_t> data, 797 std::string* str) { 798 if (data.empty() || data[0] == 0) // Valid string extension can't be empty. 799 return false; 800 const char* cstr = reinterpret_cast<const char*>(data.data()); 801 // If there is a \0 character in the middle of the `data`, treat it as end 802 // of the string. Well-formed string extensions shouldn't contain it. 803 str->assign(cstr, strnlen(cstr, data.size())); 804 RTC_DCHECK(!str->empty()); 805 return true; 806 } 807 808 bool BaseRtpStringExtension::Write(ArrayView<uint8_t> data, 809 absl::string_view str) { 810 if (str.size() > kMaxValueSizeBytes) { 811 return false; 812 } 813 RTC_DCHECK_EQ(data.size(), str.size()); 814 RTC_DCHECK_GE(str.size(), 1); 815 memcpy(data.data(), str.data(), str.size()); 816 return true; 817 } 818 819 // An RTP Header Extension for Inband Comfort Noise 820 // 821 // The form of the audio level extension block: 822 // 823 // 0 1 824 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 825 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 826 // | ID | len=0 |N| level | 827 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 828 // Sample Audio Level Encoding Using the One-Byte Header Format 829 // 830 // 0 1 2 831 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 832 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 833 // | ID | len=1 |N| level | 834 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 835 // Sample Audio Level Encoding Using the Two-Byte Header Format 836 bool InbandComfortNoiseExtension::Parse(ArrayView<const uint8_t> data, 837 std::optional<uint8_t>* level) { 838 if (data.size() != kValueSizeBytes) 839 return false; 840 *level = (data[0] & 0b1000'0000) != 0 841 ? std::nullopt 842 : std::make_optional(data[0] & 0b0111'1111); 843 return true; 844 } 845 846 bool InbandComfortNoiseExtension::Write(ArrayView<uint8_t> data, 847 std::optional<uint8_t> level) { 848 RTC_DCHECK_EQ(data.size(), kValueSizeBytes); 849 data[0] = 0b0000'0000; 850 if (level) { 851 if (*level > 127) { 852 return false; 853 } 854 data[0] = 0b1000'0000 | *level; 855 } 856 return true; 857 } 858 859 // VideoFrameTrackingIdExtension 860 // 861 // 0 1 2 862 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 863 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 864 // | ID | L=1 | video-frame-tracking-id | 865 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 866 bool VideoFrameTrackingIdExtension::Parse(ArrayView<const uint8_t> data, 867 uint16_t* video_frame_tracking_id) { 868 if (data.size() != kValueSizeBytes) { 869 return false; 870 } 871 *video_frame_tracking_id = ByteReader<uint16_t>::ReadBigEndian(data.data()); 872 return true; 873 } 874 875 bool VideoFrameTrackingIdExtension::Write(ArrayView<uint8_t> data, 876 uint16_t video_frame_tracking_id) { 877 RTC_DCHECK_EQ(data.size(), kValueSizeBytes); 878 ByteWriter<uint16_t>::WriteBigEndian(data.data(), video_frame_tracking_id); 879 return true; 880 } 881 882 } // namespace webrtc