tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

rtp_format_vp9.cc (15708B)


      1 /*
      2 *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
      3 *
      4 *  Use of this source code is governed by a BSD-style license
      5 *  that can be found in the LICENSE file in the root of the source
      6 *  tree. An additional intellectual property rights grant can be found
      7 *  in the file PATENTS.  All contributing project authors may
      8 *  be found in the AUTHORS file in the root of the source tree.
      9 */
     10 
     11 #include "modules/rtp_rtcp/source/rtp_format_vp9.h"
     12 
     13 #include <cstdint>
     14 #include <cstring>
     15 
     16 #include "api/array_view.h"
     17 #include "modules/rtp_rtcp/source/rtp_packet_to_send.h"
     18 #include "modules/video_coding/codecs/interface/common_constants.h"
     19 #include "modules/video_coding/codecs/vp9/include/vp9_globals.h"
     20 #include "rtc_base/bit_buffer.h"
     21 #include "rtc_base/checks.h"
     22 #include "rtc_base/logging.h"
     23 
     24 #define RETURN_FALSE_ON_ERROR(x) \
     25  if (!(x)) {                    \
     26    return false;                \
     27  }
     28 
     29 namespace webrtc {
     30 namespace {
     31 // Length of VP9 payload descriptors' fixed part.
     32 const size_t kFixedPayloadDescriptorBytes = 1;
     33 
     34 const uint32_t kReservedBitValue0 = 0;
     35 
     36 uint8_t TemporalIdxField(const RTPVideoHeaderVP9& hdr, uint8_t def) {
     37  return (hdr.temporal_idx == kNoTemporalIdx) ? def : hdr.temporal_idx;
     38 }
     39 
     40 uint8_t SpatialIdxField(const RTPVideoHeaderVP9& hdr, uint8_t def) {
     41  return (hdr.spatial_idx == kNoSpatialIdx) ? def : hdr.spatial_idx;
     42 }
     43 
     44 int16_t Tl0PicIdxField(const RTPVideoHeaderVP9& hdr, uint8_t def) {
     45  return (hdr.tl0_pic_idx == kNoTl0PicIdx) ? def : hdr.tl0_pic_idx;
     46 }
     47 
     48 // Picture ID:
     49 //
     50 //      +-+-+-+-+-+-+-+-+
     51 // I:   |M| PICTURE ID  |   M:0 => picture id is 7 bits.
     52 //      +-+-+-+-+-+-+-+-+   M:1 => picture id is 15 bits.
     53 // M:   | EXTENDED PID  |
     54 //      +-+-+-+-+-+-+-+-+
     55 //
     56 size_t PictureIdLength(const RTPVideoHeaderVP9& hdr) {
     57  if (hdr.picture_id == kNoPictureId)
     58    return 0;
     59  return (hdr.max_picture_id == kMaxOneBytePictureId) ? 1 : 2;
     60 }
     61 
     62 bool PictureIdPresent(const RTPVideoHeaderVP9& hdr) {
     63  return PictureIdLength(hdr) > 0;
     64 }
     65 
     66 // Layer indices:
     67 //
     68 // Flexible mode (F=1):     Non-flexible mode (F=0):
     69 //
     70 //      +-+-+-+-+-+-+-+-+   +-+-+-+-+-+-+-+-+
     71 // L:   |  T  |U|  S  |D|   |  T  |U|  S  |D|
     72 //      +-+-+-+-+-+-+-+-+   +-+-+-+-+-+-+-+-+
     73 //                          |   TL0PICIDX   |
     74 //                          +-+-+-+-+-+-+-+-+
     75 //
     76 size_t LayerInfoLength(const RTPVideoHeaderVP9& hdr) {
     77  if (hdr.temporal_idx == kNoTemporalIdx && hdr.spatial_idx == kNoSpatialIdx) {
     78    return 0;
     79  }
     80  return hdr.flexible_mode ? 1 : 2;
     81 }
     82 
     83 bool LayerInfoPresent(const RTPVideoHeaderVP9& hdr) {
     84  return LayerInfoLength(hdr) > 0;
     85 }
     86 
     87 // Reference indices:
     88 //
     89 //      +-+-+-+-+-+-+-+-+                P=1,F=1: At least one reference index
     90 // P,F: | P_DIFF      |N|  up to 3 times          has to be specified.
     91 //      +-+-+-+-+-+-+-+-+                    N=1: An additional P_DIFF follows
     92 //                                                current P_DIFF.
     93 //
     94 size_t RefIndicesLength(const RTPVideoHeaderVP9& hdr) {
     95  if (!hdr.inter_pic_predicted || !hdr.flexible_mode)
     96    return 0;
     97 
     98  RTC_CHECK_GT(hdr.num_ref_pics, 0U);
     99  RTC_CHECK_LE(hdr.num_ref_pics, kMaxVp9RefPics);
    100  return hdr.num_ref_pics;
    101 }
    102 
    103 // Scalability structure (SS).
    104 //
    105 //      +-+-+-+-+-+-+-+-+
    106 // V:   | N_S |Y|G|-|-|-|
    107 //      +-+-+-+-+-+-+-+-+              -|
    108 // Y:   |     WIDTH     | (OPTIONAL)    .
    109 //      +               +               .
    110 //      |               | (OPTIONAL)    .
    111 //      +-+-+-+-+-+-+-+-+               . N_S + 1 times
    112 //      |     HEIGHT    | (OPTIONAL)    .
    113 //      +               +               .
    114 //      |               | (OPTIONAL)    .
    115 //      +-+-+-+-+-+-+-+-+              -|
    116 // G:   |      N_G      | (OPTIONAL)
    117 //      +-+-+-+-+-+-+-+-+                           -|
    118 // N_G: |  T  |U| R |-|-| (OPTIONAL)                 .
    119 //      +-+-+-+-+-+-+-+-+              -|            . N_G times
    120 //      |    P_DIFF     | (OPTIONAL)    . R times    .
    121 //      +-+-+-+-+-+-+-+-+              -|           -|
    122 //
    123 size_t SsDataLength(const RTPVideoHeaderVP9& hdr) {
    124  if (!hdr.ss_data_available)
    125    return 0;
    126 
    127  RTC_CHECK_GT(hdr.num_spatial_layers, 0U);
    128  RTC_CHECK_LE(hdr.num_spatial_layers, kMaxVp9NumberOfSpatialLayers);
    129  RTC_CHECK_LE(hdr.gof.num_frames_in_gof, kMaxVp9FramesInGof);
    130  size_t length = 1;  // V
    131  if (hdr.spatial_layer_resolution_present) {
    132    length += 4 * hdr.num_spatial_layers;  // Y
    133  }
    134  if (hdr.gof.num_frames_in_gof > 0) {
    135    ++length;  // G
    136  }
    137  // N_G
    138  length += hdr.gof.num_frames_in_gof;  // T, U, R
    139  for (size_t i = 0; i < hdr.gof.num_frames_in_gof; ++i) {
    140    RTC_CHECK_LE(hdr.gof.num_ref_pics[i], kMaxVp9RefPics);
    141    length += hdr.gof.num_ref_pics[i];  // R times
    142  }
    143  return length;
    144 }
    145 
    146 size_t PayloadDescriptorLengthMinusSsData(const RTPVideoHeaderVP9& hdr) {
    147  return kFixedPayloadDescriptorBytes + PictureIdLength(hdr) +
    148         LayerInfoLength(hdr) + RefIndicesLength(hdr);
    149 }
    150 
    151 // Picture ID:
    152 //
    153 //      +-+-+-+-+-+-+-+-+
    154 // I:   |M| PICTURE ID  |   M:0 => picture id is 7 bits.
    155 //      +-+-+-+-+-+-+-+-+   M:1 => picture id is 15 bits.
    156 // M:   | EXTENDED PID  |
    157 //      +-+-+-+-+-+-+-+-+
    158 //
    159 bool WritePictureId(const RTPVideoHeaderVP9& vp9, BitBufferWriter* writer) {
    160  bool m_bit = (PictureIdLength(vp9) == 2);
    161  RETURN_FALSE_ON_ERROR(writer->WriteBits(m_bit ? 1 : 0, 1));
    162  RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.picture_id, m_bit ? 15 : 7));
    163  return true;
    164 }
    165 
    166 // Layer indices:
    167 //
    168 // Flexible mode (F=1):
    169 //
    170 //      +-+-+-+-+-+-+-+-+
    171 // L:   |  T  |U|  S  |D|
    172 //      +-+-+-+-+-+-+-+-+
    173 //
    174 bool WriteLayerInfoCommon(const RTPVideoHeaderVP9& vp9,
    175                          BitBufferWriter* writer) {
    176  RETURN_FALSE_ON_ERROR(writer->WriteBits(TemporalIdxField(vp9, 0), 3));
    177  RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.temporal_up_switch ? 1 : 0, 1));
    178  RETURN_FALSE_ON_ERROR(writer->WriteBits(SpatialIdxField(vp9, 0), 3));
    179  RETURN_FALSE_ON_ERROR(
    180      writer->WriteBits(vp9.inter_layer_predicted ? 1 : 0, 1));
    181  return true;
    182 }
    183 
    184 // Non-flexible mode (F=0):
    185 //
    186 //      +-+-+-+-+-+-+-+-+
    187 // L:   |  T  |U|  S  |D|
    188 //      +-+-+-+-+-+-+-+-+
    189 //      |   TL0PICIDX   |
    190 //      +-+-+-+-+-+-+-+-+
    191 //
    192 bool WriteLayerInfoNonFlexibleMode(const RTPVideoHeaderVP9& vp9,
    193                                   BitBufferWriter* writer) {
    194  RETURN_FALSE_ON_ERROR(writer->WriteUInt8(Tl0PicIdxField(vp9, 0)));
    195  return true;
    196 }
    197 
    198 bool WriteLayerInfo(const RTPVideoHeaderVP9& vp9, BitBufferWriter* writer) {
    199  if (!WriteLayerInfoCommon(vp9, writer))
    200    return false;
    201 
    202  if (vp9.flexible_mode)
    203    return true;
    204 
    205  return WriteLayerInfoNonFlexibleMode(vp9, writer);
    206 }
    207 
    208 // Reference indices:
    209 //
    210 //      +-+-+-+-+-+-+-+-+                P=1,F=1: At least one reference index
    211 // P,F: | P_DIFF      |N|  up to 3 times          has to be specified.
    212 //      +-+-+-+-+-+-+-+-+                    N=1: An additional P_DIFF follows
    213 //                                                current P_DIFF.
    214 //
    215 bool WriteRefIndices(const RTPVideoHeaderVP9& vp9, BitBufferWriter* writer) {
    216  if (!PictureIdPresent(vp9) || vp9.num_ref_pics == 0 ||
    217      vp9.num_ref_pics > kMaxVp9RefPics) {
    218    return false;
    219  }
    220  for (uint8_t i = 0; i < vp9.num_ref_pics; ++i) {
    221    bool n_bit = !(i == vp9.num_ref_pics - 1);
    222    RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.pid_diff[i], 7));
    223    RETURN_FALSE_ON_ERROR(writer->WriteBits(n_bit ? 1 : 0, 1));
    224  }
    225  return true;
    226 }
    227 
    228 // Scalability structure (SS).
    229 //
    230 //      +-+-+-+-+-+-+-+-+
    231 // V:   | N_S |Y|G|-|-|-|
    232 //      +-+-+-+-+-+-+-+-+              -|
    233 // Y:   |     WIDTH     | (OPTIONAL)    .
    234 //      +               +               .
    235 //      |               | (OPTIONAL)    .
    236 //      +-+-+-+-+-+-+-+-+               . N_S + 1 times
    237 //      |     HEIGHT    | (OPTIONAL)    .
    238 //      +               +               .
    239 //      |               | (OPTIONAL)    .
    240 //      +-+-+-+-+-+-+-+-+              -|
    241 // G:   |      N_G      | (OPTIONAL)
    242 //      +-+-+-+-+-+-+-+-+                           -|
    243 // N_G: |  T  |U| R |-|-| (OPTIONAL)                 .
    244 //      +-+-+-+-+-+-+-+-+              -|            . N_G times
    245 //      |    P_DIFF     | (OPTIONAL)    . R times    .
    246 //      +-+-+-+-+-+-+-+-+              -|           -|
    247 //
    248 bool WriteSsData(const RTPVideoHeaderVP9& vp9, BitBufferWriter* writer) {
    249  RTC_CHECK_GT(vp9.num_spatial_layers, 0U);
    250  RTC_CHECK_LE(vp9.num_spatial_layers, kMaxVp9NumberOfSpatialLayers);
    251  RTC_CHECK_LE(vp9.gof.num_frames_in_gof, kMaxVp9FramesInGof);
    252  bool g_bit = vp9.gof.num_frames_in_gof > 0;
    253 
    254  RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.num_spatial_layers - 1, 3));
    255  RETURN_FALSE_ON_ERROR(
    256      writer->WriteBits(vp9.spatial_layer_resolution_present ? 1 : 0, 1));
    257  RETURN_FALSE_ON_ERROR(writer->WriteBits(g_bit ? 1 : 0, 1));  // G
    258  RETURN_FALSE_ON_ERROR(writer->WriteBits(kReservedBitValue0, 3));
    259 
    260  if (vp9.spatial_layer_resolution_present) {
    261    for (size_t i = 0; i < vp9.num_spatial_layers; ++i) {
    262      RETURN_FALSE_ON_ERROR(writer->WriteUInt16(vp9.width[i]));
    263      RETURN_FALSE_ON_ERROR(writer->WriteUInt16(vp9.height[i]));
    264    }
    265  }
    266  if (g_bit) {
    267    RETURN_FALSE_ON_ERROR(writer->WriteUInt8(vp9.gof.num_frames_in_gof));
    268  }
    269  for (size_t i = 0; i < vp9.gof.num_frames_in_gof; ++i) {
    270    RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.gof.temporal_idx[i], 3));
    271    RETURN_FALSE_ON_ERROR(
    272        writer->WriteBits(vp9.gof.temporal_up_switch[i] ? 1 : 0, 1));
    273    RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.gof.num_ref_pics[i], 2));
    274    RETURN_FALSE_ON_ERROR(writer->WriteBits(kReservedBitValue0, 2));
    275    for (uint8_t r = 0; r < vp9.gof.num_ref_pics[i]; ++r) {
    276      RETURN_FALSE_ON_ERROR(writer->WriteUInt8(vp9.gof.pid_diff[i][r]));
    277    }
    278  }
    279  return true;
    280 }
    281 
    282 // TODO(https://bugs.webrtc.org/11319):
    283 // Workaround for switching off spatial layers on the fly.
    284 // Sent layers must start from SL0 on RTP layer, but can start from any
    285 // spatial layer because WebRTC-SVC api isn't implemented yet and
    286 // current API to invoke SVC is not flexible enough.
    287 RTPVideoHeaderVP9 RemoveInactiveSpatialLayers(
    288    const RTPVideoHeaderVP9& original_header) {
    289  RTC_CHECK_LE(original_header.num_spatial_layers,
    290               kMaxVp9NumberOfSpatialLayers);
    291  RTPVideoHeaderVP9 hdr(original_header);
    292  if (original_header.first_active_layer == 0)
    293    return hdr;
    294  for (size_t i = hdr.first_active_layer; i < hdr.num_spatial_layers; ++i) {
    295    hdr.width[i - hdr.first_active_layer] = hdr.width[i];
    296    hdr.height[i - hdr.first_active_layer] = hdr.height[i];
    297  }
    298  for (size_t i = hdr.num_spatial_layers - hdr.first_active_layer;
    299       i < hdr.num_spatial_layers; ++i) {
    300    hdr.width[i] = 0;
    301    hdr.height[i] = 0;
    302  }
    303  hdr.num_spatial_layers -= hdr.first_active_layer;
    304  hdr.spatial_idx -= hdr.first_active_layer;
    305  hdr.first_active_layer = 0;
    306  return hdr;
    307 }
    308 }  // namespace
    309 
    310 RtpPacketizerVp9::RtpPacketizerVp9(ArrayView<const uint8_t> payload,
    311                                   PayloadSizeLimits limits,
    312                                   const RTPVideoHeaderVP9& hdr)
    313    : hdr_(RemoveInactiveSpatialLayers(hdr)),
    314      header_size_(PayloadDescriptorLengthMinusSsData(hdr_)),
    315      first_packet_extra_header_size_(SsDataLength(hdr_)),
    316      remaining_payload_(payload) {
    317  RTC_CHECK_EQ(hdr_.first_active_layer, 0);
    318 
    319  limits.max_payload_len -= header_size_;
    320  limits.first_packet_reduction_len += first_packet_extra_header_size_;
    321  limits.single_packet_reduction_len += first_packet_extra_header_size_;
    322  if (!payload.empty()) {
    323    payload_sizes_ = SplitAboutEqually(payload.size(), limits);
    324  }
    325  current_packet_ = payload_sizes_.begin();
    326 }
    327 
    328 RtpPacketizerVp9::~RtpPacketizerVp9() = default;
    329 
    330 size_t RtpPacketizerVp9::NumPackets() const {
    331  return payload_sizes_.end() - current_packet_;
    332 }
    333 
    334 bool RtpPacketizerVp9::NextPacket(RtpPacketToSend* packet) {
    335  RTC_DCHECK(packet);
    336  if (current_packet_ == payload_sizes_.end()) {
    337    return false;
    338  }
    339 
    340  bool layer_begin = current_packet_ == payload_sizes_.begin();
    341  int packet_payload_len = *current_packet_;
    342  ++current_packet_;
    343  bool layer_end = current_packet_ == payload_sizes_.end();
    344 
    345  int header_size = header_size_;
    346  if (layer_begin)
    347    header_size += first_packet_extra_header_size_;
    348 
    349  uint8_t* buffer = packet->AllocatePayload(header_size + packet_payload_len);
    350  RTC_CHECK(buffer);
    351 
    352  if (!WriteHeader(layer_begin, layer_end, MakeArrayView(buffer, header_size)))
    353    return false;
    354 
    355  memcpy(buffer + header_size, remaining_payload_.data(), packet_payload_len);
    356  remaining_payload_ = remaining_payload_.subview(packet_payload_len);
    357 
    358  // Ensure end_of_picture is always set on top spatial layer when it is not
    359  // dropped.
    360  RTC_CHECK(hdr_.spatial_idx < hdr_.num_spatial_layers - 1 ||
    361            hdr_.end_of_picture);
    362 
    363  packet->SetMarker(layer_end && hdr_.end_of_picture);
    364  return true;
    365 }
    366 
    367 // VP9 format:
    368 //
    369 // Payload descriptor for F = 1 (flexible mode)
    370 //       0 1 2 3 4 5 6 7
    371 //      +-+-+-+-+-+-+-+-+
    372 //      |I|P|L|F|B|E|V|Z| (REQUIRED)
    373 //      +-+-+-+-+-+-+-+-+
    374 // I:   |M| PICTURE ID  | (RECOMMENDED)
    375 //      +-+-+-+-+-+-+-+-+
    376 // M:   | EXTENDED PID  | (RECOMMENDED)
    377 //      +-+-+-+-+-+-+-+-+
    378 // L:   |  T  |U|  S  |D| (CONDITIONALLY RECOMMENDED)
    379 //      +-+-+-+-+-+-+-+-+                             -|
    380 // P,F: | P_DIFF      |N| (CONDITIONALLY RECOMMENDED)  . up to 3 times
    381 //      +-+-+-+-+-+-+-+-+                             -|
    382 // V:   | SS            |
    383 //      | ..            |
    384 //      +-+-+-+-+-+-+-+-+
    385 //
    386 // Payload descriptor for F = 0 (non-flexible mode)
    387 //       0 1 2 3 4 5 6 7
    388 //      +-+-+-+-+-+-+-+-+
    389 //      |I|P|L|F|B|E|V|Z| (REQUIRED)
    390 //      +-+-+-+-+-+-+-+-+
    391 // I:   |M| PICTURE ID  | (RECOMMENDED)
    392 //      +-+-+-+-+-+-+-+-+
    393 // M:   | EXTENDED PID  | (RECOMMENDED)
    394 //      +-+-+-+-+-+-+-+-+
    395 // L:   |  T  |U|  S  |D| (CONDITIONALLY RECOMMENDED)
    396 //      +-+-+-+-+-+-+-+-+
    397 //      |   TL0PICIDX   | (CONDITIONALLY REQUIRED)
    398 //      +-+-+-+-+-+-+-+-+
    399 // V:   | SS            |
    400 //      | ..            |
    401 //      +-+-+-+-+-+-+-+-+
    402 bool RtpPacketizerVp9::WriteHeader(bool layer_begin,
    403                                   bool layer_end,
    404                                   ArrayView<uint8_t> buffer) const {
    405  // Required payload descriptor byte.
    406  bool i_bit = PictureIdPresent(hdr_);
    407  bool p_bit = hdr_.inter_pic_predicted;
    408  bool l_bit = LayerInfoPresent(hdr_);
    409  bool f_bit = hdr_.flexible_mode;
    410  bool b_bit = layer_begin;
    411  bool e_bit = layer_end;
    412  bool v_bit = hdr_.ss_data_available && b_bit;
    413  bool z_bit = hdr_.non_ref_for_inter_layer_pred;
    414 
    415  BitBufferWriter writer(buffer.data(), buffer.size());
    416  RETURN_FALSE_ON_ERROR(writer.WriteBits(i_bit ? 1 : 0, 1));
    417  RETURN_FALSE_ON_ERROR(writer.WriteBits(p_bit ? 1 : 0, 1));
    418  RETURN_FALSE_ON_ERROR(writer.WriteBits(l_bit ? 1 : 0, 1));
    419  RETURN_FALSE_ON_ERROR(writer.WriteBits(f_bit ? 1 : 0, 1));
    420  RETURN_FALSE_ON_ERROR(writer.WriteBits(b_bit ? 1 : 0, 1));
    421  RETURN_FALSE_ON_ERROR(writer.WriteBits(e_bit ? 1 : 0, 1));
    422  RETURN_FALSE_ON_ERROR(writer.WriteBits(v_bit ? 1 : 0, 1));
    423  RETURN_FALSE_ON_ERROR(writer.WriteBits(z_bit ? 1 : 0, 1));
    424 
    425  // Add fields that are present.
    426  if (i_bit && !WritePictureId(hdr_, &writer)) {
    427    RTC_LOG(LS_ERROR) << "Failed writing VP9 picture id.";
    428    return false;
    429  }
    430  if (l_bit && !WriteLayerInfo(hdr_, &writer)) {
    431    RTC_LOG(LS_ERROR) << "Failed writing VP9 layer info.";
    432    return false;
    433  }
    434  if (p_bit && f_bit && !WriteRefIndices(hdr_, &writer)) {
    435    RTC_LOG(LS_ERROR) << "Failed writing VP9 ref indices.";
    436    return false;
    437  }
    438  if (v_bit && !WriteSsData(hdr_, &writer)) {
    439    RTC_LOG(LS_ERROR) << "Failed writing VP9 SS data.";
    440    return false;
    441  }
    442 
    443  size_t offset_bytes = 0;
    444  size_t offset_bits = 0;
    445  writer.GetCurrentOffset(&offset_bytes, &offset_bits);
    446  RTC_DCHECK_EQ(offset_bits, 0);
    447  RTC_DCHECK_EQ(offset_bytes, buffer.size());
    448  return true;
    449 }
    450 
    451 }  // namespace webrtc