sps_parser.cc (9140B)
1 /* 2 * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "common_video/h264/sps_parser.h" 12 13 #include <cstddef> 14 #include <cstdint> 15 #include <optional> 16 #include <vector> 17 18 #include "api/array_view.h" 19 #include "common_video/h264/h264_common.h" 20 #include "rtc_base/bitstream_reader.h" 21 22 namespace { 23 constexpr int kScalingDeltaMin = -128; 24 constexpr int kScaldingDeltaMax = 127; 25 } // namespace 26 27 namespace webrtc { 28 29 SpsParser::SpsState::SpsState() = default; 30 SpsParser::SpsState::SpsState(const SpsState&) = default; 31 SpsParser::SpsState::~SpsState() = default; 32 33 // General note: this is based off the 02/2014 version of the H.264 standard. 34 // You can find it on this page: 35 // http://www.itu.int/rec/T-REC-H.264 36 37 // Unpack RBSP and parse SPS state from the supplied buffer. 38 std::optional<SpsParser::SpsState> SpsParser::ParseSps( 39 ArrayView<const uint8_t> data) { 40 std::vector<uint8_t> unpacked_buffer = H264::ParseRbsp(data); 41 BitstreamReader reader(unpacked_buffer); 42 return ParseSpsUpToVui(reader); 43 } 44 45 std::optional<SpsParser::SpsState> SpsParser::ParseSpsUpToVui( 46 BitstreamReader& reader) { 47 // Now, we need to use a bitstream reader to parse through the actual AVC SPS 48 // format. See Section 7.3.2.1.1 ("Sequence parameter set data syntax") of the 49 // H.264 standard for a complete description. 50 // Since we only care about resolution, we ignore the majority of fields, but 51 // we still have to actively parse through a lot of the data, since many of 52 // the fields have variable size. 53 // We're particularly interested in: 54 // chroma_format_idc -> affects crop units 55 // pic_{width,height}_* -> resolution of the frame in macroblocks (16x16). 56 // frame_crop_*_offset -> crop information 57 58 SpsState sps; 59 60 // chroma_format_idc will be ChromaArrayType if separate_colour_plane_flag is 61 // 0. It defaults to 1, when not specified. 62 sps.chroma_format_idc = 1; 63 64 // profile_idc: u(8). We need it to determine if we need to read/skip chroma 65 // formats. 66 uint8_t profile_idc = reader.Read<uint8_t>(); 67 // constraint_set0_flag through constraint_set5_flag + reserved_zero_2bits 68 // 1 bit each for the flags + 2 bits + 8 bits for level_idc = 16 bits. 69 reader.ConsumeBits(16); 70 // seq_parameter_set_id: ue(v) 71 sps.id = reader.ReadExponentialGolomb(); 72 sps.separate_colour_plane_flag = 0; 73 // See if profile_idc has chroma format information. 74 if (profile_idc == 100 || profile_idc == 110 || profile_idc == 122 || 75 profile_idc == 244 || profile_idc == 44 || profile_idc == 83 || 76 profile_idc == 86 || profile_idc == 118 || profile_idc == 128 || 77 profile_idc == 138 || profile_idc == 139 || profile_idc == 134) { 78 // chroma_format_idc: ue(v) 79 sps.chroma_format_idc = reader.ReadExponentialGolomb(); 80 if (sps.chroma_format_idc == 3) { 81 // separate_colour_plane_flag: u(1) 82 sps.separate_colour_plane_flag = reader.ReadBit(); 83 } 84 // bit_depth_luma_minus8: ue(v) 85 reader.ReadExponentialGolomb(); 86 // bit_depth_chroma_minus8: ue(v) 87 reader.ReadExponentialGolomb(); 88 // qpprime_y_zero_transform_bypass_flag: u(1) 89 reader.ConsumeBits(1); 90 // seq_scaling_matrix_present_flag: u(1) 91 if (reader.Read<bool>()) { 92 // Process the scaling lists just enough to be able to properly 93 // skip over them, so we can still read the resolution on streams 94 // where this is included. 95 int scaling_list_count = (sps.chroma_format_idc == 3 ? 12 : 8); 96 for (int i = 0; i < scaling_list_count; ++i) { 97 // seq_scaling_list_present_flag[i] : u(1) 98 if (reader.Read<bool>()) { 99 int last_scale = 8; 100 int next_scale = 8; 101 int size_of_scaling_list = i < 6 ? 16 : 64; 102 for (int j = 0; j < size_of_scaling_list; j++) { 103 if (next_scale != 0) { 104 // delta_scale: se(v) 105 int delta_scale = reader.ReadSignedExponentialGolomb(); 106 if (!reader.Ok() || delta_scale < kScalingDeltaMin || 107 delta_scale > kScaldingDeltaMax) { 108 return std::nullopt; 109 } 110 next_scale = (last_scale + delta_scale + 256) % 256; 111 } 112 if (next_scale != 0) 113 last_scale = next_scale; 114 } 115 } 116 } 117 } 118 } 119 // log2_max_frame_num and log2_max_pic_order_cnt_lsb are used with 120 // BitstreamReader::ReadBits, which can read at most 64 bits at a time. We 121 // also have to avoid overflow when adding 4 to the on-wire golomb value, 122 // e.g., for evil input data, ReadExponentialGolomb might return 0xfffc. 123 const uint32_t kMaxLog2Minus4 = 12; 124 125 // log2_max_frame_num_minus4: ue(v) 126 uint32_t log2_max_frame_num_minus4 = reader.ReadExponentialGolomb(); 127 if (!reader.Ok() || log2_max_frame_num_minus4 > kMaxLog2Minus4) { 128 return std::nullopt; 129 } 130 sps.log2_max_frame_num = log2_max_frame_num_minus4 + 4; 131 132 // pic_order_cnt_type: ue(v) 133 sps.pic_order_cnt_type = reader.ReadExponentialGolomb(); 134 if (sps.pic_order_cnt_type == 0) { 135 // log2_max_pic_order_cnt_lsb_minus4: ue(v) 136 uint32_t log2_max_pic_order_cnt_lsb_minus4 = reader.ReadExponentialGolomb(); 137 if (!reader.Ok() || log2_max_pic_order_cnt_lsb_minus4 > kMaxLog2Minus4) { 138 return std::nullopt; 139 } 140 sps.log2_max_pic_order_cnt_lsb = log2_max_pic_order_cnt_lsb_minus4 + 4; 141 } else if (sps.pic_order_cnt_type == 1) { 142 // delta_pic_order_always_zero_flag: u(1) 143 sps.delta_pic_order_always_zero_flag = reader.ReadBit(); 144 // offset_for_non_ref_pic: se(v) 145 reader.ReadExponentialGolomb(); 146 // offset_for_top_to_bottom_field: se(v) 147 reader.ReadExponentialGolomb(); 148 // num_ref_frames_in_pic_order_cnt_cycle: ue(v) 149 uint32_t num_ref_frames_in_pic_order_cnt_cycle = 150 reader.ReadExponentialGolomb(); 151 for (size_t i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; ++i) { 152 // offset_for_ref_frame[i]: se(v) 153 reader.ReadExponentialGolomb(); 154 if (!reader.Ok()) { 155 return std::nullopt; 156 } 157 } 158 } 159 // max_num_ref_frames: ue(v) 160 sps.max_num_ref_frames = reader.ReadExponentialGolomb(); 161 // gaps_in_frame_num_value_allowed_flag: u(1) 162 reader.ConsumeBits(1); 163 // 164 // IMPORTANT ONES! Now we're getting to resolution. First we read the pic 165 // width/height in macroblocks (16x16), which gives us the base resolution, 166 // and then we continue on until we hit the frame crop offsets, which are used 167 // to signify resolutions that aren't multiples of 16. 168 // 169 // pic_width_in_mbs_minus1: ue(v) 170 sps.width = 16 * (reader.ReadExponentialGolomb() + 1); 171 // pic_height_in_map_units_minus1: ue(v) 172 uint32_t pic_height_in_map_units_minus1 = reader.ReadExponentialGolomb(); 173 // frame_mbs_only_flag: u(1) 174 sps.frame_mbs_only_flag = reader.ReadBit(); 175 if (!sps.frame_mbs_only_flag) { 176 // mb_adaptive_frame_field_flag: u(1) 177 reader.ConsumeBits(1); 178 } 179 sps.height = 180 16 * (2 - sps.frame_mbs_only_flag) * (pic_height_in_map_units_minus1 + 1); 181 // direct_8x8_inference_flag: u(1) 182 reader.ConsumeBits(1); 183 // 184 // MORE IMPORTANT ONES! Now we're at the frame crop information. 185 // 186 uint32_t frame_crop_left_offset = 0; 187 uint32_t frame_crop_right_offset = 0; 188 uint32_t frame_crop_top_offset = 0; 189 uint32_t frame_crop_bottom_offset = 0; 190 // frame_cropping_flag: u(1) 191 if (reader.Read<bool>()) { 192 // frame_crop_{left, right, top, bottom}_offset: ue(v) 193 frame_crop_left_offset = reader.ReadExponentialGolomb(); 194 frame_crop_right_offset = reader.ReadExponentialGolomb(); 195 frame_crop_top_offset = reader.ReadExponentialGolomb(); 196 frame_crop_bottom_offset = reader.ReadExponentialGolomb(); 197 } 198 // vui_parameters_present_flag: u(1) 199 sps.vui_params_present = reader.ReadBit(); 200 201 // Far enough! We don't use the rest of the SPS. 202 if (!reader.Ok()) { 203 return std::nullopt; 204 } 205 206 // Figure out the crop units in pixels. That's based on the chroma format's 207 // sampling, which is indicated by chroma_format_idc. 208 if (sps.separate_colour_plane_flag || sps.chroma_format_idc == 0) { 209 frame_crop_bottom_offset *= (2 - sps.frame_mbs_only_flag); 210 frame_crop_top_offset *= (2 - sps.frame_mbs_only_flag); 211 } else if (!sps.separate_colour_plane_flag && sps.chroma_format_idc > 0) { 212 // Width multipliers for formats 1 (4:2:0) and 2 (4:2:2). 213 if (sps.chroma_format_idc == 1 || sps.chroma_format_idc == 2) { 214 frame_crop_left_offset *= 2; 215 frame_crop_right_offset *= 2; 216 } 217 // Height multipliers for format 1 (4:2:0). 218 if (sps.chroma_format_idc == 1) { 219 frame_crop_top_offset *= 2; 220 frame_crop_bottom_offset *= 2; 221 } 222 } 223 // Subtract the crop for each dimension. 224 sps.width -= (frame_crop_left_offset + frame_crop_right_offset); 225 sps.height -= (frame_crop_top_offset + frame_crop_bottom_offset); 226 227 return sps; 228 } 229 230 } // namespace webrtc