h264_bitstream_parser.cc (12944B)
1 /* 2 * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 #include "common_video/h264/h264_bitstream_parser.h" 11 12 #include <cstdint> 13 #include <cstdlib> 14 #include <optional> 15 #include <vector> 16 17 #include "api/array_view.h" 18 #include "common_video/h264/h264_common.h" 19 #include "common_video/h264/pps_parser.h" 20 #include "common_video/h264/sps_parser.h" 21 #include "rtc_base/bitstream_reader.h" 22 #include "rtc_base/logging.h" 23 24 namespace webrtc { 25 namespace { 26 27 constexpr int kMaxAbsQpDeltaValue = 51; 28 constexpr int kMinQpValue = 0; 29 constexpr int kMaxQpValue = 51; 30 31 } // namespace 32 33 H264BitstreamParser::H264BitstreamParser() = default; 34 H264BitstreamParser::~H264BitstreamParser() = default; 35 36 H264BitstreamParser::Result H264BitstreamParser::ParseNonParameterSetNalu( 37 ArrayView<const uint8_t> source, 38 uint8_t nalu_type) { 39 if (!sps_ || !pps_) 40 return kInvalidStream; 41 42 last_slice_qp_delta_ = std::nullopt; 43 const std::vector<uint8_t> slice_rbsp = H264::ParseRbsp(source); 44 if (slice_rbsp.size() < H264::kNaluTypeSize) 45 return kInvalidStream; 46 47 BitstreamReader slice_reader(slice_rbsp); 48 slice_reader.ConsumeBits(H264::kNaluTypeSize * 8); 49 50 // Check to see if this is an IDR slice, which has an extra field to parse 51 // out. 52 bool is_idr = (source[0] & 0x0F) == H264::NaluType::kIdr; 53 uint8_t nal_ref_idc = (source[0] & 0x60) >> 5; 54 55 uint32_t num_ref_idx_l0_active_minus1 = 56 pps_->num_ref_idx_l0_default_active_minus1; 57 uint32_t num_ref_idx_l1_active_minus1 = 58 pps_->num_ref_idx_l1_default_active_minus1; 59 60 // first_mb_in_slice: ue(v) 61 slice_reader.ReadExponentialGolomb(); 62 // slice_type: ue(v) 63 uint32_t slice_type = slice_reader.ReadExponentialGolomb(); 64 // slice_type's 5..9 range is used to indicate that all slices of a picture 65 // have the same value of slice_type % 5, we don't care about that, so we map 66 // to the corresponding 0..4 range. 67 slice_type %= 5; 68 // pic_parameter_set_id: ue(v) 69 slice_reader.ReadExponentialGolomb(); 70 if (sps_->separate_colour_plane_flag == 1) { 71 // colour_plane_id 72 slice_reader.ConsumeBits(2); 73 } 74 // frame_num: u(v) 75 // Represented by log2_max_frame_num bits. 76 slice_reader.ConsumeBits(sps_->log2_max_frame_num); 77 bool field_pic_flag = false; 78 if (sps_->frame_mbs_only_flag == 0) { 79 // field_pic_flag: u(1) 80 field_pic_flag = slice_reader.Read<bool>(); 81 if (field_pic_flag) { 82 // bottom_field_flag: u(1) 83 slice_reader.ConsumeBits(1); 84 } 85 } 86 if (is_idr) { 87 // idr_pic_id: ue(v) 88 slice_reader.ReadExponentialGolomb(); 89 } 90 // pic_order_cnt_lsb: u(v) 91 // Represented by sps_.log2_max_pic_order_cnt_lsb bits. 92 if (sps_->pic_order_cnt_type == 0) { 93 slice_reader.ConsumeBits(sps_->log2_max_pic_order_cnt_lsb); 94 if (pps_->bottom_field_pic_order_in_frame_present_flag && !field_pic_flag) { 95 // delta_pic_order_cnt_bottom: se(v) 96 slice_reader.ReadExponentialGolomb(); 97 } 98 } 99 if (sps_->pic_order_cnt_type == 1 && 100 !sps_->delta_pic_order_always_zero_flag) { 101 // delta_pic_order_cnt[0]: se(v) 102 slice_reader.ReadExponentialGolomb(); 103 if (pps_->bottom_field_pic_order_in_frame_present_flag && !field_pic_flag) { 104 // delta_pic_order_cnt[1]: se(v) 105 slice_reader.ReadExponentialGolomb(); 106 } 107 } 108 if (pps_->redundant_pic_cnt_present_flag) { 109 // redundant_pic_cnt: ue(v) 110 slice_reader.ReadExponentialGolomb(); 111 } 112 if (slice_type == H264::SliceType::kB) { 113 // direct_spatial_mv_pred_flag: u(1) 114 slice_reader.ConsumeBits(1); 115 } 116 switch (slice_type) { 117 case H264::SliceType::kP: 118 case H264::SliceType::kB: 119 case H264::SliceType::kSp: 120 // num_ref_idx_active_override_flag: u(1) 121 if (slice_reader.Read<bool>()) { 122 // num_ref_idx_l0_active_minus1: ue(v) 123 num_ref_idx_l0_active_minus1 = slice_reader.ReadExponentialGolomb(); 124 if (!slice_reader.Ok() || 125 num_ref_idx_l0_active_minus1 > H264::kMaxReferenceIndex) { 126 return kInvalidStream; 127 } 128 if (slice_type == H264::SliceType::kB) { 129 // num_ref_idx_l1_active_minus1: ue(v) 130 num_ref_idx_l1_active_minus1 = slice_reader.ReadExponentialGolomb(); 131 if (!slice_reader.Ok() || 132 num_ref_idx_l1_active_minus1 > H264::kMaxReferenceIndex) { 133 return kInvalidStream; 134 } 135 } 136 } 137 break; 138 default: 139 break; 140 } 141 if (!slice_reader.Ok()) { 142 return kInvalidStream; 143 } 144 // assume nal_unit_type != 20 && nal_unit_type != 21: 145 if (nalu_type == 20 || nalu_type == 21) { 146 RTC_LOG(LS_ERROR) << "Unsupported nal unit type."; 147 return kUnsupportedStream; 148 } 149 // if (nal_unit_type == 20 || nal_unit_type == 21) 150 // ref_pic_list_mvc_modification() 151 // else 152 { 153 // ref_pic_list_modification(): 154 // `slice_type` checks here don't use named constants as they aren't named 155 // in the spec for this segment. Keeping them consistent makes it easier to 156 // verify that they are both the same. 157 if (slice_type % 5 != 2 && slice_type % 5 != 4) { 158 // ref_pic_list_modification_flag_l0: u(1) 159 if (slice_reader.Read<bool>()) { 160 uint32_t modification_of_pic_nums_idc; 161 do { 162 // modification_of_pic_nums_idc: ue(v) 163 modification_of_pic_nums_idc = slice_reader.ReadExponentialGolomb(); 164 if (modification_of_pic_nums_idc == 0 || 165 modification_of_pic_nums_idc == 1) { 166 // abs_diff_pic_num_minus1: ue(v) 167 slice_reader.ReadExponentialGolomb(); 168 } else if (modification_of_pic_nums_idc == 2) { 169 // long_term_pic_num: ue(v) 170 slice_reader.ReadExponentialGolomb(); 171 } 172 } while (modification_of_pic_nums_idc != 3 && slice_reader.Ok()); 173 } 174 } 175 if (slice_type % 5 == 1) { 176 // ref_pic_list_modification_flag_l1: u(1) 177 if (slice_reader.Read<bool>()) { 178 uint32_t modification_of_pic_nums_idc; 179 do { 180 // modification_of_pic_nums_idc: ue(v) 181 modification_of_pic_nums_idc = slice_reader.ReadExponentialGolomb(); 182 if (modification_of_pic_nums_idc == 0 || 183 modification_of_pic_nums_idc == 1) { 184 // abs_diff_pic_num_minus1: ue(v) 185 slice_reader.ReadExponentialGolomb(); 186 } else if (modification_of_pic_nums_idc == 2) { 187 // long_term_pic_num: ue(v) 188 slice_reader.ReadExponentialGolomb(); 189 } 190 } while (modification_of_pic_nums_idc != 3 && slice_reader.Ok()); 191 } 192 } 193 } 194 if (!slice_reader.Ok()) { 195 return kInvalidStream; 196 } 197 if ((pps_->weighted_pred_flag && (slice_type == H264::SliceType::kP || 198 slice_type == H264::SliceType::kSp)) || 199 (pps_->weighted_bipred_idc == 1 && slice_type == H264::SliceType::kB)) { 200 // pred_weight_table() 201 // luma_log2_weight_denom: ue(v) 202 slice_reader.ReadExponentialGolomb(); 203 204 // If separate_colour_plane_flag is equal to 0, ChromaArrayType is set equal 205 // to chroma_format_idc. Otherwise(separate_colour_plane_flag is equal to 206 // 1), ChromaArrayType is set equal to 0. 207 uint8_t chroma_array_type = 208 sps_->separate_colour_plane_flag == 0 ? sps_->chroma_format_idc : 0; 209 210 if (chroma_array_type != 0) { 211 // chroma_log2_weight_denom: ue(v) 212 slice_reader.ReadExponentialGolomb(); 213 } 214 215 for (uint32_t i = 0; i <= num_ref_idx_l0_active_minus1; i++) { 216 // luma_weight_l0_flag 2 u(1) 217 if (slice_reader.Read<bool>()) { 218 // luma_weight_l0[i] 2 se(v) 219 slice_reader.ReadExponentialGolomb(); 220 // luma_offset_l0[i] 2 se(v) 221 slice_reader.ReadExponentialGolomb(); 222 } 223 if (chroma_array_type != 0) { 224 // chroma_weight_l0_flag: u(1) 225 if (slice_reader.Read<bool>()) { 226 for (uint8_t j = 0; j < 2; j++) { 227 // chroma_weight_l0[i][j] 2 se(v) 228 slice_reader.ReadExponentialGolomb(); 229 // chroma_offset_l0[i][j] 2 se(v) 230 slice_reader.ReadExponentialGolomb(); 231 } 232 } 233 } 234 } 235 if (slice_type % 5 == 1) { 236 for (uint32_t i = 0; i <= num_ref_idx_l1_active_minus1; i++) { 237 // luma_weight_l1_flag: u(1) 238 if (slice_reader.Read<bool>()) { 239 // luma_weight_l1[i] 2 se(v) 240 slice_reader.ReadExponentialGolomb(); 241 // luma_offset_l1[i] 2 se(v) 242 slice_reader.ReadExponentialGolomb(); 243 } 244 if (chroma_array_type != 0) { 245 // chroma_weight_l1_flag: u(1) 246 if (slice_reader.Read<bool>()) { 247 for (uint8_t j = 0; j < 2; j++) { 248 // chroma_weight_l1[i][j] 2 se(v) 249 slice_reader.ReadExponentialGolomb(); 250 // chroma_offset_l1[i][j] 2 se(v) 251 slice_reader.ReadExponentialGolomb(); 252 } 253 } 254 } 255 } 256 } 257 } 258 if (nal_ref_idc != 0) { 259 // dec_ref_pic_marking(): 260 if (is_idr) { 261 // no_output_of_prior_pics_flag: u(1) 262 // long_term_reference_flag: u(1) 263 slice_reader.ConsumeBits(2); 264 } else { 265 // adaptive_ref_pic_marking_mode_flag: u(1) 266 if (slice_reader.Read<bool>()) { 267 uint32_t memory_management_control_operation; 268 do { 269 // memory_management_control_operation: ue(v) 270 memory_management_control_operation = 271 slice_reader.ReadExponentialGolomb(); 272 if (memory_management_control_operation == 1 || 273 memory_management_control_operation == 3) { 274 // difference_of_pic_nums_minus1: ue(v) 275 slice_reader.ReadExponentialGolomb(); 276 } 277 if (memory_management_control_operation == 2) { 278 // long_term_pic_num: ue(v) 279 slice_reader.ReadExponentialGolomb(); 280 } 281 if (memory_management_control_operation == 3 || 282 memory_management_control_operation == 6) { 283 // long_term_frame_idx: ue(v) 284 slice_reader.ReadExponentialGolomb(); 285 } 286 if (memory_management_control_operation == 4) { 287 // max_long_term_frame_idx_plus1: ue(v) 288 slice_reader.ReadExponentialGolomb(); 289 } 290 } while (memory_management_control_operation != 0 && slice_reader.Ok()); 291 } 292 } 293 } 294 if (pps_->entropy_coding_mode_flag && slice_type != H264::SliceType::kI && 295 slice_type != H264::SliceType::kSi) { 296 // cabac_init_idc: ue(v) 297 slice_reader.ReadExponentialGolomb(); 298 } 299 300 int last_slice_qp_delta = slice_reader.ReadSignedExponentialGolomb(); 301 if (!slice_reader.Ok()) { 302 return kInvalidStream; 303 } 304 if (abs(last_slice_qp_delta) > kMaxAbsQpDeltaValue) { 305 // Something has gone wrong, and the parsed value is invalid. 306 RTC_LOG(LS_WARNING) << "Parsed QP value out of range."; 307 return kInvalidStream; 308 } 309 310 last_slice_qp_delta_ = last_slice_qp_delta; 311 return kOk; 312 } 313 314 void H264BitstreamParser::ParseSlice(ArrayView<const uint8_t> slice) { 315 if (slice.empty()) { 316 return; 317 } 318 H264::NaluType nalu_type = H264::ParseNaluType(slice[0]); 319 switch (nalu_type) { 320 case H264::NaluType::kSps: { 321 sps_ = SpsParser::ParseSps(slice.subview(H264::kNaluTypeSize)); 322 if (!sps_) 323 RTC_DLOG(LS_WARNING) << "Unable to parse SPS from H264 bitstream."; 324 break; 325 } 326 case H264::NaluType::kPps: { 327 pps_ = PpsParser::ParsePps(slice.subview(H264::kNaluTypeSize)); 328 if (!pps_) 329 RTC_DLOG(LS_WARNING) << "Unable to parse PPS from H264 bitstream."; 330 break; 331 } 332 case H264::NaluType::kAud: 333 case H264::NaluType::kFiller: 334 case H264::NaluType::kSei: 335 case H264::NaluType::kPrefix: 336 break; // Ignore these nalus, as we don't care about their contents. 337 default: 338 Result res = ParseNonParameterSetNalu(slice, nalu_type); 339 if (res != kOk) 340 RTC_DLOG(LS_INFO) << "Failed to parse bitstream. NAL type " 341 << static_cast<int>(nalu_type) << ", error: " << res; 342 break; 343 } 344 } 345 346 void H264BitstreamParser::ParseBitstream(ArrayView<const uint8_t> bitstream) { 347 std::vector<H264::NaluIndex> nalu_indices = H264::FindNaluIndices(bitstream); 348 for (const H264::NaluIndex& index : nalu_indices) 349 ParseSlice( 350 bitstream.subview(index.payload_start_offset, index.payload_size)); 351 } 352 353 std::optional<int> H264BitstreamParser::GetLastSliceQp() const { 354 if (!last_slice_qp_delta_ || !pps_) 355 return std::nullopt; 356 const int qp = 26 + pps_->pic_init_qp_minus26 + *last_slice_qp_delta_; 357 if (qp < kMinQpValue || qp > kMaxQpValue) { 358 RTC_LOG(LS_ERROR) << "Parsed invalid QP from bitstream."; 359 return std::nullopt; 360 } 361 return qp; 362 } 363 364 } // namespace webrtc