H265.h (14500B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 #ifndef DOM_MEDIA_PLATFORMS_AGNOSTIC_BYTESTREAMS_H265_H_ 6 #define DOM_MEDIA_PLATFORMS_AGNOSTIC_BYTESTREAMS_H265_H_ 7 8 #include <stdint.h> 9 10 #include "mozilla/CheckedInt.h" 11 #include "mozilla/Maybe.h" 12 #include "mozilla/Result.h" 13 #include "mozilla/Span.h" 14 #include "mozilla/gfx/Point.h" 15 #include "nsStringFwd.h" 16 #include "nsTArray.h" 17 18 namespace mozilla { 19 20 class BitReader; 21 class MediaByteBuffer; 22 class MediaRawData; 23 24 // Most classes in this file are implemented according to the H265 spec 25 // (https://www.itu.int/rec/T-REC-H.265-202108-I/en), except the HVCCConfig, 26 // which is in the ISO/IEC 14496-15. To make it easier to read the 27 // implementation with the spec, the naming style in this file follows the spec 28 // instead of our usual style. 29 30 enum { 31 kMaxLongTermRefPicSets = 32, // See num_long_term_ref_pics_sps 32 kMaxShortTermRefPicSets = 64, // See num_short_term_ref_pic_sets 33 kMaxSubLayers = 7, // See [v/s]ps_max_sub_layers_minus1 34 }; 35 36 // H265NALU represents NALU data (Spec 7.3.1 NAL unit syntax) for convenient 37 // access. In addition, this class does not own the raw RBSP data. Ensure that 38 // the original data source remains valid when accessing `mNALU`. 39 class H265NALU final { 40 public: 41 H265NALU(const uint8_t* aData, uint32_t aByteSize); 42 H265NALU() = default; 43 44 // Table 7-1 45 enum NAL_TYPES { 46 TRAIL_N = 0, 47 TRAIL_R = 1, 48 TSA_N = 2, 49 TSA_R = 3, 50 STSA_N = 4, 51 STSA_R = 5, 52 RADL_N = 6, 53 RADL_R = 7, 54 RASL_N = 8, 55 RASL_R = 9, 56 RSV_VCL_N10 = 10, 57 RSV_VCL_R11 = 11, 58 RSV_VCL_N12 = 12, 59 RSV_VCL_R13 = 13, 60 RSV_VCL_N14 = 14, 61 RSV_VCL_R15 = 15, 62 BLA_W_LP = 16, 63 BLA_W_RADL = 17, 64 BLA_N_LP = 18, 65 IDR_W_RADL = 19, 66 IDR_N_LP = 20, 67 CRA_NUT = 21, 68 RSV_IRAP_VCL22 = 22, 69 RSV_IRAP_VCL23 = 23, 70 RSV_VCL24 = 24, 71 RSV_VCL25 = 25, 72 RSV_VCL26 = 26, 73 RSV_VCL27 = 27, 74 RSV_VCL28 = 28, 75 RSV_VCL29 = 29, 76 RSV_VCL30 = 30, 77 RSV_VCL31 = 31, 78 VPS_NUT = 32, 79 SPS_NUT = 33, 80 PPS_NUT = 34, 81 AUD_NUT = 35, 82 EOS_NUT = 36, 83 EOB_NUT = 37, 84 FD_NUT = 38, 85 PREFIX_SEI_NUT = 39, 86 SUFFIX_SEI_NUT = 40, 87 RSV_NVCL41 = 41, 88 RSV_NVCL42 = 42, 89 RSV_NVCL43 = 43, 90 RSV_NVCL44 = 44, 91 RSV_NVCL45 = 45, 92 RSV_NVCL46 = 46, 93 RSV_NVCL47 = 47, 94 UNSPEC48 = 48, 95 UNSPEC49 = 49, 96 UNSPEC50 = 50, 97 UNSPEC51 = 51, 98 UNSPEC52 = 52, 99 UNSPEC53 = 53, 100 UNSPEC54 = 54, 101 UNSPEC55 = 55, 102 UNSPEC56 = 56, 103 UNSPEC57 = 57, 104 UNSPEC58 = 58, 105 UNSPEC59 = 59, 106 UNSPEC60 = 60, 107 UNSPEC61 = 61, 108 UNSPEC62 = 62, 109 UNSPEC63 = 63, 110 }; 111 112 bool IsIframe() const { 113 return mNalUnitType == NAL_TYPES::IDR_W_RADL || 114 mNalUnitType == NAL_TYPES::IDR_N_LP; 115 } 116 117 bool IsSPS() const { return mNalUnitType == NAL_TYPES::SPS_NUT; } 118 bool IsVPS() const { return mNalUnitType == NAL_TYPES::VPS_NUT; } 119 bool IsPPS() const { return mNalUnitType == NAL_TYPES::PPS_NUT; } 120 bool IsSEI() const { 121 return mNalUnitType == NAL_TYPES::PREFIX_SEI_NUT || 122 mNalUnitType == NAL_TYPES::SUFFIX_SEI_NUT; 123 } 124 125 uint8_t mNalUnitType; 126 uint8_t mNuhLayerId; 127 uint8_t mNuhTemporalIdPlus1; 128 // This contain the full content of NALU, which can be used to decode rbsp. 129 const Span<const uint8_t> mNALU; 130 }; 131 132 // H265 spec, 7.3.3 Profile, tier and level syntax 133 struct H265ProfileTierLevel final { 134 H265ProfileTierLevel() = default; 135 136 bool operator==(const H265ProfileTierLevel& aOther) const; 137 138 enum H265ProfileIdc { 139 kProfileIdcMain = 1, 140 kProfileIdcMain10 = 2, 141 kProfileIdcMainStill = 3, 142 kProfileIdcRangeExtensions = 4, 143 kProfileIdcHighThroughput = 5, 144 kProfileIdcMultiviewMain = 6, 145 kProfileIdcScalableMain = 7, 146 kProfileIdc3dMain = 8, 147 kProfileIdcScreenContentCoding = 9, 148 kProfileIdcScalableRangeExtensions = 10, 149 kProfileIdcHighThroughputScreenContentCoding = 11, 150 }; 151 152 // From Table A.8 - General tier and level limits. 153 uint32_t GetMaxLumaPs() const; 154 155 // From A.4.2 - Profile-specific level limits for the video profiles. 156 uint32_t GetDpbMaxPicBuf() const; 157 158 // Syntax elements. 159 uint8_t general_profile_space = {}; 160 bool general_tier_flag = {}; 161 uint8_t general_profile_idc = {}; 162 uint32_t general_profile_compatibility_flags = {}; 163 bool general_progressive_source_flag = {}; 164 bool general_interlaced_source_flag = {}; 165 bool general_non_packed_constraint_flag = {}; 166 bool general_frame_only_constraint_flag = {}; 167 uint8_t general_level_idc = {}; 168 }; 169 170 // H265 spec, 7.3.7 Short-term reference picture set syntax 171 struct H265StRefPicSet final { 172 H265StRefPicSet() = default; 173 174 bool operator==(const H265StRefPicSet& aOther) const; 175 176 // Syntax elements. 177 uint32_t num_negative_pics = {}; 178 uint32_t num_positive_pics = {}; 179 180 // Calculated fields 181 // From the H265 spec 7.4.8 182 bool usedByCurrPicS0[kMaxShortTermRefPicSets] = {}; // (7-65) 183 bool usedByCurrPicS1[kMaxShortTermRefPicSets] = {}; // (7-66) 184 uint32_t deltaPocS0[kMaxShortTermRefPicSets] = {}; // (7-67) + (7-69) 185 uint32_t deltaPocS1[kMaxShortTermRefPicSets] = {}; // (7-68) + (7-70) 186 uint32_t numDeltaPocs = {}; // (7-72) 187 }; 188 189 // H265 spec, E.2.1 VUI parameters syntax 190 struct H265VUIParameters { 191 H265VUIParameters() = default; 192 193 bool operator==(const H265VUIParameters& aOther) const; 194 195 bool HasValidAspectRatio() const; 196 197 // This should only be called when VUI has a valid aspect ratio. 198 double GetPixelAspectRatio() const; 199 200 // Syntax elements. 201 bool aspect_ratio_info_present_flag = false; 202 uint32_t sar_width = {}; 203 uint32_t sar_height = {}; 204 bool video_full_range_flag = {}; 205 Maybe<uint8_t> colour_primaries; 206 Maybe<uint8_t> transfer_characteristics; 207 Maybe<uint8_t> matrix_coeffs; 208 209 // Not spec element. 210 bool mIsSARValid = false; 211 }; 212 213 // H265 spec, 7.3.2.2 Sequence parameter set RBSP syntax 214 struct H265SPS final { 215 H265SPS() = default; 216 217 bool operator==(const H265SPS& aOther) const; 218 bool operator!=(const H265SPS& aOther) const; 219 220 // Syntax elements. 221 uint8_t sps_video_parameter_set_id = {}; 222 uint8_t sps_max_sub_layers_minus1 = {}; 223 bool sps_temporal_id_nesting_flag = {}; 224 H265ProfileTierLevel profile_tier_level = {}; 225 uint32_t sps_seq_parameter_set_id = {}; 226 uint32_t chroma_format_idc = {}; 227 bool separate_colour_plane_flag = {}; 228 uint32_t pic_width_in_luma_samples = {}; 229 uint32_t pic_height_in_luma_samples = {}; 230 231 bool conformance_window_flag = {}; 232 uint32_t conf_win_left_offset = {}; 233 uint32_t conf_win_right_offset = {}; 234 uint32_t conf_win_top_offset = {}; 235 uint32_t conf_win_bottom_offset = {}; 236 237 uint32_t bit_depth_luma_minus8 = {}; 238 uint32_t bit_depth_chroma_minus8 = {}; 239 uint32_t log2_max_pic_order_cnt_lsb_minus4 = {}; 240 bool sps_sub_layer_ordering_info_present_flag = {}; 241 uint32_t sps_max_dec_pic_buffering_minus1[kMaxSubLayers] = {}; 242 uint32_t sps_max_num_reorder_pics[kMaxSubLayers] = {}; 243 uint32_t sps_max_latency_increase_plus1[kMaxSubLayers] = {}; 244 uint32_t log2_min_luma_coding_block_size_minus3 = {}; 245 uint32_t log2_diff_max_min_luma_coding_block_size = {}; 246 uint32_t log2_min_luma_transform_block_size_minus2 = {}; 247 uint32_t log2_diff_max_min_luma_transform_block_size = {}; 248 uint32_t max_transform_hierarchy_depth_inter = {}; 249 uint32_t max_transform_hierarchy_depth_intra = {}; 250 251 bool pcm_enabled_flag = {}; 252 uint8_t pcm_sample_bit_depth_luma_minus1 = {}; 253 uint8_t pcm_sample_bit_depth_chroma_minus1 = {}; 254 uint32_t log2_min_pcm_luma_coding_block_size_minus3 = {}; 255 uint32_t log2_diff_max_min_pcm_luma_coding_block_size = {}; 256 bool pcm_loop_filter_disabled_flag = {}; 257 258 uint32_t num_short_term_ref_pic_sets = {}; 259 H265StRefPicSet st_ref_pic_set[kMaxShortTermRefPicSets] = {}; 260 261 bool sps_temporal_mvp_enabled_flag = {}; 262 bool strong_intra_smoothing_enabled_flag = {}; 263 Maybe<H265VUIParameters> vui_parameters; 264 265 // Calculated fields 266 uint32_t subWidthC = {}; // From Table 6-1. 267 uint32_t subHeightC = {}; // From Table 6-1. 268 Maybe<uint32_t> mCroppedWidth; // Calculated by conformance_window_flag 269 Maybe<uint32_t> mCroppedHeight; // Calculated by conformance_window_flag 270 CheckedUint32 mDisplayWidth; // Per (E-68) + (E-69) 271 CheckedUint32 mDisplayHeight; // Per (E-70) + (E-71) 272 uint32_t maxDpbSize = {}; 273 274 // Often used information 275 uint32_t BitDepthLuma() const { return bit_depth_luma_minus8 + 8; } 276 uint32_t BitDepthChroma() const { return bit_depth_chroma_minus8 + 8; } 277 gfx::IntSize GetImageSize() const; 278 gfx::IntSize GetDisplaySize() const; 279 gfx::ColorDepth ColorDepth() const; 280 gfx::YUVColorSpace ColorSpace() const; 281 bool IsFullColorRange() const; 282 uint8_t ColorPrimaries() const; 283 uint8_t TransferFunction() const; 284 }; 285 286 // ISO/IEC 14496-15 : hvcC. 287 struct HVCCConfig final { 288 public: 289 static Result<HVCCConfig, nsresult> Parse( 290 const mozilla::MediaRawData* aSample); 291 static Result<HVCCConfig, nsresult> Parse( 292 const mozilla::MediaByteBuffer* aExtraData); 293 294 uint8_t NALUSize() const { return lengthSizeMinusOne + 1; } 295 uint32_t NumSPS() const; 296 bool HasSPS() const; 297 nsCString ToString() const; 298 299 // Returns the first available NALU of the specified type, or nothing if no 300 // such NALU is found. 301 Maybe<H265NALU> GetFirstAvaiableNALU(H265NALU::NAL_TYPES aType) const; 302 303 uint8_t configurationVersion; 304 uint8_t general_profile_space; 305 bool general_tier_flag; 306 uint8_t general_profile_idc; 307 uint32_t general_profile_compatibility_flags; 308 uint64_t general_constraint_indicator_flags; 309 uint8_t general_level_idc; 310 uint16_t min_spatial_segmentation_idc; 311 uint8_t parallelismType; 312 uint8_t chroma_format_idc; 313 uint8_t bit_depth_luma_minus8; 314 uint8_t bit_depth_chroma_minus8; 315 uint16_t avgFrameRate; 316 uint8_t constantFrameRate; 317 uint8_t numTemporalLayers; 318 bool temporalIdNested; 319 uint8_t lengthSizeMinusOne; 320 321 nsTArray<H265NALU> mNALUs; 322 323 // Keep the orginal buffer alive in order to let H265NALU always access to 324 // valid data if there is any NALU. 325 RefPtr<const MediaByteBuffer> mByteBuffer; 326 327 private: 328 HVCCConfig() = default; 329 }; 330 331 class SPSIterator final { 332 public: 333 explicit SPSIterator(const HVCCConfig& aConfig) 334 : mCurrentIdx(0), mConfig(aConfig) { 335 FindSPS(); 336 } 337 338 SPSIterator& operator++() { 339 mCurrentIdx++; 340 FindSPS(); 341 return *this; 342 } 343 344 explicit operator bool() const { return IsValid(); } 345 346 const H265NALU* operator*() const { 347 if (!IsValid()) { 348 return nullptr; 349 } 350 if (!mConfig.mNALUs[mCurrentIdx].IsSPS()) { 351 return nullptr; 352 } 353 return &mConfig.mNALUs[mCurrentIdx]; 354 } 355 356 private: 357 void FindSPS() { 358 Maybe<size_t> spsIdx; 359 for (auto idx = mCurrentIdx; idx < mConfig.mNALUs.Length(); idx++) { 360 if (mConfig.mNALUs[idx].IsSPS()) { 361 spsIdx = Some(idx); 362 break; 363 } 364 } 365 if (spsIdx) { 366 mCurrentIdx = *spsIdx; 367 } 368 } 369 370 bool IsValid() const { 371 return mCurrentIdx < mConfig.mNALUs.Length() && 372 mConfig.mNALUs[mCurrentIdx].IsSPS(); 373 } 374 375 size_t mCurrentIdx; 376 const HVCCConfig& mConfig; 377 }; 378 379 class H265 final { 380 public: 381 static Result<H265SPS, nsresult> DecodeSPSFromHVCCExtraData( 382 const mozilla::MediaByteBuffer* aExtraData); 383 static Result<H265SPS, nsresult> DecodeSPSFromSPSNALU( 384 const H265NALU& aSPSNALU); 385 386 // Extract SPS and PPS NALs from aSample by looking into each NALs. 387 static already_AddRefed<mozilla::MediaByteBuffer> ExtractHVCCExtraData( 388 const mozilla::MediaRawData* aSample); 389 390 // Return true if both extradata are equal. 391 static bool CompareExtraData(const mozilla::MediaByteBuffer* aExtraData1, 392 const mozilla::MediaByteBuffer* aExtraData2); 393 394 // Return the value of sps_max_dec_pic_buffering_minus1[0] + 1 from a valid 395 // SPS in the extradata, otherwise return 0. 396 static uint32_t ComputeMaxRefFrames( 397 const mozilla::MediaByteBuffer* aExtraData); 398 399 // Create a dummy extradata, useful to create a decoder and test the 400 // capabilities of the decoder. 401 static already_AddRefed<mozilla::MediaByteBuffer> CreateFakeExtraData(); 402 403 // Create new extradata with the essential information from the given 404 // HVCCConfig, excluding its original NALUs. The NALUs will be replaced by the 405 // given NALUS, which are usually SPS, PPS, VPS and SEI. 406 static already_AddRefed<mozilla::MediaByteBuffer> CreateNewExtraData( 407 const HVCCConfig& aConfig, const nsTArray<H265NALU>& aNALUs); 408 409 // Return true if the given sample is a keyframe. Return error if we can't 410 // determine the result. 411 static Result<bool, nsresult> IsKeyFrame( 412 const mozilla::MediaRawData* aSample); 413 414 private: 415 // Return RAW BYTE SEQUENCE PAYLOAD (rbsp) from NAL content. 416 static already_AddRefed<mozilla::MediaByteBuffer> DecodeNALUnit( 417 const Span<const uint8_t>& aNALU); 418 419 // Parse the profile level based on the H265 spec, 7.3.3. MUST use a bit 420 // reader which starts from the position of the first bit of the data. 421 static Result<Ok, nsresult> ParseProfileTierLevel( 422 BitReader& aReader, bool aProfilePresentFlag, 423 uint8_t aMaxNumSubLayersMinus1, H265ProfileTierLevel& aProfile); 424 425 // Parse the short-term reference picture set based on the H265 spec, 7.3.7. 426 // MUST use a bit reader which starts from the position of the first bit of 427 // the data. 428 static Result<Ok, nsresult> ParseStRefPicSet(BitReader& aReader, 429 uint32_t aStRpsIdx, 430 H265SPS& aSPS); 431 432 // Parse the VUI parameters based on the H265 spec, E.2.1. MUST use a bit 433 // reader which starts from the position of the first bit of the data. 434 static Result<Ok, nsresult> ParseVuiParameters(BitReader& aReader, 435 H265SPS& aSPS); 436 437 // Parse and ignore the structure. MUST use a bitreader which starts from the 438 // position of the first bit of the data. 439 static Result<Ok, nsresult> ParseAndIgnoreScalingListData(BitReader& aReader); 440 static Result<Ok, nsresult> ParseAndIgnoreHrdParameters( 441 BitReader& aReader, bool aCommonInfPresentFlag, 442 int aMaxNumSubLayersMinus1); 443 static Result<Ok, nsresult> ParseAndIgnoreSubLayerHrdParameters( 444 BitReader& aReader, int aCpbCnt, bool aSubPicHrdParamsPresentFlag); 445 }; 446 447 } // namespace mozilla 448 449 #endif // DOM_MEDIA_PLATFORMS_AGNOSTIC_BYTESTREAMS_H265_H_