DecoderData.cpp (14910B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 #include "DecoderData.h" 6 7 #include "Adts.h" 8 #include "AnnexB.h" 9 #include "BufferReader.h" 10 #include "MP4Metadata.h" 11 #include "VideoUtils.h" 12 #include "mozilla/EndianUtils.h" 13 #include "mozilla/Logging.h" 14 #include "mozilla/glean/DomMediaMp4Metrics.h" 15 #include "mp4parse.h" 16 17 #define LOG(...) \ 18 MOZ_LOG(gMP4MetadataLog, mozilla::LogLevel::Debug, (__VA_ARGS__)) 19 20 using mozilla::media::TimeUnit; 21 22 namespace mozilla { 23 24 mozilla::Result<mozilla::Ok, nsresult> CryptoFile::DoUpdate( 25 const uint8_t* aData, size_t aLength) { 26 BufferReader reader(aData, aLength); 27 while (reader.Remaining()) { 28 PsshInfo psshInfo; 29 if (!reader.ReadArray(psshInfo.uuid, 16)) { 30 return mozilla::Err(NS_ERROR_FAILURE); 31 } 32 33 if (!reader.CanReadType<uint32_t>()) { 34 return mozilla::Err(NS_ERROR_FAILURE); 35 } 36 auto length = reader.ReadType<uint32_t>(); 37 38 if (!reader.ReadArray(psshInfo.data, length)) { 39 return mozilla::Err(NS_ERROR_FAILURE); 40 } 41 pssh.AppendElement(std::move(psshInfo)); 42 } 43 return mozilla::Ok(); 44 } 45 46 static MediaResult UpdateTrackProtectedInfo(mozilla::TrackInfo& aConfig, 47 const Mp4parseSinfInfo& aSinf) { 48 if (aSinf.is_encrypted != 0) { 49 if (aSinf.scheme_type == MP4_PARSE_ENCRYPTION_SCHEME_TYPE_CENC) { 50 aConfig.mCrypto.mCryptoScheme = CryptoScheme::Cenc; 51 } else if (aSinf.scheme_type == MP4_PARSE_ENCRYPTION_SCHEME_TYPE_CBCS) { 52 aConfig.mCrypto.mCryptoScheme = CryptoScheme::Cbcs; 53 } else { 54 // Unsupported encryption type; 55 return MediaResult( 56 NS_ERROR_DOM_MEDIA_METADATA_ERR, 57 RESULT_DETAIL( 58 "Unsupported encryption scheme encountered aSinf.scheme_type=%d", 59 static_cast<int>(aSinf.scheme_type))); 60 } 61 aConfig.mCrypto.mIVSize = aSinf.iv_size; 62 aConfig.mCrypto.mKeyId.AppendElements(aSinf.kid.data, aSinf.kid.length); 63 aConfig.mCrypto.mCryptByteBlock = aSinf.crypt_byte_block; 64 aConfig.mCrypto.mSkipByteBlock = aSinf.skip_byte_block; 65 aConfig.mCrypto.mConstantIV.AppendElements(aSinf.constant_iv.data, 66 aSinf.constant_iv.length); 67 } 68 return NS_OK; 69 } 70 71 // Verify various information shared by Mp4ParseTrackAudioInfo and 72 // Mp4ParseTrackVideoInfo and record telemetry on that info. Returns an 73 // appropriate MediaResult indicating if the info is valid or not. 74 // This verifies: 75 // - That we have a sample_info_count > 0 (valid tracks should have at least one 76 // sample description entry) 77 // - That only a single codec is used across all sample infos, as we don't 78 // handle multiple. 79 // - If more than one sample information structures contain crypto info. This 80 // case is not fatal (we don't return an error), but does record telemetry 81 // to help judge if we need more handling in gecko for multiple crypto. 82 // 83 // Telemetry is also recorded on the above. As of writing, the 84 // telemetry is recorded to give us early warning if MP4s exist that we're not 85 // handling. Note, if adding new checks and telemetry to this function, 86 // telemetry should be recorded before returning to ensure it is gathered. 87 template <typename Mp4ParseTrackAudioOrVideoInfo> 88 static MediaResult VerifyAudioOrVideoInfoAndRecordTelemetry( 89 Mp4ParseTrackAudioOrVideoInfo* audioOrVideoInfo) { 90 glean::media_mp4_parse::num_sample_description_entries.AccumulateSingleSample( 91 audioOrVideoInfo->sample_info_count); 92 93 bool hasMultipleCodecs = false; 94 uint32_t cryptoCount = 0; 95 Mp4parseCodec codecType = audioOrVideoInfo->sample_info[0].codec_type; 96 for (uint32_t i = 0; i < audioOrVideoInfo->sample_info_count; i++) { 97 if (audioOrVideoInfo->sample_info[0].codec_type != codecType) { 98 hasMultipleCodecs = true; 99 } 100 101 // Update our encryption info if any is present on the sample info. 102 if (audioOrVideoInfo->sample_info[i].protected_data.is_encrypted) { 103 cryptoCount += 1; 104 } 105 } 106 107 glean::media_mp4_parse::sample_description_entries_have_multiple_codecs 108 .EnumGet(static_cast<glean::media_mp4_parse:: 109 SampleDescriptionEntriesHaveMultipleCodecsLabel>( 110 hasMultipleCodecs)) 111 .Add(); 112 113 // Accumulate if we have multiple (2 or more) crypto entries. 114 // TODO(1715283): rework this to count number of crypto entries + gather 115 // richer data. 116 glean::media_mp4_parse::sample_description_entries_have_multiple_crypto 117 .EnumGet(static_cast<glean::media_mp4_parse:: 118 SampleDescriptionEntriesHaveMultipleCryptoLabel>( 119 cryptoCount >= 2)) 120 .Add(); 121 122 if (audioOrVideoInfo->sample_info_count == 0) { 123 return MediaResult( 124 NS_ERROR_DOM_MEDIA_METADATA_ERR, 125 RESULT_DETAIL("Got 0 sample info while verifying track.")); 126 } 127 128 if (hasMultipleCodecs) { 129 // Different codecs in a single track. We don't handle this. 130 return MediaResult( 131 NS_ERROR_DOM_MEDIA_METADATA_ERR, 132 RESULT_DETAIL("Multiple codecs encountered while verifying track.")); 133 } 134 135 return NS_OK; 136 } 137 138 MediaResult MP4AudioInfo::Update(const Mp4parseTrackInfo* aTrack, 139 const Mp4parseTrackAudioInfo* aAudio, 140 const IndiceWrapper* aIndices) { 141 auto rv = VerifyAudioOrVideoInfoAndRecordTelemetry(aAudio); 142 NS_ENSURE_SUCCESS(rv, rv); 143 144 Mp4parseCodec codecType = aAudio->sample_info[0].codec_type; 145 for (uint32_t i = 0; i < aAudio->sample_info_count; i++) { 146 if (aAudio->sample_info[i].protected_data.is_encrypted) { 147 auto rv = UpdateTrackProtectedInfo(*this, 148 aAudio->sample_info[i].protected_data); 149 NS_ENSURE_SUCCESS(rv, rv); 150 break; 151 } 152 } 153 154 // We assume that the members of the first sample info are representative of 155 // the entire track. This code will need to be updated should this assumption 156 // ever not hold. E.g. if we need to handle different codecs in a single 157 // track, or if we have different numbers or channels in a single track. 158 Mp4parseByteData mp4ParseSampleCodecSpecific = 159 aAudio->sample_info[0].codec_specific_config; 160 Mp4parseByteData extraData = aAudio->sample_info[0].extra_data; 161 MOZ_ASSERT(mCodecSpecificConfig.is<NoCodecSpecificData>(), 162 "Should have no codec specific data yet"); 163 if (codecType == MP4PARSE_CODEC_OPUS) { 164 mMimeType = "audio/opus"_ns; 165 OpusCodecSpecificData opusCodecSpecificData{}; 166 // The Opus decoder expects the container's codec delay or 167 // pre-skip value, in microseconds, as a 64-bit int at the 168 // start of the codec-specific config blob. 169 if (mp4ParseSampleCodecSpecific.data && 170 mp4ParseSampleCodecSpecific.length >= 12) { 171 uint16_t preskip = mozilla::LittleEndian::readUint16( 172 mp4ParseSampleCodecSpecific.data + 10); 173 opusCodecSpecificData.mContainerCodecDelayFrames = preskip; 174 LOG("Opus stream in MP4 container, %" PRId64 175 " microseconds of encoder delay (%" PRIu16 ").", 176 opusCodecSpecificData.mContainerCodecDelayFrames, preskip); 177 } else { 178 // This file will error later as it will be rejected by the opus decoder. 179 opusCodecSpecificData.mContainerCodecDelayFrames = 0; 180 } 181 opusCodecSpecificData.mHeadersBinaryBlob->AppendElements( 182 mp4ParseSampleCodecSpecific.data, mp4ParseSampleCodecSpecific.length); 183 mCodecSpecificConfig = 184 AudioCodecSpecificVariant{std::move(opusCodecSpecificData)}; 185 } else if (codecType == MP4PARSE_CODEC_AAC || 186 codecType == MP4PARSE_CODEC_XHEAAC) { 187 mMimeType = "audio/mp4a-latm"_ns; 188 int64_t codecDelayUS = aTrack->media_time; 189 double USECS_PER_S = 1e6; 190 // We can't use mozilla::UsecsToFrames here because we need to round, and it 191 // floors. 192 uint32_t encoderDelayFrameCount = 0; 193 if (codecDelayUS > 0) { 194 encoderDelayFrameCount = static_cast<uint32_t>( 195 std::lround(static_cast<double>(codecDelayUS) * 196 aAudio->sample_info->sample_rate / USECS_PER_S)); 197 LOG("AAC stream in MP4 container, %" PRIu32 " frames of encoder delay.", 198 encoderDelayFrameCount); 199 } 200 201 uint64_t mediaFrameCount = 0; 202 // Pass the padding number, in frames, to the AAC decoder as well. 203 if (aIndices) { 204 MP4SampleIndex::Indice firstIndice = {0}; 205 MP4SampleIndex::Indice lastIndice = {0}; 206 bool rv = aIndices->GetIndice(0, firstIndice); 207 rv |= aIndices->GetIndice(aIndices->Length() - 1, lastIndice); 208 if (rv) { 209 if (firstIndice.start_composition > lastIndice.end_composition) { 210 return MediaResult( 211 NS_ERROR_DOM_MEDIA_METADATA_ERR, 212 RESULT_DETAIL("Inconsistent start and end time in index")); 213 } 214 // The `end_composition` member of the very last index member is the 215 // duration of the media in microseconds, excluding decoder delay and 216 // padding. Convert to frames and give to the decoder so that trimming 217 // can be done properly. 218 mediaFrameCount = 219 lastIndice.end_composition - firstIndice.start_composition; 220 LOG("AAC stream in MP4 container, total media duration is %" PRIu64 221 " frames", 222 mediaFrameCount); 223 } else { 224 LOG("AAC stream in MP4 container, couldn't determine total media time"); 225 } 226 } 227 228 AacCodecSpecificData aacCodecSpecificData{}; 229 230 aacCodecSpecificData.mEncoderDelayFrames = encoderDelayFrameCount; 231 aacCodecSpecificData.mMediaFrameCount = mediaFrameCount; 232 233 // codec specific data is used to store the DecoderConfigDescriptor. 234 aacCodecSpecificData.mDecoderConfigDescriptorBinaryBlob->AppendElements( 235 mp4ParseSampleCodecSpecific.data, mp4ParseSampleCodecSpecific.length); 236 // extra data stores the ES_Descriptor. 237 aacCodecSpecificData.mEsDescriptorBinaryBlob->AppendElements( 238 extraData.data, extraData.length); 239 mCodecSpecificConfig = 240 AudioCodecSpecificVariant{std::move(aacCodecSpecificData)}; 241 } else if (codecType == MP4PARSE_CODEC_FLAC) { 242 MOZ_ASSERT(extraData.length == 0, 243 "FLAC doesn't expect extra data so doesn't handle it!"); 244 mMimeType = "audio/flac"_ns; 245 FlacCodecSpecificData flacCodecSpecificData{}; 246 flacCodecSpecificData.mStreamInfoBinaryBlob->AppendElements( 247 mp4ParseSampleCodecSpecific.data, mp4ParseSampleCodecSpecific.length); 248 mCodecSpecificConfig = 249 AudioCodecSpecificVariant{std::move(flacCodecSpecificData)}; 250 } else if (codecType == MP4PARSE_CODEC_MP3) { 251 // mp3 in mp4 can contain ES_Descriptor info (it also has a flash in mp4 252 // specific box, which the rust parser recognizes). However, we don't 253 // handle any such data here. 254 mMimeType = "audio/mpeg"_ns; 255 // TODO(bug 1705812): parse the encoder delay values from the mp4. 256 mCodecSpecificConfig = AudioCodecSpecificVariant{Mp3CodecSpecificData{}}; 257 } 258 259 mRate = aAudio->sample_info[0].sample_rate; 260 mChannels = aAudio->sample_info[0].channels; 261 mBitDepth = aAudio->sample_info[0].bit_depth; 262 mExtendedProfile = 263 AssertedCast<int8_t>(aAudio->sample_info[0].extended_profile); 264 if (aTrack->duration > TimeUnit::MaxTicks()) { 265 mDuration = TimeUnit::FromInfinity(); 266 } else { 267 mDuration = 268 TimeUnit(AssertedCast<int64_t>(aTrack->duration), aTrack->time_scale); 269 } 270 mMediaTime = TimeUnit(aTrack->media_time, aTrack->time_scale); 271 mTrackId = aTrack->track_id; 272 273 // In stagefright, mProfile is kKeyAACProfile, mExtendedProfile is kKeyAACAOT. 274 if (aAudio->sample_info[0].profile <= 4) { 275 mProfile = AssertedCast<int8_t>(aAudio->sample_info[0].profile); 276 } 277 278 if (mCodecSpecificConfig.is<NoCodecSpecificData>()) { 279 // Handle codecs that are not explicitly handled above. 280 MOZ_ASSERT( 281 extraData.length == 0, 282 "Codecs that use extra data should be explicitly handled already"); 283 AudioCodecSpecificBinaryBlob codecSpecificBinaryBlob; 284 // No codec specific metadata set, use the generic form. 285 codecSpecificBinaryBlob.mBinaryBlob->AppendElements( 286 mp4ParseSampleCodecSpecific.data, mp4ParseSampleCodecSpecific.length); 287 mCodecSpecificConfig = 288 AudioCodecSpecificVariant{std::move(codecSpecificBinaryBlob)}; 289 } 290 291 return NS_OK; 292 } 293 294 bool MP4AudioInfo::IsValid() const { 295 return mChannels > 0 && mRate > 0 && 296 // Accept any mime type here, but if it's aac, validate the profile. 297 (!mMimeType.EqualsLiteral("audio/mp4a-latm") || mProfile > 0 || 298 mExtendedProfile > 0); 299 } 300 301 MediaResult MP4VideoInfo::Update(const Mp4parseTrackInfo* track, 302 const Mp4parseTrackVideoInfo* video) { 303 auto rv = VerifyAudioOrVideoInfoAndRecordTelemetry(video); 304 NS_ENSURE_SUCCESS(rv, rv); 305 306 Mp4parseCodec codecType = video->sample_info[0].codec_type; 307 for (uint32_t i = 0; i < video->sample_info_count; i++) { 308 if (video->sample_info[i].protected_data.is_encrypted) { 309 auto rv = 310 UpdateTrackProtectedInfo(*this, video->sample_info[i].protected_data); 311 NS_ENSURE_SUCCESS(rv, rv); 312 break; 313 } 314 } 315 316 // We assume that the members of the first sample info are representative of 317 // the entire track. This code will need to be updated should this assumption 318 // ever not hold. E.g. if we need to handle different codecs in a single 319 // track, or if we have different numbers or channels in a single track. 320 if (codecType == MP4PARSE_CODEC_AVC) { 321 mMimeType = "video/avc"_ns; 322 } else if (codecType == MP4PARSE_CODEC_VP9) { 323 mMimeType = "video/vp9"_ns; 324 } else if (codecType == MP4PARSE_CODEC_AV1) { 325 mMimeType = "video/av1"_ns; 326 } else if (codecType == MP4PARSE_CODEC_MP4V) { 327 mMimeType = "video/mp4v-es"_ns; 328 } else if (codecType == MP4PARSE_CODEC_HEVC) { 329 mMimeType = "video/hevc"_ns; 330 } 331 mTrackId = track->track_id; 332 if (track->duration > TimeUnit::MaxTicks()) { 333 mDuration = TimeUnit::FromInfinity(); 334 } else { 335 mDuration = 336 TimeUnit(AssertedCast<int64_t>(track->duration), track->time_scale); 337 } 338 mMediaTime = TimeUnit(track->media_time, track->time_scale); 339 mDisplay.width = AssertedCast<int32_t>(video->display_width); 340 mDisplay.height = AssertedCast<int32_t>(video->display_height); 341 mImage.width = video->sample_info[0].image_width; 342 mImage.height = video->sample_info[0].image_height; 343 mRotation = ToSupportedRotation(video->rotation); 344 Mp4parseByteData extraData = video->sample_info[0].extra_data; 345 // If length is 0 we append nothing 346 mExtraData->AppendElements(extraData.data, extraData.length); 347 return NS_OK; 348 } 349 350 bool MP4VideoInfo::IsValid() const { 351 return (mDisplay.width > 0 && mDisplay.height > 0) || 352 (mImage.width > 0 && mImage.height > 0); 353 } 354 355 } // namespace mozilla 356 357 #undef LOG