FFmpegVideoEncoder.cpp (37394B)
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim:set ts=2 sw=2 sts=2 et cindent: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #include "FFmpegVideoEncoder.h" 8 9 #include <aom/aomcx.h> 10 11 #include <algorithm> 12 13 #include "AnnexB.h" 14 #include "BufferReader.h" 15 #include "EncoderConfig.h" 16 #include "FFmpegEncoderModule.h" 17 #include "FFmpegLog.h" 18 #include "FFmpegRuntimeLinker.h" 19 #include "FFmpegUtils.h" 20 #include "H264.h" 21 #include "ImageContainer.h" 22 #include "ImageConversion.h" 23 #include "libavutil/error.h" 24 #include "libavutil/pixfmt.h" 25 #include "libyuv.h" 26 #include "mozilla/StaticPrefs_media.h" 27 #include "mozilla/dom/ImageBitmapBinding.h" 28 #include "mozilla/dom/ImageUtils.h" 29 #include "mozilla/dom/VideoFrameBinding.h" 30 #ifdef MOZ_WIDGET_ANDROID 31 # include "mozilla/gfx/gfxVars.h" 32 #endif 33 #include "nsPrintfCString.h" 34 35 // The ffmpeg namespace is introduced to avoid the PixelFormat's name conflicts 36 // with MediaDataEncoder::PixelFormat in MediaDataEncoder class scope. 37 namespace ffmpeg { 38 39 // TODO: WebCodecs' I420A should map to MediaDataEncoder::PixelFormat and then 40 // to AV_PIX_FMT_YUVA420P here. 41 #if LIBAVCODEC_VERSION_MAJOR < 54 42 using FFmpegPixelFormat = enum PixelFormat; 43 const FFmpegPixelFormat FFMPEG_PIX_FMT_NONE = FFmpegPixelFormat::PIX_FMT_NONE; 44 const FFmpegPixelFormat FFMPEG_PIX_FMT_RGBA = FFmpegPixelFormat::PIX_FMT_RGBA; 45 const FFmpegPixelFormat FFMPEG_PIX_FMT_BGRA = FFmpegPixelFormat::PIX_FMT_BGRA; 46 const FFmpegPixelFormat FFMPEG_PIX_FMT_RGB24 = FFmpegPixelFormat::PIX_FMT_RGB24; 47 const FFmpegPixelFormat FFMPEG_PIX_FMT_BGR24 = FFmpegPixelFormat::PIX_FMT_BGR24; 48 const FFmpegPixelFormat FFMPEG_PIX_FMT_YUV444P = 49 FFmpegPixelFormat::PIX_FMT_YUV444P; 50 const FFmpegPixelFormat FFMPEG_PIX_FMT_YUV422P = 51 FFmpegPixelFormat::PIX_FMT_YUV422P; 52 const FFmpegPixelFormat FFMPEG_PIX_FMT_YUV420P = 53 FFmpegPixelFormat::PIX_FMT_YUV420P; 54 const FFmpegPixelFormat FFMPEG_PIX_FMT_NV12 = FFmpegPixelFormat::PIX_FMT_NV12; 55 const FFmpegPixelFormat FFMPEG_PIX_FMT_NV21 = FFmpegPixelFormat::PIX_FMT_NV21; 56 #else 57 using FFmpegPixelFormat = enum AVPixelFormat; 58 const FFmpegPixelFormat FFMPEG_PIX_FMT_NONE = 59 FFmpegPixelFormat::AV_PIX_FMT_NONE; 60 const FFmpegPixelFormat FFMPEG_PIX_FMT_RGBA = 61 FFmpegPixelFormat::AV_PIX_FMT_RGBA; 62 const FFmpegPixelFormat FFMPEG_PIX_FMT_BGRA = 63 FFmpegPixelFormat::AV_PIX_FMT_BGRA; 64 const FFmpegPixelFormat FFMPEG_PIX_FMT_RGB24 = 65 FFmpegPixelFormat::AV_PIX_FMT_RGB24; 66 const FFmpegPixelFormat FFMPEG_PIX_FMT_BGR24 = 67 FFmpegPixelFormat::AV_PIX_FMT_BGR24; 68 const FFmpegPixelFormat FFMPEG_PIX_FMT_YUV444P = 69 FFmpegPixelFormat::AV_PIX_FMT_YUV444P; 70 const FFmpegPixelFormat FFMPEG_PIX_FMT_YUV422P = 71 FFmpegPixelFormat::AV_PIX_FMT_YUV422P; 72 const FFmpegPixelFormat FFMPEG_PIX_FMT_YUV420P = 73 FFmpegPixelFormat::AV_PIX_FMT_YUV420P; 74 const FFmpegPixelFormat FFMPEG_PIX_FMT_NV12 = 75 FFmpegPixelFormat::AV_PIX_FMT_NV12; 76 const FFmpegPixelFormat FFMPEG_PIX_FMT_NV21 = 77 FFmpegPixelFormat::AV_PIX_FMT_NV21; 78 #endif 79 80 static const char* GetPixelFormatString(FFmpegPixelFormat aFormat) { 81 switch (aFormat) { 82 case FFMPEG_PIX_FMT_NONE: 83 return "none"; 84 case FFMPEG_PIX_FMT_RGBA: 85 return "packed RGBA 8:8:8:8 (32bpp, RGBARGBA...)"; 86 case FFMPEG_PIX_FMT_BGRA: 87 return "packed BGRA 8:8:8:8 (32bpp, BGRABGRA...)"; 88 case FFMPEG_PIX_FMT_RGB24: 89 return "packed RGB 8:8:8 (24bpp, RGBRGB...)"; 90 case FFMPEG_PIX_FMT_BGR24: 91 return "packed RGB 8:8:8 (24bpp, BGRBGR...)"; 92 case FFMPEG_PIX_FMT_YUV444P: 93 return "planar YUV 4:4:4 (24bpp, 1 Cr & Cb sample per 1x1 Y samples)"; 94 case FFMPEG_PIX_FMT_YUV422P: 95 return "planar YUV 4:2:2 (16bpp, 1 Cr & Cb sample per 2x1 Y samples)"; 96 case FFMPEG_PIX_FMT_YUV420P: 97 return "planar YUV 4:2:0 (12bpp, 1 Cr & Cb sample per 2x2 Y samples)"; 98 case FFMPEG_PIX_FMT_NV12: 99 return "planar YUV 4:2:0 (12bpp, 1 interleaved UV components per 1x1 Y " 100 "samples)"; 101 case FFMPEG_PIX_FMT_NV21: 102 return "planar YUV 4:2:0 (12bpp, 1 interleaved VU components per 1x1 Y " 103 "samples)"; 104 default: 105 break; 106 } 107 MOZ_ASSERT_UNREACHABLE("Unsupported pixel format"); 108 return "unsupported"; 109 } 110 111 }; // namespace ffmpeg 112 113 namespace mozilla { 114 115 struct H264Setting { 116 int mValue; 117 nsCString mString; 118 }; 119 120 struct H264LiteralSetting { 121 int mValue; 122 nsLiteralCString mString; 123 H264Setting get() const { return {mValue, mString.AsString()}; } 124 }; 125 126 #if LIBAVCODEC_VERSION_MAJOR < 62 127 static constexpr H264LiteralSetting H264Profiles[]{ 128 {FF_PROFILE_H264_BASELINE, "baseline"_ns}, 129 {FF_PROFILE_H264_MAIN, "main"_ns}, 130 {FF_PROFILE_H264_EXTENDED, ""_ns}, 131 {FF_PROFILE_H264_HIGH, "high"_ns}}; 132 #else 133 static constexpr H264LiteralSetting H264Profiles[]{ 134 {AV_PROFILE_H264_BASELINE, "baseline"_ns}, 135 {AV_PROFILE_H264_MAIN, "main"_ns}, 136 {AV_PROFILE_H264_EXTENDED, ""_ns}, 137 {AV_PROFILE_H264_HIGH, "high"_ns}}; 138 #endif 139 140 static Maybe<H264Setting> GetH264Profile(const H264_PROFILE& aProfile) { 141 switch (aProfile) { 142 case H264_PROFILE::H264_PROFILE_UNKNOWN: 143 return Nothing(); 144 case H264_PROFILE::H264_PROFILE_BASE: 145 return Some(H264Profiles[0].get()); 146 case H264_PROFILE::H264_PROFILE_MAIN: 147 return Some(H264Profiles[1].get()); 148 case H264_PROFILE::H264_PROFILE_EXTENDED: 149 return Some(H264Profiles[2].get()); 150 case H264_PROFILE::H264_PROFILE_HIGH: 151 return Some(H264Profiles[3].get()); 152 default: 153 break; 154 } 155 MOZ_ASSERT_UNREACHABLE("undefined profile"); 156 return Nothing(); 157 } 158 159 static Maybe<H264Setting> GetH264Level(const H264_LEVEL& aLevel) { 160 int val = static_cast<int>(aLevel); 161 nsPrintfCString str("%d", val); 162 str.Insert('.', 1); 163 return Some(H264Setting{val, str}); 164 } 165 166 struct VPXSVCAppendix { 167 uint8_t mLayeringMode; 168 }; 169 170 struct SVCLayerSettings { 171 using CodecAppendix = Variant<VPXSVCAppendix, aom_svc_params_t>; 172 size_t mNumberSpatialLayers; 173 size_t mNumberTemporalLayers; 174 uint8_t mPeriodicity; 175 nsTArray<uint8_t> mLayerIds; 176 // libvpx: ts_rate_decimator, libaom: framerate_factor 177 nsTArray<uint8_t> mRateDecimators; 178 nsTArray<uint32_t> mTargetBitrates; 179 Maybe<CodecAppendix> mCodecAppendix; 180 }; 181 182 static SVCLayerSettings GetSVCLayerSettings(CodecType aCodec, 183 const ScalabilityMode& aMode, 184 uint32_t aBitPerSec) { 185 // TODO: Apply more sophisticated bitrate allocation, like SvcRateAllocator: 186 // https://searchfox.org/mozilla-central/rev/3bd65516eb9b3a9568806d846ba8c81a9402a885/third_party/libwebrtc/modules/video_coding/svc/svc_rate_allocator.h#26 187 188 size_t layers = 0; 189 const uint32_t kbps = aBitPerSec / 1000; // ts_target_bitrate requies kbps. 190 191 uint8_t periodicity; 192 nsTArray<uint8_t> layerIds; 193 nsTArray<uint8_t> rateDecimators; 194 nsTArray<uint32_t> bitrates; 195 196 Maybe<SVCLayerSettings::CodecAppendix> appendix; 197 198 if (aMode == ScalabilityMode::L1T2) { 199 // Two temporal layers. 0-1... 200 // 201 // Frame pattern: 202 // Layer 0: |0| |2| |4| |6| |8| 203 // Layer 1: | |1| |3| |5| |7| | 204 205 layers = 2; 206 207 // 2 frames per period. 208 periodicity = 2; 209 210 // Assign layer ids. 211 layerIds.AppendElement(0); 212 layerIds.AppendElement(1); 213 214 // Set rate decimators. 215 rateDecimators.AppendElement(2); 216 rateDecimators.AppendElement(1); 217 218 // Bitrate allocation: L0 - 60%, L1 - 40%. 219 bitrates.AppendElement(kbps * 3 / 5); 220 bitrates.AppendElement(kbps); 221 222 if (aCodec == CodecType::VP8 || aCodec == CodecType::VP9) { 223 appendix.emplace(VPXSVCAppendix{ 224 .mLayeringMode = 2 /* VP9E_TEMPORAL_LAYERING_MODE_0101 */ 225 }); 226 } 227 } else { 228 MOZ_ASSERT(aMode == ScalabilityMode::L1T3); 229 // Three temporal layers. 0-2-1-2... 230 // 231 // Frame pattern: 232 // Layer 0: |0| | | |4| | | |8| | | |12| 233 // Layer 1: | | |2| | | |6| | | |10| | | 234 // Layer 2: | |1| |3| |5| |7| |9| |11| | 235 236 layers = 3; 237 238 // 4 frames per period 239 periodicity = 4; 240 241 // Assign layer ids. 242 layerIds.AppendElement(0); 243 layerIds.AppendElement(2); 244 layerIds.AppendElement(1); 245 layerIds.AppendElement(2); 246 247 // Set rate decimators. 248 rateDecimators.AppendElement(4); 249 rateDecimators.AppendElement(2); 250 rateDecimators.AppendElement(1); 251 252 // Bitrate allocation: L0 - 50%, L1 - 20%, L2 - 30%. 253 bitrates.AppendElement(kbps / 2); 254 bitrates.AppendElement(kbps * 7 / 10); 255 bitrates.AppendElement(kbps); 256 257 if (aCodec == CodecType::VP8 || aCodec == CodecType::VP9) { 258 appendix.emplace(VPXSVCAppendix{ 259 .mLayeringMode = 3 /* VP9E_TEMPORAL_LAYERING_MODE_0212 */ 260 }); 261 } 262 } 263 264 MOZ_ASSERT(layers == bitrates.Length(), 265 "Bitrate must be assigned to each layer"); 266 return SVCLayerSettings{1, 267 layers, 268 periodicity, 269 std::move(layerIds), 270 std::move(rateDecimators), 271 std::move(bitrates), 272 appendix}; 273 } 274 275 void FFmpegVideoEncoder<LIBAV_VER>::SVCInfo::UpdateTemporalLayerId() { 276 MOZ_ASSERT(!mTemporalLayerIds.IsEmpty()); 277 mCurrentIndex = (mCurrentIndex + 1) % mTemporalLayerIds.Length(); 278 } 279 280 uint8_t FFmpegVideoEncoder<LIBAV_VER>::SVCInfo::CurrentTemporalLayerId() { 281 MOZ_ASSERT(!mTemporalLayerIds.IsEmpty()); 282 return mTemporalLayerIds[mCurrentIndex]; 283 } 284 285 void FFmpegVideoEncoder<LIBAV_VER>::SVCInfo::ResetTemporalLayerId() { 286 MOZ_ASSERT(!mTemporalLayerIds.IsEmpty()); 287 mCurrentIndex = 0; 288 } 289 290 FFmpegVideoEncoder<LIBAV_VER>::FFmpegVideoEncoder( 291 const FFmpegLibWrapper* aLib, AVCodecID aCodecID, 292 const RefPtr<TaskQueue>& aTaskQueue, const EncoderConfig& aConfig) 293 : FFmpegDataEncoder(aLib, aCodecID, aTaskQueue, aConfig) {} 294 295 RefPtr<MediaDataEncoder::InitPromise> FFmpegVideoEncoder<LIBAV_VER>::Init() { 296 FFMPEGV_LOG("Init"); 297 return InvokeAsync(mTaskQueue, __func__, [self = RefPtr(this)]() { 298 MediaResult r = self->InitEncoder(); 299 if (NS_FAILED(r.Code())) { 300 FFMPEGV_LOG("%s", r.Description().get()); 301 return InitPromise::CreateAndReject(r, __func__); 302 } 303 return InitPromise::CreateAndResolve(true, __func__); 304 }); 305 } 306 307 nsCString FFmpegVideoEncoder<LIBAV_VER>::GetDescriptionName() const { 308 #ifdef USING_MOZFFVPX 309 return "ffvpx video encoder"_ns; 310 #else 311 const char* lib = 312 # if defined(MOZ_FFMPEG) 313 FFmpegRuntimeLinker::LinkStatusLibraryName(); 314 # else 315 "no library: ffmpeg disabled during build"; 316 # endif 317 return nsPrintfCString("ffmpeg video encoder (%s)", lib); 318 #endif 319 } 320 321 bool FFmpegVideoEncoder<LIBAV_VER>::SvcEnabled() const { 322 return mConfig.mScalabilityMode != ScalabilityMode::None; 323 } 324 325 bool FFmpegVideoEncoder<LIBAV_VER>::ShouldTryHardware() const { 326 #ifdef MOZ_WIDGET_ANDROID 327 // On Android, the MediaCodec encoders are the only ones available to us, 328 // which may be implemented in hardware or software. 329 if (mCodecID == AV_CODEC_ID_H264 || mCodecID == AV_CODEC_ID_HEVC) { 330 return StaticPrefs::media_ffvpx_hw_enabled(); 331 } 332 #endif 333 334 if (mConfig.mHardwarePreference == HardwarePreference::RequireSoftware) { 335 return false; 336 } 337 338 RefPtr<PlatformEncoderModule> pem = 339 FFmpegEncoderModule<LIBAV_VER>::Create(mLib); 340 return pem->SupportsCodec(mConfig.mCodec) 341 .contains(media::EncodeSupport::HardwareEncode); 342 } 343 344 MediaResult FFmpegVideoEncoder<LIBAV_VER>::InitEncoder() { 345 MediaResult result(NS_ERROR_DOM_MEDIA_NOT_SUPPORTED_ERR); 346 if (ShouldTryHardware()) { 347 result = InitEncoderInternal(/* aHardware */ true); 348 } 349 // TODO(aosmond): We should be checking here for RequireHardware, but we fail 350 // encoding tests if we don't allow fallback to software on Linux in CI. 351 if (NS_FAILED(result.Code())) { 352 result = InitEncoderInternal(/* aHardware */ false); 353 } 354 return result; 355 } 356 357 MediaResult FFmpegVideoEncoder<LIBAV_VER>::InitEncoderInternal(bool aHardware) { 358 MOZ_ASSERT(mTaskQueue->IsOnCurrentThread()); 359 360 FFMPEGV_LOG("FFmpegVideoEncoder::InitEncoder"); 361 362 // Initialize the common members of the encoder instance 363 auto r = AllocateCodecContext(aHardware); 364 if (r.isErr()) { 365 return r.inspectErr(); 366 } 367 mCodecContext = r.unwrap(); 368 mCodecName = mCodecContext->codec->name; 369 370 #ifdef MOZ_WIDGET_ANDROID 371 // We need to create a MediaCodec encoder for H264/HEVC but it may or may not 372 // be backed by actual hardware. 373 switch (mCodecID) { 374 case AV_CODEC_ID_H264: 375 mIsHardwareAccelerated = aHardware && gfx::gfxVars::UseH264HwEncode(); 376 break; 377 case AV_CODEC_ID_HEVC: 378 mIsHardwareAccelerated = aHardware && gfx::gfxVars::UseHEVCHwEncode(); 379 break; 380 default: 381 mIsHardwareAccelerated = aHardware; 382 break; 383 } 384 mUseDurationMap = aHardware; 385 #else 386 # ifdef MOZ_FFMPEG_ENCODER_USE_DURATION_MAP 387 mUseDurationMap = true; 388 # endif 389 mIsHardwareAccelerated = aHardware; 390 #endif 391 392 // And now the video-specific part 393 #ifdef MOZ_WIDGET_ANDROID 394 // COLOR_FormatYUV420SemiPlanar(NV12) is the most widely supported 395 // format by the Android hardware encoders. 396 mCodecContext->pix_fmt = 397 aHardware ? ffmpeg::FFMPEG_PIX_FMT_NV12 : ffmpeg::FFMPEG_PIX_FMT_YUV420P; 398 #else 399 mCodecContext->pix_fmt = ffmpeg::FFMPEG_PIX_FMT_YUV420P; 400 // // TODO: do this properly, based on the colorspace of the frame. Setting 401 // this like that crashes encoders. if (mConfig.mCodec != CodecType::AV1) { 402 // if (mConfig.mPixelFormat == dom::ImageBitmapFormat::RGBA32 || 403 // mConfig.mPixelFormat == dom::ImageBitmapFormat::BGRA32) { 404 // mCodecContext->color_primaries = AVCOL_PRI_BT709; 405 // mCodecContext->colorspace = AVCOL_SPC_RGB; 406 // #ifdef FFVPX_VERSION 407 // mCodecContext->color_trc = AVCOL_TRC_IEC61966_2_1; 408 // #endif 409 // } else { 410 // mCodecContext->color_primaries = AVCOL_PRI_BT709; 411 // mCodecContext->colorspace = AVCOL_SPC_BT709; 412 // mCodecContext->color_trc = AVCOL_TRC_BT709; 413 // } 414 // } 415 #endif 416 417 mCodecContext->width = static_cast<int>(mConfig.mSize.width); 418 mCodecContext->height = static_cast<int>(mConfig.mSize.height); 419 // Reasonnable default for the quantization range. 420 mCodecContext->qmin = 421 static_cast<int>(StaticPrefs::media_ffmpeg_encoder_quantizer_min()); 422 mCodecContext->qmax = 423 static_cast<int>(StaticPrefs::media_ffmpeg_encoder_quantizer_max()); 424 if (mConfig.mUsage == Usage::Realtime) { 425 mCodecContext->thread_count = 1; 426 } else { 427 int64_t pixels = mCodecContext->width * mCodecContext->height; 428 int threads = 1; 429 // Select a thread count that depends on the frame size, and cap to the 430 // number of available threads minus one 431 if (pixels >= 3840 * 2160) { 432 threads = 16; 433 } else if (pixels >= 1920 * 1080) { 434 threads = 8; 435 } else if (pixels >= 1280 * 720) { 436 threads = 4; 437 } else if (pixels >= 640 * 480) { 438 threads = 2; 439 } 440 mCodecContext->thread_count = 441 std::clamp<int>(threads, 1, GetNumberOfProcessors() - 1); 442 } 443 // TODO(bug 1869560): The recommended time_base is the reciprocal of the frame 444 // rate, but we set it to microsecond for now. 445 mCodecContext->time_base = 446 AVRational{.num = 1, .den = static_cast<int>(USECS_PER_S)}; 447 #if LIBAVCODEC_VERSION_MAJOR >= 57 448 // Note that sometimes framerate can be zero (from webcodecs). 449 mCodecContext->framerate = 450 AVRational{.num = static_cast<int>(mConfig.mFramerate), .den = 1}; 451 #endif 452 453 #if LIBAVCODEC_VERSION_MAJOR >= 60 454 mCodecContext->flags |= AV_CODEC_FLAG_FRAME_DURATION; 455 #endif 456 457 // Setting 0 here disable inter-frames: all frames are keyframes 458 mCodecContext->gop_size = mConfig.mKeyframeInterval 459 ? static_cast<int>(mConfig.mKeyframeInterval) 460 : 10000; 461 mCodecContext->keyint_min = 0; 462 463 // When either real-time or SVC is enabled via config, the general settings of 464 // the encoder are set to be more appropriate for real-time usage 465 if (mConfig.mUsage == Usage::Realtime || SvcEnabled()) { 466 if (mConfig.mUsage != Usage::Realtime) { 467 FFMPEGV_LOG( 468 "SVC enabled but low latency encoding mode not enabled, forcing low " 469 "latency mode"); 470 } 471 mLib->av_opt_set(mCodecContext->priv_data, "deadline", "realtime", 0); 472 // Explicitly ask encoder do not keep in flight at any one time for 473 // lookahead purposes. 474 mLib->av_opt_set(mCodecContext->priv_data, "lag-in-frames", "0", 0); 475 476 if (mConfig.mCodec == CodecType::VP8 || mConfig.mCodec == CodecType::VP9) { 477 mLib->av_opt_set(mCodecContext->priv_data, "error-resilient", "1", 0); 478 } 479 if (mConfig.mCodec == CodecType::AV1) { 480 mLib->av_opt_set(mCodecContext->priv_data, "error-resilience", "1", 0); 481 // This sets usage to AOM_USAGE_REALTIME 482 mLib->av_opt_set(mCodecContext->priv_data, "usage", "1", 0); 483 // Allow the bitrate to swing 50% up and down the target 484 mLib->av_opt_set(mCodecContext->priv_data, "rc_undershoot_percent", "50", 485 0); 486 mLib->av_opt_set(mCodecContext->priv_data, "rc_overshoot_percent", "50", 487 0); 488 // Row multithreading -- note that we do single threaded encoding for now, 489 // so this doesn't do much 490 mLib->av_opt_set(mCodecContext->priv_data, "row_mt", "1", 0); 491 // Cyclic refresh adaptive quantization 492 mLib->av_opt_set(mCodecContext->priv_data, "aq-mode", "3", 0); 493 // optimized for real-time, 7 for regular, lower: more cpu use -> higher 494 // compression ratio 495 mLib->av_opt_set(mCodecContext->priv_data, "cpu-used", "9", 0); 496 // disable, this is to handle camera motion, unlikely for our use case 497 mLib->av_opt_set(mCodecContext->priv_data, "enable-global-motion", "0", 498 0); 499 mLib->av_opt_set(mCodecContext->priv_data, "enable-cfl-intra", "0", 0); 500 // TODO: Set a number of tiles appropriate for the number of threads used 501 // -- disable tiling if using a single thread. 502 mLib->av_opt_set(mCodecContext->priv_data, "tile-columns", "0", 0); 503 mLib->av_opt_set(mCodecContext->priv_data, "tile-rows", "0", 0); 504 } 505 } else { 506 if (mConfig.mCodec == CodecType::AV1) { 507 mLib->av_opt_set_int( 508 mCodecContext->priv_data, "cpu-used", 509 static_cast<int>(StaticPrefs::media_ffmpeg_encoder_cpu_used()), 0); 510 } 511 } 512 513 if (SvcEnabled()) { 514 if (Maybe<SVCSettings> settings = GetSVCSettings()) { 515 if (mCodecName == "libaom-av1") { 516 if (mConfig.mBitrateMode != BitrateMode::Constant) { 517 return MediaResult(NS_ERROR_DOM_MEDIA_NOT_SUPPORTED_ERR, 518 "AV1 with SVC only supports constant bitrate"_ns); 519 } 520 } 521 522 SVCSettings s = settings.extract(); 523 FFMPEGV_LOG("SVC options string: %s=%s", s.mSettingKeyValue.first.get(), 524 s.mSettingKeyValue.second.get()); 525 mLib->av_opt_set(mCodecContext->priv_data, s.mSettingKeyValue.first.get(), 526 s.mSettingKeyValue.second.get(), 0); 527 528 // FFmpegVideoEncoder is reset after Drain(), so mSVCInfo should be 529 // reset() before emplace(). 530 mSVCInfo.reset(); 531 mSVCInfo.emplace(std::move(s.mTemporalLayerIds)); 532 533 // TODO: layer settings should be changed dynamically when the frame's 534 // color space changed. 535 } 536 } 537 538 #ifdef MOZ_WIDGET_ANDROID 539 // If we are using MediaCodec, we can set more options. 540 if (aHardware) { 541 if (mConfig.mBitrateMode == BitrateMode::Constant) { 542 mLib->av_opt_set(mCodecContext->priv_data, "bitrate_mode", "cbr", 0); 543 } else { 544 mLib->av_opt_set(mCodecContext->priv_data, "bitrate_mode", "vbr", 0); 545 } 546 } 547 #endif 548 549 nsAutoCString h264Log; 550 if (mConfig.mCodecSpecific.is<H264Specific>()) { 551 const H264Specific& h264Specific = 552 mConfig.mCodecSpecific.as<H264Specific>(); 553 H264Settings s = GetH264Settings(h264Specific); 554 mCodecContext->profile = s.mProfile; 555 mCodecContext->level = s.mLevel; 556 for (const auto& pair : s.mSettingKeyValuePairs) { 557 mLib->av_opt_set(mCodecContext->priv_data, pair.first.get(), 558 pair.second.get(), 0); 559 } 560 561 // Log the settings. 562 const char* formatStr = 563 h264Specific.mFormat == H264BitStreamFormat::AVC ? "AVCC" : "AnnexB"; 564 if (mCodecName.Equals("libx264"_ns)) { 565 // When using profile other than EXTENDED, the profile string is in the 566 // first element of mSettingKeyValuePairs, while EXTENDED profile has no 567 // profile string. 568 569 MOZ_ASSERT_IF( 570 s.mSettingKeyValuePairs.Length() != 3, 571 h264Specific.mProfile == H264_PROFILE::H264_PROFILE_EXTENDED); 572 const char* profileStr = s.mSettingKeyValuePairs.Length() == 3 573 ? s.mSettingKeyValuePairs[0].second.get() 574 : "extended"; 575 const char* levelStr = s.mSettingKeyValuePairs.Length() == 3 576 ? s.mSettingKeyValuePairs[1].second.get() 577 : s.mSettingKeyValuePairs[0].second.get(); 578 h264Log.AppendPrintf(", H264: profile - %d (%s), level %d (%s), %s", 579 mCodecContext->profile, profileStr, 580 mCodecContext->level, levelStr, formatStr); 581 } else { 582 h264Log.AppendPrintf(", H264: profile - %d, level %d, %s", 583 mCodecContext->profile, mCodecContext->level, 584 formatStr); 585 } 586 } 587 588 // - if mConfig.mDenoising is set: av_opt_set_int(mCodecContext->priv_data, 589 // "noise_sensitivity", x, 0), where the x is from 0(disabled) to 6. 590 // - if mConfig.mAdaptiveQp is set: av_opt_set_int(mCodecContext->priv_data, 591 // "aq_mode", x, 0), where x is from 0 to 3: 0 - Disabled, 1 - Variance 592 // AQ(default), 2 - Complexity AQ, 3 - Cycle AQ. 593 594 // Our old version of libaom-av1 is considered experimental by the recent 595 // ffmpeg we use. Allow experimental codecs for now until we decide on an AV1 596 // encoder. 597 mCodecContext->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL; 598 599 SetContextBitrate(); 600 601 AVDictionary* options = nullptr; 602 if (int ret = OpenCodecContext(mCodecContext->codec, &options); ret < 0) { 603 return MediaResult( 604 NS_ERROR_DOM_MEDIA_FATAL_ERR, 605 RESULT_DETAIL("failed to open %s avcodec: %s", mCodecName.get(), 606 MakeErrorString(mLib, ret).get())); 607 } 608 mLib->av_dict_free(&options); 609 610 FFMPEGV_LOG( 611 "%s has been initialized with format: %s, bitrate: %" PRIi64 612 ", width: %d, height: %d, quantizer: [%d, %d], time_base: %d/%d%s", 613 mCodecName.get(), ffmpeg::GetPixelFormatString(mCodecContext->pix_fmt), 614 static_cast<int64_t>(mCodecContext->bit_rate), mCodecContext->width, 615 mCodecContext->height, mCodecContext->qmin, mCodecContext->qmax, 616 mCodecContext->time_base.num, mCodecContext->time_base.den, 617 h264Log.IsEmpty() ? "" : h264Log.get()); 618 619 return NS_OK; 620 } 621 622 // avcodec_send_frame and avcodec_receive_packet were introduced in version 58. 623 #if LIBAVCODEC_VERSION_MAJOR >= 58 624 Result<MediaDataEncoder::EncodedData, MediaResult> FFmpegVideoEncoder< 625 LIBAV_VER>::EncodeInputWithModernAPIs(RefPtr<const MediaData> aSample) { 626 MOZ_ASSERT(mTaskQueue->IsOnCurrentThread()); 627 MOZ_ASSERT(mCodecContext); 628 MOZ_ASSERT(aSample); 629 630 RefPtr<const VideoData> sample(aSample->As<VideoData>()); 631 632 // Validate input. 633 if (!sample->mImage) { 634 return Err(MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR, "No image"_ns)); 635 } 636 if (sample->mImage->GetSize().IsEmpty()) { 637 return Err(MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR, 638 "image width or height is invalid"_ns)); 639 } 640 641 // Allocate AVFrame. 642 if (!PrepareFrame()) { 643 return Err( 644 MediaResult(NS_ERROR_OUT_OF_MEMORY, "failed to allocate frame"_ns)); 645 } 646 647 // Set AVFrame properties for its internal data allocation. For now, we always 648 // convert into ffmpeg's buffer. 649 mFrame->format = mCodecContext->pix_fmt; 650 mFrame->width = static_cast<int>(mConfig.mSize.width); 651 mFrame->height = static_cast<int>(mConfig.mSize.height); 652 mFrame->pict_type = 653 sample->mKeyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_NONE; 654 655 // Allocate AVFrame data. 656 if (int ret = mLib->av_frame_get_buffer(mFrame, 0); ret < 0) { 657 return Err(MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR, 658 RESULT_DETAIL("failed to allocate frame data: %s", 659 MakeErrorString(mLib, ret).get()))); 660 } 661 662 // Make sure AVFrame is writable. 663 if (int ret = mLib->av_frame_make_writable(mFrame); ret < 0) { 664 return Err(MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR, 665 RESULT_DETAIL("failed to make frame writable: %s", 666 MakeErrorString(mLib, ret).get()))); 667 } 668 669 nsresult rv; 670 switch (mFrame->format) { 671 case ffmpeg::FFMPEG_PIX_FMT_YUV420P: 672 rv = ConvertToI420(sample->mImage, mFrame->data[0], mFrame->linesize[0], 673 mFrame->data[1], mFrame->linesize[1], mFrame->data[2], 674 mFrame->linesize[2], mConfig.mSize); 675 break; 676 case ffmpeg::FFMPEG_PIX_FMT_NV12: 677 rv = ConvertToNV12(sample->mImage, mFrame->data[0], mFrame->linesize[0], 678 mFrame->data[1], mFrame->linesize[1], mConfig.mSize); 679 break; 680 default: 681 MOZ_ASSERT_UNREACHABLE("unhandled ffmpeg format!"); 682 rv = NS_ERROR_DOM_MEDIA_FATAL_ERR; 683 break; 684 } 685 if (NS_FAILED(rv)) { 686 return Err(MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR, 687 "failed to convert format to ffmpeg format"_ns)); 688 } 689 690 // Set presentation timestamp and duration of the AVFrame. The unit of pts is 691 // time_base. 692 // TODO(bug 1869560): The recommended time_base is the reciprocal of the frame 693 // rate, but we set it to microsecond for now. 694 # if LIBAVCODEC_VERSION_MAJOR >= 59 695 mFrame->time_base = 696 AVRational{.num = 1, .den = static_cast<int>(USECS_PER_S)}; 697 # endif 698 // Provide fake pts, see header file. 699 if (mConfig.mCodec == CodecType::AV1) { 700 mFrame->pts = mFakePts; 701 mPtsMap.Insert(mFakePts, aSample->mTime.ToMicroseconds()); 702 mFakePts += aSample->mDuration.ToMicroseconds(); 703 mCurrentFramePts = aSample->mTime.ToMicroseconds(); 704 } else { 705 mFrame->pts = aSample->mTime.ToMicroseconds(); 706 } 707 # ifdef MOZ_FFMPEG_ENCODER_USE_DURATION_MAP 708 if (mUseDurationMap) { 709 // Save duration in the time_base unit. 710 mDurationMap.Insert(mFrame->pts, aSample->mDuration.ToMicroseconds()); 711 } 712 # else 713 mFrame->duration = aSample->mDuration.ToMicroseconds(); 714 # endif 715 Duration(mFrame) = aSample->mDuration.ToMicroseconds(); 716 717 AVDictionary* dict = nullptr; 718 // VP8/VP9 use a mode that handles the temporal layer id sequence internally, 719 // and don't require setting explicitly setting the metadata. Other codecs 720 // such as AV1 via libaom however requires manual frame tagging. 721 if (SvcEnabled() && mConfig.mCodec != CodecType::VP8 && 722 mConfig.mCodec != CodecType::VP9) { 723 if (aSample->mKeyframe) { 724 FFMPEGV_LOG("Key frame requested, reseting temporal layer id"); 725 mSVCInfo->ResetTemporalLayerId(); 726 } 727 nsPrintfCString str("%d", mSVCInfo->CurrentTemporalLayerId()); 728 mLib->av_dict_set(&dict, "temporal_id", str.get(), 0); 729 mFrame->metadata = dict; 730 } 731 732 // Now send the AVFrame to ffmpeg for encoding, same code for audio and video. 733 return FFmpegDataEncoder<LIBAV_VER>::EncodeWithModernAPIs(); 734 } 735 #endif // if LIBAVCODEC_VERSION_MAJOR >= 58 736 737 Result<RefPtr<MediaRawData>, MediaResult> 738 FFmpegVideoEncoder<LIBAV_VER>::ToMediaRawData(AVPacket* aPacket) { 739 MOZ_ASSERT(mTaskQueue->IsOnCurrentThread()); 740 MOZ_ASSERT(aPacket); 741 742 auto creationResult = CreateMediaRawData(aPacket); 743 if (creationResult.isErr()) { 744 return Err(creationResult.unwrapErr()); 745 } 746 747 RefPtr<MediaRawData> data = creationResult.unwrap(); 748 749 data->mKeyframe = (aPacket->flags & AV_PKT_FLAG_KEY) != 0; 750 751 auto extradataResult = GetExtraData(aPacket); 752 if (extradataResult.isOk()) { 753 data->mExtraData = extradataResult.unwrap(); 754 } else if (extradataResult.isErr()) { 755 MediaResult e = extradataResult.unwrapErr(); 756 if (e.Code() != NS_ERROR_NOT_AVAILABLE && 757 e.Code() != NS_ERROR_NOT_IMPLEMENTED) { 758 return Err(e); 759 } 760 FFMPEGV_LOG("GetExtraData failed with %s, but we can ignore it for now", 761 e.Description().get()); 762 } 763 764 if (mCodecID == AV_CODEC_ID_H264 && 765 mConfig.mCodecSpecific.is<H264Specific>() && 766 mConfig.mCodecSpecific.as<H264Specific>().mFormat == 767 H264BitStreamFormat::AVC && 768 !mCodecName.Equals("libx264"_ns) && AnnexB::IsAnnexB(*data)) { 769 if (data->mExtraData) { 770 mLastExtraData = std::move(data->mExtraData); 771 } 772 if (!AnnexB::ConvertSampleToAVCC(data, mLastExtraData)) { 773 return Err(MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR, 774 "Failed to convert to AVCC"_ns)); 775 } 776 } 777 778 // TODO(bug 1869560): The unit of pts, dts, and duration is time_base, which 779 // is recommended to be the reciprocal of the frame rate, but we set it to 780 // microsecond for now. 781 data->mTime = media::TimeUnit::FromMicroseconds(aPacket->pts); 782 #ifdef MOZ_FFMPEG_ENCODER_USE_DURATION_MAP 783 int64_t duration; 784 if (mUseDurationMap && mDurationMap.Find(aPacket->pts, duration)) { 785 data->mDuration = media::TimeUnit::FromMicroseconds(duration); 786 } else 787 #endif 788 { 789 data->mDuration = media::TimeUnit::FromMicroseconds(aPacket->duration); 790 } 791 data->mTimecode = media::TimeUnit::FromMicroseconds(aPacket->dts); 792 793 if (mConfig.mCodec == CodecType::AV1) { 794 auto found = mPtsMap.Take(aPacket->pts); 795 data->mTime = media::TimeUnit::FromMicroseconds(found.value()); 796 } 797 798 if (mSVCInfo) { 799 if (data->mKeyframe) { 800 FFMPEGV_LOG( 801 "Encoded packet is key frame, reseting temporal layer id sequence"); 802 mSVCInfo->ResetTemporalLayerId(); 803 } 804 uint8_t temporalLayerId = mSVCInfo->CurrentTemporalLayerId(); 805 data->mTemporalLayerId.emplace(temporalLayerId); 806 mSVCInfo->UpdateTemporalLayerId(); 807 } 808 809 return data; 810 } 811 812 Result<already_AddRefed<MediaByteBuffer>, MediaResult> 813 FFmpegVideoEncoder<LIBAV_VER>::GetExtraData(AVPacket* aPacket) { 814 MOZ_ASSERT(mTaskQueue->IsOnCurrentThread()); 815 MOZ_ASSERT(aPacket); 816 817 // We only extract the extra data when encoding into AVCC format. 818 if (mCodecID != AV_CODEC_ID_H264 || 819 !mConfig.mCodecSpecific.is<H264Specific>() || 820 mConfig.mCodecSpecific.as<H264Specific>().mFormat != 821 H264BitStreamFormat::AVC) { 822 return Err( 823 MediaResult(NS_ERROR_NOT_AVAILABLE, "Extra data unnecessary"_ns)); 824 } 825 826 Span<const uint8_t> packetBuf(aPacket->data, 827 static_cast<size_t>(aPacket->size)); 828 if (!mCodecName.Equals("libx264"_ns) && AnnexB::IsAnnexB(packetBuf)) { 829 auto extraData = AnnexB::ExtractExtraDataForAVCC(packetBuf); 830 if (!extraData) { 831 return Err(MediaResult(NS_ERROR_NOT_AVAILABLE, 832 "Extra data missing from packet"_ns)); 833 } 834 return extraData.forget(); 835 } 836 837 if (!(aPacket->flags & AV_PKT_FLAG_KEY)) { 838 return Err(MediaResult(NS_ERROR_NOT_AVAILABLE, 839 "Extra data only comes with key frame"_ns)); 840 } 841 842 if (!mCodecName.Equals("libx264"_ns)) { 843 return Err(MediaResult( 844 NS_ERROR_NOT_IMPLEMENTED, 845 RESULT_DETAIL( 846 "Get extra data from codec %s has not been implemented yet", 847 mCodecName.get()))); 848 } 849 850 bool useGlobalHeader = 851 #if LIBAVCODEC_VERSION_MAJOR >= 57 852 mCodecContext->flags & AV_CODEC_FLAG_GLOBAL_HEADER; 853 #else 854 false; 855 #endif 856 857 Span<const uint8_t> buf; 858 if (useGlobalHeader) { 859 buf = 860 Span<const uint8_t>(mCodecContext->extradata, 861 static_cast<size_t>(mCodecContext->extradata_size)); 862 } else { 863 buf = packetBuf; 864 } 865 if (buf.empty()) { 866 return Err(MediaResult(NS_ERROR_UNEXPECTED, 867 "fail to get H264 AVCC header in key frame!"_ns)); 868 } 869 870 BufferReader reader(buf); 871 872 // The first part is sps. 873 uint32_t spsSize = MOZ_TRY(reader.ReadU32()); 874 Span<const uint8_t> spsData = 875 MOZ_TRY(reader.ReadSpan<const uint8_t>(static_cast<size_t>(spsSize))); 876 877 // The second part is pps. 878 uint32_t ppsSize = MOZ_TRY(reader.ReadU32()); 879 Span<const uint8_t> ppsData = 880 MOZ_TRY(reader.ReadSpan<const uint8_t>(static_cast<size_t>(ppsSize))); 881 882 // Ensure we have profile, constraints and level needed to create the extra 883 // data. 884 if (spsData.Length() < 4) { 885 return Err(MediaResult(NS_ERROR_UNEXPECTED, "spsData is too short"_ns)); 886 } 887 888 FFMPEGV_LOG( 889 "Generate extra data: profile - %u, constraints: %u, level: %u for pts @ " 890 "%" PRId64, 891 spsData[1], spsData[2], spsData[3], aPacket->pts); 892 893 // Create extra data. 894 auto extraData = MakeRefPtr<MediaByteBuffer>(); 895 H264::WriteExtraData(extraData, spsData[1], spsData[2], spsData[3], spsData, 896 ppsData); 897 MOZ_ASSERT(extraData); 898 return extraData.forget(); 899 } 900 901 Maybe<FFmpegVideoEncoder<LIBAV_VER>::SVCSettings> 902 FFmpegVideoEncoder<LIBAV_VER>::GetSVCSettings() { 903 MOZ_ASSERT(!mCodecName.IsEmpty()); 904 MOZ_ASSERT(SvcEnabled()); 905 906 CodecType codecType = CodecType::Unknown; 907 if (mCodecName == "libvpx") { 908 codecType = CodecType::VP8; 909 } else if (mCodecName == "libvpx-vp9") { 910 codecType = CodecType::VP9; 911 } else if (mCodecName == "libaom-av1") { 912 codecType = CodecType::AV1; 913 } 914 915 if (codecType == CodecType::Unknown) { 916 FFMPEGV_LOG("SVC setting is not implemented for %s codec", 917 mCodecName.get()); 918 return Nothing(); 919 } 920 921 SVCLayerSettings svc = GetSVCLayerSettings( 922 codecType, mConfig.mScalabilityMode, mConfig.mBitrate); 923 924 nsAutoCString name; 925 nsAutoCString parameters; 926 927 if (codecType == CodecType::VP8 || codecType == CodecType::VP9) { 928 // Check if the number of temporal layers in codec specific settings 929 // matches 930 // the number of layers for the given scalability mode. 931 if (mConfig.mCodecSpecific.is<VP8Specific>()) { 932 MOZ_ASSERT(mConfig.mCodecSpecific.as<VP8Specific>().mNumTemporalLayers == 933 svc.mNumberTemporalLayers); 934 } else if (mConfig.mCodecSpecific.is<VP9Specific>()) { 935 MOZ_ASSERT(mConfig.mCodecSpecific.as<VP9Specific>().mNumTemporalLayers == 936 svc.mNumberTemporalLayers); 937 } 938 939 // Form an SVC setting string for libvpx. 940 name = "ts-parameters"_ns; 941 parameters.Append("ts_target_bitrate="); 942 for (size_t i = 0; i < svc.mTargetBitrates.Length(); ++i) { 943 if (i > 0) { 944 parameters.Append(","); 945 } 946 parameters.AppendPrintf("%d", svc.mTargetBitrates[i]); 947 } 948 parameters.AppendPrintf( 949 ":ts_layering_mode=%u", 950 svc.mCodecAppendix->as<VPXSVCAppendix>().mLayeringMode); 951 } 952 953 if (codecType == CodecType::AV1) { 954 // Form an SVC setting string for libaom. 955 name = "svc-parameters"_ns; 956 parameters.AppendPrintf("number_spatial_layers=%zu", 957 svc.mNumberSpatialLayers); 958 parameters.AppendPrintf(":number_temporal_layers=%zu", 959 svc.mNumberTemporalLayers); 960 parameters.Append(":framerate_factor="); 961 for (size_t i = 0; i < svc.mRateDecimators.Length(); ++i) { 962 if (i > 0) { 963 parameters.Append(","); 964 } 965 parameters.AppendPrintf("%d", svc.mRateDecimators[i]); 966 } 967 parameters.Append(":layer_target_bitrate="); 968 for (size_t i = 0; i < svc.mTargetBitrates.Length(); ++i) { 969 if (i > 0) { 970 parameters.Append(","); 971 } 972 parameters.AppendPrintf("%d", svc.mTargetBitrates[i]); 973 } 974 } 975 976 return Some( 977 SVCSettings{std::move(svc.mLayerIds), 978 std::make_pair(std::move(name), std::move(parameters))}); 979 } 980 981 FFmpegVideoEncoder<LIBAV_VER>::H264Settings FFmpegVideoEncoder< 982 LIBAV_VER>::GetH264Settings(const H264Specific& aH264Specific) { 983 nsTArray<std::pair<nsCString, nsCString>> keyValuePairs; 984 985 Maybe<H264Setting> profile = GetH264Profile(aH264Specific.mProfile); 986 MOZ_RELEASE_ASSERT(profile.isSome()); 987 MOZ_RELEASE_ASSERT(!profile->mString.IsEmpty() || 988 aH264Specific.mProfile == 989 H264_PROFILE::H264_PROFILE_EXTENDED); 990 991 Maybe<H264Setting> level = GetH264Level(aH264Specific.mLevel); 992 MOZ_RELEASE_ASSERT(level.isSome()); 993 MOZ_RELEASE_ASSERT(!level->mString.IsEmpty()); 994 995 if (!profile->mString.IsEmpty()) { 996 keyValuePairs.AppendElement(std::make_pair("profile"_ns, profile->mString)); 997 } 998 keyValuePairs.AppendElement(std::make_pair("level"_ns, level->mString)); 999 1000 if (mCodecName.Equals("libx264"_ns)) { 1001 // Set format: libx264's default format is annexb. 1002 if (aH264Specific.mFormat == H264BitStreamFormat::AVC) { 1003 keyValuePairs.AppendElement(std::make_pair("x264-params"_ns, "annexb=0")); 1004 // mCodecContext->flags |= AV_CODEC_FLAG_GLOBAL_HEADER 1005 // if we don't want to append SPS/PPS data in all keyframe 1006 // (LIBAVCODEC_VERSION_MAJOR >= 57 only). 1007 } else { 1008 // Set annexb explicitly even if it's default format. 1009 keyValuePairs.AppendElement(std::make_pair("x264-params"_ns, "annexb=1")); 1010 } 1011 } 1012 1013 return H264Settings{.mProfile = profile->mValue, 1014 .mLevel = level->mValue, 1015 .mSettingKeyValuePairs = std::move(keyValuePairs)}; 1016 } 1017 1018 } // namespace mozilla