tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

commit 85a6431fdfb8ec5ac1245a36503c771549ebfb8f
parent 2f890b64baff0a71cfe5a9b98caffaf0fcf30a52
Author: Updatebot <updatebot@mozilla.com>
Date:   Tue, 21 Oct 2025 22:18:36 +0000

Bug 1994050 - Update libvpx to f32182fc9455d7979236dffca35c8baf232a74ec r=chunmin

Differential Revision: https://phabricator.services.mozilla.com/D268445

Diffstat:
Mmedia/libvpx/libvpx/args.c | 21++++++++++++++-------
Mmedia/libvpx/libvpx/build/make/Android.mk | 2+-
Mmedia/libvpx/libvpx/build/make/configure.sh | 11++++++-----
Mmedia/libvpx/libvpx/build/make/iosbuild.sh | 10+++-------
Mmedia/libvpx/libvpx/examples/vp9_spatial_svc_encoder.c | 9++++++---
Mmedia/libvpx/libvpx/libs.mk | 4++++
Mmedia/libvpx/libvpx/test/convolve_test.cc | 37++++++++++++++++++++++++++++++++++++-
Mmedia/libvpx/libvpx/test/encode_api_test.cc | 63+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mmedia/libvpx/libvpx/vp8/vp8_cx_iface.c | 17++++-------------
Mmedia/libvpx/libvpx/vp9/common/vp9_rtcd_defs.pl | 6+++---
Mmedia/libvpx/libvpx/vp9/encoder/arm/neon/vp9_highbd_temporal_filter_neon.c | 204+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mmedia/libvpx/libvpx/vp9/encoder/vp9_ratectrl.c | 8++++++--
Mmedia/libvpx/libvpx/vp9/vp9_cx_iface.c | 19+++++--------------
Mmedia/libvpx/libvpx/vpx_dsp/arm/mem_neon.h | 59+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mmedia/libvpx/libvpx/vpx_ports/aarch64_cpudetect.c | 4++--
Mmedia/libvpx/moz.yaml | 4++--
16 files changed, 418 insertions(+), 60 deletions(-)

diff --git a/media/libvpx/libvpx/args.c b/media/libvpx/libvpx/args.c @@ -91,24 +91,31 @@ char **argv_dup(int argc, const char **argv) { } void arg_show_usage(FILE *fp, const struct arg_def *const *defs) { - char option_text[40] = { 0 }; - for (; *defs; defs++) { const struct arg_def *def = *defs; char *short_val = def->has_val ? " <arg>" : ""; char *long_val = def->has_val ? "=<arg>" : ""; + int n = 0; + // Short options are indented with two spaces. Long options are indented + // with 12 spaces. if (def->short_name && def->long_name) { char *comma = def->has_val ? "," : ", "; - snprintf(option_text, 37, "-%s%s%s --%s%6s", def->short_name, short_val, - comma, def->long_name, long_val); + n = fprintf(fp, " -%s%s%s --%s%s", def->short_name, short_val, comma, + def->long_name, long_val); } else if (def->short_name) - snprintf(option_text, 37, "-%s%s", def->short_name, short_val); + n = fprintf(fp, " -%s%s", def->short_name, short_val); else if (def->long_name) - snprintf(option_text, 37, " --%s%s", def->long_name, long_val); + n = fprintf(fp, " --%s%s", def->long_name, long_val); - fprintf(fp, " %-37s\t%s\n", option_text, def->desc); + // Descriptions are indented with 40 spaces. If an option is 40 characters + // or longer, its description starts on the next line. + if (n < 40) + for (int i = 0; i < 40 - n; i++) fputc(' ', fp); + else + fputs("\n ", fp); + fprintf(fp, "%s\n", def->desc); if (def->enums) { const struct arg_enum_list *listptr; diff --git a/media/libvpx/libvpx/build/make/Android.mk b/media/libvpx/libvpx/build/make/Android.mk @@ -25,7 +25,7 @@ ifdef NDK_ROOT # Android.mk file in the libvpx directory: # LOCAL_PATH := $(call my-dir) # include $(CLEAR_VARS) -# include jni/libvpx/build/make/Android.mk +# include libvpx/build/make/Android.mk # # By default libvpx will use the 'cpufeatures' module from the NDK. This allows # the library to be built with all available optimizations (SSE2->AVX512 for diff --git a/media/libvpx/libvpx/build/make/configure.sh b/media/libvpx/libvpx/build/make/configure.sh @@ -1229,8 +1229,8 @@ EOF ;; esac - if [ "$(show_darwin_sdk_major_version iphoneos)" -gt 8 \ - && [ "$(show_xcode_version)" -lt 16 ]; then + if [ "$(show_darwin_sdk_major_version iphoneos)" -gt 8 ] \ + && [ "$(show_xcode_version | cut -d. -f1)" -lt 16 ]; then check_add_cflags -fembed-bitcode check_add_asflags -fembed-bitcode check_add_ldflags -fembed-bitcode @@ -1381,6 +1381,10 @@ EOF AS=${AS:-nasm} add_ldflags -Zhigh-mem ;; + darwin*) + enabled x86 && darwin_arch="-arch i386" || darwin_arch="-arch x86_64" + add_cflags ${darwin_arch} + add_ldflags ${darwin_arch} esac AS="${alt_as:-${AS:-auto}}" @@ -1503,9 +1507,6 @@ EOF ;; darwin*) add_asflags -f macho${bits} - enabled x86 && darwin_arch="-arch i386" || darwin_arch="-arch x86_64" - add_cflags ${darwin_arch} - add_ldflags ${darwin_arch} # -mdynamic-no-pic is still a bit of voodoo -- it was required at # one time, but does not seem to be now, and it breaks some of the # code that still relies on inline assembly. diff --git a/media/libvpx/libvpx/build/make/iosbuild.sh b/media/libvpx/libvpx/build/make/iosbuild.sh @@ -30,13 +30,9 @@ SCRIPT_DIR=$(dirname "$0") LIBVPX_SOURCE_DIR=$(cd ${SCRIPT_DIR}/../..; pwd) LIPO=$(xcrun -sdk iphoneos${SDK} -find lipo) ORIG_PWD="$(pwd)" -ARM_TARGETS="arm64-darwin-gcc - armv7-darwin-gcc - armv7s-darwin-gcc" -SIM_TARGETS="x86-iphonesimulator-gcc - x86_64-iphonesimulator-gcc" -OSX_TARGETS="x86-darwin16-gcc - x86_64-darwin16-gcc" +ARM_TARGETS="arm64-darwin-gcc" +SIM_TARGETS="x86_64-iphonesimulator-gcc" +OSX_TARGETS="x86_64-darwin16-gcc" TARGETS="${ARM_TARGETS} ${SIM_TARGETS}" # Configures for the target specified by $1, and invokes make with the dist diff --git a/media/libvpx/libvpx/examples/vp9_spatial_svc_encoder.c b/media/libvpx/libvpx/examples/vp9_spatial_svc_encoder.c @@ -14,6 +14,8 @@ * that benefit from a scalable bitstream. */ +#include <assert.h> +#include <limits.h> #include <math.h> #include <stdarg.h> #include <stdio.h> @@ -25,12 +27,13 @@ #include "../tools_common.h" #include "../video_writer.h" +#include "../vpx_ports/bitops.h" #include "../vpx_ports/vpx_timer.h" #include "./svc_context.h" #include "vpx/vp8cx.h" +#include "vpx/vpx_decoder.h" #include "vpx/vpx_encoder.h" #include "../vpxstats.h" -#include "vp9/encoder/vp9_encoder.h" #include "./y4minput.h" #define OUTPUT_FRAME_STATS 0 @@ -783,8 +786,8 @@ static void svc_output_rc_stats( int count = 0; double sum_bitrate = 0.0; double sum_bitrate2 = 0.0; - vp9_zero(sizes); - vp9_zero(sizes_parsed); + memset(sizes, 0, sizeof(sizes)); + memset(sizes_parsed, 0, sizeof(sizes_parsed)); vpx_codec_control(codec, VP9E_GET_SVC_LAYER_ID, layer_id); parse_superframe_index(cx_pkt->data.frame.buf, cx_pkt->data.frame.sz, sizes_parsed, &count); diff --git a/media/libvpx/libvpx/libs.mk b/media/libvpx/libvpx/libs.mk @@ -182,14 +182,18 @@ INSTALL-LIBS-$(CONFIG_ENCODERS) += include/vpx/vpx_tpl.h ifeq ($(CONFIG_EXTERNAL_BUILD),yes) ifeq ($(CONFIG_MSVS),yes) INSTALL-LIBS-yes += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/$(CODEC_LIB).lib) +ifeq ($(CONFIG_STATIC),yes) INSTALL-LIBS-$(CONFIG_DEBUG_LIBS) += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/$(CODEC_LIB)d.lib) +endif INSTALL-LIBS-$(CONFIG_SHARED) += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/vpx.dll) INSTALL-LIBS-$(CONFIG_SHARED) += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/vpx.exp) endif else INSTALL-LIBS-$(CONFIG_STATIC) += $(LIBSUBDIR)/libvpx.a +ifeq ($(CONFIG_STATIC),yes) INSTALL-LIBS-$(CONFIG_DEBUG_LIBS) += $(LIBSUBDIR)/libvpx_g.a endif +endif CODEC_SRCS=$(call enabled,CODEC_SRCS) diff --git a/media/libvpx/libvpx/test/convolve_test.cc b/media/libvpx/libvpx/test/convolve_test.cc @@ -1804,6 +1804,18 @@ WRAP12TAP(convolve12_vert_ssse3, 12) WRAP12TAP(convolve12_ssse3, 12) #endif // HAVE_SSSE3 +#if HAVE_NEON +WRAP12TAP(convolve12_horiz_neon, 8) +WRAP12TAP(convolve12_vert_neon, 8) +WRAP12TAP(convolve12_neon, 8) +WRAP12TAP(convolve12_horiz_neon, 10) +WRAP12TAP(convolve12_vert_neon, 10) +WRAP12TAP(convolve12_neon, 10) +WRAP12TAP(convolve12_horiz_neon, 12) +WRAP12TAP(convolve12_vert_neon, 12) +WRAP12TAP(convolve12_neon, 12) +#endif // HAVE_NEON + WRAP12TAP(convolve12_horiz_c, 8) WRAP12TAP(convolve12_vert_c, 8) WRAP12TAP(convolve12_c, 8) @@ -2045,14 +2057,37 @@ INSTANTIATE_TEST_SUITE_P(NEON, ConvolveTest, ::testing::ValuesIn(kArrayConvolve_neon)); #if !CONFIG_REALTIME_ONLY && CONFIG_VP9_ENCODER +#if CONFIG_VP9_HIGHBITDEPTH +const ConvolveFunctions12Tap convolve12tap_8bit_neon( + wrap_convolve12_horiz_neon_8, wrap_convolve12_vert_neon_8, + wrap_convolve12_neon_8, 8); + +const ConvolveFunctions12Tap convolve12tap_10bit_neon( + wrap_convolve12_horiz_neon_10, wrap_convolve12_vert_neon_10, + wrap_convolve12_neon_10, 10); + +const ConvolveFunctions12Tap convolve12tap_12bit_neon( + wrap_convolve12_horiz_neon_12, wrap_convolve12_vert_neon_12, + wrap_convolve12_neon_12, 12); + +const Convolve12TapParam kArrayConvolve12Tap_neon[] = { + ALL_SIZES_12TAP(convolve12tap_8bit_neon), + ALL_SIZES_12TAP(convolve12tap_10bit_neon), + ALL_SIZES_12TAP(convolve12tap_12bit_neon) +}; + +#else + const ConvolveFunctions12Tap convolve12Tap_neon(vpx_convolve12_horiz_neon, vpx_convolve12_vert_neon, vpx_convolve12_neon, 0); const Convolve12TapParam kArrayConvolve12Tap_neon[] = { ALL_SIZES_12TAP( convolve12Tap_neon) }; +#endif // CONFIG_VP9_HIGHBITDEPTH + INSTANTIATE_TEST_SUITE_P(NEON, ConvolveTest12Tap, ::testing::ValuesIn(kArrayConvolve12Tap_neon)); -#endif +#endif // !CONFIG_REALTIME_ONLY && CONFIG_VP9_ENCODER #endif // HAVE_NEON #if HAVE_NEON_DOTPROD diff --git a/media/libvpx/libvpx/test/encode_api_test.cc b/media/libvpx/libvpx/test/encode_api_test.cc @@ -1784,6 +1784,69 @@ TEST(EncodeAPI, Buganizer441668134) { vpx_codec_destroy(&ctx); } +// Encode a few frames, with realtime mode and tile_rows set to 1, +// with row-mt enabled. This triggers an assertion in vp9_bitstream.c (in +// function write_modes()), as in the issue:42105459. In this test it happens on +// very first encoded frame since lag_in_frames = 0. Issue is due to enabling +// TILE_ROWS: passes if tile_rows is disabled (set to 0), or if height is above +// 64 (so both row-tiles are non-empty). +TEST(EncodeAPI, DISABLED_Buganizer442105459) { + // Initialize VP9 encoder interface + vpx_codec_iface_t *iface = vpx_codec_vp9_cx(); + // Get default encoder configuration + vpx_codec_enc_cfg_t cfg; + ASSERT_EQ(vpx_codec_enc_config_default(iface, &cfg, 0), VPX_CODEC_OK); + // Configure encoder + cfg.g_w = 946u; + cfg.g_h = 64u; + cfg.g_threads = 1; + cfg.g_profile = 0; + cfg.g_bit_depth = VPX_BITS_8; + // Rate control targeting deeper encoding paths + cfg.rc_target_bitrate = 100; + cfg.rc_min_quantizer = 0; + cfg.rc_max_quantizer = 0; + cfg.rc_end_usage = VPX_VBR; + cfg.ss_number_layers = 1; + cfg.g_lag_in_frames = 0; + // Initialize encoder context + vpx_codec_ctx_t ctx; + ASSERT_EQ(vpx_codec_enc_init(&ctx, iface, &cfg, 0), VPX_CODEC_OK); + // Set control parameters + vpx_codec_control_(&ctx, VP8E_SET_CPUUSED, -5); + vpx_codec_control_(&ctx, VP9E_SET_TILE_ROWS, 1); + vpx_codec_control_(&ctx, VP9E_SET_ROW_MT, 1); + // Image format selection + vpx_img_fmt_t img_fmt = VPX_IMG_FMT_I420; + // Allocate image with varied alignment + vpx_image_t *img = vpx_img_alloc(nullptr, img_fmt, cfg.g_w, cfg.g_h, 1); + // Encode with dynamic configuration changes + int num_frames = 2; + // Per-frame constants captured from the original run (indices consumed per + // frame) + const unsigned long frame_pts_mul[] = { 33333UL, 33333UL }; + const unsigned long frame_durations[] = { 33333UL, 33333UL }; + const vpx_enc_deadline_t frame_deadlines[] = { VPX_DL_REALTIME, + VPX_DL_REALTIME }; + for (int frame = 0; frame < num_frames; frame++) { + // Encode frame + vpx_codec_pts_t pts = frame * frame_pts_mul[frame]; + unsigned long duration = frame_durations[frame]; + vpx_enc_deadline_t deadline = frame_deadlines[frame]; + ASSERT_EQ(vpx_codec_encode(&ctx, img, pts, duration, /*flags*/ 0, deadline), + VPX_CODEC_OK); + } + // Flush encoder. + ASSERT_EQ(vpx_codec_encode(&ctx, NULL, 0, 0, 0, VPX_DL_REALTIME), 0); + // Get remaining data + vpx_codec_iter_t iter = NULL; + while (vpx_codec_get_cx_data(&ctx, &iter) != NULL) { + // Process remaining packets + } + vpx_img_free(img); + vpx_codec_destroy(&ctx); +} + #if CONFIG_VP9_HIGHBITDEPTH TEST(EncodeAPI, Buganizer329674887RowMT0BitDepth12) { VP9Encoder encoder(8, 0, VPX_BITS_12, VPX_IMG_FMT_I444); diff --git a/media/libvpx/libvpx/vp8/vp8_cx_iface.c b/media/libvpx/libvpx/vp8/vp8_cx_iface.c @@ -1022,19 +1022,10 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx, res = image2yuvconfig(img, &sd); - if (sd.y_width != ctx->cfg.g_w || sd.y_height != ctx->cfg.g_h) { - /* from vpx_encoder.h for g_w/g_h: - "Note that the frames passed as input to the encoder must have this - resolution" - */ - ctx->base.err_detail = "Invalid input frame resolution"; - res = VPX_CODEC_INVALID_PARAM; - } else { - if (vp8_receive_raw_frame(ctx->cpi, ctx->next_frame_flag | lib_flags, - &sd, dst_time_stamp, dst_end_time_stamp)) { - VP8_COMP *cpi = (VP8_COMP *)ctx->cpi; - res = update_error_state(ctx, &cpi->common.error); - } + if (vp8_receive_raw_frame(ctx->cpi, ctx->next_frame_flag | lib_flags, &sd, + dst_time_stamp, dst_end_time_stamp)) { + VP8_COMP *cpi = (VP8_COMP *)ctx->cpi; + res = update_error_state(ctx, &cpi->common.error); } /* reset for next frame */ diff --git a/media/libvpx/libvpx/vp9/common/vp9_rtcd_defs.pl b/media/libvpx/libvpx/vp9/common/vp9_rtcd_defs.pl @@ -206,13 +206,13 @@ if (vpx_config("CONFIG_REALTIME_ONLY") ne "yes") { if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vpx_highbd_convolve12_vert/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel12 *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd"; - specialize qw/vpx_highbd_convolve12_vert ssse3 avx2/; + specialize qw/vpx_highbd_convolve12_vert ssse3 avx2 neon/; add_proto qw/void vpx_highbd_convolve12_horiz/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel12 *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd"; - specialize qw/vpx_highbd_convolve12_horiz ssse3 avx2/; + specialize qw/vpx_highbd_convolve12_horiz ssse3 avx2 neon/; add_proto qw/void vpx_highbd_convolve12/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel12 *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd"; - specialize qw/vpx_highbd_convolve12 ssse3 avx2/; + specialize qw/vpx_highbd_convolve12 ssse3 avx2 neon/; } } diff --git a/media/libvpx/libvpx/vp9/encoder/arm/neon/vp9_highbd_temporal_filter_neon.c b/media/libvpx/libvpx/vp9/encoder/arm/neon/vp9_highbd_temporal_filter_neon.c @@ -14,6 +14,8 @@ #include "./vp9_rtcd.h" #include "./vpx_config.h" #include "vpx/vpx_integer.h" +#include "vpx_dsp/arm/mem_neon.h" +#include "vpx_dsp/arm/transpose_neon.h" #include "vp9/encoder/vp9_encoder.h" #include "vp9/encoder/vp9_temporal_filter.h" #include "vp9/encoder/vp9_temporal_filter_constants.h" @@ -870,3 +872,205 @@ void vp9_highbd_apply_temporal_filter_neon( strength, blk_fw, use_whole_blk, u_accum, u_count, v_accum, v_count, y_dist_ptr, u_dist_ptr, v_dist_ptr); } + +static INLINE uint16x8_t highbd_convolve12_8( + const int16x8_t s0, const int16x8_t s1, const int16x8_t s2, + const int16x8_t s3, const int16x8_t s4, const int16x8_t s5, + const int16x8_t s6, const int16x8_t s7, const int16x8_t s8, + const int16x8_t s9, const int16x8_t sA, const int16x8_t sB, + const int16x8_t filter_0_7, const int16x4_t filter_8_11, uint16x8_t max) { + const int16x4_t filter_0_3 = vget_low_s16(filter_0_7); + const int16x4_t filter_4_7 = vget_high_s16(filter_0_7); + + int32x4_t sum_lo = vmull_lane_s16(vget_low_s16(s0), filter_0_3, 0); + sum_lo = vmlal_lane_s16(sum_lo, vget_low_s16(s1), filter_0_3, 1); + sum_lo = vmlal_lane_s16(sum_lo, vget_low_s16(s2), filter_0_3, 2); + sum_lo = vmlal_lane_s16(sum_lo, vget_low_s16(s3), filter_0_3, 3); + sum_lo = vmlal_lane_s16(sum_lo, vget_low_s16(s4), filter_4_7, 0); + sum_lo = vmlal_lane_s16(sum_lo, vget_low_s16(s5), filter_4_7, 1); + sum_lo = vmlal_lane_s16(sum_lo, vget_low_s16(s6), filter_4_7, 2); + sum_lo = vmlal_lane_s16(sum_lo, vget_low_s16(s7), filter_4_7, 3); + sum_lo = vmlal_lane_s16(sum_lo, vget_low_s16(s8), filter_8_11, 0); + sum_lo = vmlal_lane_s16(sum_lo, vget_low_s16(s9), filter_8_11, 1); + sum_lo = vmlal_lane_s16(sum_lo, vget_low_s16(sA), filter_8_11, 2); + sum_lo = vmlal_lane_s16(sum_lo, vget_low_s16(sB), filter_8_11, 3); + + int32x4_t sum_hi = vmull_lane_s16(vget_high_s16(s0), filter_0_3, 0); + sum_hi = vmlal_lane_s16(sum_hi, vget_high_s16(s1), filter_0_3, 1); + sum_hi = vmlal_lane_s16(sum_hi, vget_high_s16(s2), filter_0_3, 2); + sum_hi = vmlal_lane_s16(sum_hi, vget_high_s16(s3), filter_0_3, 3); + sum_hi = vmlal_lane_s16(sum_hi, vget_high_s16(s4), filter_4_7, 0); + sum_hi = vmlal_lane_s16(sum_hi, vget_high_s16(s5), filter_4_7, 1); + sum_hi = vmlal_lane_s16(sum_hi, vget_high_s16(s6), filter_4_7, 2); + sum_hi = vmlal_lane_s16(sum_hi, vget_high_s16(s7), filter_4_7, 3); + sum_hi = vmlal_lane_s16(sum_hi, vget_high_s16(s8), filter_8_11, 0); + sum_hi = vmlal_lane_s16(sum_hi, vget_high_s16(s9), filter_8_11, 1); + sum_hi = vmlal_lane_s16(sum_hi, vget_high_s16(sA), filter_8_11, 2); + sum_hi = vmlal_lane_s16(sum_hi, vget_high_s16(sB), filter_8_11, 3); + + uint16x4_t sum_lo_s16 = vqrshrun_n_s32(sum_lo, FILTER_BITS); + uint16x4_t sum_hi_s16 = vqrshrun_n_s32(sum_hi, FILTER_BITS); + + uint16x8_t sum = vcombine_u16(sum_lo_s16, sum_hi_s16); + return vminq_u16(sum, max); +} + +void vpx_highbd_convolve12_horiz_neon(const uint16_t *src, ptrdiff_t src_stride, + uint16_t *dst, ptrdiff_t dst_stride, + const InterpKernel12 *filter, int x0_q4, + int x_step_q4, int y0_q4, int y_step_q4, + int w, int h, int bd) { + // Scaling not supported by Neon implementation. + if (x_step_q4 != 16) { + vpx_highbd_convolve12_horiz_c(src, src_stride, dst, dst_stride, filter, + x0_q4, x_step_q4, y0_q4, y_step_q4, w, h, bd); + return; + } + + assert(w == 32 || w == 16 || w == 8); + assert(h % 4 == 0); + + const int16x8_t filter_0_7 = vld1q_s16(filter[x0_q4]); + const int16x4_t filter_8_11 = vld1_s16(filter[x0_q4] + 8); + const uint16x8_t max = vdupq_n_u16((1 << bd) - 1); + + src -= MAX_FILTER_TAP / 2 - 1; + + do { + const int16_t *s = (const int16_t *)src; + uint16_t *d = dst; + int width = w; + + do { + int16x8_t s0[12], s1[12]; + load_s16_8x12(s + 0 * src_stride, 1, &s0[0], &s0[1], &s0[2], &s0[3], + &s0[4], &s0[5], &s0[6], &s0[7], &s0[8], &s0[9], &s0[10], + &s0[11]); + load_s16_8x12(s + 1 * src_stride, 1, &s1[0], &s1[1], &s1[2], &s1[3], + &s1[4], &s1[5], &s1[6], &s1[7], &s1[8], &s1[9], &s1[10], + &s1[11]); + + uint16x8_t d0 = highbd_convolve12_8( + s0[0], s0[1], s0[2], s0[3], s0[4], s0[5], s0[6], s0[7], s0[8], s0[9], + s0[10], s0[11], filter_0_7, filter_8_11, max); + uint16x8_t d1 = highbd_convolve12_8( + s1[0], s1[1], s1[2], s1[3], s1[4], s1[5], s1[6], s1[7], s1[8], s1[9], + s1[10], s1[11], filter_0_7, filter_8_11, max); + + vst1q_u16(d + 0 * dst_stride, d0); + vst1q_u16(d + 1 * dst_stride, d1); + + s += 8; + d += 8; + width -= 8; + } while (width != 0); + src += 2 * src_stride; + dst += 2 * dst_stride; + h -= 2; + } while (h != 0); +} + +void vpx_highbd_convolve12_vert_neon(const uint16_t *src, ptrdiff_t src_stride, + uint16_t *dst, ptrdiff_t dst_stride, + const InterpKernel12 *filter, int x0_q4, + int x_step_q4, int y0_q4, int y_step_q4, + int w, int h, int bd) { + // Scaling not supported by Neon implementation. + if (y_step_q4 != 16) { + vpx_highbd_convolve12_vert_c(src, src_stride, dst, dst_stride, filter, + x0_q4, x_step_q4, y0_q4, y_step_q4, w, h, bd); + return; + } + + assert(w == 32 || w == 16 || w == 8); + assert(h == 32 || h == 16 || h == 8); + + const int16x8_t filter_0_7 = vld1q_s16(filter[y0_q4]); + const int16x4_t filter_8_11 = vld1_s16(filter[y0_q4] + 8); + const uint16x8_t max = vdupq_n_u16((1 << bd) - 1); + + src -= src_stride * (MAX_FILTER_TAP / 2 - 1); + + do { + const int16_t *s = (const int16_t *)src; + uint16_t *d = dst; + int height = h; + + int16x8_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA; + load_s16_8x11(s, src_stride, &s0, &s1, &s2, &s3, &s4, &s5, &s6, &s7, &s8, + &s9, &sA); + s += 11 * src_stride; + + do { + int16x8_t sB, sC, sD, sE; + load_s16_8x4(s, src_stride, &sB, &sC, &sD, &sE); + + uint16x8_t d0 = + highbd_convolve12_8(s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, + filter_0_7, filter_8_11, max); + uint16x8_t d1 = + highbd_convolve12_8(s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, + filter_0_7, filter_8_11, max); + uint16x8_t d2 = + highbd_convolve12_8(s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, + filter_0_7, filter_8_11, max); + uint16x8_t d3 = + highbd_convolve12_8(s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, + filter_0_7, filter_8_11, max); + + store_u16_8x4(d, dst_stride, d0, d1, d2, d3); + + s0 = s4; + s1 = s5; + s2 = s6; + s3 = s7; + s4 = s8; + s5 = s9; + s6 = sA; + s7 = sB; + s8 = sC; + s9 = sD; + sA = sE; + + s += 4 * src_stride; + d += 4 * dst_stride; + height -= 4; + } while (height != 0); + src += 8; + dst += 8; + w -= 8; + } while (w != 0); +} + +void vpx_highbd_convolve12_neon(const uint16_t *src, ptrdiff_t src_stride, + uint16_t *dst, ptrdiff_t dst_stride, + const InterpKernel12 *filter, int x0_q4, + int x_step_q4, int y0_q4, int y_step_q4, int w, + int h, int bd) { + // Scaling not supported by Neon implementation. + if (x_step_q4 != 16 || y_step_q4 != 16) { + vpx_highbd_convolve12_c(src, src_stride, dst, dst_stride, filter, x0_q4, + x_step_q4, y0_q4, y_step_q4, w, h, bd); + return; + } + + assert(w == 32 || w == 16 || w == 8); + assert(h == 32 || h == 16 || h == 8); + + DECLARE_ALIGNED(32, uint16_t, im_block[BW * (BH + MAX_FILTER_TAP)]); + + const int im_stride = BW; + // Account for the vertical pass needing MAX_FILTER_TAP / 2 - 1 lines prior + // and MAX_FILTER_TAP / 2 lines post. (+1 to make total divisible by 2.) + const int im_height = h + MAX_FILTER_TAP; + const ptrdiff_t border_offset = MAX_FILTER_TAP / 2 - 1; + + // Filter starting border_offset rows up. + vpx_highbd_convolve12_horiz_neon( + src - src_stride * border_offset, src_stride, im_block, im_stride, filter, + x0_q4, x_step_q4, y0_q4, y_step_q4, w, im_height, bd); + + vpx_highbd_convolve12_vert_neon(im_block + im_stride * border_offset, + im_stride, dst, dst_stride, filter, x0_q4, + x_step_q4, y0_q4, y_step_q4, w, h, bd); +} diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_ratectrl.c b/media/libvpx/libvpx/vp9/encoder/vp9_ratectrl.c @@ -856,15 +856,19 @@ int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame, frame_type, i, correction_factor, cm->bit_depth); } + int diff_bits = (int)VPXMIN( + VPXMAX(((int64_t)target_bits_per_mb - (int64_t)bits_per_mb_at_this_q), + -INT_MAX), + INT_MAX); if (bits_per_mb_at_this_q <= target_bits_per_mb) { - if ((target_bits_per_mb - bits_per_mb_at_this_q) <= last_error) + if (diff_bits <= last_error) q = i; else q = i - 1; break; } else { - last_error = bits_per_mb_at_this_q - target_bits_per_mb; + last_error = -diff_bits; } } while (++i <= active_worst_quality); diff --git a/media/libvpx/libvpx/vp9/vp9_cx_iface.c b/media/libvpx/libvpx/vp9/vp9_cx_iface.c @@ -1478,22 +1478,13 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx, timebase_units_to_ticks(timebase_in_ts, pts_end); res = image2yuvconfig(img, &sd); - if (sd.y_width != ctx->cfg.g_w || sd.y_height != ctx->cfg.g_h) { - /* from vpx_encoder.h for g_w/g_h: - "Note that the frames passed as input to the encoder must have this - resolution" - */ - ctx->base.err_detail = "Invalid input frame resolution"; - res = VPX_CODEC_INVALID_PARAM; - } else { - // Store the original flags in to the frame buffer. Will extract the - // key frame flag when we actually encode this frame. - if (vp9_receive_raw_frame(cpi, flags | ctx->next_frame_flags, &sd, + // Store the original flags in to the frame buffer. Will extract the + // key frame flag when we actually encode this frame. + if (vp9_receive_raw_frame(cpi, flags | ctx->next_frame_flags, &sd, dst_time_stamp, dst_end_time_stamp)) { - res = update_error_state(ctx, &cpi->common.error); - } - ctx->next_frame_flags = 0; + res = update_error_state(ctx, &cpi->common.error); } + ctx->next_frame_flags = 0; } cx_data = ctx->cx_data; diff --git a/media/libvpx/libvpx/vpx_dsp/arm/mem_neon.h b/media/libvpx/libvpx/vpx_dsp/arm/mem_neon.h @@ -646,6 +646,65 @@ static INLINE void load_s16_4x8(const int16_t *s, const ptrdiff_t p, *s7 = vld1_s16(s); } +static INLINE void load_s16_8x11(const int16_t *s, const ptrdiff_t p, + int16x8_t *s0, int16x8_t *s1, int16x8_t *s2, + int16x8_t *s3, int16x8_t *s4, int16x8_t *s5, + int16x8_t *s6, int16x8_t *s7, int16x8_t *s8, + int16x8_t *s9, int16x8_t *s10) { + *s0 = vld1q_s16(s); + s += p; + *s1 = vld1q_s16(s); + s += p; + *s2 = vld1q_s16(s); + s += p; + *s3 = vld1q_s16(s); + s += p; + *s4 = vld1q_s16(s); + s += p; + *s5 = vld1q_s16(s); + s += p; + *s6 = vld1q_s16(s); + s += p; + *s7 = vld1q_s16(s); + s += p; + *s8 = vld1q_s16(s); + s += p; + *s9 = vld1q_s16(s); + s += p; + *s10 = vld1q_s16(s); +} + +static INLINE void load_s16_8x12(const int16_t *s, const ptrdiff_t p, + int16x8_t *s0, int16x8_t *s1, int16x8_t *s2, + int16x8_t *s3, int16x8_t *s4, int16x8_t *s5, + int16x8_t *s6, int16x8_t *s7, int16x8_t *s8, + int16x8_t *s9, int16x8_t *s10, + int16x8_t *s11) { + *s0 = vld1q_s16(s); + s += p; + *s1 = vld1q_s16(s); + s += p; + *s2 = vld1q_s16(s); + s += p; + *s3 = vld1q_s16(s); + s += p; + *s4 = vld1q_s16(s); + s += p; + *s5 = vld1q_s16(s); + s += p; + *s6 = vld1q_s16(s); + s += p; + *s7 = vld1q_s16(s); + s += p; + *s8 = vld1q_s16(s); + s += p; + *s9 = vld1q_s16(s); + s += p; + *s10 = vld1q_s16(s); + s += p; + *s11 = vld1q_s16(s); +} + static INLINE void load_s16_8x8(const int16_t *s, const ptrdiff_t p, int16x8_t *s0, int16x8_t *s1, int16x8_t *s2, int16x8_t *s3, int16x8_t *s4, int16x8_t *s5, diff --git a/media/libvpx/libvpx/vpx_ports/aarch64_cpudetect.c b/media/libvpx/libvpx/vpx_ports/aarch64_cpudetect.c @@ -16,7 +16,7 @@ #include <sys/sysctl.h> #endif -#if !CONFIG_RUNTIME_CPU_DETECT || defined(__OpenBSD__) +#if !CONFIG_RUNTIME_CPU_DETECT static int arm_get_cpu_caps(void) { // This function should actually be a no-op. There is no way to adjust any of @@ -29,7 +29,7 @@ static int arm_get_cpu_caps(void) { return flags; } -#elif defined(__APPLE__) // end !CONFIG_RUNTIME_CPU_DETECT || defined(__OpenBSD__) +#elif defined(__APPLE__) // end !CONFIG_RUNTIME_CPU_DETECT // sysctlbyname() parameter documentation for instruction set characteristics: // https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics diff --git a/media/libvpx/moz.yaml b/media/libvpx/moz.yaml @@ -20,11 +20,11 @@ origin: # Human-readable identifier for this version/release # Generally "version NNN", "tag SSS", "bookmark SSS" - release: 84e8b23bd7d27b4d2cd6135744986037cd529a9d (Thu Sep 11 17:33:18 2025). + release: f32182fc9455d7979236dffca35c8baf232a74ec (Mon Oct 06 20:24:46 2025). # Revision to pull in # Must be a long or short commit SHA (long preferred) - revision: 84e8b23bd7d27b4d2cd6135744986037cd529a9d + revision: f32182fc9455d7979236dffca35c8baf232a74ec # The package's license, where possible using the mnemonic from # https://spdx.org/licenses/