[ tor-browser ].git.dasho

commit 5ce1cf127034ed8ac82ae2910cd2865aae2aedcc
parent eb8e7f929baf5979ed6defb9986b3f45c4a9e747
Author: Updatebot <updatebot@mozilla.com>
Date:   Fri, 21 Nov 2025 21:28:59 +0000

Bug 1999339 - Update libvpx to 9a7674e1a83d1261a49776c8794b87c9bccc85d7 r=chunmin

Differential Revision: https://phabricator.services.mozilla.com/D272057

Diffstat:
M media/libvpx/libvpx/examples/vpx_dec_fuzzer.cc  | 43 ++++++++++++++++++++++++++++++++++++-------
M media/libvpx/libvpx/test/convolve_test.cc  | 35 +++++++++++++++++++++++++++++++++++
M media/libvpx/libvpx/test/encode_api_test.cc  | 103 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
A media/libvpx/libvpx/third_party/nalloc/LICENSE  | 21 +++++++++++++++++++++
A media/libvpx/libvpx/third_party/nalloc/README.libvpx  | 11 +++++++++++
A media/libvpx/libvpx/third_party/nalloc/nalloc.h  | 330 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M media/libvpx/libvpx/vp8/vp8_cx_iface.c  | 17 ++++-------------
M media/libvpx/libvpx/vp9/common/vp9_rtcd_defs.pl  | 6 +++---
A media/libvpx/libvpx/vp9/encoder/arm/neon/vp9_highbd_temporal_filter_sve2.c  | 285 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M media/libvpx/libvpx/vp9/encoder/arm/neon/vp9_temporal_filter_neon_dotprod.c  | 45 ++++++++++-----------------------------------
M media/libvpx/libvpx/vp9/encoder/arm/neon/vp9_temporal_filter_neon_i8mm.c  | 45 ++++++++++-----------------------------------
M media/libvpx/libvpx/vp9/encoder/vp9_encoder.c  | 9 ++++++++-
M media/libvpx/libvpx/vp9/vp9_cx_iface.c  | 19 +++++--------------
M media/libvpx/libvpx/vp9/vp9cx.mk  | 2 ++
M media/libvpx/libvpx/vpx_dsp/arm/highbd_vpx_convolve8_sve2.c  | 116 +++++++++++++++++++++----------------------------------------------------------
M media/libvpx/libvpx/vpx_dsp/arm/mem_neon.h  | 32 ++++++++++++++++++++++++++++++--
M media/libvpx/libvpx/vpx_dsp/arm/transpose_neon.h  | 154 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_neon_dotprod.c  | 87 ++++++++++++++++++-------------------------------------------------------------
M media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_neon_i8mm.c  | 88 ++++++++++++++++++-------------------------------------------------------------
M media/libvpx/libvpx/vpx_dsp/arm/vpx_neon_sve2_bridge.h  | 14 ++++++++++++++
M media/libvpx/libvpx/vpx_dsp/arm/vpx_neon_sve_bridge.h  | 5 +++++
M media/libvpx/libvpx/vpx_ports/aarch64_cpudetect.c  | 4 ++--
M media/libvpx/moz.yaml  | 4 ++--

23 files changed, 1130 insertions(+), 345 deletions(-)
diff --git a/media/libvpx/libvpx/examples/vpx_dec_fuzzer.cc b/media/libvpx/libvpx/examples/vpx_dec_fuzzer.cc
@@ -69,6 +69,7 @@
 #include <algorithm>
 #include <memory>
 
+#include "third_party/nalloc/nalloc.h"
 #include "vpx/vp8dx.h"
 #include "vpx/vpx_decoder.h"
 #include "vpx_ports/mem_ops.h"
@@ -85,25 +86,40 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
   if (size <= IVF_FILE_HDR_SZ) {
     return 0;
   }
+  nalloc_init(nullptr);
 
   vpx_codec_ctx_t codec;
   // Set thread count in the range [1, 64].
   const unsigned int threads = (data[IVF_FILE_HDR_SZ] & 0x3f) + 1;
   vpx_codec_dec_cfg_t cfg = { threads, 0, 0 };
-  if (vpx_codec_dec_init(&codec, VPXD_INTERFACE(DECODER), &cfg, 0)) {
+  vpx_codec_flags_t flags = 0;
+  if ((data[IVF_FILE_HDR_SZ] & 0x40) != 0) {
+    flags |= VPX_CODEC_USE_POSTPROC;
+  }
+  vpx_codec_err_t err =
+      vpx_codec_dec_init(&codec, VPXD_INTERFACE(DECODER), &cfg, flags);
+  if (err == VPX_CODEC_INCAPABLE) {
+    // vpx_codec_dec_init may fail with VPX_CODEC_USE_POSTPROC
+    // if the library is configured with --disable-postproc.
+    flags = 0;
+    if (vpx_codec_dec_init(&codec, VPXD_INTERFACE(DECODER), &cfg, flags)) {
+      return 0;
+    }
+  } else if (err != 0) {
     return 0;
   }
 
+  nalloc_start(data, size);
+
   if (threads > 1) {
     const int enable = (data[IVF_FILE_HDR_SZ] & 0xa0) != 0;
-    const vpx_codec_err_t err =
-        vpx_codec_control(&codec, VP9D_SET_LOOP_FILTER_OPT, enable);
-    static_cast<void>(err);
+    err = vpx_codec_control(&codec, VP9D_SET_LOOP_FILTER_OPT, enable);
   }
 
   data += IVF_FILE_HDR_SZ;
   size -= IVF_FILE_HDR_SZ;
 
+  int frame_cnt = 0;
   while (size > IVF_FRAME_HDR_SZ) {
     size_t frame_size = mem_get_le32(data);
     size -= IVF_FRAME_HDR_SZ;
@@ -112,9 +128,20 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
 
     vpx_codec_stream_info_t stream_info;
     stream_info.sz = sizeof(stream_info);
-    vpx_codec_err_t err = vpx_codec_peek_stream_info(VPXD_INTERFACE(DECODER),
-                                                     data, size, &stream_info);
-    static_cast<void>(err);
+    err = vpx_codec_peek_stream_info(VPXD_INTERFACE(DECODER), data, size,
+                                     &stream_info);
+
+    ++frame_cnt;
+    if (flags & VPX_CODEC_USE_POSTPROC) {
+      if (frame_cnt % 16 == 4) {
+        vp8_postproc_cfg_t pp = { 0, 0, 0 };
+        if (vpx_codec_control(&codec, VP8_SET_POSTPROC, &pp)) goto fail;
+      } else if (frame_cnt % 16 == 12) {
+        vp8_postproc_cfg_t pp = { VP8_DEBLOCK | VP8_DEMACROBLOCK | VP8_MFQE, 4,
+                                  0 };
+        if (vpx_codec_control(&codec, VP8_SET_POSTPROC, &pp)) goto fail;
+      }
+    }
 
     err = vpx_codec_decode(&codec, data, frame_size, nullptr, 0);
     static_cast<void>(err);
@@ -125,6 +152,8 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
     data += frame_size;
     size -= frame_size;
   }
+fail:
   vpx_codec_destroy(&codec);
+  nalloc_end();
   return 0;
 }
diff --git a/media/libvpx/libvpx/test/convolve_test.cc b/media/libvpx/libvpx/test/convolve_test.cc
@@ -1816,6 +1816,18 @@ WRAP12TAP(convolve12_vert_neon, 12)
 WRAP12TAP(convolve12_neon, 12)
 #endif  // HAVE_NEON
 
+#if HAVE_SVE2
+WRAP12TAP(convolve12_horiz_sve2, 8)
+WRAP12TAP(convolve12_vert_sve2, 8)
+WRAP12TAP(convolve12_sve2, 8)
+WRAP12TAP(convolve12_horiz_sve2, 10)
+WRAP12TAP(convolve12_vert_sve2, 10)
+WRAP12TAP(convolve12_sve2, 10)
+WRAP12TAP(convolve12_horiz_sve2, 12)
+WRAP12TAP(convolve12_vert_sve2, 12)
+WRAP12TAP(convolve12_sve2, 12)
+#endif  // HAVE_SVE2
+
 WRAP12TAP(convolve12_horiz_c, 8)
 WRAP12TAP(convolve12_vert_c, 8)
 WRAP12TAP(convolve12_c, 8)
@@ -2180,6 +2192,29 @@ const ConvolveParam kArrayConvolve_sve2[] = { ALL_SIZES(convolve8_sve2),
                                               ALL_SIZES(convolve12_sve2) };
 INSTANTIATE_TEST_SUITE_P(SVE2, ConvolveTest,
                          ::testing::ValuesIn(kArrayConvolve_sve2));
+
+#if !CONFIG_REALTIME_ONLY && CONFIG_VP9_ENCODER
+const ConvolveFunctions12Tap convolve12tap_8bit_sve2(
+    wrap_convolve12_horiz_sve2_8, wrap_convolve12_vert_sve2_8,
+    wrap_convolve12_sve2_8, 8);
+
+const ConvolveFunctions12Tap convolve12tap_10bit_sve2(
+    wrap_convolve12_horiz_sve2_10, wrap_convolve12_vert_sve2_10,
+    wrap_convolve12_sve2_10, 10);
+
+const ConvolveFunctions12Tap convolve12tap_12bit_sve2(
+    wrap_convolve12_horiz_sve2_12, wrap_convolve12_vert_sve2_12,
+    wrap_convolve12_sve2_12, 12);
+
+const Convolve12TapParam kArrayConvolve12Tap_sve2[] = {
+  ALL_SIZES_12TAP(convolve12tap_8bit_sve2),
+  ALL_SIZES_12TAP(convolve12tap_10bit_sve2),
+  ALL_SIZES_12TAP(convolve12tap_12bit_sve2)
+};
+
+INSTANTIATE_TEST_SUITE_P(SVE2, ConvolveTest12Tap,
+                         ::testing::ValuesIn(kArrayConvolve12Tap_sve2));
+#endif  // !CONFIG_REALTIME_ONLY && CONFIG_VP9_ENCODER
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 #endif  // HAVE_SVE2
 
diff --git a/media/libvpx/libvpx/test/encode_api_test.cc b/media/libvpx/libvpx/test/encode_api_test.cc
@@ -1786,11 +1786,11 @@ TEST(EncodeAPI, Buganizer441668134) {
 
 // Encode a few frames, with realtime mode and tile_rows set to 1,
 // with row-mt enabled. This triggers an assertion in vp9_bitstream.c (in
-// function write_modes()), as in the issue:42105459. In this test it happens on
-// very first encoded frame since lag_in_frames = 0. Issue is due to enabling
-// TILE_ROWS: passes if tile_rows is disabled (set to 0), or if height is above
-// 64 (so both row-tiles are non-empty).
-TEST(EncodeAPI, DISABLED_Buganizer442105459) {
+// function write_modes()), as in the issue:442105459. In this test it happens
+// on very first encoded frame since lag_in_frames = 0. Issue is due to enabling
+// TILE_ROWS, with number of tile_rows more than the number of superblocks.
+// This test sets 2 tile_rows with height corresponding to 1 superblock (sb).
+TEST(EncodeAPI, Buganizer442105459_2RowTiles) {
   // Initialize VP9 encoder interface
   vpx_codec_iface_t *iface = vpx_codec_vp9_cx();
   // Get default encoder configuration
@@ -1798,7 +1798,7 @@ TEST(EncodeAPI, DISABLED_Buganizer442105459) {
   ASSERT_EQ(vpx_codec_enc_config_default(iface, &cfg, 0), VPX_CODEC_OK);
   // Configure encoder
   cfg.g_w = 946u;
-  cfg.g_h = 64u;
+  cfg.g_h = 64u;  // 1 sb row, 2 tile_rows set below.
   cfg.g_threads = 1;
   cfg.g_profile = 0;
   cfg.g_bit_depth = VPX_BITS_8;
@@ -1815,16 +1815,99 @@ TEST(EncodeAPI, DISABLED_Buganizer442105459) {
   // Set control parameters
   vpx_codec_control_(&ctx, VP8E_SET_CPUUSED, -5);
   vpx_codec_control_(&ctx, VP9E_SET_TILE_ROWS, 1);
+  vpx_codec_control_(&ctx, VP9E_SET_TILE_COLUMNS, 1);
   vpx_codec_control_(&ctx, VP9E_SET_ROW_MT, 1);
   // Image format selection
   vpx_img_fmt_t img_fmt = VPX_IMG_FMT_I420;
   // Allocate image with varied alignment
   vpx_image_t *img = vpx_img_alloc(nullptr, img_fmt, cfg.g_w, cfg.g_h, 1);
+  for (unsigned int y = 0; y < img->d_h; y++) {
+    for (unsigned int x = 0; x < img->d_w; x++) {
+      img->planes[0][y * img->stride[0] + x] = ((x ^ y) * 127) & 0xFF;
+    }
+  }
+  const unsigned int uv_height = (img->d_h + 1) >> 1;
+  for (int i : { VPX_PLANE_U, VPX_PLANE_V }) {
+    memset(img->planes[i], 0, img->stride[i] * uv_height);
+  }
+  // Encode with dynamic configuration changes
+  int num_frames = 2;
+  // Per-frame constants captured from the original run (indices consumed per
+  // frame)
+  const vpx_codec_pts_t frame_pts_mul[] = { 33333UL, 33333UL };
+  const unsigned long frame_durations[] = { 33333UL, 33333UL };
+  const vpx_enc_deadline_t frame_deadlines[] = { VPX_DL_REALTIME,
+                                                 VPX_DL_REALTIME };
+  for (int frame = 0; frame < num_frames; frame++) {
+    // Encode frame
+    vpx_codec_pts_t pts = frame * frame_pts_mul[frame];
+    unsigned long duration = frame_durations[frame];
+    vpx_enc_deadline_t deadline = frame_deadlines[frame];
+    ASSERT_EQ(vpx_codec_encode(&ctx, img, pts, duration, /*flags*/ 0, deadline),
+              VPX_CODEC_OK);
+  }
+  // Flush encoder.
+  ASSERT_EQ(vpx_codec_encode(&ctx, nullptr, 0, 0, 0, VPX_DL_REALTIME), 0);
+  // Get remaining data
+  vpx_codec_iter_t iter = nullptr;
+  while (vpx_codec_get_cx_data(&ctx, &iter) != nullptr) {
+    // Process remaining packets
+  }
+  vpx_img_free(img);
+  vpx_codec_destroy(&ctx);
+}
+
+// Encode a few frames, with realtime mode and tile_rows set to 1,
+// with row-mt enabled. This triggers an assertion in vp9_bitstream.c (in
+// function write_modes()), as in the issue:442105459. In this test it happens
+// on very first encoded frame since lag_in_frames = 0. Issue is due to enabling
+// TILE_ROWS, with number of tile_rows more than the number of superblocks.
+// This test sets 4 tile_rows with height corresponding to 3 superblocks.
+TEST(EncodeAPI, Buganizer442105459_4RowTiles) {
+  // Initialize VP9 encoder interface
+  vpx_codec_iface_t *iface = vpx_codec_vp9_cx();
+  // Get default encoder configuration
+  vpx_codec_enc_cfg_t cfg;
+  ASSERT_EQ(vpx_codec_enc_config_default(iface, &cfg, 0), VPX_CODEC_OK);
+  // Configure encoder
+  cfg.g_w = 946u;
+  cfg.g_h = 192u;  // 3 sb rows, 4 tile_rows set below.
+  cfg.g_threads = 1;
+  cfg.g_profile = 0;
+  cfg.g_bit_depth = VPX_BITS_8;
+  // Rate control targeting deeper encoding paths
+  cfg.rc_target_bitrate = 100;
+  cfg.rc_min_quantizer = 0;
+  cfg.rc_max_quantizer = 0;
+  cfg.rc_end_usage = VPX_VBR;
+  cfg.ss_number_layers = 1;
+  cfg.g_lag_in_frames = 0;
+  // Initialize encoder context
+  vpx_codec_ctx_t ctx;
+  ASSERT_EQ(vpx_codec_enc_init(&ctx, iface, &cfg, 0), VPX_CODEC_OK);
+  // Set control parameters
+  vpx_codec_control_(&ctx, VP8E_SET_CPUUSED, -5);
+  vpx_codec_control_(&ctx, VP9E_SET_TILE_ROWS, 2);
+  vpx_codec_control_(&ctx, VP9E_SET_TILE_COLUMNS, 1);
+  vpx_codec_control_(&ctx, VP9E_SET_ROW_MT, 1);
+  // Image format selection
+  vpx_img_fmt_t img_fmt = VPX_IMG_FMT_I420;
+  // Allocate image with varied alignment
+  vpx_image_t *img = vpx_img_alloc(nullptr, img_fmt, cfg.g_w, cfg.g_h, 1);
+  for (unsigned int y = 0; y < img->d_h; y++) {
+    for (unsigned int x = 0; x < img->d_w; x++) {
+      img->planes[0][y * img->stride[0] + x] = ((x ^ y) * 127) & 0xFF;
+    }
+  }
+  const unsigned int uv_height = (img->d_h + 1) >> 1;
+  for (int i : { VPX_PLANE_U, VPX_PLANE_V }) {
+    memset(img->planes[i], 0, img->stride[i] * uv_height);
+  }
   // Encode with dynamic configuration changes
   int num_frames = 2;
   // Per-frame constants captured from the original run (indices consumed per
   // frame)
-  const unsigned long frame_pts_mul[] = { 33333UL, 33333UL };
+  const vpx_codec_pts_t frame_pts_mul[] = { 33333UL, 33333UL };
   const unsigned long frame_durations[] = { 33333UL, 33333UL };
   const vpx_enc_deadline_t frame_deadlines[] = { VPX_DL_REALTIME,
                                                  VPX_DL_REALTIME };
@@ -1837,10 +1920,10 @@ TEST(EncodeAPI, DISABLED_Buganizer442105459) {
               VPX_CODEC_OK);
   }
   // Flush encoder.
-  ASSERT_EQ(vpx_codec_encode(&ctx, NULL, 0, 0, 0, VPX_DL_REALTIME), 0);
+  ASSERT_EQ(vpx_codec_encode(&ctx, nullptr, 0, 0, 0, VPX_DL_REALTIME), 0);
   // Get remaining data
-  vpx_codec_iter_t iter = NULL;
-  while (vpx_codec_get_cx_data(&ctx, &iter) != NULL) {
+  vpx_codec_iter_t iter = nullptr;
+  while (vpx_codec_get_cx_data(&ctx, &iter) != nullptr) {
     // Process remaining packets
   }
   vpx_img_free(img);
diff --git a/media/libvpx/libvpx/third_party/nalloc/LICENSE b/media/libvpx/libvpx/third_party/nalloc/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 Catena cyber
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/media/libvpx/libvpx/third_party/nalloc/README.libvpx b/media/libvpx/libvpx/third_party/nalloc/README.libvpx
@@ -0,0 +1,11 @@
+Name: nalloc
+URL: https://github.com/catenacyber/nallocfuzz
+Version: dc351a94bbded5ede5b7550d6d08e78e0cc6dcef
+License: MIT
+License File: LICENSE
+
+Description:
+Nalloc is a tool to inject allocation failures while fuzzing.
+
+Local Modifications:
+None
diff --git a/media/libvpx/libvpx/third_party/nalloc/nalloc.h b/media/libvpx/libvpx/third_party/nalloc/nalloc.h
@@ -0,0 +1,330 @@
+/*
+ MIT License
+
+ Copyright (c) 2025 Catena cyber
+
+ Permission is hereby granted, free of charge, to any person obtaining a copy
+ of this software and associated documentation files (the "Software"), to deal
+ in the Software without restriction, including without limitation the rights
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ copies of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in all
+ copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ SOFTWARE.
+*/
+
+#ifndef NALLOC_H_
+#define NALLOC_H_
+
+#if defined(__clang__) && defined(__has_feature)
+#if __has_feature(address_sanitizer)
+#define NALLOC_ASAN 1
+#endif
+#endif
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static const uint32_t nalloc_crc32_table[] = {
+  0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9, 0x130476dc, 0x17c56b6b,
+  0x1a864db2, 0x1e475005, 0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61,
+  0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd, 0x4c11db70, 0x48d0c6c7,
+  0x4593e01e, 0x4152fda9, 0x5f15adac, 0x5bd4b01b, 0x569796c2, 0x52568b75,
+  0x6a1936c8, 0x6ed82b7f, 0x639b0da6, 0x675a1011, 0x791d4014, 0x7ddc5da3,
+  0x709f7b7a, 0x745e66cd, 0x9823b6e0, 0x9ce2ab57, 0x91a18d8e, 0x95609039,
+  0x8b27c03c, 0x8fe6dd8b, 0x82a5fb52, 0x8664e6e5, 0xbe2b5b58, 0xbaea46ef,
+  0xb7a96036, 0xb3687d81, 0xad2f2d84, 0xa9ee3033, 0xa4ad16ea, 0xa06c0b5d,
+  0xd4326d90, 0xd0f37027, 0xddb056fe, 0xd9714b49, 0xc7361b4c, 0xc3f706fb,
+  0xceb42022, 0xca753d95, 0xf23a8028, 0xf6fb9d9f, 0xfbb8bb46, 0xff79a6f1,
+  0xe13ef6f4, 0xe5ffeb43, 0xe8bccd9a, 0xec7dd02d, 0x34867077, 0x30476dc0,
+  0x3d044b19, 0x39c556ae, 0x278206ab, 0x23431b1c, 0x2e003dc5, 0x2ac12072,
+  0x128e9dcf, 0x164f8078, 0x1b0ca6a1, 0x1fcdbb16, 0x018aeb13, 0x054bf6a4,
+  0x0808d07d, 0x0cc9cdca, 0x7897ab07, 0x7c56b6b0, 0x71159069, 0x75d48dde,
+  0x6b93dddb, 0x6f52c06c, 0x6211e6b5, 0x66d0fb02, 0x5e9f46bf, 0x5a5e5b08,
+  0x571d7dd1, 0x53dc6066, 0x4d9b3063, 0x495a2dd4, 0x44190b0d, 0x40d816ba,
+  0xaca5c697, 0xa864db20, 0xa527fdf9, 0xa1e6e04e, 0xbfa1b04b, 0xbb60adfc,
+  0xb6238b25, 0xb2e29692, 0x8aad2b2f, 0x8e6c3698, 0x832f1041, 0x87ee0df6,
+  0x99a95df3, 0x9d684044, 0x902b669d, 0x94ea7b2a, 0xe0b41de7, 0xe4750050,
+  0xe9362689, 0xedf73b3e, 0xf3b06b3b, 0xf771768c, 0xfa325055, 0xfef34de2,
+  0xc6bcf05f, 0xc27dede8, 0xcf3ecb31, 0xcbffd686, 0xd5b88683, 0xd1799b34,
+  0xdc3abded, 0xd8fba05a, 0x690ce0ee, 0x6dcdfd59, 0x608edb80, 0x644fc637,
+  0x7a089632, 0x7ec98b85, 0x738aad5c, 0x774bb0eb, 0x4f040d56, 0x4bc510e1,
+  0x46863638, 0x42472b8f, 0x5c007b8a, 0x58c1663d, 0x558240e4, 0x51435d53,
+  0x251d3b9e, 0x21dc2629, 0x2c9f00f0, 0x285e1d47, 0x36194d42, 0x32d850f5,
+  0x3f9b762c, 0x3b5a6b9b, 0x0315d626, 0x07d4cb91, 0x0a97ed48, 0x0e56f0ff,
+  0x1011a0fa, 0x14d0bd4d, 0x19939b94, 0x1d528623, 0xf12f560e, 0xf5ee4bb9,
+  0xf8ad6d60, 0xfc6c70d7, 0xe22b20d2, 0xe6ea3d65, 0xeba91bbc, 0xef68060b,
+  0xd727bbb6, 0xd3e6a601, 0xdea580d8, 0xda649d6f, 0xc423cd6a, 0xc0e2d0dd,
+  0xcda1f604, 0xc960ebb3, 0xbd3e8d7e, 0xb9ff90c9, 0xb4bcb610, 0xb07daba7,
+  0xae3afba2, 0xaafbe615, 0xa7b8c0cc, 0xa379dd7b, 0x9b3660c6, 0x9ff77d71,
+  0x92b45ba8, 0x9675461f, 0x8832161a, 0x8cf30bad, 0x81b02d74, 0x857130c3,
+  0x5d8a9099, 0x594b8d2e, 0x5408abf7, 0x50c9b640, 0x4e8ee645, 0x4a4ffbf2,
+  0x470cdd2b, 0x43cdc09c, 0x7b827d21, 0x7f436096, 0x7200464f, 0x76c15bf8,
+  0x68860bfd, 0x6c47164a, 0x61043093, 0x65c52d24, 0x119b4be9, 0x155a565e,
+  0x18197087, 0x1cd86d30, 0x029f3d35, 0x065e2082, 0x0b1d065b, 0x0fdc1bec,
+  0x3793a651, 0x3352bbe6, 0x3e119d3f, 0x3ad08088, 0x2497d08d, 0x2056cd3a,
+  0x2d15ebe3, 0x29d4f654, 0xc5a92679, 0xc1683bce, 0xcc2b1d17, 0xc8ea00a0,
+  0xd6ad50a5, 0xd26c4d12, 0xdf2f6bcb, 0xdbee767c, 0xe3a1cbc1, 0xe760d676,
+  0xea23f0af, 0xeee2ed18, 0xf0a5bd1d, 0xf464a0aa, 0xf9278673, 0xfde69bc4,
+  0x89b8fd09, 0x8d79e0be, 0x803ac667, 0x84fbdbd0, 0x9abc8bd5, 0x9e7d9662,
+  0x933eb0bb, 0x97ffad0c, 0xafb010b1, 0xab710d06, 0xa6322bdf, 0xa2f33668,
+  0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4
+};
+
+// Nallocfuzz data to take a decision
+uint32_t nalloc_random_state = 0;
+__thread unsigned int nalloc_running = 0;
+bool nalloc_initialized = false;
+uint32_t nalloc_runs = 0;
+
+// Nalloc fuzz parameters
+uint32_t nalloc_bitmask = 0xFF;
+bool nalloc_random_bitmask = true;
+uint32_t nalloc_magic = 0x294cee63;
+bool nalloc_verbose = false;
+
+#ifdef NALLOC_ASAN
+extern void __sanitizer_print_stack_trace(void);
+#endif
+
+// Generic init, using env variables to get parameters
+void nalloc_init(const char *prog) {
+  if (nalloc_initialized) {
+    return;
+  }
+  nalloc_initialized = true;
+  char *bitmask = getenv("NALLOC_FREQ");
+  if (bitmask) {
+    int shift = atoi(bitmask);
+    if (shift > 0 && shift < 31) {
+      nalloc_bitmask = 1 << shift;
+      nalloc_random_bitmask = false;
+    } else if (shift == 0) {
+      nalloc_random_bitmask = false;
+      nalloc_bitmask = 0;
+    }
+  } else if (prog == NULL || strstr(prog, "nalloc") == NULL) {
+    nalloc_random_bitmask = false;
+    nalloc_bitmask = 0;
+    return;
+  }
+
+  char *magic = getenv("NALLOC_MAGIC");
+  if (magic) {
+    nalloc_magic = (uint32_t)strtol(magic, NULL, 0);
+  }
+
+  char *verbose = getenv("NALLOC_VERBOSE");
+  if (verbose) {
+    nalloc_verbose = true;
+  }
+}
+
+// add one byte to the CRC
+static inline void nalloc_random_update(uint8_t b) {
+  nalloc_random_state =
+      ((uint32_t)((uint32_t)nalloc_random_state << 8)) ^
+      nalloc_crc32_table[((nalloc_random_state >> 24) ^ b) & 0xFF];
+}
+
+// Start the failure injections, using a buffer as seed
+static int nalloc_start(const uint8_t *data, size_t size) {
+  if (nalloc_random_bitmask) {
+    if (nalloc_random_state & 0x10) {
+      nalloc_bitmask = 0xFFFFFFFF;
+    } else {
+      nalloc_bitmask = 1 << (5 + (nalloc_random_state & 0xF));
+    }
+  } else if (nalloc_bitmask == 0) {
+    // nalloc disabled
+    return 0;
+  }
+  nalloc_random_state = 0;
+  for (size_t i = 0; i < size; i++) {
+    nalloc_random_update(data[i]);
+  }
+  if (__sync_fetch_and_add(&nalloc_running, 1)) {
+    __sync_fetch_and_sub(&nalloc_running, 1);
+    return 0;
+  }
+  nalloc_runs++;
+  return 1;
+}
+
+// Stop the failure injections
+static void nalloc_end() { __sync_fetch_and_sub(&nalloc_running, 1); }
+
+static bool nalloc_backtrace_exclude(size_t size, const char *op) {
+  if (nalloc_verbose) {
+    fprintf(stderr, "failed %s(%zu) \n", op, size);
+#ifdef NALLOC_ASAN
+    __sanitizer_print_stack_trace();
+#endif
+  }
+
+  return false;
+}
+
+//
+static bool nalloc_fail(size_t size, const char *op) {
+  // do not fail before thread init
+  if (nalloc_runs == 0) {
+    return false;
+  }
+  if (__sync_fetch_and_add(&nalloc_running, 1) != 1) {
+    // do not fail allocations outside of fuzzer input
+    // and do not fail inside of this function
+    __sync_fetch_and_sub(&nalloc_running, 1);
+    return false;
+  }
+  nalloc_random_update((uint8_t)size);
+  if (size >= 0x100) {
+    nalloc_random_update((uint8_t)(size >> 8));
+    if (size >= 0x10000) {
+      nalloc_random_update((uint8_t)(size >> 16));
+      // bigger may already fail or oom
+    }
+  }
+  if (((nalloc_random_state ^ nalloc_magic) & nalloc_bitmask) == 0) {
+    if (nalloc_backtrace_exclude(size, op)) {
+      __sync_fetch_and_sub(&nalloc_running, 1);
+      return false;
+    }
+    __sync_fetch_and_sub(&nalloc_running, 1);
+    return true;
+  }
+  __sync_fetch_and_sub(&nalloc_running, 1);
+  return false;
+}
+
+// ASAN interceptor for libc routines
+#ifdef NALLOC_ASAN
+extern void *__interceptor_malloc(size_t);
+extern void *__interceptor_calloc(size_t, size_t);
+extern void *__interceptor_realloc(void *, size_t);
+extern void *__interceptor_reallocarray(void *, size_t, size_t);
+
+extern ssize_t __interceptor_read(int, void *, size_t);
+extern ssize_t __interceptor_write(int, const void *, size_t);
+extern ssize_t __interceptor_recv(int, void *, size_t, int);
+extern ssize_t __interceptor_send(int, const void *, size_t, int);
+
+#define nalloc_malloc(s) __interceptor_malloc(s)
+#define nalloc_calloc(s, n) __interceptor_calloc(s, n)
+#define nalloc_realloc(p, s) __interceptor_realloc(p, s)
+#define nalloc_reallocarray(p, s, n) __interceptor_reallocarray(p, s, n)
+
+#define nalloc_read(f, b, s) __interceptor_read(f, b, s)
+#define nalloc_write(f, b, s) __interceptor_write(f, b, s)
+#define nalloc_recv(f, b, s, x) __interceptor_recv(f, b, s, x)
+#define nalloc_send(f, b, s, x) __interceptor_send(f, b, s, x)
+
+#else
+extern void *__libc_malloc(size_t);
+extern void *__libc_calloc(size_t, size_t);
+extern void *__libc_realloc(void *, size_t);
+extern void *__libc_reallocarray(void *, size_t, size_t);
+
+extern ssize_t __read(int, void *, size_t);
+extern ssize_t __write(int, const void *, size_t);
+extern ssize_t __recv(int, void *, size_t, int);
+extern ssize_t __send(int, const void *, size_t, int);
+
+#define nalloc_malloc(s) __libc_malloc(s)
+#define nalloc_calloc(s, n) __libc_calloc(s, n)
+#define nalloc_realloc(p, s) __libc_realloc(p, s)
+#define nalloc_reallocarray(p, s, n) __libc_reallocarray(p, s, n)
+
+#define nalloc_read(f, b, s) __read(f, b, s)
+#define nalloc_write(f, b, s) __write(f, b, s)
+#define nalloc_recv(f, b, s, x) __recv(f, b, s, x)
+#define nalloc_send(f, b, s, x) __send(f, b, s, x)
+#endif
+
+// nalloc standard function overwrites with pseudo-random failures
+ssize_t read(int fd, void *buf, size_t count) {
+  if (nalloc_fail(count, "read")) {
+    errno = EIO;
+    return -1;
+  }
+  return nalloc_read(fd, buf, count);
+}
+
+ssize_t write(int fd, const void *buf, size_t count) {
+  if (nalloc_fail(count, "write")) {
+    errno = EIO;
+    return -1;
+  }
+  return nalloc_write(fd, buf, count);
+}
+
+ssize_t recv(int fd, void *buf, size_t count, int flags) {
+  if (nalloc_fail(count, "recv")) {
+    errno = EIO;
+    return -1;
+  }
+  return nalloc_recv(fd, buf, count, flags);
+}
+
+ssize_t send(int fd, const void *buf, size_t count, int flags) {
+  if (nalloc_fail(count, "send")) {
+    errno = EIO;
+    return -1;
+  }
+  return nalloc_send(fd, buf, count, flags);
+}
+
+void *calloc(size_t nmemb, size_t size) {
+  if (nalloc_fail(size, "calloc")) {
+    errno = ENOMEM;
+    return NULL;
+  }
+  return nalloc_calloc(nmemb, size);
+}
+
+void *malloc(size_t size) {
+  if (nalloc_fail(size, "malloc")) {
+    errno = ENOMEM;
+    return NULL;
+  }
+  return nalloc_malloc(size);
+}
+
+void *realloc(void *ptr, size_t size) {
+  if (nalloc_fail(size, "realloc")) {
+    errno = ENOMEM;
+    return NULL;
+  }
+  return nalloc_realloc(ptr, size);
+}
+
+void *reallocarray(void *ptr, size_t nmemb, size_t size) {
+  if (nalloc_fail(size, "reallocarray")) {
+    errno = ENOMEM;
+    return NULL;
+  }
+  return nalloc_reallocarray(ptr, nmemb, size);
+}
+
+#ifdef __cplusplus
+}  // extern "C" {
+#endif
+
+#endif  // NALLOC_H_
diff --git a/media/libvpx/libvpx/vp8/vp8_cx_iface.c b/media/libvpx/libvpx/vp8/vp8_cx_iface.c
@@ -1022,19 +1022,10 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx,
 
       res = image2yuvconfig(img, &sd);
 
-      if (sd.y_width != ctx->cfg.g_w || sd.y_height != ctx->cfg.g_h) {
-        /* from vpx_encoder.h for g_w/g_h:
-           "Note that the frames passed as input to the encoder must have this
-           resolution"
-        */
-        ctx->base.err_detail = "Invalid input frame resolution";
-        res = VPX_CODEC_INVALID_PARAM;
-      } else {
-        if (vp8_receive_raw_frame(ctx->cpi, ctx->next_frame_flag | lib_flags,
-                                  &sd, dst_time_stamp, dst_end_time_stamp)) {
-          VP8_COMP *cpi = (VP8_COMP *)ctx->cpi;
-          res = update_error_state(ctx, &cpi->common.error);
-        }
+      if (vp8_receive_raw_frame(ctx->cpi, ctx->next_frame_flag | lib_flags, &sd,
+                                dst_time_stamp, dst_end_time_stamp)) {
+        VP8_COMP *cpi = (VP8_COMP *)ctx->cpi;
+        res = update_error_state(ctx, &cpi->common.error);
       }
 
       /* reset for next frame */
diff --git a/media/libvpx/libvpx/vp9/common/vp9_rtcd_defs.pl b/media/libvpx/libvpx/vp9/common/vp9_rtcd_defs.pl
@@ -206,13 +206,13 @@ if (vpx_config("CONFIG_REALTIME_ONLY") ne "yes") {
 
   if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
     add_proto qw/void vpx_highbd_convolve12_vert/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel12 *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd";
-    specialize qw/vpx_highbd_convolve12_vert ssse3 avx2 neon/;
+    specialize qw/vpx_highbd_convolve12_vert ssse3 avx2 neon sve2/;
 
     add_proto qw/void vpx_highbd_convolve12_horiz/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel12 *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd";
-    specialize qw/vpx_highbd_convolve12_horiz ssse3 avx2 neon/;
+    specialize qw/vpx_highbd_convolve12_horiz ssse3 avx2 neon sve2/;
 
     add_proto qw/void vpx_highbd_convolve12/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel12 *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd";
-    specialize qw/vpx_highbd_convolve12 ssse3 avx2 neon/;
+    specialize qw/vpx_highbd_convolve12 ssse3 avx2 neon sve2/;
   }
 }
 
diff --git a/media/libvpx/libvpx/vp9/encoder/arm/neon/vp9_highbd_temporal_filter_sve2.c b/media/libvpx/libvpx/vp9/encoder/arm/neon/vp9_highbd_temporal_filter_sve2.c
@@ -0,0 +1,285 @@
+/*
+ *  Copyright (c) 2025 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+#include <arm_neon_sve_bridge.h>
+#include <arm_sve.h>
+#include <assert.h>
+
+#include "./vp9_rtcd.h"
+#include "./vpx_config.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_dsp/arm/mem_neon.h"
+#include "vpx_dsp/arm/transpose_neon.h"
+#include "vp9/encoder/vp9_temporal_filter.h"
+#include "vpx_dsp/arm/vpx_neon_sve_bridge.h"
+#include "vpx_dsp/arm/vpx_neon_sve2_bridge.h"
+
+DECLARE_ALIGNED(16, static const uint16_t, kDotProdPermuteTbl[32]) = {
+  // clang-format off
+  0,  1,  2,  3,  1,  2,  3,  4,
+  2,  3,  4,  5,  3,  4,  5,  6,
+  4,  5,  6,  7,  5,  6,  7,  0,
+  6,  7,  0,  1,  7,  0,  1,  2,
+  // clang-format on
+};
+
+static INLINE uint16x8_t highbd_convolve12_8_h(
+    const int16x8_t s0, const int16x8_t s1, const int16x8_t s2,
+    const int16x8_t filter_0_7, const int16x8_t filter_4_11,
+    const uint16x8x4_t perm_tbl, const uint16x8_t max) {
+  int16x8_t perm_samples[8];
+
+  perm_samples[0] = vpx_tbl_s16(s0, perm_tbl.val[0]);
+  perm_samples[1] = vpx_tbl_s16(s0, perm_tbl.val[1]);
+  perm_samples[2] = vpx_tbl2_s16(s0, s1, perm_tbl.val[2]);
+  perm_samples[3] = vpx_tbl2_s16(s0, s1, perm_tbl.val[3]);
+  perm_samples[4] = vpx_tbl_s16(s1, perm_tbl.val[0]);
+  perm_samples[5] = vpx_tbl_s16(s1, perm_tbl.val[1]);
+  perm_samples[6] = vpx_tbl2_s16(s1, s2, perm_tbl.val[2]);
+  perm_samples[7] = vpx_tbl2_s16(s1, s2, perm_tbl.val[3]);
+
+  int64x2_t sum01 =
+      vpx_dotq_lane_s16(vdupq_n_s64(0), perm_samples[0], filter_0_7, 0);
+  sum01 = vpx_dotq_lane_s16(sum01, perm_samples[2], filter_0_7, 1);
+  sum01 = vpx_dotq_lane_s16(sum01, perm_samples[4], filter_4_11, 1);
+
+  int64x2_t sum23 =
+      vpx_dotq_lane_s16(vdupq_n_s64(0), perm_samples[1], filter_0_7, 0);
+  sum23 = vpx_dotq_lane_s16(sum23, perm_samples[3], filter_0_7, 1);
+  sum23 = vpx_dotq_lane_s16(sum23, perm_samples[5], filter_4_11, 1);
+
+  int64x2_t sum45 =
+      vpx_dotq_lane_s16(vdupq_n_s64(0), perm_samples[2], filter_0_7, 0);
+  sum45 = vpx_dotq_lane_s16(sum45, perm_samples[4], filter_0_7, 1);
+  sum45 = vpx_dotq_lane_s16(sum45, perm_samples[6], filter_4_11, 1);
+
+  int64x2_t sum67 =
+      vpx_dotq_lane_s16(vdupq_n_s64(0), perm_samples[3], filter_0_7, 0);
+  sum67 = vpx_dotq_lane_s16(sum67, perm_samples[5], filter_0_7, 1);
+  sum67 = vpx_dotq_lane_s16(sum67, perm_samples[7], filter_4_11, 1);
+
+  int32x4_t sum0123 = vcombine_s32(vmovn_s64(sum01), vmovn_s64(sum23));
+  int32x4_t sum4567 = vcombine_s32(vmovn_s64(sum45), vmovn_s64(sum67));
+
+  uint16x8_t res = vcombine_u16(vqrshrun_n_s32(sum0123, FILTER_BITS),
+                                vqrshrun_n_s32(sum4567, FILTER_BITS));
+  return vminq_u16(res, max);
+}
+
+void vpx_highbd_convolve12_horiz_sve2(const uint16_t *src, ptrdiff_t src_stride,
+                                      uint16_t *dst, ptrdiff_t dst_stride,
+                                      const InterpKernel12 *filter, int x0_q4,
+                                      int x_step_q4, int y0_q4, int y_step_q4,
+                                      int w, int h, int bd) {
+  // Scaling not supported by SVE2 implementation.
+  if (x_step_q4 != 16) {
+    vpx_highbd_convolve12_horiz_c(src, src_stride, dst, dst_stride, filter,
+                                  x0_q4, x_step_q4, y0_q4, y_step_q4, w, h, bd);
+    return;
+  }
+
+  assert(w == 32 || w == 16 || w == 8);
+  assert(h % 4 == 0);
+
+  const int16x8_t filter_0_7 = vld1q_s16(filter[x0_q4]);
+  const int16x8_t filter_4_11 = vld1q_s16(filter[x0_q4] + 4);
+  const uint16x8_t max = vdupq_n_u16((1 << bd) - 1);
+  uint16x8x4_t permute_tbl = vld1q_u16_x4(kDotProdPermuteTbl);
+
+  // Scale indices by size of the true vector length to avoid reading from an
+  // 'undefined' portion of a vector on a system with SVE vectors > 128-bit.
+  permute_tbl.val[2] = vsetq_lane_u16(svcnth(), permute_tbl.val[2], 7);
+  permute_tbl.val[3] = vsetq_lane_u16(svcnth(), permute_tbl.val[3], 5);
+  uint16x8_t permute_tbl_3_offsets =
+      vreinterpretq_u16_u64(vdupq_n_u64(svcnth() * 0x0001000100000000ULL));
+  permute_tbl.val[3] =
+      vaddq_u16(permute_tbl.val[3], permute_tbl_3_offsets);  // 2, 3, 6, 7
+
+  src -= MAX_FILTER_TAP / 2 - 1;
+
+  do {
+    const int16_t *s = (const int16_t *)src;
+    uint16_t *d = dst;
+    int width = w;
+
+    do {
+      int16x8_t s0[3], s1[3];
+
+      load_s16_8x3(s + 0 * src_stride, 8, &s0[0], &s0[1], &s0[2]);
+      load_s16_8x3(s + 1 * src_stride, 8, &s1[0], &s1[1], &s1[2]);
+
+      uint16x8_t d0 = highbd_convolve12_8_h(s0[0], s0[1], s0[2], filter_0_7,
+                                            filter_4_11, permute_tbl, max);
+      uint16x8_t d1 = highbd_convolve12_8_h(s1[0], s1[1], s1[2], filter_0_7,
+                                            filter_4_11, permute_tbl, max);
+
+      vst1q_u16(d + 0 * dst_stride, d0);
+      vst1q_u16(d + 1 * dst_stride, d1);
+
+      s += 8;
+      d += 8;
+      width -= 8;
+    } while (width != 0);
+    src += 2 * src_stride;
+    dst += 2 * dst_stride;
+    h -= 2;
+  } while (h != 0);
+}
+
+static INLINE uint16x4_t highbd_convolve12_4_v(const int16x8_t s0[2],
+                                               const int16x8_t s1[2],
+                                               const int16x8_t s2[2],
+                                               const int16x8_t filter_0_7,
+                                               const int16x8_t filter_4_11,
+                                               const uint16x4_t max) {
+  int64x2_t sum01 = vpx_dotq_lane_s16(vdupq_n_s64(0), s0[0], filter_0_7, 0);
+  sum01 = vpx_dotq_lane_s16(sum01, s1[0], filter_0_7, 1);
+  sum01 = vpx_dotq_lane_s16(sum01, s2[0], filter_4_11, 1);
+
+  int64x2_t sum23 = vpx_dotq_lane_s16(vdupq_n_s64(0), s0[1], filter_0_7, 0);
+  sum23 = vpx_dotq_lane_s16(sum23, s1[1], filter_0_7, 1);
+  sum23 = vpx_dotq_lane_s16(sum23, s2[1], filter_4_11, 1);
+
+  int32x4_t sum0123 = vcombine_s32(vmovn_s64(sum01), vmovn_s64(sum23));
+
+  uint16x4_t res = vqrshrun_n_s32(sum0123, FILTER_BITS);
+
+  return vmin_u16(res, max);
+}
+
+void vpx_highbd_convolve12_vert_sve2(const uint16_t *src, ptrdiff_t src_stride,
+                                     uint16_t *dst, ptrdiff_t dst_stride,
+                                     const InterpKernel12 *filter, int x0_q4,
+                                     int x_step_q4, int y0_q4, int y_step_q4,
+                                     int w, int h, int bd) {
+  // Scaling not supported by SVE2 implementation.
+  if (y_step_q4 != 16) {
+    vpx_highbd_convolve12_vert_c(src, src_stride, dst, dst_stride, filter,
+                                 x0_q4, x_step_q4, y0_q4, y_step_q4, w, h, bd);
+    return;
+  }
+  assert(w == 32 || w == 16 || w == 8);
+  assert(h % 4 == 0);
+
+  const int16x8_t filter_0_7 = vld1q_s16(filter[y0_q4]);
+  const int16x8_t filter_4_11 = vld1q_s16(filter[y0_q4] + 4);
+
+  const uint16x4_t max = vdup_n_u16((1 << bd) - 1);
+
+  src -= src_stride * (MAX_FILTER_TAP / 2 - 1);
+
+  do {
+    const int16_t *s = (const int16_t *)src;
+    uint16_t *d = dst;
+    int height = h;
+
+    int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA;
+    load_s16_4x11(s, src_stride, &s0, &s1, &s2, &s3, &s4, &s5, &s6, &s7, &s8,
+                  &s9, &sA);
+    s += 11 * src_stride;
+
+    int16x8_t s0123[2], s1234[2], s2345[2], s3456[2], s4567[2], s5678[2],
+        s6789[2], s789A[2];
+    transpose_concat_s16_4x4(s0, s1, s2, s3, &s0123[0], &s0123[1]);
+    transpose_concat_s16_4x4(s1, s2, s3, s4, &s1234[0], &s1234[1]);
+    transpose_concat_s16_4x4(s2, s3, s4, s5, &s2345[0], &s2345[1]);
+    transpose_concat_s16_4x4(s3, s4, s5, s6, &s3456[0], &s3456[1]);
+    transpose_concat_s16_4x4(s4, s5, s6, s7, &s4567[0], &s4567[1]);
+    transpose_concat_s16_4x4(s5, s6, s7, s8, &s5678[0], &s5678[1]);
+    transpose_concat_s16_4x4(s6, s7, s8, s9, &s6789[0], &s6789[1]);
+    transpose_concat_s16_4x4(s7, s8, s9, sA, &s789A[0], &s789A[1]);
+
+    do {
+      int16x4_t sB, sC, sD, sE;
+      load_s16_4x4(s, src_stride, &sB, &sC, &sD, &sE);
+
+      int16x8_t s89AB[2], s9ABC[2], sABCD[2], sBCDE[2];
+      transpose_concat_s16_4x4(s8, s9, sA, sB, &s89AB[0], &s89AB[1]);
+      transpose_concat_s16_4x4(s9, sA, sB, sC, &s9ABC[0], &s9ABC[1]);
+      transpose_concat_s16_4x4(sA, sB, sC, sD, &sABCD[0], &sABCD[1]);
+      transpose_concat_s16_4x4(sB, sC, sD, sE, &sBCDE[0], &sBCDE[1]);
+
+      uint16x4_t d0 = highbd_convolve12_4_v(s0123, s4567, s89AB, filter_0_7,
+                                            filter_4_11, max);
+      uint16x4_t d1 = highbd_convolve12_4_v(s1234, s5678, s9ABC, filter_0_7,
+                                            filter_4_11, max);
+      uint16x4_t d2 = highbd_convolve12_4_v(s2345, s6789, sABCD, filter_0_7,
+                                            filter_4_11, max);
+      uint16x4_t d3 = highbd_convolve12_4_v(s3456, s789A, sBCDE, filter_0_7,
+                                            filter_4_11, max);
+
+      store_u16_4x4(d, dst_stride, d0, d1, d2, d3);
+
+      // Prepare block for next iteration - reusing as much as possible.
+      // Shuffle everything up four rows.
+      s0123[0] = s4567[0];
+      s0123[1] = s4567[1];
+      s1234[0] = s5678[0];
+      s1234[1] = s5678[1];
+      s2345[0] = s6789[0];
+      s2345[1] = s6789[1];
+      s3456[0] = s789A[0];
+      s3456[1] = s789A[1];
+      s4567[0] = s89AB[0];
+      s4567[1] = s89AB[1];
+      s5678[0] = s9ABC[0];
+      s5678[1] = s9ABC[1];
+      s6789[0] = sABCD[0];
+      s6789[1] = sABCD[1];
+      s789A[0] = sBCDE[0];
+      s789A[1] = sBCDE[1];
+
+      s8 = sC;
+      s9 = sD;
+      sA = sE;
+
+      s += 4 * src_stride;
+      d += 4 * dst_stride;
+      height -= 4;
+    } while (height != 0);
+    src += 4;
+    dst += 4;
+    w -= 4;
+  } while (w != 0);
+}
+
+void vpx_highbd_convolve12_sve2(const uint16_t *src, ptrdiff_t src_stride,
+                                uint16_t *dst, ptrdiff_t dst_stride,
+                                const InterpKernel12 *filter, int x0_q4,
+                                int x_step_q4, int y0_q4, int y_step_q4, int w,
+                                int h, int bd) {
+  // Scaling not supported by SVE2 implementation.
+  if (x_step_q4 != 16 || y_step_q4 != 16) {
+    vpx_highbd_convolve12_c(src, src_stride, dst, dst_stride, filter, x0_q4,
+                            x_step_q4, y0_q4, y_step_q4, w, h, bd);
+    return;
+  }
+
+  assert(w == 32 || w == 16 || w == 8);
+  assert(h == 32 || h == 16 || h == 8);
+
+  DECLARE_ALIGNED(32, uint16_t, im_block[BW * (BH + MAX_FILTER_TAP)]);
+
+  const int im_stride = BW;
+  // Account for the vertical pass needing MAX_FILTER_TAP / 2 - 1 lines prior
+  // and MAX_FILTER_TAP / 2 lines post. (+1 to make total divisible by 4.)
+  const int im_height = h + MAX_FILTER_TAP;
+  const ptrdiff_t border_offset = MAX_FILTER_TAP / 2 - 1;
+
+  // Filter starting border_offset rows up.
+  vpx_highbd_convolve12_horiz_sve2(
+      src - src_stride * border_offset, src_stride, im_block, im_stride, filter,
+      x0_q4, x_step_q4, y0_q4, y_step_q4, w, im_height, bd);
+
+  vpx_highbd_convolve12_vert_sve2(im_block + im_stride * border_offset,
+                                  im_stride, dst, dst_stride, filter, x0_q4,
+                                  x_step_q4, y0_q4, y_step_q4, w, h, bd);
+}
diff --git a/media/libvpx/libvpx/vp9/encoder/arm/neon/vp9_temporal_filter_neon_dotprod.c b/media/libvpx/libvpx/vp9/encoder/arm/neon/vp9_temporal_filter_neon_dotprod.c
@@ -15,6 +15,7 @@
 #include "./vpx_config.h"
 #include "vpx/vpx_integer.h"
 #include "vpx_dsp/arm/mem_neon.h"
+#include "vpx_dsp/arm/transpose_neon.h"
 #include "vp9/encoder/vp9_temporal_filter.h"
 
 DECLARE_ALIGNED(16, static const uint8_t, kDotProdPermuteTbl[48]) = {
@@ -145,32 +146,6 @@ static INLINE uint8x8_t convolve12_8_v(
   return vqrshrun_n_s16(sum, FILTER_BITS);
 }
 
-static INLINE void transpose_concat_8x4(int8x8_t a0, int8x8_t a1, int8x8_t a2,
-                                        int8x8_t a3, int8x16_t *b0,
-                                        int8x16_t *b1) {
-  // Transpose 8-bit elements and concatenate result rows as follows:
-  // a0: 00, 01, 02, 03, 04, 05, 06, 07
-  // a1: 10, 11, 12, 13, 14, 15, 16, 17
-  // a2: 20, 21, 22, 23, 24, 25, 26, 27
-  // a3: 30, 31, 32, 33, 34, 35, 36, 37
-  //
-  // b0: 00, 10, 20, 30, 01, 11, 21, 31, 02, 12, 22, 32, 03, 13, 23, 33
-  // b1: 04, 14, 24, 34, 05, 15, 25, 35, 06, 16, 26, 36, 07, 17, 27, 37
-
-  int8x16_t a0q = vcombine_s8(a0, vdup_n_s8(0));
-  int8x16_t a1q = vcombine_s8(a1, vdup_n_s8(0));
-  int8x16_t a2q = vcombine_s8(a2, vdup_n_s8(0));
-  int8x16_t a3q = vcombine_s8(a3, vdup_n_s8(0));
-
-  int8x16_t a02 = vzipq_s8(a0q, a2q).val[0];
-  int8x16_t a13 = vzipq_s8(a1q, a3q).val[0];
-
-  int8x16x2_t a0123 = vzipq_s8(a02, a13);
-
-  *b0 = a0123.val[0];
-  *b1 = a0123.val[1];
-}
-
 void vpx_convolve12_vert_neon_dotprod(const uint8_t *src, ptrdiff_t src_stride,
                                       uint8_t *dst, ptrdiff_t dst_stride,
                                       const InterpKernel12 *filter, int x0_q4,
@@ -221,14 +196,14 @@ void vpx_convolve12_vert_neon_dotprod(const uint8_t *src, ptrdiff_t src_stride,
     int8x16_t s0123_lo, s0123_hi, s1234_lo, s1234_hi, s2345_lo, s2345_hi,
         s3456_lo, s3456_hi, s4567_lo, s4567_hi, s5678_lo, s5678_hi, s6789_lo,
         s6789_hi, s789A_lo, s789A_hi;
-    transpose_concat_8x4(s0, s1, s2, s3, &s0123_lo, &s0123_hi);
-    transpose_concat_8x4(s1, s2, s3, s4, &s1234_lo, &s1234_hi);
-    transpose_concat_8x4(s2, s3, s4, s5, &s2345_lo, &s2345_hi);
-    transpose_concat_8x4(s3, s4, s5, s6, &s3456_lo, &s3456_hi);
-    transpose_concat_8x4(s4, s5, s6, s7, &s4567_lo, &s4567_hi);
-    transpose_concat_8x4(s5, s6, s7, s8, &s5678_lo, &s5678_hi);
-    transpose_concat_8x4(s6, s7, s8, s9, &s6789_lo, &s6789_hi);
-    transpose_concat_8x4(s7, s8, s9, sA, &s789A_lo, &s789A_hi);
+    transpose_concat_s8_8x4(s0, s1, s2, s3, &s0123_lo, &s0123_hi);
+    transpose_concat_s8_8x4(s1, s2, s3, s4, &s1234_lo, &s1234_hi);
+    transpose_concat_s8_8x4(s2, s3, s4, s5, &s2345_lo, &s2345_hi);
+    transpose_concat_s8_8x4(s3, s4, s5, s6, &s3456_lo, &s3456_hi);
+    transpose_concat_s8_8x4(s4, s5, s6, s7, &s4567_lo, &s4567_hi);
+    transpose_concat_s8_8x4(s5, s6, s7, s8, &s5678_lo, &s5678_hi);
+    transpose_concat_s8_8x4(s6, s7, s8, s9, &s6789_lo, &s6789_hi);
+    transpose_concat_s8_8x4(s7, s8, s9, sA, &s789A_lo, &s789A_hi);
 
     do {
       uint8x8_t tB, tC, tD, tE;
@@ -241,7 +216,7 @@ void vpx_convolve12_vert_neon_dotprod(const uint8_t *src, ptrdiff_t src_stride,
 
       int8x16_t s89AB_lo, s89AB_hi, s9ABC_lo, s9ABC_hi, sABCD_lo, sABCD_hi,
           sBCDE_lo, sBCDE_hi;
-      transpose_concat_8x4(sB, sC, sD, sE, &sBCDE_lo, &sBCDE_hi);
+      transpose_concat_s8_8x4(sB, sC, sD, sE, &sBCDE_lo, &sBCDE_hi);
 
       // Merge new data into block from previous iteration.
       int8x16x2_t samples_LUT_lo = { { s789A_lo, sBCDE_lo } };
diff --git a/media/libvpx/libvpx/vp9/encoder/arm/neon/vp9_temporal_filter_neon_i8mm.c b/media/libvpx/libvpx/vp9/encoder/arm/neon/vp9_temporal_filter_neon_i8mm.c
@@ -15,6 +15,7 @@
 #include "./vpx_config.h"
 #include "vpx/vpx_integer.h"
 #include "vpx_dsp/arm/mem_neon.h"
+#include "vpx_dsp/arm/transpose_neon.h"
 #include "vp9/encoder/vp9_temporal_filter.h"
 
 DECLARE_ALIGNED(16, static const uint8_t, kMatMulPermuteTbl[32]) = {
@@ -139,32 +140,6 @@ static INLINE uint8x8_t convolve12_8_v(
   return vqrshrun_n_s16(sum, FILTER_BITS);
 }
 
-static INLINE void transpose_concat_8x4(uint8x8_t a0, uint8x8_t a1,
-                                        uint8x8_t a2, uint8x8_t a3,
-                                        uint8x16_t *b0, uint8x16_t *b1) {
-  // Transpose 8-bit elements and concatenate result rows as follows:
-  // a0: 00, 01, 02, 03, 04, 05, 06, 07
-  // a1: 10, 11, 12, 13, 14, 15, 16, 17
-  // a2: 20, 21, 22, 23, 24, 25, 26, 27
-  // a3: 30, 31, 32, 33, 34, 35, 36, 37
-  //
-  // b0: 00, 10, 20, 30, 01, 11, 21, 31, 02, 12, 22, 32, 03, 13, 23, 33
-  // b1: 04, 14, 24, 34, 05, 15, 25, 35, 06, 16, 26, 36, 07, 17, 27, 37
-
-  uint8x16_t a0q = vcombine_u8(a0, vdup_n_u8(0));
-  uint8x16_t a1q = vcombine_u8(a1, vdup_n_u8(0));
-  uint8x16_t a2q = vcombine_u8(a2, vdup_n_u8(0));
-  uint8x16_t a3q = vcombine_u8(a3, vdup_n_u8(0));
-
-  uint8x16_t a02 = vzipq_u8(a0q, a2q).val[0];
-  uint8x16_t a13 = vzipq_u8(a1q, a3q).val[0];
-
-  uint8x16x2_t a0123 = vzipq_u8(a02, a13);
-
-  *b0 = a0123.val[0];
-  *b1 = a0123.val[1];
-}
-
 void vpx_convolve12_vert_neon_i8mm(const uint8_t *src, ptrdiff_t src_stride,
                                    uint8_t *dst, ptrdiff_t dst_stride,
                                    const InterpKernel12 *filter, int x0_q4,
@@ -202,14 +177,14 @@ void vpx_convolve12_vert_neon_i8mm(const uint8_t *src, ptrdiff_t src_stride,
     uint8x16_t s0123_lo, s0123_hi, s1234_lo, s1234_hi, s2345_lo, s2345_hi,
         s3456_lo, s3456_hi, s4567_lo, s4567_hi, s5678_lo, s5678_hi, s6789_lo,
         s6789_hi, s789A_lo, s789A_hi;
-    transpose_concat_8x4(s0, s1, s2, s3, &s0123_lo, &s0123_hi);
-    transpose_concat_8x4(s1, s2, s3, s4, &s1234_lo, &s1234_hi);
-    transpose_concat_8x4(s2, s3, s4, s5, &s2345_lo, &s2345_hi);
-    transpose_concat_8x4(s3, s4, s5, s6, &s3456_lo, &s3456_hi);
-    transpose_concat_8x4(s4, s5, s6, s7, &s4567_lo, &s4567_hi);
-    transpose_concat_8x4(s5, s6, s7, s8, &s5678_lo, &s5678_hi);
-    transpose_concat_8x4(s6, s7, s8, s9, &s6789_lo, &s6789_hi);
-    transpose_concat_8x4(s7, s8, s9, sA, &s789A_lo, &s789A_hi);
+    transpose_concat_u8_8x4(s0, s1, s2, s3, &s0123_lo, &s0123_hi);
+    transpose_concat_u8_8x4(s1, s2, s3, s4, &s1234_lo, &s1234_hi);
+    transpose_concat_u8_8x4(s2, s3, s4, s5, &s2345_lo, &s2345_hi);
+    transpose_concat_u8_8x4(s3, s4, s5, s6, &s3456_lo, &s3456_hi);
+    transpose_concat_u8_8x4(s4, s5, s6, s7, &s4567_lo, &s4567_hi);
+    transpose_concat_u8_8x4(s5, s6, s7, s8, &s5678_lo, &s5678_hi);
+    transpose_concat_u8_8x4(s6, s7, s8, s9, &s6789_lo, &s6789_hi);
+    transpose_concat_u8_8x4(s7, s8, s9, sA, &s789A_lo, &s789A_hi);
 
     do {
       uint8x8_t sB, sC, sD, sE;
@@ -217,7 +192,7 @@ void vpx_convolve12_vert_neon_i8mm(const uint8_t *src, ptrdiff_t src_stride,
 
       uint8x16_t s89AB_lo, s89AB_hi, s9ABC_lo, s9ABC_hi, sABCD_lo, sABCD_hi,
           sBCDE_lo, sBCDE_hi;
-      transpose_concat_8x4(sB, sC, sD, sE, &sBCDE_lo, &sBCDE_hi);
+      transpose_concat_u8_8x4(sB, sC, sD, sE, &sBCDE_lo, &sBCDE_hi);
 
       // Merge new data into block from previous iteration.
       uint8x16x2_t samples_LUT_lo = { { s789A_lo, sBCDE_lo } };
diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_encoder.c b/media/libvpx/libvpx/vp9/encoder/vp9_encoder.c
@@ -1420,7 +1420,14 @@ static void set_tile_limits(VP9_COMP *cpi) {
 
   cm->log2_tile_cols =
       clamp(cpi->oxcf.tile_columns, min_log2_tile_cols, max_log2_tile_cols);
-  cm->log2_tile_rows = cpi->oxcf.tile_rows;
+
+  // Max allowed number of tile_rows is 4 (so log2_tile_rows = 2), and each
+  // tile_row contains a multiple of superblocks.
+  const int sb64_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> 3;
+  const int max_log2_tile_rows = (sb64_rows >= 4)   ? 2
+                                 : (sb64_rows >= 2) ? 1
+                                                    : 0;
+  cm->log2_tile_rows = VPXMIN(cpi->oxcf.tile_rows, max_log2_tile_rows);
 
   if (cpi->oxcf.target_level == LEVEL_AUTO) {
     const int level_tile_cols =
diff --git a/media/libvpx/libvpx/vp9/vp9_cx_iface.c b/media/libvpx/libvpx/vp9/vp9_cx_iface.c
@@ -1478,22 +1478,13 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
           timebase_units_to_ticks(timebase_in_ts, pts_end);
       res = image2yuvconfig(img, &sd);
 
-      if (sd.y_width != ctx->cfg.g_w || sd.y_height != ctx->cfg.g_h) {
-        /* from vpx_encoder.h for g_w/g_h:
-           "Note that the frames passed as input to the encoder must have this
-           resolution"
-        */
-        ctx->base.err_detail = "Invalid input frame resolution";
-        res = VPX_CODEC_INVALID_PARAM;
-      } else {
-        // Store the original flags in to the frame buffer. Will extract the
-        // key frame flag when we actually encode this frame.
-        if (vp9_receive_raw_frame(cpi, flags | ctx->next_frame_flags, &sd,
+      // Store the original flags in to the frame buffer. Will extract the
+      // key frame flag when we actually encode this frame.
+      if (vp9_receive_raw_frame(cpi, flags | ctx->next_frame_flags, &sd,
                                 dst_time_stamp, dst_end_time_stamp)) {
-          res = update_error_state(ctx, &cpi->common.error);
-        }
-        ctx->next_frame_flags = 0;
+        res = update_error_state(ctx, &cpi->common.error);
       }
+      ctx->next_frame_flags = 0;
     }
 
     cx_data = ctx->cx_data;
diff --git a/media/libvpx/libvpx/vp9/vp9cx.mk b/media/libvpx/libvpx/vp9/vp9cx.mk
@@ -129,6 +129,7 @@ VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/highbd_temporal_filter_ssse3.c
 VP9_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/highbd_temporal_filter_sse4.c
 VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/highbd_temporal_filter_avx2.c
 VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_highbd_temporal_filter_neon.c
+VP9_CX_SRCS-$(HAVE_SVE2) += encoder/arm/neon/vp9_highbd_temporal_filter_sve2.c
 endif
 
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2.asm
@@ -179,6 +180,7 @@ VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/arm/neon/vp9_temporal_filt
 VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/arm/neon/vp9_temporal_filter_neon_dotprod.c
 VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/arm/neon/vp9_temporal_filter_neon_i8mm.c
 VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/arm/neon/vp9_highbd_temporal_filter_neon.c
+VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/arm/neon/vp9_highbd_temporal_filter_sve2.c
 VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/vp9_alt_ref_aq.h
 VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/vp9_alt_ref_aq.c
 VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/vp9_aq_variance.c
diff --git a/media/libvpx/libvpx/vpx_dsp/arm/highbd_vpx_convolve8_sve2.c b/media/libvpx/libvpx/vpx_dsp/arm/highbd_vpx_convolve8_sve2.c
@@ -36,72 +36,6 @@ DECLARE_ALIGNED(16, static const uint16_t, kDotProdMergeBlockTbl[24]) = {
 DECLARE_ALIGNED(16, static const uint16_t, kTblConv4_8[8]) = { 0, 2, 4, 6,
                                                                1, 3, 5, 7 };
 
-static INLINE void transpose_concat_4x4(const int16x4_t s0, const int16x4_t s1,
-                                        const int16x4_t s2, const int16x4_t s3,
-                                        int16x8_t res[2]) {
-  // Transpose 16-bit elements:
-  // s0: 00, 01, 02, 03
-  // s1: 10, 11, 12, 13
-  // s2: 20, 21, 22, 23
-  // s3: 30, 31, 32, 33
-  //
-  // res[0]: 00 10 20 30 01 11 21 31
-  // res[1]: 02 12 22 32 03 13 23 33
-
-  int16x8_t s0q = vcombine_s16(s0, vdup_n_s16(0));
-  int16x8_t s1q = vcombine_s16(s1, vdup_n_s16(0));
-  int16x8_t s2q = vcombine_s16(s2, vdup_n_s16(0));
-  int16x8_t s3q = vcombine_s16(s3, vdup_n_s16(0));
-
-  int16x8_t s02 = vzip1q_s16(s0q, s2q);
-  int16x8_t s13 = vzip1q_s16(s1q, s3q);
-
-  int16x8x2_t s0123 = vzipq_s16(s02, s13);
-
-  res[0] = s0123.val[0];
-  res[1] = s0123.val[1];
-}
-
-static INLINE void transpose_concat_8x4(const int16x8_t s0, const int16x8_t s1,
-                                        const int16x8_t s2, const int16x8_t s3,
-                                        int16x8_t res[4]) {
-  // Transpose 16-bit elements:
-  // s0: 00, 01, 02, 03, 04, 05, 06, 07
-  // s1: 10, 11, 12, 13, 14, 15, 16, 17
-  // s2: 20, 21, 22, 23, 24, 25, 26, 27
-  // s3: 30, 31, 32, 33, 34, 35, 36, 37
-  //
-  // res[0]: 00 10 20 30 01 11 21 31
-  // res[1]: 02 12 22 32 03 13 23 33
-  // res[2]: 04 14 24 34 05 15 25 35
-  // res[3]: 06 16 26 36 07 17 27 37
-
-  int16x8x2_t s02 = vzipq_s16(s0, s2);
-  int16x8x2_t s13 = vzipq_s16(s1, s3);
-
-  int16x8x2_t s0123_lo = vzipq_s16(s02.val[0], s13.val[0]);
-  int16x8x2_t s0123_hi = vzipq_s16(s02.val[1], s13.val[1]);
-
-  res[0] = s0123_lo.val[0];
-  res[1] = s0123_lo.val[1];
-  res[2] = s0123_hi.val[0];
-  res[3] = s0123_hi.val[1];
-}
-
-static INLINE void vpx_tbl2x4_s16(int16x8_t s0[4], int16x8_t s1[4],
-                                  int16x8_t res[4], uint16x8_t idx) {
-  res[0] = vpx_tbl2_s16(s0[0], s1[0], idx);
-  res[1] = vpx_tbl2_s16(s0[1], s1[1], idx);
-  res[2] = vpx_tbl2_s16(s0[2], s1[2], idx);
-  res[3] = vpx_tbl2_s16(s0[3], s1[3], idx);
-}
-
-static INLINE void vpx_tbl2x2_s16(int16x8_t s0[2], int16x8_t s1[2],
-                                  int16x8_t res[2], uint16x8_t idx) {
-  res[0] = vpx_tbl2_s16(s0[0], s1[0], idx);
-  res[1] = vpx_tbl2_s16(s0[1], s1[1], idx);
-}
-
 static INLINE uint16x4_t highbd_convolve8_4_v(int16x8_t s_lo[2],
                                               int16x8_t s_hi[2],
                                               int16x8_t filter,
@@ -169,10 +103,10 @@ static INLINE void highbd_convolve8_8tap_vert_sve2(
     s += 7 * src_stride;
 
     int16x8_t s0123[2], s1234[2], s2345[2], s3456[2];
-    transpose_concat_4x4(s0, s1, s2, s3, s0123);
-    transpose_concat_4x4(s1, s2, s3, s4, s1234);
-    transpose_concat_4x4(s2, s3, s4, s5, s2345);
-    transpose_concat_4x4(s3, s4, s5, s6, s3456);
+    transpose_concat_s16_4x4(s0, s1, s2, s3, &s0123[0], &s0123[1]);
+    transpose_concat_s16_4x4(s1, s2, s3, s4, &s1234[0], &s1234[1]);
+    transpose_concat_s16_4x4(s2, s3, s4, s5, &s2345[0], &s2345[1]);
+    transpose_concat_s16_4x4(s3, s4, s5, s6, &s3456[0], &s3456[1]);
 
     do {
       int16x4_t s7, s8, s9, sA;
@@ -180,7 +114,7 @@ static INLINE void highbd_convolve8_8tap_vert_sve2(
       load_s16_4x4(s, src_stride, &s7, &s8, &s9, &sA);
 
       int16x8_t s4567[2], s5678[2], s6789[2], s789A[2];
-      transpose_concat_4x4(s7, s8, s9, sA, s789A);
+      transpose_concat_s16_4x4(s7, s8, s9, sA, &s789A[0], &s789A[1]);
 
       vpx_tbl2x2_s16(s3456, s789A, s4567, merge_tbl_idx.val[0]);
       vpx_tbl2x2_s16(s3456, s789A, s5678, merge_tbl_idx.val[1]);
@@ -219,17 +153,22 @@ static INLINE void highbd_convolve8_8tap_vert_sve2(
       s += 7 * src_stride;
 
       int16x8_t s0123[4], s1234[4], s2345[4], s3456[4];
-      transpose_concat_8x4(s0, s1, s2, s3, s0123);
-      transpose_concat_8x4(s1, s2, s3, s4, s1234);
-      transpose_concat_8x4(s2, s3, s4, s5, s2345);
-      transpose_concat_8x4(s3, s4, s5, s6, s3456);
+      transpose_concat_s16_8x4(s0, s1, s2, s3, &s0123[0], &s0123[1], &s0123[2],
+                               &s0123[3]);
+      transpose_concat_s16_8x4(s1, s2, s3, s4, &s1234[0], &s1234[1], &s1234[2],
+                               &s1234[3]);
+      transpose_concat_s16_8x4(s2, s3, s4, s5, &s2345[0], &s2345[1], &s2345[2],
+                               &s2345[3]);
+      transpose_concat_s16_8x4(s3, s4, s5, s6, &s3456[0], &s3456[1], &s3456[2],
+                               &s3456[3]);
 
       do {
         int16x8_t s7, s8, s9, sA;
         load_s16_8x4(s, src_stride, &s7, &s8, &s9, &sA);
 
         int16x8_t s4567[4], s5678[5], s6789[4], s789A[4];
-        transpose_concat_8x4(s7, s8, s9, sA, s789A);
+        transpose_concat_s16_8x4(s7, s8, s9, sA, &s789A[0], &s789A[1],
+                                 &s789A[2], &s789A[3]);
 
         vpx_tbl2x4_s16(s3456, s789A, s4567, merge_tbl_idx.val[0]);
         vpx_tbl2x4_s16(s3456, s789A, s5678, merge_tbl_idx.val[1]);
@@ -343,10 +282,10 @@ void vpx_highbd_convolve8_avg_vert_sve2(const uint16_t *src,
     s += 7 * src_stride;
 
     int16x8_t s0123[2], s1234[2], s2345[2], s3456[2];
-    transpose_concat_4x4(s0, s1, s2, s3, s0123);
-    transpose_concat_4x4(s1, s2, s3, s4, s1234);
-    transpose_concat_4x4(s2, s3, s4, s5, s2345);
-    transpose_concat_4x4(s3, s4, s5, s6, s3456);
+    transpose_concat_s16_4x4(s0, s1, s2, s3, &s0123[0], &s0123[1]);
+    transpose_concat_s16_4x4(s1, s2, s3, s4, &s1234[0], &s1234[1]);
+    transpose_concat_s16_4x4(s2, s3, s4, s5, &s2345[0], &s2345[1]);
+    transpose_concat_s16_4x4(s3, s4, s5, s6, &s3456[0], &s3456[1]);
 
     do {
       int16x4_t s7, s8, s9, sA;
@@ -354,7 +293,7 @@ void vpx_highbd_convolve8_avg_vert_sve2(const uint16_t *src,
       load_s16_4x4(s, src_stride, &s7, &s8, &s9, &sA);
 
       int16x8_t s4567[2], s5678[2], s6789[2], s789A[2];
-      transpose_concat_4x4(s7, s8, s9, sA, s789A);
+      transpose_concat_s16_4x4(s7, s8, s9, sA, &s789A[0], &s789A[1]);
 
       vpx_tbl2x2_s16(s3456, s789A, s4567, merge_tbl_idx.val[0]);
       vpx_tbl2x2_s16(s3456, s789A, s5678, merge_tbl_idx.val[1]);
@@ -398,17 +337,22 @@ void vpx_highbd_convolve8_avg_vert_sve2(const uint16_t *src,
       s += 7 * src_stride;
 
       int16x8_t s0123[4], s1234[4], s2345[4], s3456[4];
-      transpose_concat_8x4(s0, s1, s2, s3, s0123);
-      transpose_concat_8x4(s1, s2, s3, s4, s1234);
-      transpose_concat_8x4(s2, s3, s4, s5, s2345);
-      transpose_concat_8x4(s3, s4, s5, s6, s3456);
+      transpose_concat_s16_8x4(s0, s1, s2, s3, &s0123[0], &s0123[1], &s0123[2],
+                               &s0123[3]);
+      transpose_concat_s16_8x4(s1, s2, s3, s4, &s1234[0], &s1234[1], &s1234[2],
+                               &s1234[3]);
+      transpose_concat_s16_8x4(s2, s3, s4, s5, &s2345[0], &s2345[1], &s2345[2],
+                               &s2345[3]);
+      transpose_concat_s16_8x4(s3, s4, s5, s6, &s3456[0], &s3456[1], &s3456[2],
+                               &s3456[3]);
 
       do {
         int16x8_t s7, s8, s9, sA;
         load_s16_8x4(s, src_stride, &s7, &s8, &s9, &sA);
 
         int16x8_t s4567[4], s5678[5], s6789[4], s789A[4];
-        transpose_concat_8x4(s7, s8, s9, sA, s789A);
+        transpose_concat_s16_8x4(s7, s8, s9, sA, &s789A[0], &s789A[1],
+                                 &s789A[2], &s789A[3]);
 
         vpx_tbl2x4_s16(s3456, s789A, s4567, merge_tbl_idx.val[0]);
         vpx_tbl2x4_s16(s3456, s789A, s5678, merge_tbl_idx.val[1]);
diff --git a/media/libvpx/libvpx/vpx_dsp/arm/mem_neon.h b/media/libvpx/libvpx/vpx_dsp/arm/mem_neon.h
@@ -40,11 +40,11 @@ static INLINE uint8x16x3_t vld1q_u8_x3(uint8_t const *ptr) {
 static INLINE int16x4_t create_s16x4_neon(const int16_t c0, const int16_t c1,
                                           const int16_t c2, const int16_t c3) {
   return vcreate_s16((uint16_t)c0 | ((uint32_t)c1 << 16) |
-                     ((int64_t)(uint16_t)c2 << 32) | ((int64_t)c3 << 48));
+                     ((uint64_t)(uint16_t)c2 << 32) | ((uint64_t)c3 << 48));
 }
 
 static INLINE int32x2_t create_s32x2_neon(const int32_t c0, const int32_t c1) {
-  return vcreate_s32((uint32_t)c0 | ((int64_t)(uint32_t)c1 << 32));
+  return vcreate_s32((uint32_t)c0 | ((uint64_t)(uint32_t)c1 << 32));
 }
 
 static INLINE int32x4_t create_s32x4_neon(const int32_t c0, const int32_t c1,
@@ -499,6 +499,34 @@ static INLINE void load_s16_4x4(const int16_t *s, const ptrdiff_t p,
   *s3 = vld1_s16(s);
 }
 
+static INLINE void load_s16_4x11(const int16_t *s, const ptrdiff_t p,
+                                 int16x4_t *s0, int16x4_t *s1, int16x4_t *s2,
+                                 int16x4_t *s3, int16x4_t *s4, int16x4_t *s5,
+                                 int16x4_t *s6, int16x4_t *s7, int16x4_t *s8,
+                                 int16x4_t *s9, int16x4_t *s10) {
+  *s0 = vld1_s16(s);
+  s += p;
+  *s1 = vld1_s16(s);
+  s += p;
+  *s2 = vld1_s16(s);
+  s += p;
+  *s3 = vld1_s16(s);
+  s += p;
+  *s4 = vld1_s16(s);
+  s += p;
+  *s5 = vld1_s16(s);
+  s += p;
+  *s6 = vld1_s16(s);
+  s += p;
+  *s7 = vld1_s16(s);
+  s += p;
+  *s8 = vld1_s16(s);
+  s += p;
+  *s9 = vld1_s16(s);
+  s += p;
+  *s10 = vld1_s16(s);
+}
+
 static INLINE void store_u16_4x4(uint16_t *s, const ptrdiff_t p,
                                  const uint16x4_t s0, const uint16x4_t s1,
                                  const uint16x4_t s2, const uint16x4_t s3) {
diff --git a/media/libvpx/libvpx/vpx_dsp/arm/transpose_neon.h b/media/libvpx/libvpx/vpx_dsp/arm/transpose_neon.h
@@ -1549,4 +1549,158 @@ static INLINE void load_and_transpose_s32_8x8(
 
   transpose_s32_8x8(a0, a1, a2, a3, a4, a5, a6, a7);
 }
+
+static INLINE void transpose_concat_s16_4x4(const int16x4_t a0,
+                                            const int16x4_t a1,
+                                            const int16x4_t a2,
+                                            const int16x4_t a3, int16x8_t *b0,
+                                            int16x8_t *b1) {
+  // Transpose 16-bit elements:
+  // a0: 00, 01, 02, 03
+  // a1: 10, 11, 12, 13
+  // a2: 20, 21, 22, 23
+  // a3: 30, 31, 32, 33
+  //
+  // b0: 00 10 20 30 01 11 21 31
+  // b1: 02 12 22 32 03 13 23 33
+
+  int16x8_t a0q = vcombine_s16(a0, vdup_n_s16(0));
+  int16x8_t a1q = vcombine_s16(a1, vdup_n_s16(0));
+  int16x8_t a2q = vcombine_s16(a2, vdup_n_s16(0));
+  int16x8_t a3q = vcombine_s16(a3, vdup_n_s16(0));
+
+  int16x8_t a02 = vzipq_s16(a0q, a2q).val[0];
+  int16x8_t a13 = vzipq_s16(a1q, a3q).val[0];
+
+  int16x8x2_t a0123 = vzipq_s16(a02, a13);
+
+  *b0 = a0123.val[0];
+  *b1 = a0123.val[1];
+}
+
+static INLINE void transpose_concat_s16_8x4(const int16x8_t a0,
+                                            const int16x8_t a1,
+                                            const int16x8_t a2,
+                                            const int16x8_t a3, int16x8_t *b0,
+                                            int16x8_t *b1, int16x8_t *b2,
+                                            int16x8_t *b3) {
+  // Transpose 16-bit elements:
+  // a0: 00, 01, 02, 03, 04, 05, 06, 07
+  // a1: 10, 11, 12, 13, 14, 15, 16, 17
+  // a2: 20, 21, 22, 23, 24, 25, 26, 27
+  // a3: 30, 31, 32, 33, 34, 35, 36, 37
+  //
+  // b0: 00 10 20 30 01 11 21 31
+  // b1: 02 12 22 32 03 13 23 33
+  // b2: 04 14 24 34 05 15 25 35
+  // b3: 06 16 26 36 07 17 27 37
+
+  int16x8x2_t a02 = vzipq_s16(a0, a2);
+  int16x8x2_t a13 = vzipq_s16(a1, a3);
+
+  int16x8x2_t a0123_lo = vzipq_s16(a02.val[0], a13.val[0]);
+  int16x8x2_t a0123_hi = vzipq_s16(a02.val[1], a13.val[1]);
+
+  *b0 = a0123_lo.val[0];
+  *b1 = a0123_lo.val[1];
+  *b2 = a0123_hi.val[0];
+  *b3 = a0123_hi.val[1];
+}
+
+static INLINE void transpose_concat_s8_8x4(int8x8_t a0, int8x8_t a1,
+                                           int8x8_t a2, int8x8_t a3,
+                                           int8x16_t *b0, int8x16_t *b1) {
+  // Transpose 8-bit elements and concatenate result rows as follows:
+  // a0: 00, 01, 02, 03, 04, 05, 06, 07
+  // a1: 10, 11, 12, 13, 14, 15, 16, 17
+  // a2: 20, 21, 22, 23, 24, 25, 26, 27
+  // a3: 30, 31, 32, 33, 34, 35, 36, 37
+  //
+  // b0: 00, 10, 20, 30, 01, 11, 21, 31, 02, 12, 22, 32, 03, 13, 23, 33
+  // b1: 04, 14, 24, 34, 05, 15, 25, 35, 06, 16, 26, 36, 07, 17, 27, 37
+
+  int8x16_t a0q = vcombine_s8(a0, vdup_n_s8(0));
+  int8x16_t a1q = vcombine_s8(a1, vdup_n_s8(0));
+  int8x16_t a2q = vcombine_s8(a2, vdup_n_s8(0));
+  int8x16_t a3q = vcombine_s8(a3, vdup_n_s8(0));
+
+  int8x16_t a02 = vzipq_s8(a0q, a2q).val[0];
+  int8x16_t a13 = vzipq_s8(a1q, a3q).val[0];
+
+  int8x16x2_t a0123 = vzipq_s8(a02, a13);
+
+  *b0 = a0123.val[0];
+  *b1 = a0123.val[1];
+}
+
+static INLINE void transpose_concat_u8_8x4(uint8x8_t a0, uint8x8_t a1,
+                                           uint8x8_t a2, uint8x8_t a3,
+                                           uint8x16_t *b0, uint8x16_t *b1) {
+  // Transpose 8-bit elements and concatenate result rows as follows:
+  // a0: 00, 01, 02, 03, 04, 05, 06, 07
+  // a1: 10, 11, 12, 13, 14, 15, 16, 17
+  // a2: 20, 21, 22, 23, 24, 25, 26, 27
+  // a3: 30, 31, 32, 33, 34, 35, 36, 37
+  //
+  // b0: 00, 10, 20, 30, 01, 11, 21, 31, 02, 12, 22, 32, 03, 13, 23, 33
+  // b1: 04, 14, 24, 34, 05, 15, 25, 35, 06, 16, 26, 36, 07, 17, 27, 37
+
+  uint8x16_t a0q = vcombine_u8(a0, vdup_n_u8(0));
+  uint8x16_t a1q = vcombine_u8(a1, vdup_n_u8(0));
+  uint8x16_t a2q = vcombine_u8(a2, vdup_n_u8(0));
+  uint8x16_t a3q = vcombine_u8(a3, vdup_n_u8(0));
+
+  uint8x16_t a02 = vzipq_u8(a0q, a2q).val[0];
+  uint8x16_t a13 = vzipq_u8(a1q, a3q).val[0];
+
+  uint8x16x2_t a0123 = vzipq_u8(a02, a13);
+
+  *b0 = a0123.val[0];
+  *b1 = a0123.val[1];
+}
+
+static INLINE void transpose_concat_s8_4x4(int8x8_t a0, int8x8_t a1,
+                                           int8x8_t a2, int8x8_t a3,
+                                           int8x16_t *b) {
+  // Transpose 8-bit elements and concatenate result rows as follows:
+  // a0: 00, 01, 02, 03, XX, XX, XX, XX
+  // a1: 10, 11, 12, 13, XX, XX, XX, XX
+  // a2: 20, 21, 22, 23, XX, XX, XX, XX
+  // a3: 30, 31, 32, 33, XX, XX, XX, XX
+  //
+  // b: 00, 10, 20, 30, 01, 11, 21, 31, 02, 12, 22, 32, 03, 13, 23, 33
+
+  int8x16_t a0q = vcombine_s8(a0, vdup_n_s8(0));
+  int8x16_t a1q = vcombine_s8(a1, vdup_n_s8(0));
+  int8x16_t a2q = vcombine_s8(a2, vdup_n_s8(0));
+  int8x16_t a3q = vcombine_s8(a3, vdup_n_s8(0));
+
+  int8x16_t a02 = vzipq_s8(a0q, a2q).val[0];
+  int8x16_t a13 = vzipq_s8(a1q, a3q).val[0];
+
+  *b = vzipq_s8(a02, a13).val[0];
+}
+
+static INLINE void transpose_concat_u8_4x4(uint8x8_t a0, uint8x8_t a1,
+                                           uint8x8_t a2, uint8x8_t a3,
+                                           uint8x16_t *b) {
+  // Transpose 8-bit elements and concatenate result rows as follows:
+  // a0: 00, 01, 02, 03, XX, XX, XX, XX
+  // a1: 10, 11, 12, 13, XX, XX, XX, XX
+  // a2: 20, 21, 22, 23, XX, XX, XX, XX
+  // a3: 30, 31, 32, 33, XX, XX, XX, XX
+  //
+  // b: 00, 10, 20, 30, 01, 11, 21, 31, 02, 12, 22, 32, 03, 13, 23, 33
+
+  uint8x16_t a0q = vcombine_u8(a0, vdup_n_u8(0));
+  uint8x16_t a1q = vcombine_u8(a1, vdup_n_u8(0));
+  uint8x16_t a2q = vcombine_u8(a2, vdup_n_u8(0));
+  uint8x16_t a3q = vcombine_u8(a3, vdup_n_u8(0));
+
+  uint8x16_t a02 = vzipq_u8(a0q, a2q).val[0];
+  uint8x16_t a13 = vzipq_u8(a1q, a3q).val[0];
+
+  *b = vzipq_u8(a02, a13).val[0];
+}
+
 #endif  // VPX_VPX_DSP_ARM_TRANSPOSE_NEON_H_
diff --git a/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_neon_dotprod.c b/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_neon_dotprod.c
@@ -360,53 +360,6 @@ void vpx_convolve8_avg_horiz_neon_dotprod(const uint8_t *src,
   }
 }
 
-static INLINE void transpose_concat_4x4(int8x8_t a0, int8x8_t a1, int8x8_t a2,
-                                        int8x8_t a3, int8x16_t *b) {
-  // Transpose 8-bit elements and concatenate result rows as follows:
-  // a0: 00, 01, 02, 03, XX, XX, XX, XX
-  // a1: 10, 11, 12, 13, XX, XX, XX, XX
-  // a2: 20, 21, 22, 23, XX, XX, XX, XX
-  // a3: 30, 31, 32, 33, XX, XX, XX, XX
-  //
-  // b: 00, 10, 20, 30, 01, 11, 21, 31, 02, 12, 22, 32, 03, 13, 23, 33
-
-  int8x16_t a0q = vcombine_s8(a0, vdup_n_s8(0));
-  int8x16_t a1q = vcombine_s8(a1, vdup_n_s8(0));
-  int8x16_t a2q = vcombine_s8(a2, vdup_n_s8(0));
-  int8x16_t a3q = vcombine_s8(a3, vdup_n_s8(0));
-
-  int8x16_t a02 = vzipq_s8(a0q, a2q).val[0];
-  int8x16_t a13 = vzipq_s8(a1q, a3q).val[0];
-
-  *b = vzipq_s8(a02, a13).val[0];
-}
-
-static INLINE void transpose_concat_8x4(int8x8_t a0, int8x8_t a1, int8x8_t a2,
-                                        int8x8_t a3, int8x16_t *b0,
-                                        int8x16_t *b1) {
-  // Transpose 8-bit elements and concatenate result rows as follows:
-  // a0: 00, 01, 02, 03, 04, 05, 06, 07
-  // a1: 10, 11, 12, 13, 14, 15, 16, 17
-  // a2: 20, 21, 22, 23, 24, 25, 26, 27
-  // a3: 30, 31, 32, 33, 34, 35, 36, 37
-  //
-  // b0: 00, 10, 20, 30, 01, 11, 21, 31, 02, 12, 22, 32, 03, 13, 23, 33
-  // b1: 04, 14, 24, 34, 05, 15, 25, 35, 06, 16, 26, 36, 07, 17, 27, 37
-
-  int8x16_t a0q = vcombine_s8(a0, vdup_n_s8(0));
-  int8x16_t a1q = vcombine_s8(a1, vdup_n_s8(0));
-  int8x16_t a2q = vcombine_s8(a2, vdup_n_s8(0));
-  int8x16_t a3q = vcombine_s8(a3, vdup_n_s8(0));
-
-  int8x16_t a02 = vzipq_s8(a0q, a2q).val[0];
-  int8x16_t a13 = vzipq_s8(a1q, a3q).val[0];
-
-  int8x16x2_t a0123 = vzipq_s8(a02, a13);
-
-  *b0 = a0123.val[0];
-  *b1 = a0123.val[1];
-}
-
 static INLINE int16x4_t convolve8_4_v(const int8x16_t samples_lo,
                                       const int8x16_t samples_hi,
                                       const int8x8_t filters) {
@@ -464,10 +417,10 @@ static INLINE void convolve_8tap_vert_neon_dotprod(
     // This operation combines a conventional transpose and the sample permute
     // (see horizontal case) required before computing the dot product.
     int8x16_t s0123, s1234, s2345, s3456;
-    transpose_concat_4x4(s0, s1, s2, s3, &s0123);
-    transpose_concat_4x4(s1, s2, s3, s4, &s1234);
-    transpose_concat_4x4(s2, s3, s4, s5, &s2345);
-    transpose_concat_4x4(s3, s4, s5, s6, &s3456);
+    transpose_concat_s8_4x4(s0, s1, s2, s3, &s0123);
+    transpose_concat_s8_4x4(s1, s2, s3, s4, &s1234);
+    transpose_concat_s8_4x4(s2, s3, s4, s5, &s2345);
+    transpose_concat_s8_4x4(s3, s4, s5, s6, &s3456);
 
     do {
       uint8x8_t t7, t8, t9, t10;
@@ -479,7 +432,7 @@ static INLINE void convolve_8tap_vert_neon_dotprod(
       int8x8_t s10 = vreinterpret_s8_u8(vsub_u8(t10, vdup_n_u8(128)));
 
       int8x16_t s78910;
-      transpose_concat_4x4(s7, s8, s9, s10, &s78910);
+      transpose_concat_s8_4x4(s7, s8, s9, s10, &s78910);
 
       // Merge new data into block from previous iteration.
       int8x16x2_t samples_LUT = { { s3456, s78910 } };
@@ -531,10 +484,10 @@ static INLINE void convolve_8tap_vert_neon_dotprod(
       // (see horizontal case) required before computing the dot product.
       int8x16_t s0123_lo, s0123_hi, s1234_lo, s1234_hi, s2345_lo, s2345_hi,
           s3456_lo, s3456_hi;
-      transpose_concat_8x4(s0, s1, s2, s3, &s0123_lo, &s0123_hi);
-      transpose_concat_8x4(s1, s2, s3, s4, &s1234_lo, &s1234_hi);
-      transpose_concat_8x4(s2, s3, s4, s5, &s2345_lo, &s2345_hi);
-      transpose_concat_8x4(s3, s4, s5, s6, &s3456_lo, &s3456_hi);
+      transpose_concat_s8_8x4(s0, s1, s2, s3, &s0123_lo, &s0123_hi);
+      transpose_concat_s8_8x4(s1, s2, s3, s4, &s1234_lo, &s1234_hi);
+      transpose_concat_s8_8x4(s2, s3, s4, s5, &s2345_lo, &s2345_hi);
+      transpose_concat_s8_8x4(s3, s4, s5, s6, &s3456_lo, &s3456_hi);
 
       do {
         uint8x8_t t7, t8, t9, t10;
@@ -546,7 +499,7 @@ static INLINE void convolve_8tap_vert_neon_dotprod(
         int8x8_t s10 = vreinterpret_s8_u8(vsub_u8(t10, vdup_n_u8(128)));
 
         int8x16_t s78910_lo, s78910_hi;
-        transpose_concat_8x4(s7, s8, s9, s10, &s78910_lo, &s78910_hi);
+        transpose_concat_s8_8x4(s7, s8, s9, s10, &s78910_lo, &s78910_hi);
 
         // Merge new data into block from previous iteration.
         int8x16x2_t samples_LUT = { { s3456_lo, s78910_lo } };
@@ -655,10 +608,10 @@ void vpx_convolve8_avg_vert_neon_dotprod(const uint8_t *src,
     // This operation combines a conventional transpose and the sample permute
     // (see horizontal case) required before computing the dot product.
     int8x16_t s0123, s1234, s2345, s3456;
-    transpose_concat_4x4(s0, s1, s2, s3, &s0123);
-    transpose_concat_4x4(s1, s2, s3, s4, &s1234);
-    transpose_concat_4x4(s2, s3, s4, s5, &s2345);
-    transpose_concat_4x4(s3, s4, s5, s6, &s3456);
+    transpose_concat_s8_4x4(s0, s1, s2, s3, &s0123);
+    transpose_concat_s8_4x4(s1, s2, s3, s4, &s1234);
+    transpose_concat_s8_4x4(s2, s3, s4, s5, &s2345);
+    transpose_concat_s8_4x4(s3, s4, s5, s6, &s3456);
 
     do {
       uint8x8_t t7, t8, t9, t10;
@@ -670,7 +623,7 @@ void vpx_convolve8_avg_vert_neon_dotprod(const uint8_t *src,
       int8x8_t s10 = vreinterpret_s8_u8(vsub_u8(t10, vdup_n_u8(128)));
 
       int8x16_t s78910;
-      transpose_concat_4x4(s7, s8, s9, s10, &s78910);
+      transpose_concat_s8_4x4(s7, s8, s9, s10, &s78910);
 
       // Merge new data into block from previous iteration.
       int8x16x2_t samples_LUT = { { s3456, s78910 } };
@@ -728,10 +681,10 @@ void vpx_convolve8_avg_vert_neon_dotprod(const uint8_t *src,
       // (see horizontal case) required before computing the dot product.
       int8x16_t s0123_lo, s0123_hi, s1234_lo, s1234_hi, s2345_lo, s2345_hi,
           s3456_lo, s3456_hi;
-      transpose_concat_8x4(s0, s1, s2, s3, &s0123_lo, &s0123_hi);
-      transpose_concat_8x4(s1, s2, s3, s4, &s1234_lo, &s1234_hi);
-      transpose_concat_8x4(s2, s3, s4, s5, &s2345_lo, &s2345_hi);
-      transpose_concat_8x4(s3, s4, s5, s6, &s3456_lo, &s3456_hi);
+      transpose_concat_s8_8x4(s0, s1, s2, s3, &s0123_lo, &s0123_hi);
+      transpose_concat_s8_8x4(s1, s2, s3, s4, &s1234_lo, &s1234_hi);
+      transpose_concat_s8_8x4(s2, s3, s4, s5, &s2345_lo, &s2345_hi);
+      transpose_concat_s8_8x4(s3, s4, s5, s6, &s3456_lo, &s3456_hi);
 
       do {
         uint8x8_t t7, t8, t9, t10;
@@ -743,7 +696,7 @@ void vpx_convolve8_avg_vert_neon_dotprod(const uint8_t *src,
         int8x8_t s10 = vreinterpret_s8_u8(vsub_u8(t10, vdup_n_u8(128)));
 
         int8x16_t s78910_lo, s78910_hi;
-        transpose_concat_8x4(s7, s8, s9, s10, &s78910_lo, &s78910_hi);
+        transpose_concat_s8_8x4(s7, s8, s9, s10, &s78910_lo, &s78910_hi);
 
         // Merge new data into block from previous iteration.
         int8x16x2_t samples_LUT = { { s3456_lo, s78910_lo } };
diff --git a/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_neon_i8mm.c b/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_neon_i8mm.c
@@ -340,54 +340,6 @@ void vpx_convolve8_avg_horiz_neon_i8mm(const uint8_t *src, ptrdiff_t src_stride,
   }
 }
 
-static INLINE void transpose_concat_4x4(uint8x8_t a0, uint8x8_t a1,
-                                        uint8x8_t a2, uint8x8_t a3,
-                                        uint8x16_t *b) {
-  // Transpose 8-bit elements and concatenate result rows as follows:
-  // a0: 00, 01, 02, 03, XX, XX, XX, XX
-  // a1: 10, 11, 12, 13, XX, XX, XX, XX
-  // a2: 20, 21, 22, 23, XX, XX, XX, XX
-  // a3: 30, 31, 32, 33, XX, XX, XX, XX
-  //
-  // b: 00, 10, 20, 30, 01, 11, 21, 31, 02, 12, 22, 32, 03, 13, 23, 33
-
-  uint8x16_t a0q = vcombine_u8(a0, vdup_n_u8(0));
-  uint8x16_t a1q = vcombine_u8(a1, vdup_n_u8(0));
-  uint8x16_t a2q = vcombine_u8(a2, vdup_n_u8(0));
-  uint8x16_t a3q = vcombine_u8(a3, vdup_n_u8(0));
-
-  uint8x16_t a02 = vzipq_u8(a0q, a2q).val[0];
-  uint8x16_t a13 = vzipq_u8(a1q, a3q).val[0];
-
-  *b = vzipq_u8(a02, a13).val[0];
-}
-
-static INLINE void transpose_concat_8x4(uint8x8_t a0, uint8x8_t a1,
-                                        uint8x8_t a2, uint8x8_t a3,
-                                        uint8x16_t *b0, uint8x16_t *b1) {
-  // Transpose 8-bit elements and concatenate result rows as follows:
-  // a0: 00, 01, 02, 03, 04, 05, 06, 07
-  // a1: 10, 11, 12, 13, 14, 15, 16, 17
-  // a2: 20, 21, 22, 23, 24, 25, 26, 27
-  // a3: 30, 31, 32, 33, 34, 35, 36, 37
-  //
-  // b0: 00, 10, 20, 30, 01, 11, 21, 31, 02, 12, 22, 32, 03, 13, 23, 33
-  // b1: 04, 14, 24, 34, 05, 15, 25, 35, 06, 16, 26, 36, 07, 17, 27, 37
-
-  uint8x16_t a0q = vcombine_u8(a0, vdup_n_u8(0));
-  uint8x16_t a1q = vcombine_u8(a1, vdup_n_u8(0));
-  uint8x16_t a2q = vcombine_u8(a2, vdup_n_u8(0));
-  uint8x16_t a3q = vcombine_u8(a3, vdup_n_u8(0));
-
-  uint8x16_t a02 = vzipq_u8(a0q, a2q).val[0];
-  uint8x16_t a13 = vzipq_u8(a1q, a3q).val[0];
-
-  uint8x16x2_t a0123 = vzipq_u8(a02, a13);
-
-  *b0 = a0123.val[0];
-  *b1 = a0123.val[1];
-}
-
 static INLINE int16x4_t convolve8_4_v(const uint8x16_t samples_lo,
                                       const uint8x16_t samples_hi,
                                       const int8x8_t filters) {
@@ -432,17 +384,17 @@ static INLINE void convolve_8tap_vert_neon_i8mm(const uint8_t *src,
     // This operation combines a conventional transpose and the sample permute
     // (see horizontal case) required before computing the dot product.
     uint8x16_t s0123, s1234, s2345, s3456;
-    transpose_concat_4x4(s0, s1, s2, s3, &s0123);
-    transpose_concat_4x4(s1, s2, s3, s4, &s1234);
-    transpose_concat_4x4(s2, s3, s4, s5, &s2345);
-    transpose_concat_4x4(s3, s4, s5, s6, &s3456);
+    transpose_concat_u8_4x4(s0, s1, s2, s3, &s0123);
+    transpose_concat_u8_4x4(s1, s2, s3, s4, &s1234);
+    transpose_concat_u8_4x4(s2, s3, s4, s5, &s2345);
+    transpose_concat_u8_4x4(s3, s4, s5, s6, &s3456);
 
     do {
       uint8x8_t s7, s8, s9, s10;
       load_u8_8x4(src, src_stride, &s7, &s8, &s9, &s10);
 
       uint8x16_t s78910;
-      transpose_concat_4x4(s7, s8, s9, s10, &s78910);
+      transpose_concat_u8_4x4(s7, s8, s9, s10, &s78910);
 
       // Merge new data into block from previous iteration.
       uint8x16x2_t samples_LUT = { { s3456, s78910 } };
@@ -485,17 +437,17 @@ static INLINE void convolve_8tap_vert_neon_i8mm(const uint8_t *src,
       // (see horizontal case) required before computing the dot product.
       uint8x16_t s0123_lo, s0123_hi, s1234_lo, s1234_hi, s2345_lo, s2345_hi,
           s3456_lo, s3456_hi;
-      transpose_concat_8x4(s0, s1, s2, s3, &s0123_lo, &s0123_hi);
-      transpose_concat_8x4(s1, s2, s3, s4, &s1234_lo, &s1234_hi);
-      transpose_concat_8x4(s2, s3, s4, s5, &s2345_lo, &s2345_hi);
-      transpose_concat_8x4(s3, s4, s5, s6, &s3456_lo, &s3456_hi);
+      transpose_concat_u8_8x4(s0, s1, s2, s3, &s0123_lo, &s0123_hi);
+      transpose_concat_u8_8x4(s1, s2, s3, s4, &s1234_lo, &s1234_hi);
+      transpose_concat_u8_8x4(s2, s3, s4, s5, &s2345_lo, &s2345_hi);
+      transpose_concat_u8_8x4(s3, s4, s5, s6, &s3456_lo, &s3456_hi);
 
       do {
         uint8x8_t s7, s8, s9, s10;
         load_u8_8x4(s, src_stride, &s7, &s8, &s9, &s10);
 
         uint8x16_t s78910_lo, s78910_hi;
-        transpose_concat_8x4(s7, s8, s9, s10, &s78910_lo, &s78910_hi);
+        transpose_concat_u8_8x4(s7, s8, s9, s10, &s78910_lo, &s78910_hi);
 
         // Merge new data into block from previous iteration.
         uint8x16x2_t samples_LUT = { { s3456_lo, s78910_lo } };
@@ -594,17 +546,17 @@ void vpx_convolve8_avg_vert_neon_i8mm(const uint8_t *src, ptrdiff_t src_stride,
     // This operation combines a conventional transpose and the sample permute
     // (see horizontal case) required before computing the dot product.
     uint8x16_t s0123, s1234, s2345, s3456;
-    transpose_concat_4x4(s0, s1, s2, s3, &s0123);
-    transpose_concat_4x4(s1, s2, s3, s4, &s1234);
-    transpose_concat_4x4(s2, s3, s4, s5, &s2345);
-    transpose_concat_4x4(s3, s4, s5, s6, &s3456);
+    transpose_concat_u8_4x4(s0, s1, s2, s3, &s0123);
+    transpose_concat_u8_4x4(s1, s2, s3, s4, &s1234);
+    transpose_concat_u8_4x4(s2, s3, s4, s5, &s2345);
+    transpose_concat_u8_4x4(s3, s4, s5, s6, &s3456);
 
     do {
       uint8x8_t s7, s8, s9, s10;
       load_u8_8x4(src, src_stride, &s7, &s8, &s9, &s10);
 
       uint8x16_t s78910;
-      transpose_concat_4x4(s7, s8, s9, s10, &s78910);
+      transpose_concat_u8_4x4(s7, s8, s9, s10, &s78910);
 
       // Merge new data into block from previous iteration.
       uint8x16x2_t samples_LUT = { { s3456, s78910 } };
@@ -653,17 +605,17 @@ void vpx_convolve8_avg_vert_neon_i8mm(const uint8_t *src, ptrdiff_t src_stride,
       // (see horizontal case) required before computing the dot product.
       uint8x16_t s0123_lo, s0123_hi, s1234_lo, s1234_hi, s2345_lo, s2345_hi,
           s3456_lo, s3456_hi;
-      transpose_concat_8x4(s0, s1, s2, s3, &s0123_lo, &s0123_hi);
-      transpose_concat_8x4(s1, s2, s3, s4, &s1234_lo, &s1234_hi);
-      transpose_concat_8x4(s2, s3, s4, s5, &s2345_lo, &s2345_hi);
-      transpose_concat_8x4(s3, s4, s5, s6, &s3456_lo, &s3456_hi);
+      transpose_concat_u8_8x4(s0, s1, s2, s3, &s0123_lo, &s0123_hi);
+      transpose_concat_u8_8x4(s1, s2, s3, s4, &s1234_lo, &s1234_hi);
+      transpose_concat_u8_8x4(s2, s3, s4, s5, &s2345_lo, &s2345_hi);
+      transpose_concat_u8_8x4(s3, s4, s5, s6, &s3456_lo, &s3456_hi);
 
       do {
         uint8x8_t s7, s8, s9, s10;
         load_u8_8x4(s, src_stride, &s7, &s8, &s9, &s10);
 
         uint8x16_t s78910_lo, s78910_hi;
-        transpose_concat_8x4(s7, s8, s9, s10, &s78910_lo, &s78910_hi);
+        transpose_concat_u8_8x4(s7, s8, s9, s10, &s78910_lo, &s78910_hi);
 
         // Merge new data into block from previous iteration.
         uint8x16x2_t samples_LUT = { { s3456_lo, s78910_lo } };
diff --git a/media/libvpx/libvpx/vpx_dsp/arm/vpx_neon_sve2_bridge.h b/media/libvpx/libvpx/vpx_dsp/arm/vpx_neon_sve2_bridge.h
@@ -29,4 +29,18 @@ static INLINE int16x8_t vpx_tbl2_s16(int16x8_t s0, int16x8_t s1,
       svtbl2_s16(samples, svset_neonq_u16(svundef_u16(), tbl)));
 }
 
+static INLINE void vpx_tbl2x4_s16(int16x8_t s0[4], int16x8_t s1[4],
+                                  int16x8_t res[4], uint16x8_t idx) {
+  res[0] = vpx_tbl2_s16(s0[0], s1[0], idx);
+  res[1] = vpx_tbl2_s16(s0[1], s1[1], idx);
+  res[2] = vpx_tbl2_s16(s0[2], s1[2], idx);
+  res[3] = vpx_tbl2_s16(s0[3], s1[3], idx);
+}
+
+static INLINE void vpx_tbl2x2_s16(int16x8_t s0[2], int16x8_t s1[2],
+                                  int16x8_t res[2], uint16x8_t idx) {
+  res[0] = vpx_tbl2_s16(s0[0], s1[0], idx);
+  res[1] = vpx_tbl2_s16(s0[1], s1[1], idx);
+}
+
 #endif  // VPX_VPX_DSP_ARM_VPX_NEON_SVE2_BRIDGE_H_
diff --git a/media/libvpx/libvpx/vpx_dsp/arm/vpx_neon_sve_bridge.h b/media/libvpx/libvpx/vpx_dsp/arm/vpx_neon_sve_bridge.h
@@ -48,4 +48,9 @@ static INLINE uint16x8_t vpx_tbl_u16(uint16x8_t data, uint16x8_t indices) {
                                    svset_neonq_u16(svundef_u16(), indices)));
 }
 
+static INLINE int16x8_t vpx_tbl_s16(int16x8_t data, uint16x8_t indices) {
+  return svget_neonq_s16(svtbl_s16(svset_neonq_s16(svundef_s16(), data),
+                                   svset_neonq_u16(svundef_u16(), indices)));
+}
+
 #endif  // VPX_VPX_DSP_ARM_VPX_NEON_SVE_BRIDGE_H_
diff --git a/media/libvpx/libvpx/vpx_ports/aarch64_cpudetect.c b/media/libvpx/libvpx/vpx_ports/aarch64_cpudetect.c
@@ -16,7 +16,7 @@
 #include <sys/sysctl.h>
 #endif
 
-#if !CONFIG_RUNTIME_CPU_DETECT || defined(__OpenBSD__)
+#if !CONFIG_RUNTIME_CPU_DETECT
 
 static int arm_get_cpu_caps(void) {
   // This function should actually be a no-op. There is no way to adjust any of
@@ -29,7 +29,7 @@ static int arm_get_cpu_caps(void) {
   return flags;
 }
 
-#elif defined(__APPLE__)  // end !CONFIG_RUNTIME_CPU_DETECT || defined(__OpenBSD__)
+#elif defined(__APPLE__)  // end !CONFIG_RUNTIME_CPU_DETECT
 
 // sysctlbyname() parameter documentation for instruction set characteristics:
 // https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics
diff --git a/media/libvpx/moz.yaml b/media/libvpx/moz.yaml
@@ -20,11 +20,11 @@ origin:
 
   # Human-readable identifier for this version/release
   # Generally "version NNN", "tag SSS", "bookmark SSS"
-  release: f32182fc9455d7979236dffca35c8baf232a74ec (Mon Oct 06 20:24:46 2025).
+  release: 9a7674e1a83d1261a49776c8794b87c9bccc85d7 (Tue Nov 04 19:55:43 2025).
 
   # Revision to pull in
   # Must be a long or short commit SHA (long preferred)
-  revision: f32182fc9455d7979236dffca35c8baf232a74ec
+  revision: 9a7674e1a83d1261a49776c8794b87c9bccc85d7
 
   # The package's license, where possible using the mnemonic from
   # https://spdx.org/licenses/

	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE

M	media/libvpx/libvpx/examples/vpx_dec_fuzzer.cc	\|	43	++++++++++++++++++++++++++++++++++++-------
M	media/libvpx/libvpx/test/convolve_test.cc	\|	35	+++++++++++++++++++++++++++++++++++
M	media/libvpx/libvpx/test/encode_api_test.cc	\|	103	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
A	media/libvpx/libvpx/third_party/nalloc/LICENSE	\|	21	+++++++++++++++++++++
A	media/libvpx/libvpx/third_party/nalloc/README.libvpx	\|	11	+++++++++++
A	media/libvpx/libvpx/third_party/nalloc/nalloc.h	\|	330	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	media/libvpx/libvpx/vp8/vp8_cx_iface.c	\|	17	++++-------------
M	media/libvpx/libvpx/vp9/common/vp9_rtcd_defs.pl	\|	6	+++---
A	media/libvpx/libvpx/vp9/encoder/arm/neon/vp9_highbd_temporal_filter_sve2.c	\|	285	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	media/libvpx/libvpx/vp9/encoder/arm/neon/vp9_temporal_filter_neon_dotprod.c	\|	45	++++++++++-----------------------------------
M	media/libvpx/libvpx/vp9/encoder/arm/neon/vp9_temporal_filter_neon_i8mm.c	\|	45	++++++++++-----------------------------------
M	media/libvpx/libvpx/vp9/encoder/vp9_encoder.c	\|	9	++++++++-
M	media/libvpx/libvpx/vp9/vp9_cx_iface.c	\|	19	+++++--------------
M	media/libvpx/libvpx/vp9/vp9cx.mk	\|	2	++
M	media/libvpx/libvpx/vpx_dsp/arm/highbd_vpx_convolve8_sve2.c	\|	116	+++++++++++++++++++++----------------------------------------------------------
M	media/libvpx/libvpx/vpx_dsp/arm/mem_neon.h	\|	32	++++++++++++++++++++++++++++++--
M	media/libvpx/libvpx/vpx_dsp/arm/transpose_neon.h	\|	154	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_neon_dotprod.c	\|	87	++++++++++++++++++-------------------------------------------------------------
M	media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_neon_i8mm.c	\|	88	++++++++++++++++++-------------------------------------------------------------
M	media/libvpx/libvpx/vpx_dsp/arm/vpx_neon_sve2_bridge.h	\|	14	++++++++++++++
M	media/libvpx/libvpx/vpx_dsp/arm/vpx_neon_sve_bridge.h	\|	5	+++++
M	media/libvpx/libvpx/vpx_ports/aarch64_cpudetect.c	\|	4	++--
M	media/libvpx/moz.yaml	\|	4	++--