commit 55d1695f3ef83922447a9a2721c407ee6ec54dab
parent 6d8397e7f60224e104d83e38b0f6b095870b52b5
Author: Updatebot <updatebot@mozilla.com>
Date: Mon, 17 Nov 2025 20:43:36 +0000
Bug 1999338 - Update dav1d to 6deac59d1ea499c0f486ab823588f53a2c742aa4 r=chunmin
Differential Revision: https://phabricator.services.mozilla.com/D272055
Diffstat:
10 files changed, 503 insertions(+), 16 deletions(-)
diff --git a/media/libdav1d/moz.yaml b/media/libdav1d/moz.yaml
@@ -20,11 +20,11 @@ origin:
# Human-readable identifier for this version/release
# Generally "version NNN", "tag SSS", "bookmark SSS"
- release: 0bc6bd93417179cd0c30fac40d2fd11aa29c8523 (2025-10-05T14:03:24.000+01:00).
+ release: 6deac59d1ea499c0f486ab823588f53a2c742aa4 (2025-11-07T00:51:38.000+09:00).
# Revision to pull in
# Must be a long or short commit SHA (long preferred)
- revision: 0bc6bd93417179cd0c30fac40d2fd11aa29c8523
+ revision: 6deac59d1ea499c0f486ab823588f53a2c742aa4
# The package's license, where possible using the mnemonic from
# https://spdx.org/licenses/
diff --git a/media/libdav1d/vcs_version.h b/media/libdav1d/vcs_version.h
@@ -1,2 +1,2 @@
/* auto-generated, do not edit */
-#define DAV1D_VERSION "0bc6bd93417179cd0c30fac40d2fd11aa29c8523"
+#define DAV1D_VERSION "6deac59d1ea499c0f486ab823588f53a2c742aa4"
diff --git a/third_party/dav1d/NEWS b/third_party/dav1d/NEWS
@@ -7,6 +7,7 @@ Changes for 1.5.2 'Sonic':
- mark C globals with small code model
- reduce the code size of the frame header parsing (OBU)
- minor fixes on tools and CI
+ - fix compilation with nasm 3.00
Changes for 1.5.1 'Sonic':
diff --git a/third_party/dav1d/src/internal.h b/third_party/dav1d/src/internal.h
@@ -302,7 +302,7 @@ struct Dav1dFrameContext {
int cdef_buf_sbh;
int lr_buf_plane_sz[2]; /* (stride*sbh*4) << sb128 if n_tc > 1, else stride*4 */
int re_sz /* h */;
- ALIGN(Av1FilterLUT lim_lut, 16);
+ Av1FilterLUT lim_lut;
ALIGN(uint8_t lvl[8 /* seg_id */][4 /* dir */][8 /* ref */][2 /* is_gmv */], 16);
int last_sharpness;
uint8_t *tx_lpf_right_edge[2];
diff --git a/third_party/dav1d/src/lf_mask.h b/third_party/dav1d/src/lf_mask.h
@@ -34,9 +34,9 @@
#include "src/levels.h"
typedef struct Av1FilterLUT {
- uint8_t e[64];
- uint8_t i[64];
- uint64_t sharp[2];
+ ALIGN(uint8_t e[64], 16);
+ ALIGN(uint8_t i[64], 16);
+ ALIGN(uint64_t sharp[2], 16);
} Av1FilterLUT;
typedef struct Av1RestorationUnit {
diff --git a/third_party/dav1d/src/mc_tmpl.c b/third_party/dav1d/src/mc_tmpl.c
@@ -736,16 +736,16 @@ static void w_mask_c(pixel *dst, const ptrdiff_t dst_stride,
const int mask_rnd = 1 << (mask_sh - 5);
do {
for (int x = 0; x < w; x++) {
- const int m = imin(38 + ((abs(tmp1[x] - tmp2[x]) + mask_rnd) >> mask_sh), 64);
- dst[x] = iclip_pixel((tmp1[x] * m +
- tmp2[x] * (64 - m) + rnd) >> sh);
+ const int tmpdiff = tmp1[x] - tmp2[x];
+ const int m = imin(38 + ((abs(tmpdiff) + mask_rnd) >> mask_sh), 64);
+ dst[x] = iclip_pixel((tmpdiff * m + tmp2[x] * 64 + rnd) >> sh);
if (ss_hor) {
x++;
- const int n = imin(38 + ((abs(tmp1[x] - tmp2[x]) + mask_rnd) >> mask_sh), 64);
- dst[x] = iclip_pixel((tmp1[x] * n +
- tmp2[x] * (64 - n) + rnd) >> sh);
+ const int tmpdiff = tmp1[x] - tmp2[x];
+ const int n = imin(38 + ((abs(tmpdiff) + mask_rnd) >> mask_sh), 64);
+ dst[x] = iclip_pixel((tmpdiff * n + tmp2[x] * 64 + rnd) >> sh);
if (h & ss_ver) {
mask[x >> 1] = (m + n + mask[x >> 1] + 2 - sign) >> 2;
diff --git a/third_party/dav1d/src/riscv/64/ipred.S b/third_party/dav1d/src/riscv/64/ipred.S
@@ -27,6 +27,53 @@
#include "src/riscv/asm.S"
+// void ipred_v_8bpc_rvv(pixel *dst, const ptrdiff_t stride,
+// const pixel *const topleft,
+// const int width, const int height, const int a,
+// const int max_width, const int max_height)
+function ipred_v_8bpc_rvv, export=1, ext="v,zba"
+ csrw vxrm, zero
+ addi a2, a2, 1
+ vsetvli t0, a3, e8, m1, ta, ma
+ bne t0, a3, 3f // Go to slow path - whole row doesn't fit in regsiter
+1:
+ // Fast path - row fits in register
+ add t1, a0, a1
+ vle8.v v4, (a2)
+2:
+ vse8.v v4, (a0)
+ sh1add a0, a1, a0
+ vse8.v v4, (t1)
+ sh1add t1, a1, t1
+ addi a4, a4, -2
+ bnez a4, 2b
+ ret
+
+ // Row doesn't fit in register.
+3:
+ vsetvli t0, a3, e8, m2, ta, ma // Try using 2 registers at once (LMUL=2)
+ beq t0, a3, 1b // Back to fast path - now it fits
+
+4:
+ // No need for more vsetli, since both width and VLEN are power of 2, so there is no tail.
+ vle8.v v4, (a2)
+ mv t2, a0
+ mv t1, a4
+5:
+ vse8.v v4, (t2)
+ add t2, t2, a1
+ addi t1, t1, -1
+ bnez t1, 5b // Loop over rows.
+
+ sub a3, a3, t0
+ add a2, a2, t0
+ add a0, a0, t0
+ bnez a3, 4b // Loop over columns
+
+ ret
+endfunc
+
+
function dc_gen_8bpc_rvv, export=1, ext="v,zbb"
.variant_cc dav1d_dc_gen_8bpc_rvv
add t1, a1, a2
diff --git a/third_party/dav1d/src/riscv/64/mc.S b/third_party/dav1d/src/riscv/64/mc.S
@@ -1,6 +1,7 @@
/******************************************************************************
* Copyright © 2018, VideoLAN and dav1d authors
* Copyright © 2024, Nathan Egge, Niklas Haas, Bogdan Gligorijevic
+ * Copyright © 2025, Sungjoon Moon
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -532,3 +533,421 @@ function warp_8x8t_8bpc_rvv, export=1, ext="v,zba"
ret
endfunc
+
+function emu_edge_8bpc_rvv, export=1, ext="v,zbb"
+ ld t0, 0(sp)
+ ld t1, 8(sp)
+
+ // int cx = iclip((int) x, 0, (int) iw - 1);
+ max t2, a4, zero
+ addi t4, a2, -1
+ min t2, t2, t4
+
+ // int cy = iclip((int) y, 0, (int) ih - 1);
+ max t3, a5, zero
+ addi t5, a3, -1
+ min t3, t3, t5
+
+ // ref += cy*PXSTRIDE(ref_stride) + cx
+ mul t3, t3, t1
+ add t3, t3, t2
+
+ add t0, t0, t3
+
+ addi t4, a0, -1
+
+ neg t2, a4
+ add t3, a4, a0
+ sub t3, t3, a2
+
+ // int left_ext = iclip((int) -x, 0, (int) bw - 1);
+ max t2, t2, zero
+ min a2, t2, t4 # a2 = left_ext
+
+ // int right_ext = iclip((int) (x + bw - iw), 0, (int) bw - 1);
+ max t3, t3, zero
+ min a4, t3, t4 # a4 = right_ext
+
+ addi t6, a1, -1
+
+ neg t4, a5
+ add t5, a5, a1
+ sub t5, t5, a3
+
+ // int top_ext = iclip((int) -y, 0, (int) bh - 1);
+ max t4, t4, zero
+ min a3, t4, t6 # a3 = top_ext
+
+ // int bottom_ext = iclip((int) (x + bh - ih), 0, (int) bh - 1);
+ max t5, t5, zero
+ min a5, t5, t6 # a5 = bottom_ext
+
+ sub t4, a1, a3
+ sub t4, t4, a5 # t4 = center_h
+
+ mul t5, a3, a7
+ add a1, a6, t5 # blk = dst + top_ext * dst_stride
+
+ sub t3, a0, a2
+ sub t3, t3, a4 # t3 = center_w = bw - left_ext - right_ext
+
+.macro v_loop need_left, need_right
+9:
+ # pixel_copy()
+ add t5, a1, a2 # t5 = blk + left_ext
+ mv t2, t0 # ref
+0:
+ vsetvli t6, t3, e8, m1, ta, ma
+ vle8.v v8, (t2)
+ add t2, t2, t6
+
+ vse8.v v8, (t5)
+ sub t3, t3, t6
+ add t5, t5, t6
+ bnez t3, 0b
+
+ sub t3, a0, a2
+ sub t3, t3, a4 # t3 = center_w = bw - left_ext - right_ext
+
+.if \need_left
+ lb t2, (t0) # ref[0]
+ # pixel_set()
+ vsetvli t6, a2, e8, m1, ta, ma
+ vmv.v.x v8, t2
+ mv t2, a2 # left_ext
+ mv t5, a1 # blk
+0:
+ vse8.v v8, (t5)
+ sub t2, t2, t6 # left_ext -= t6
+ add t5, t5, t6 # blk += t6
+ vsetvli t6, t2, e8, m1, ta, ma
+ bnez t2, 0b
+.endif
+
+.if \need_right
+ add t5, a1, a2 # t5 = blk + left_ext
+ add t5, t5, t3 # t5 = blk + left_ext + center_w
+ lb t2, -1(t5) # blk[left_ext + center_w - 1]
+ # pixel_set()
+ vsetvli t6, a4, e8, m1, ta, ma
+ vmv.v.x v8, t2
+ mv t2, a4 # right_ext
+0:
+ vse8.v v8, (t5)
+ sub t2, t2, t6
+ add t5, t5, t6
+ vsetvli t6, t2, e8, m1, ta, ma
+ bnez t2, 0b
+.endif
+
+ add t0, t0, t1 # ref += ref_stride
+ add a1, a1, a7 # blk += dst_stride
+ addi t4, t4, -1 # center_h--
+ bnez t4, 9b
+.endm
+
+L(emu_edge_center):
+ blez t4, L(emu_edge_bottom)
+
+ beqz a2, 1f # if (left_ext)
+ beqz a4, 2f # if (right_ext)
+ v_loop 1, 1
+ j L(emu_edge_bottom)
+
+1:
+ beqz a4, 3f
+ v_loop 0, 1
+ j L(emu_edge_bottom)
+
+2:
+ v_loop 1, 0
+ j L(emu_edge_bottom)
+
+3:
+ v_loop 0, 0
+
+L(emu_edge_bottom): # copy bottom
+ blez a5, L(emu_edge_top)
+ mv t2, a0 # bw
+2:
+ mv t5, a5 # bottom_ext
+ mv t1, a1 # dst
+
+ vsetvli t6, t2, e8, m1, ta, ma
+ sub t0, t1, a7 # dst - dst_stride
+ vle8.v v8, (t0)
+0:
+ vse8.v v8, (t1)
+ add t1, t1, a7
+ addi t5, t5, -1
+ bnez t5, 0b
+
+ sub t2, t2, t6
+ add a1, a1, t6
+ bnez t2, 2b
+
+L(emu_edge_top): # copy top
+ blez a3, L(emu_edge_end)
+ mul t5, a3, a7
+ add t1, a6, t5 # blk = dst + top_ext * PXSTRIDE(dst_stride)
+ # a6 = dst
+1:
+ mv t0, a3 # top_ext
+ mv t4, a6 # dst
+
+ vsetvli t6, a0, e8, m1, ta, ma
+ vle8.v v8, (t1)
+0:
+ vse8.v v8, (t4)
+ add t4, t4, a7
+ vse8.v v8, (t4)
+ add t4, t4, a7
+ addi t0, t0, -2
+ bgtz t0, 0b
+
+ sub a0, a0, t6
+ add t1, t1, t6
+ add a6, a6, t6
+
+ bnez a0, 1b
+
+L(emu_edge_end):
+ ret
+endfunc
+
+.macro w_mask_fn type vlen
+function w_mask_\type\()_\vlen\()8bpc_rvv, export=1, ext="v,zba,zbb"
+ csrw vxrm, zero
+ li t1, 38*256+8
+.ifc \vlen, vl256_
+ addi t0, zero, 64
+ bgt a4, t0, 2f
+ li t2, 0xCAC9C8CFCE0000
+ li t3, 0xC1C0C7C6C50000
+.else
+ addi t0, zero, 32
+ bgt a4, t0, 2f
+ li t2, 0xCAC9C8CF0000
+ li t3, 0xC1C0C7C60000
+.endif
+ ctz t4, a4
+ slli t4, t4, 3
+ srl t2, t2, t4
+ andi t2, t2, 0xFF
+ srl t3, t3, t4
+ andi t3, t3, 0xFF
+
+1:
+.if \type == 444
+w_mask_body 444 narrow
+
+ sh1add a0, a1, a0 # dst += dst_stride
+ add a6, a6, a4 # mask += w
+.elseif \type == 422
+w_mask_body 422 narrow
+
+ sh1add a0, a1, a0 # dst += dst_stride
+ srli t4, a4, 1
+ add a6, a6, t4 # mask += w >> 1
+.elseif \type == 420
+w_mask_body 420 narrow
+
+ sh1add a0, a1, a0 # dst += dst_stride
+.endif
+
+ sh1add a2, a4, a2
+ sh1add a3, a4, a3
+
+ addi a5, a5, -2
+ bnez a5, 1b
+
+ ret
+
+2:
+ li t2, 0xca
+ li t3, 0xc1
+
+3:
+ mv t5, zero
+
+.if \type == 444
+w_mask_body 444 wide # VLEN>=256
+.elseif \type == 422
+w_mask_body 422 wide # VLEN>=256
+.elseif \type == 420
+w_mask_body 420 wide # VLEN>=256
+.endif
+
+ add t5, t5, t6
+ bne t5, a4, 4b
+
+ sh1add a0, a1, a0 # dst += dst_stride
+.if \type == 444
+ add a6, a6, a4 # mask += w
+.elseif \type == 422
+ srli t4, a4, 1
+ add a6, a6, t4 # mask += w >> 1
+.elseif \type == 420
+.endif
+
+ sh1add a2, a4, a2
+ sh1add a3, a4, a3
+
+ addi a5, a5, -2
+ bnez a5, 3b
+
+ ret
+
+endfunc
+.endm
+
+.macro w_mask_body type size
+ mv t0, a0 # dst
+
+4:
+ vsetvl t6, a4, t2
+
+ # load tmp1 and tmp2
+ vle16.v v0, (a2) # tmp1[x]
+
+ sh1add t4, a4, a2 # tmp1
+ vle16.v v16, (t4) # tmp1[x]
+ sh1add a2, t6, a2 # tmp1 += w / k
+
+ vle16.v v4, (a3) # tmp2[x]
+
+ sh1add t4, a4, a3 # tmp2
+ vle16.v v20, (t4) # tmp2[x]
+ sh1add a3, t6, a3 # tmp2 += w / k
+
+ # v12 = abs(tmp1[x] - tmp2[x])
+ vsub.vv v12, v0, v4 # tmp1[x] - tmp2[x]
+ vsub.vv v8, v4, v0 # tmp2[x] - tmp1[x]
+ vmax.vv v8, v12, v8
+
+ vsub.vv v28, v16, v20 # tmp1[x] - tmp2[x]
+ vsub.vv v24, v20, v16 # tmp2[x] - tmp1[x]
+ vmax.vv v24, v28, v24
+
+ li t4, 64
+
+ # min(38 + (v12 + 8) >> 8, 64) -> min((v12 + 38*256 + 8) >> 8, 64)
+ vadd.vx v8, v8, t1
+ vsra.vi v8, v8, 8
+ vmin.vx v8, v8, t4
+
+ vadd.vx v24, v24, t1
+ vsra.vi v24, v24, 8
+ vmin.vx v24, v24, t4
+
+ # dst[x] = (tmp1[x] - tmp2[x]) * m + 64 * tmp2[x];
+ # v12, v28 = tmp1[x] - tmp2[x]
+ # v8, v24 = {m,n}
+ vwmul.vx v0, v4, t4
+ vwmacc.vv v0, v8, v12
+ vnclipu.wi v0, v0, 10
+ vmax.vx v0, v0, zero
+
+ vwmul.vx v16, v20, t4
+ vwmacc.vv v16, v24, v28
+ vnclipu.wi v16, v16, 10
+ vmax.vx v16, v16, zero
+
+.if \type == 444
+ vsetvl zero, zero, t3
+
+ vnclipu.wi v0, v0, 0
+ vnclipu.wi v16, v16, 0
+
+ vse8.v v0, (t0) # dst[x] =
+ add t4, t0, a1
+ vse8.v v16, (t4) # dst[x] =
+ add t0, t0, t6
+
+ vnsrl.wi v8, v8, 0
+ vnsrl.wi v24, v24, 0
+
+ vse8.v v8, (a6) # mask[x] = m
+ add t4, a6, a4
+ vse8.v v24, (t4) # mask[x] = m
+ add a6, a6, t6
+
+.elseif \type == 422
+ # v4, v20 = m
+ # v12, v28 = n
+ vnsrl.wi v4, v8, 0
+ vnsrl.wi v8, v8, 16
+
+ vnsrl.wi v20, v24, 0
+ vnsrl.wi v24, v24, 16
+
+ # v8, v24 = m + n - sign
+ vadd.vv v8, v4, v8
+ vsub.vx v8, v8, a7
+
+ vadd.vv v24, v20, v24
+ vsub.vx v24, v24, a7
+
+ vsetvl zero, zero, t3
+
+ vnclipu.wi v0, v0, 0
+ vnclipu.wi v16, v16, 0
+
+ vse8.v v0, (t0) # dst[x] =
+ add t4, t0, a1
+ vse8.v v16, (t4) # dst[x] =
+ add t0, t0, t6
+
+ vnclipu.wi v8, v8, 1
+ vnclipu.wi v24, v24, 1
+
+.ifc \size, wide
+ srli t4, t6, 1
+ vsetvl zero, t4, t3
+.endif
+
+ vse8.v v8, (a6) # mask[x] = m + n + 1 - sign
+ srli t4, a4, 1
+ add t4, a6, t4
+ vse8.v v24, (t4) # mask[x] = m + n + 1 - sign
+ srli t4, t6, 1
+ add a6, a6, t4
+.elseif \type == 420
+ # v4, v20 = m
+ # v12, v28 = n
+ vnsrl.wi v4, v8, 0
+ vnsrl.wi v8, v8, 16
+
+ vnsrl.wi v20, v24, 0
+ vnsrl.wi v24, v24, 16
+
+ # v8 = m + n + mask[x >> 1]
+ vadd.vv v8, v4, v8
+ vadd.vv v24, v20, v24
+ vadd.vv v8, v8, v24
+ vsub.vx v8, v8, a7
+
+ vsetvl zero, zero, t3
+
+ vnclipu.wi v0, v0, 0
+ vnclipu.wi v16, v16, 0
+
+ vse8.v v0, (t0) # dst[x] =
+ add t4, t0, a1
+ vse8.v v16, (t4) # dst[x] =
+ add t0, t0, t6
+
+ vnclipu.wi v8, v8, 2
+
+ vse8.v v8, (a6) # mask[x] = (m + n + mask[x >> 1] + 2 - sign) >> 2;
+ srli t4, t6, 1
+ add a6, a6, t4
+.endif
+.endm
+
+w_mask_fn 444
+w_mask_fn 444 vl256_
+w_mask_fn 422
+w_mask_fn 422 vl256_
+w_mask_fn 420
+w_mask_fn 420 vl256_
diff --git a/third_party/dav1d/src/riscv/ipred.h b/third_party/dav1d/src/riscv/ipred.h
@@ -33,6 +33,7 @@ decl_cfl_pred_fn(BF(dav1d_ipred_cfl_128, rvv));
decl_cfl_pred_fn(BF(dav1d_ipred_cfl_top, rvv));
decl_cfl_pred_fn(BF(dav1d_ipred_cfl_left, rvv));
+decl_angular_ipred_fn(BF(dav1d_ipred_v, rvv));
decl_angular_ipred_fn(BF(dav1d_ipred_paeth, rvv));
decl_angular_ipred_fn(BF(dav1d_ipred_smooth, rvv));
decl_angular_ipred_fn(BF(dav1d_ipred_smooth_v, rvv));
@@ -52,6 +53,7 @@ static ALWAYS_INLINE void intra_pred_dsp_init_riscv(Dav1dIntraPredDSPContext *co
c->cfl_pred[TOP_DC_PRED ] = dav1d_ipred_cfl_top_8bpc_rvv;
c->cfl_pred[LEFT_DC_PRED] = dav1d_ipred_cfl_left_8bpc_rvv;
+ c->intra_pred[VERT_PRED ] = dav1d_ipred_v_8bpc_rvv;
c->intra_pred[PAETH_PRED ] = dav1d_ipred_paeth_8bpc_rvv;
c->intra_pred[SMOOTH_PRED ] = dav1d_ipred_smooth_8bpc_rvv;
c->intra_pred[SMOOTH_V_PRED] = dav1d_ipred_smooth_v_8bpc_rvv;
diff --git a/third_party/dav1d/src/riscv/mc.h b/third_party/dav1d/src/riscv/mc.h
@@ -40,8 +40,17 @@ decl_avg_fn(BF(dav1d_avg, rvv));
decl_w_avg_fn(BF(dav1d_w_avg, rvv));
decl_mask_fn(BF(dav1d_mask, rvv));
+decl_w_mask_fn(BF(dav1d_w_mask_444, rvv));
+decl_w_mask_fn(BF(dav1d_w_mask_422, rvv));
+decl_w_mask_fn(BF(dav1d_w_mask_420, rvv));
+
+decl_w_mask_fn(BF(dav1d_w_mask_444_vl256, rvv));
+decl_w_mask_fn(BF(dav1d_w_mask_422_vl256, rvv));
+decl_w_mask_fn(BF(dav1d_w_mask_420_vl256, rvv));
+
decl_warp8x8_fn(BF(dav1d_warp_8x8, rvv));
decl_warp8x8t_fn(BF(dav1d_warp_8x8t, rvv));
+decl_emu_edge_fn(BF(dav1d_emu_edge, rvv));
static ALWAYS_INLINE void mc_dsp_init_riscv(Dav1dMCDSPContext *const c) {
const unsigned flags = dav1d_get_cpu_flags();
@@ -58,10 +67,11 @@ static ALWAYS_INLINE void mc_dsp_init_riscv(Dav1dMCDSPContext *const c) {
#if BITDEPTH == 8
c->blend_h = BF(dav1d_blend_h, rvv);
+ c->emu_edge = BF(dav1d_emu_edge, rvv);
- if (dav1d_get_vlen() >= 256) {
- c->blend_h = BF(dav1d_blend_h_vl256, rvv);
- }
+ c->w_mask[0] = BF(dav1d_w_mask_444, rvv);
+ c->w_mask[1] = BF(dav1d_w_mask_422, rvv);
+ c->w_mask[2] = BF(dav1d_w_mask_420, rvv);
c->avg = BF(dav1d_avg, rvv);
c->w_avg = BF(dav1d_w_avg, rvv);
@@ -69,5 +79,13 @@ static ALWAYS_INLINE void mc_dsp_init_riscv(Dav1dMCDSPContext *const c) {
c->warp8x8 = BF(dav1d_warp_8x8, rvv);
c->warp8x8t = BF(dav1d_warp_8x8t, rvv);
+
+ if (dav1d_get_vlen() >= 256) {
+ c->blend_h = BF(dav1d_blend_h_vl256, rvv);
+
+ c->w_mask[0] = BF(dav1d_w_mask_444_vl256, rvv);
+ c->w_mask[1] = BF(dav1d_w_mask_422_vl256, rvv);
+ c->w_mask[2] = BF(dav1d_w_mask_420_vl256, rvv);
+ }
#endif
}