tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

commit 55d1695f3ef83922447a9a2721c407ee6ec54dab
parent 6d8397e7f60224e104d83e38b0f6b095870b52b5
Author: Updatebot <updatebot@mozilla.com>
Date:   Mon, 17 Nov 2025 20:43:36 +0000

Bug 1999338 - Update dav1d to 6deac59d1ea499c0f486ab823588f53a2c742aa4 r=chunmin

Differential Revision: https://phabricator.services.mozilla.com/D272055

Diffstat:
Mmedia/libdav1d/moz.yaml | 4++--
Mmedia/libdav1d/vcs_version.h | 2+-
Mthird_party/dav1d/NEWS | 1+
Mthird_party/dav1d/src/internal.h | 2+-
Mthird_party/dav1d/src/lf_mask.h | 6+++---
Mthird_party/dav1d/src/mc_tmpl.c | 12++++++------
Mthird_party/dav1d/src/riscv/64/ipred.S | 47+++++++++++++++++++++++++++++++++++++++++++++++
Mthird_party/dav1d/src/riscv/64/mc.S | 419+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mthird_party/dav1d/src/riscv/ipred.h | 2++
Mthird_party/dav1d/src/riscv/mc.h | 24+++++++++++++++++++++---
10 files changed, 503 insertions(+), 16 deletions(-)

diff --git a/media/libdav1d/moz.yaml b/media/libdav1d/moz.yaml @@ -20,11 +20,11 @@ origin: # Human-readable identifier for this version/release # Generally "version NNN", "tag SSS", "bookmark SSS" - release: 0bc6bd93417179cd0c30fac40d2fd11aa29c8523 (2025-10-05T14:03:24.000+01:00). + release: 6deac59d1ea499c0f486ab823588f53a2c742aa4 (2025-11-07T00:51:38.000+09:00). # Revision to pull in # Must be a long or short commit SHA (long preferred) - revision: 0bc6bd93417179cd0c30fac40d2fd11aa29c8523 + revision: 6deac59d1ea499c0f486ab823588f53a2c742aa4 # The package's license, where possible using the mnemonic from # https://spdx.org/licenses/ diff --git a/media/libdav1d/vcs_version.h b/media/libdav1d/vcs_version.h @@ -1,2 +1,2 @@ /* auto-generated, do not edit */ -#define DAV1D_VERSION "0bc6bd93417179cd0c30fac40d2fd11aa29c8523" +#define DAV1D_VERSION "6deac59d1ea499c0f486ab823588f53a2c742aa4" diff --git a/third_party/dav1d/NEWS b/third_party/dav1d/NEWS @@ -7,6 +7,7 @@ Changes for 1.5.2 'Sonic': - mark C globals with small code model - reduce the code size of the frame header parsing (OBU) - minor fixes on tools and CI + - fix compilation with nasm 3.00 Changes for 1.5.1 'Sonic': diff --git a/third_party/dav1d/src/internal.h b/third_party/dav1d/src/internal.h @@ -302,7 +302,7 @@ struct Dav1dFrameContext { int cdef_buf_sbh; int lr_buf_plane_sz[2]; /* (stride*sbh*4) << sb128 if n_tc > 1, else stride*4 */ int re_sz /* h */; - ALIGN(Av1FilterLUT lim_lut, 16); + Av1FilterLUT lim_lut; ALIGN(uint8_t lvl[8 /* seg_id */][4 /* dir */][8 /* ref */][2 /* is_gmv */], 16); int last_sharpness; uint8_t *tx_lpf_right_edge[2]; diff --git a/third_party/dav1d/src/lf_mask.h b/third_party/dav1d/src/lf_mask.h @@ -34,9 +34,9 @@ #include "src/levels.h" typedef struct Av1FilterLUT { - uint8_t e[64]; - uint8_t i[64]; - uint64_t sharp[2]; + ALIGN(uint8_t e[64], 16); + ALIGN(uint8_t i[64], 16); + ALIGN(uint64_t sharp[2], 16); } Av1FilterLUT; typedef struct Av1RestorationUnit { diff --git a/third_party/dav1d/src/mc_tmpl.c b/third_party/dav1d/src/mc_tmpl.c @@ -736,16 +736,16 @@ static void w_mask_c(pixel *dst, const ptrdiff_t dst_stride, const int mask_rnd = 1 << (mask_sh - 5); do { for (int x = 0; x < w; x++) { - const int m = imin(38 + ((abs(tmp1[x] - tmp2[x]) + mask_rnd) >> mask_sh), 64); - dst[x] = iclip_pixel((tmp1[x] * m + - tmp2[x] * (64 - m) + rnd) >> sh); + const int tmpdiff = tmp1[x] - tmp2[x]; + const int m = imin(38 + ((abs(tmpdiff) + mask_rnd) >> mask_sh), 64); + dst[x] = iclip_pixel((tmpdiff * m + tmp2[x] * 64 + rnd) >> sh); if (ss_hor) { x++; - const int n = imin(38 + ((abs(tmp1[x] - tmp2[x]) + mask_rnd) >> mask_sh), 64); - dst[x] = iclip_pixel((tmp1[x] * n + - tmp2[x] * (64 - n) + rnd) >> sh); + const int tmpdiff = tmp1[x] - tmp2[x]; + const int n = imin(38 + ((abs(tmpdiff) + mask_rnd) >> mask_sh), 64); + dst[x] = iclip_pixel((tmpdiff * n + tmp2[x] * 64 + rnd) >> sh); if (h & ss_ver) { mask[x >> 1] = (m + n + mask[x >> 1] + 2 - sign) >> 2; diff --git a/third_party/dav1d/src/riscv/64/ipred.S b/third_party/dav1d/src/riscv/64/ipred.S @@ -27,6 +27,53 @@ #include "src/riscv/asm.S" +// void ipred_v_8bpc_rvv(pixel *dst, const ptrdiff_t stride, +// const pixel *const topleft, +// const int width, const int height, const int a, +// const int max_width, const int max_height) +function ipred_v_8bpc_rvv, export=1, ext="v,zba" + csrw vxrm, zero + addi a2, a2, 1 + vsetvli t0, a3, e8, m1, ta, ma + bne t0, a3, 3f // Go to slow path - whole row doesn't fit in regsiter +1: + // Fast path - row fits in register + add t1, a0, a1 + vle8.v v4, (a2) +2: + vse8.v v4, (a0) + sh1add a0, a1, a0 + vse8.v v4, (t1) + sh1add t1, a1, t1 + addi a4, a4, -2 + bnez a4, 2b + ret + + // Row doesn't fit in register. +3: + vsetvli t0, a3, e8, m2, ta, ma // Try using 2 registers at once (LMUL=2) + beq t0, a3, 1b // Back to fast path - now it fits + +4: + // No need for more vsetli, since both width and VLEN are power of 2, so there is no tail. + vle8.v v4, (a2) + mv t2, a0 + mv t1, a4 +5: + vse8.v v4, (t2) + add t2, t2, a1 + addi t1, t1, -1 + bnez t1, 5b // Loop over rows. + + sub a3, a3, t0 + add a2, a2, t0 + add a0, a0, t0 + bnez a3, 4b // Loop over columns + + ret +endfunc + + function dc_gen_8bpc_rvv, export=1, ext="v,zbb" .variant_cc dav1d_dc_gen_8bpc_rvv add t1, a1, a2 diff --git a/third_party/dav1d/src/riscv/64/mc.S b/third_party/dav1d/src/riscv/64/mc.S @@ -1,6 +1,7 @@ /****************************************************************************** * Copyright © 2018, VideoLAN and dav1d authors * Copyright © 2024, Nathan Egge, Niklas Haas, Bogdan Gligorijevic + * Copyright © 2025, Sungjoon Moon * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -532,3 +533,421 @@ function warp_8x8t_8bpc_rvv, export=1, ext="v,zba" ret endfunc + +function emu_edge_8bpc_rvv, export=1, ext="v,zbb" + ld t0, 0(sp) + ld t1, 8(sp) + + // int cx = iclip((int) x, 0, (int) iw - 1); + max t2, a4, zero + addi t4, a2, -1 + min t2, t2, t4 + + // int cy = iclip((int) y, 0, (int) ih - 1); + max t3, a5, zero + addi t5, a3, -1 + min t3, t3, t5 + + // ref += cy*PXSTRIDE(ref_stride) + cx + mul t3, t3, t1 + add t3, t3, t2 + + add t0, t0, t3 + + addi t4, a0, -1 + + neg t2, a4 + add t3, a4, a0 + sub t3, t3, a2 + + // int left_ext = iclip((int) -x, 0, (int) bw - 1); + max t2, t2, zero + min a2, t2, t4 # a2 = left_ext + + // int right_ext = iclip((int) (x + bw - iw), 0, (int) bw - 1); + max t3, t3, zero + min a4, t3, t4 # a4 = right_ext + + addi t6, a1, -1 + + neg t4, a5 + add t5, a5, a1 + sub t5, t5, a3 + + // int top_ext = iclip((int) -y, 0, (int) bh - 1); + max t4, t4, zero + min a3, t4, t6 # a3 = top_ext + + // int bottom_ext = iclip((int) (x + bh - ih), 0, (int) bh - 1); + max t5, t5, zero + min a5, t5, t6 # a5 = bottom_ext + + sub t4, a1, a3 + sub t4, t4, a5 # t4 = center_h + + mul t5, a3, a7 + add a1, a6, t5 # blk = dst + top_ext * dst_stride + + sub t3, a0, a2 + sub t3, t3, a4 # t3 = center_w = bw - left_ext - right_ext + +.macro v_loop need_left, need_right +9: + # pixel_copy() + add t5, a1, a2 # t5 = blk + left_ext + mv t2, t0 # ref +0: + vsetvli t6, t3, e8, m1, ta, ma + vle8.v v8, (t2) + add t2, t2, t6 + + vse8.v v8, (t5) + sub t3, t3, t6 + add t5, t5, t6 + bnez t3, 0b + + sub t3, a0, a2 + sub t3, t3, a4 # t3 = center_w = bw - left_ext - right_ext + +.if \need_left + lb t2, (t0) # ref[0] + # pixel_set() + vsetvli t6, a2, e8, m1, ta, ma + vmv.v.x v8, t2 + mv t2, a2 # left_ext + mv t5, a1 # blk +0: + vse8.v v8, (t5) + sub t2, t2, t6 # left_ext -= t6 + add t5, t5, t6 # blk += t6 + vsetvli t6, t2, e8, m1, ta, ma + bnez t2, 0b +.endif + +.if \need_right + add t5, a1, a2 # t5 = blk + left_ext + add t5, t5, t3 # t5 = blk + left_ext + center_w + lb t2, -1(t5) # blk[left_ext + center_w - 1] + # pixel_set() + vsetvli t6, a4, e8, m1, ta, ma + vmv.v.x v8, t2 + mv t2, a4 # right_ext +0: + vse8.v v8, (t5) + sub t2, t2, t6 + add t5, t5, t6 + vsetvli t6, t2, e8, m1, ta, ma + bnez t2, 0b +.endif + + add t0, t0, t1 # ref += ref_stride + add a1, a1, a7 # blk += dst_stride + addi t4, t4, -1 # center_h-- + bnez t4, 9b +.endm + +L(emu_edge_center): + blez t4, L(emu_edge_bottom) + + beqz a2, 1f # if (left_ext) + beqz a4, 2f # if (right_ext) + v_loop 1, 1 + j L(emu_edge_bottom) + +1: + beqz a4, 3f + v_loop 0, 1 + j L(emu_edge_bottom) + +2: + v_loop 1, 0 + j L(emu_edge_bottom) + +3: + v_loop 0, 0 + +L(emu_edge_bottom): # copy bottom + blez a5, L(emu_edge_top) + mv t2, a0 # bw +2: + mv t5, a5 # bottom_ext + mv t1, a1 # dst + + vsetvli t6, t2, e8, m1, ta, ma + sub t0, t1, a7 # dst - dst_stride + vle8.v v8, (t0) +0: + vse8.v v8, (t1) + add t1, t1, a7 + addi t5, t5, -1 + bnez t5, 0b + + sub t2, t2, t6 + add a1, a1, t6 + bnez t2, 2b + +L(emu_edge_top): # copy top + blez a3, L(emu_edge_end) + mul t5, a3, a7 + add t1, a6, t5 # blk = dst + top_ext * PXSTRIDE(dst_stride) + # a6 = dst +1: + mv t0, a3 # top_ext + mv t4, a6 # dst + + vsetvli t6, a0, e8, m1, ta, ma + vle8.v v8, (t1) +0: + vse8.v v8, (t4) + add t4, t4, a7 + vse8.v v8, (t4) + add t4, t4, a7 + addi t0, t0, -2 + bgtz t0, 0b + + sub a0, a0, t6 + add t1, t1, t6 + add a6, a6, t6 + + bnez a0, 1b + +L(emu_edge_end): + ret +endfunc + +.macro w_mask_fn type vlen +function w_mask_\type\()_\vlen\()8bpc_rvv, export=1, ext="v,zba,zbb" + csrw vxrm, zero + li t1, 38*256+8 +.ifc \vlen, vl256_ + addi t0, zero, 64 + bgt a4, t0, 2f + li t2, 0xCAC9C8CFCE0000 + li t3, 0xC1C0C7C6C50000 +.else + addi t0, zero, 32 + bgt a4, t0, 2f + li t2, 0xCAC9C8CF0000 + li t3, 0xC1C0C7C60000 +.endif + ctz t4, a4 + slli t4, t4, 3 + srl t2, t2, t4 + andi t2, t2, 0xFF + srl t3, t3, t4 + andi t3, t3, 0xFF + +1: +.if \type == 444 +w_mask_body 444 narrow + + sh1add a0, a1, a0 # dst += dst_stride + add a6, a6, a4 # mask += w +.elseif \type == 422 +w_mask_body 422 narrow + + sh1add a0, a1, a0 # dst += dst_stride + srli t4, a4, 1 + add a6, a6, t4 # mask += w >> 1 +.elseif \type == 420 +w_mask_body 420 narrow + + sh1add a0, a1, a0 # dst += dst_stride +.endif + + sh1add a2, a4, a2 + sh1add a3, a4, a3 + + addi a5, a5, -2 + bnez a5, 1b + + ret + +2: + li t2, 0xca + li t3, 0xc1 + +3: + mv t5, zero + +.if \type == 444 +w_mask_body 444 wide # VLEN>=256 +.elseif \type == 422 +w_mask_body 422 wide # VLEN>=256 +.elseif \type == 420 +w_mask_body 420 wide # VLEN>=256 +.endif + + add t5, t5, t6 + bne t5, a4, 4b + + sh1add a0, a1, a0 # dst += dst_stride +.if \type == 444 + add a6, a6, a4 # mask += w +.elseif \type == 422 + srli t4, a4, 1 + add a6, a6, t4 # mask += w >> 1 +.elseif \type == 420 +.endif + + sh1add a2, a4, a2 + sh1add a3, a4, a3 + + addi a5, a5, -2 + bnez a5, 3b + + ret + +endfunc +.endm + +.macro w_mask_body type size + mv t0, a0 # dst + +4: + vsetvl t6, a4, t2 + + # load tmp1 and tmp2 + vle16.v v0, (a2) # tmp1[x] + + sh1add t4, a4, a2 # tmp1 + vle16.v v16, (t4) # tmp1[x] + sh1add a2, t6, a2 # tmp1 += w / k + + vle16.v v4, (a3) # tmp2[x] + + sh1add t4, a4, a3 # tmp2 + vle16.v v20, (t4) # tmp2[x] + sh1add a3, t6, a3 # tmp2 += w / k + + # v12 = abs(tmp1[x] - tmp2[x]) + vsub.vv v12, v0, v4 # tmp1[x] - tmp2[x] + vsub.vv v8, v4, v0 # tmp2[x] - tmp1[x] + vmax.vv v8, v12, v8 + + vsub.vv v28, v16, v20 # tmp1[x] - tmp2[x] + vsub.vv v24, v20, v16 # tmp2[x] - tmp1[x] + vmax.vv v24, v28, v24 + + li t4, 64 + + # min(38 + (v12 + 8) >> 8, 64) -> min((v12 + 38*256 + 8) >> 8, 64) + vadd.vx v8, v8, t1 + vsra.vi v8, v8, 8 + vmin.vx v8, v8, t4 + + vadd.vx v24, v24, t1 + vsra.vi v24, v24, 8 + vmin.vx v24, v24, t4 + + # dst[x] = (tmp1[x] - tmp2[x]) * m + 64 * tmp2[x]; + # v12, v28 = tmp1[x] - tmp2[x] + # v8, v24 = {m,n} + vwmul.vx v0, v4, t4 + vwmacc.vv v0, v8, v12 + vnclipu.wi v0, v0, 10 + vmax.vx v0, v0, zero + + vwmul.vx v16, v20, t4 + vwmacc.vv v16, v24, v28 + vnclipu.wi v16, v16, 10 + vmax.vx v16, v16, zero + +.if \type == 444 + vsetvl zero, zero, t3 + + vnclipu.wi v0, v0, 0 + vnclipu.wi v16, v16, 0 + + vse8.v v0, (t0) # dst[x] = + add t4, t0, a1 + vse8.v v16, (t4) # dst[x] = + add t0, t0, t6 + + vnsrl.wi v8, v8, 0 + vnsrl.wi v24, v24, 0 + + vse8.v v8, (a6) # mask[x] = m + add t4, a6, a4 + vse8.v v24, (t4) # mask[x] = m + add a6, a6, t6 + +.elseif \type == 422 + # v4, v20 = m + # v12, v28 = n + vnsrl.wi v4, v8, 0 + vnsrl.wi v8, v8, 16 + + vnsrl.wi v20, v24, 0 + vnsrl.wi v24, v24, 16 + + # v8, v24 = m + n - sign + vadd.vv v8, v4, v8 + vsub.vx v8, v8, a7 + + vadd.vv v24, v20, v24 + vsub.vx v24, v24, a7 + + vsetvl zero, zero, t3 + + vnclipu.wi v0, v0, 0 + vnclipu.wi v16, v16, 0 + + vse8.v v0, (t0) # dst[x] = + add t4, t0, a1 + vse8.v v16, (t4) # dst[x] = + add t0, t0, t6 + + vnclipu.wi v8, v8, 1 + vnclipu.wi v24, v24, 1 + +.ifc \size, wide + srli t4, t6, 1 + vsetvl zero, t4, t3 +.endif + + vse8.v v8, (a6) # mask[x] = m + n + 1 - sign + srli t4, a4, 1 + add t4, a6, t4 + vse8.v v24, (t4) # mask[x] = m + n + 1 - sign + srli t4, t6, 1 + add a6, a6, t4 +.elseif \type == 420 + # v4, v20 = m + # v12, v28 = n + vnsrl.wi v4, v8, 0 + vnsrl.wi v8, v8, 16 + + vnsrl.wi v20, v24, 0 + vnsrl.wi v24, v24, 16 + + # v8 = m + n + mask[x >> 1] + vadd.vv v8, v4, v8 + vadd.vv v24, v20, v24 + vadd.vv v8, v8, v24 + vsub.vx v8, v8, a7 + + vsetvl zero, zero, t3 + + vnclipu.wi v0, v0, 0 + vnclipu.wi v16, v16, 0 + + vse8.v v0, (t0) # dst[x] = + add t4, t0, a1 + vse8.v v16, (t4) # dst[x] = + add t0, t0, t6 + + vnclipu.wi v8, v8, 2 + + vse8.v v8, (a6) # mask[x] = (m + n + mask[x >> 1] + 2 - sign) >> 2; + srli t4, t6, 1 + add a6, a6, t4 +.endif +.endm + +w_mask_fn 444 +w_mask_fn 444 vl256_ +w_mask_fn 422 +w_mask_fn 422 vl256_ +w_mask_fn 420 +w_mask_fn 420 vl256_ diff --git a/third_party/dav1d/src/riscv/ipred.h b/third_party/dav1d/src/riscv/ipred.h @@ -33,6 +33,7 @@ decl_cfl_pred_fn(BF(dav1d_ipred_cfl_128, rvv)); decl_cfl_pred_fn(BF(dav1d_ipred_cfl_top, rvv)); decl_cfl_pred_fn(BF(dav1d_ipred_cfl_left, rvv)); +decl_angular_ipred_fn(BF(dav1d_ipred_v, rvv)); decl_angular_ipred_fn(BF(dav1d_ipred_paeth, rvv)); decl_angular_ipred_fn(BF(dav1d_ipred_smooth, rvv)); decl_angular_ipred_fn(BF(dav1d_ipred_smooth_v, rvv)); @@ -52,6 +53,7 @@ static ALWAYS_INLINE void intra_pred_dsp_init_riscv(Dav1dIntraPredDSPContext *co c->cfl_pred[TOP_DC_PRED ] = dav1d_ipred_cfl_top_8bpc_rvv; c->cfl_pred[LEFT_DC_PRED] = dav1d_ipred_cfl_left_8bpc_rvv; + c->intra_pred[VERT_PRED ] = dav1d_ipred_v_8bpc_rvv; c->intra_pred[PAETH_PRED ] = dav1d_ipred_paeth_8bpc_rvv; c->intra_pred[SMOOTH_PRED ] = dav1d_ipred_smooth_8bpc_rvv; c->intra_pred[SMOOTH_V_PRED] = dav1d_ipred_smooth_v_8bpc_rvv; diff --git a/third_party/dav1d/src/riscv/mc.h b/third_party/dav1d/src/riscv/mc.h @@ -40,8 +40,17 @@ decl_avg_fn(BF(dav1d_avg, rvv)); decl_w_avg_fn(BF(dav1d_w_avg, rvv)); decl_mask_fn(BF(dav1d_mask, rvv)); +decl_w_mask_fn(BF(dav1d_w_mask_444, rvv)); +decl_w_mask_fn(BF(dav1d_w_mask_422, rvv)); +decl_w_mask_fn(BF(dav1d_w_mask_420, rvv)); + +decl_w_mask_fn(BF(dav1d_w_mask_444_vl256, rvv)); +decl_w_mask_fn(BF(dav1d_w_mask_422_vl256, rvv)); +decl_w_mask_fn(BF(dav1d_w_mask_420_vl256, rvv)); + decl_warp8x8_fn(BF(dav1d_warp_8x8, rvv)); decl_warp8x8t_fn(BF(dav1d_warp_8x8t, rvv)); +decl_emu_edge_fn(BF(dav1d_emu_edge, rvv)); static ALWAYS_INLINE void mc_dsp_init_riscv(Dav1dMCDSPContext *const c) { const unsigned flags = dav1d_get_cpu_flags(); @@ -58,10 +67,11 @@ static ALWAYS_INLINE void mc_dsp_init_riscv(Dav1dMCDSPContext *const c) { #if BITDEPTH == 8 c->blend_h = BF(dav1d_blend_h, rvv); + c->emu_edge = BF(dav1d_emu_edge, rvv); - if (dav1d_get_vlen() >= 256) { - c->blend_h = BF(dav1d_blend_h_vl256, rvv); - } + c->w_mask[0] = BF(dav1d_w_mask_444, rvv); + c->w_mask[1] = BF(dav1d_w_mask_422, rvv); + c->w_mask[2] = BF(dav1d_w_mask_420, rvv); c->avg = BF(dav1d_avg, rvv); c->w_avg = BF(dav1d_w_avg, rvv); @@ -69,5 +79,13 @@ static ALWAYS_INLINE void mc_dsp_init_riscv(Dav1dMCDSPContext *const c) { c->warp8x8 = BF(dav1d_warp_8x8, rvv); c->warp8x8t = BF(dav1d_warp_8x8t, rvv); + + if (dav1d_get_vlen() >= 256) { + c->blend_h = BF(dav1d_blend_h_vl256, rvv); + + c->w_mask[0] = BF(dav1d_w_mask_444_vl256, rvv); + c->w_mask[1] = BF(dav1d_w_mask_422_vl256, rvv); + c->w_mask[2] = BF(dav1d_w_mask_420_vl256, rvv); + } #endif }