tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

mc16.S (3319B)


      1 /******************************************************************************
      2 * Copyright © 2018, VideoLAN and dav1d authors
      3 * Copyright © 2024, Nathan Egge
      4 * All rights reserved.
      5 *
      6 * Redistribution and use in source and binary forms, with or without
      7 * modification, are permitted provided that the following conditions are met:
      8 *
      9 * 1. Redistributions of source code must retain the above copyright notice, this
     10 *    list of conditions and the following disclaimer.
     11 *
     12 * 2. Redistributions in binary form must reproduce the above copyright notice,
     13 *    this list of conditions and the following disclaimer in the documentation
     14 *    and/or other materials provided with the distribution.
     15 *
     16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
     17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
     20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
     23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26 *****************************************************************************/
     27 
     28 #include "src/riscv/asm.S"
     29 
     30 function blend_vl256_16bpc_rvv, export=1, ext=zbb
     31  ctz t0, a3
     32  addi t0, t0, 0xc4
     33  j L(blend_epilog)
     34 endfunc
     35 
     36 function blend_16bpc_rvv, export=1, ext="v,zbb"
     37  ctz t0, a3
     38  addi t0, t0, 0xc5
     39 L(blend_epilog):
     40  csrw vxrm, zero
     41  andi t0, t0, 0xc7
     42  li t1, 64
     43  ori t0, t0, 8
     44  add a6, a3, a3
     45  vsetvl zero, a3, t0
     46 1:
     47  addi a4, a4, -2
     48  vle8.v v24, (a5)
     49  add a5, a5, a3
     50  vle8.v v28, (a5)
     51  add a5, a5, a3
     52  vle16.v v8, (a2)
     53  add a2, a2, a6
     54  vle16.v v12, (a2)
     55  add a2, a2, a6
     56  vzext.vf2 v16, v24
     57  vzext.vf2 v20, v28
     58  vle16.v v0, (a0)
     59  add t0, a0, a1
     60  vle16.v v4, (t0)
     61  vwmulu.vv v24, v8, v16
     62  vwmulu.vv v8, v12, v20
     63  vrsub.vx v16, v16, t1
     64  vrsub.vx v20, v20, t1
     65  vwmaccu.vv v24, v0, v16
     66  vwmaccu.vv v8, v4, v20
     67  vnclipu.wi v0, v24, 6
     68  vnclipu.wi v4, v8, 6
     69  vse16.v v0, (a0)
     70  vse16.v v4, (t0)
     71  add a0, t0, a1
     72  bnez a4, 1b
     73  ret
     74 endfunc
     75 
     76 function blend_v_vl256_16bpc_rvv, export=1, ext=zbb
     77  srai t0, a3, 2
     78  ctz t0, t0
     79  addi t0, t0, 0xc6
     80  j L(blend_v_epilog)
     81 endfunc
     82 
     83 function blend_v_16bpc_rvv, export=1, ext="v,zbb"
     84  ctz t0, a3
     85  addi t0, t0, 0xc5
     86 L(blend_v_epilog):
     87  andi t0, t0, 0xc7
     88  ori t0, t0, 8
     89  srai t1, a3, 2
     90  sub t1, a3, t1
     91  vsetvl zero, t1, t0
     92  csrw vxrm, zero
     93  la t1, dav1d_obmc_masks
     94  add t1, t1, a3
     95  vle8.v v20, (t1)
     96  li t0, 64
     97  vzext.vf2 v16, v20
     98  add a3, a3, a3
     99  vrsub.vx v20, v16, t0
    100 1:
    101  addi a4, a4, -2
    102  vle16.v v8, (a2)
    103  add a2, a2, a3
    104  vle16.v v12, (a2)
    105  add a2, a2, a3
    106  vle16.v v0, (a0)
    107  add t0, a0, a1
    108  vle16.v v4, (t0)
    109  vwmulu.vv v24, v8, v16
    110  vwmulu.vv v8, v12, v16
    111  vwmaccu.vv v24, v0, v20
    112  vwmaccu.vv v8, v4, v20
    113  vnclipu.wi v0, v24, 6
    114  vnclipu.wi v4, v8, 6
    115  vse16.v v0, (a0)
    116  vse16.v v4, (t0)
    117  add a0, t0, a1
    118  bnez a4, 1b
    119  ret
    120 endfunc