tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

cdef16.S (8007B)


      1 /*
      2 * Copyright © 2018, VideoLAN and dav1d authors
      3 * Copyright © 2020, Martin Storsjo
      4 * All rights reserved.
      5 *
      6 * Redistribution and use in source and binary forms, with or without
      7 * modification, are permitted provided that the following conditions are met:
      8 *
      9 * 1. Redistributions of source code must retain the above copyright notice, this
     10 *    list of conditions and the following disclaimer.
     11 *
     12 * 2. Redistributions in binary form must reproduce the above copyright notice,
     13 *    this list of conditions and the following disclaimer in the documentation
     14 *    and/or other materials provided with the distribution.
     15 *
     16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
     17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
     20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
     23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26 */
     27 
     28 #include "src/arm/asm.S"
     29 #include "util.S"
     30 #include "cdef_tmpl.S"
     31 
     32 // r1 = d0/q0
     33 // r2 = d2/q1
     34 .macro pad_top_bot_16 s1, s2, w, stride, r1, r2, align, ret
     35        tst             r7,  #1 // CDEF_HAVE_LEFT
     36        beq             2f
     37        // CDEF_HAVE_LEFT
     38        tst             r7,  #2 // CDEF_HAVE_RIGHT
     39        beq             1f
     40        // CDEF_HAVE_LEFT+CDEF_HAVE_RIGHT
     41        vldr            s8,  [\s1, #-4]
     42        vld1.16         {\r1}, [\s1, :\align]
     43        vldr            s9,  [\s1, #2*\w]
     44        vldr            s10, [\s2, #-4]
     45        vld1.16         {\r2}, [\s2, :\align]
     46        vldr            s11, [\s2, #2*\w]
     47        vstr            s8,  [r0, #-4]
     48        vst1.16         {\r1}, [r0, :\align]
     49        vstr            s9,  [r0, #2*\w]
     50        add             r0,  r0,  #2*\stride
     51        vstr            s10, [r0, #-4]
     52        vst1.16         {\r2}, [r0, :\align]
     53        vstr            s11, [r0, #2*\w]
     54 .if \ret
     55        pop             {r4-r8,pc}
     56 .else
     57        add             r0,  r0,  #2*\stride
     58        b               3f
     59 .endif
     60 
     61 1:
     62        // CDEF_HAVE_LEFT+!CDEF_HAVE_RIGHT
     63        vldr            s8,  [\s1, #-4]
     64        vld1.16         {\r1}, [\s1, :\align]
     65        vldr            s9,  [\s2, #-4]
     66        vld1.16         {\r2}, [\s2, :\align]
     67        vstr            s8,  [r0, #-4]
     68        vst1.16         {\r1}, [r0, :\align]
     69        vstr            s12, [r0, #2*\w]
     70        add             r0,  r0,  #2*\stride
     71        vstr            s9,  [r0, #-4]
     72        vst1.16         {\r2}, [r0, :\align]
     73        vstr            s12, [r0, #2*\w]
     74 .if \ret
     75        pop             {r4-r8,pc}
     76 .else
     77        add             r0,  r0,  #2*\stride
     78        b               3f
     79 .endif
     80 
     81 2:
     82        // !CDEF_HAVE_LEFT
     83        tst             r7,  #2 // CDEF_HAVE_RIGHT
     84        beq             1f
     85        // !CDEF_HAVE_LEFT+CDEF_HAVE_RIGHT
     86        vld1.16         {\r1}, [\s1, :\align]
     87        vldr            s8,  [\s1, #2*\w]
     88        vld1.16         {\r2}, [\s2, :\align]
     89        vldr            s9,  [\s2, #2*\w]
     90        vstr            s12, [r0, #-4]
     91        vst1.16         {\r1}, [r0, :\align]
     92        vstr            s8,  [r0, #2*\w]
     93        add             r0,  r0,  #2*\stride
     94        vstr            s12, [r0, #-4]
     95        vst1.16         {\r2}, [r0, :\align]
     96        vstr            s9,  [r0, #2*\w]
     97 .if \ret
     98        pop             {r4-r8,pc}
     99 .else
    100        add             r0,  r0,  #2*\stride
    101        b               3f
    102 .endif
    103 
    104 1:
    105        // !CDEF_HAVE_LEFT+!CDEF_HAVE_RIGHT
    106        vld1.16         {\r1}, [\s1, :\align]
    107        vld1.16         {\r2}, [\s2, :\align]
    108        vstr            s12, [r0, #-4]
    109        vst1.16         {\r1}, [r0, :\align]
    110        vstr            s12, [r0, #2*\w]
    111        add             r0,  r0,  #2*\stride
    112        vstr            s12, [r0, #-4]
    113        vst1.16         {\r2}, [r0, :\align]
    114        vstr            s12, [r0, #2*\w]
    115 .if \ret
    116        pop             {r4-r8,pc}
    117 .else
    118        add             r0,  r0,  #2*\stride
    119 .endif
    120 3:
    121 .endm
    122 
    123 // void dav1d_cdef_paddingX_16bpc_neon(uint16_t *tmp, const pixel *src,
    124 //                                     ptrdiff_t src_stride, const pixel (*left)[2],
    125 //                                     const pixel *const top,
    126 //                                     const pixel *const bottom, int h,
    127 //                                     enum CdefEdgeFlags edges);
    128 
    129 // r1 = d0/q0
    130 // r2 = d2/q1
    131 .macro padding_func_16 w, stride, r1, r2, align
    132 function cdef_padding\w\()_16bpc_neon, export=1
    133        push            {r4-r8,lr}
    134        ldrd            r4,  r5,  [sp, #24]
    135        ldrd            r6,  r7,  [sp, #32]
    136        vmov.i16        q3,  #0x8000
    137        tst             r7,  #4 // CDEF_HAVE_TOP
    138        bne             1f
    139        // !CDEF_HAVE_TOP
    140        sub             r12, r0,  #2*(2*\stride+2)
    141        vmov.i16        q2,  #0x8000
    142        vst1.16         {q2,q3}, [r12]!
    143 .if \w == 8
    144        vst1.16         {q2,q3}, [r12]!
    145 .endif
    146        b               3f
    147 1:
    148        // CDEF_HAVE_TOP
    149        add             r8,  r4,  r2
    150        sub             r0,  r0,  #2*(2*\stride)
    151        pad_top_bot_16  r4,  r8,  \w, \stride, \r1, \r2, \align, 0
    152 
    153        // Middle section
    154 3:
    155        tst             r7,  #1 // CDEF_HAVE_LEFT
    156        beq             2f
    157        // CDEF_HAVE_LEFT
    158        tst             r7,  #2 // CDEF_HAVE_RIGHT
    159        beq             1f
    160        // CDEF_HAVE_LEFT+CDEF_HAVE_RIGHT
    161 0:
    162        vld1.32         {d2[]}, [r3, :32]!
    163        vldr            s5,  [r1, #2*\w]
    164        vld1.16         {\r1}, [r1, :\align], r2
    165        subs            r6,  r6,  #1
    166        vstr            s4,  [r0, #-4]
    167        vst1.16         {\r1}, [r0, :\align]
    168        vstr            s5,  [r0, #2*\w]
    169        add             r0,  r0,  #2*\stride
    170        bgt             0b
    171        b               3f
    172 1:
    173        // CDEF_HAVE_LEFT+!CDEF_HAVE_RIGHT
    174        vld1.32         {d2[]}, [r3, :32]!
    175        vld1.16         {\r1}, [r1, :\align], r2
    176        subs            r6,  r6,  #1
    177        vstr            s4,  [r0, #-4]
    178        vst1.16         {\r1}, [r0, :\align]
    179        vstr            s12, [r0, #2*\w]
    180        add             r0,  r0,  #2*\stride
    181        bgt             1b
    182        b               3f
    183 2:
    184        tst             r7,  #2 // CDEF_HAVE_RIGHT
    185        beq             1f
    186        // !CDEF_HAVE_LEFT+CDEF_HAVE_RIGHT
    187 0:
    188        vldr            s4,  [r1, #2*\w]
    189        vld1.16         {\r1}, [r1, :\align], r2
    190        subs            r6,  r6,  #1
    191        vstr            s12, [r0, #-4]
    192        vst1.16         {\r1}, [r0, :\align]
    193        vstr            s4,  [r0, #2*\w]
    194        add             r0,  r0,  #2*\stride
    195        bgt             0b
    196        b               3f
    197 1:
    198        // !CDEF_HAVE_LEFT+!CDEF_HAVE_RIGHT
    199        vld1.16         {\r1}, [r1, :\align], r2
    200        subs            r6,  r6,  #1
    201        vstr            s12, [r0, #-4]
    202        vst1.16         {\r1}, [r0, :\align]
    203        vstr            s12, [r0, #2*\w]
    204        add             r0,  r0,  #2*\stride
    205        bgt             1b
    206 
    207 3:
    208        tst             r7,  #8 // CDEF_HAVE_BOTTOM
    209        bne             1f
    210        // !CDEF_HAVE_BOTTOM
    211        sub             r12, r0,  #4
    212        vmov.i16        q2,  #0x8000
    213        vst1.16         {q2,q3}, [r12]!
    214 .if \w == 8
    215        vst1.16         {q2,q3}, [r12]!
    216 .endif
    217        pop             {r4-r8,pc}
    218 1:
    219        // CDEF_HAVE_BOTTOM
    220        add             r8,  r5,  r2
    221        pad_top_bot_16  r5,  r8,  \w, \stride, \r1, \r2, \align, 1
    222 endfunc
    223 .endm
    224 
    225 padding_func_16 8, 16, q0, q1, 128
    226 padding_func_16 4, 8,  d0, d2, 64
    227 
    228 tables
    229 
    230 filter 8, 16
    231 filter 4, 16
    232 
    233 find_dir 16