tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

util.S (12392B)


      1 /******************************************************************************
      2 * Copyright © 2018, VideoLAN and dav1d authors
      3 * Copyright © 2015 Martin Storsjo
      4 * Copyright © 2015 Janne Grunau
      5 * All rights reserved.
      6 *
      7 * Redistribution and use in source and binary forms, with or without
      8 * modification, are permitted provided that the following conditions are met:
      9 *
     10 * 1. Redistributions of source code must retain the above copyright notice, this
     11 *    list of conditions and the following disclaimer.
     12 *
     13 * 2. Redistributions in binary form must reproduce the above copyright notice,
     14 *    this list of conditions and the following disclaimer in the documentation
     15 *    and/or other materials provided with the distribution.
     16 *
     17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
     18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     20 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
     21 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     23 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
     24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27 *****************************************************************************/
     28 
     29 #ifndef DAV1D_SRC_ARM_64_UTIL_S
     30 #define DAV1D_SRC_ARM_64_UTIL_S
     31 
     32 #include "config.h"
     33 #include "src/arm/asm.S"
     34 
     35 #ifndef __has_feature
     36 #define __has_feature(x) 0
     37 #endif
     38 
     39 .macro  movrel rd, val, offset=0
     40 #if defined(__APPLE__)
     41  .if \offset < 0
     42        adrp            \rd, \val@PAGE
     43        add             \rd, \rd, \val@PAGEOFF
     44        sub             \rd, \rd, -(\offset)
     45  .else
     46        adrp            \rd, \val+(\offset)@PAGE
     47        add             \rd, \rd, \val+(\offset)@PAGEOFF
     48  .endif
     49 #elif defined(PIC) && defined(_WIN32)
     50  .if \offset < 0
     51        adrp            \rd, \val
     52        add             \rd, \rd, :lo12:\val
     53        sub             \rd, \rd, -(\offset)
     54  .else
     55        adrp            \rd, \val+(\offset)
     56        add             \rd, \rd, :lo12:\val+(\offset)
     57  .endif
     58 #elif __has_feature(hwaddress_sanitizer)
     59        adrp            \rd, :pg_hi21_nc:\val+(\offset)
     60        movk            \rd, #:prel_g3:\val+0x100000000
     61        add             \rd, \rd, :lo12:\val+(\offset)
     62 #elif defined(PIC)
     63        adrp            \rd, \val+(\offset)
     64        add             \rd, \rd, :lo12:\val+(\offset)
     65 #else
     66        ldr             \rd, =\val+\offset
     67 #endif
     68 .endm
     69 
     70 .macro sub_sp space
     71 #ifdef _WIN32
     72 .if \space > 8192
     73        // Here, we'd need to touch two (or more) pages while decrementing
     74        // the stack pointer.
     75        .error          "sub_sp_align doesn't support values over 8K at the moment"
     76 .elseif \space > 4096
     77        sub             x16, sp,  #4096
     78        ldr             xzr, [x16]
     79        sub             sp,  x16, #(\space - 4096)
     80 .else
     81        sub             sp,  sp,  #\space
     82 .endif
     83 #else
     84 .if \space >= 4096
     85        sub             sp,  sp,  #(\space)/4096*4096
     86 .endif
     87 .if (\space % 4096) != 0
     88        sub             sp,  sp,  #(\space)%4096
     89 .endif
     90 #endif
     91 .endm
     92 
     93 .macro transpose_8x8b_xtl r0, r1, r2, r3, r4, r5, r6, r7, xtl
     94        // a0 b0 a1 b1 a2 b2 a3 b3 a4 b4 a5 b5 a6 b6 a7 b7
     95        zip1            \r0\().16b, \r0\().16b, \r1\().16b
     96        // c0 d0 c1 d1 c2 d2 d3 d3 c4 d4 c5 d5 c6 d6 d7 d7
     97        zip1            \r2\().16b, \r2\().16b, \r3\().16b
     98        // e0 f0 e1 f1 e2 f2 e3 f3 e4 f4 e5 f5 e6 f6 e7 f7
     99        zip1            \r4\().16b, \r4\().16b, \r5\().16b
    100        // g0 h0 g1 h1 g2 h2 h3 h3 g4 h4 g5 h5 g6 h6 h7 h7
    101        zip1            \r6\().16b, \r6\().16b, \r7\().16b
    102 
    103        // a0 b0 c0 d0 a2 b2 c2 d2 a4 b4 c4 d4 a6 b6 c6 d6
    104        trn1            \r1\().8h,  \r0\().8h,  \r2\().8h
    105        // a1 b1 c1 d1 a3 b3 c3 d3 a5 b5 c5 d5 a7 b7 c7 d7
    106        trn2            \r3\().8h,  \r0\().8h,  \r2\().8h
    107        // e0 f0 g0 h0 e2 f2 g2 h2 e4 f4 g4 h4 e6 f6 g6 h6
    108        trn1            \r5\().8h,  \r4\().8h,  \r6\().8h
    109        // e1 f1 g1 h1 e3 f3 g3 h3 e5 f5 g5 h5 e7 f7 g7 h7
    110        trn2            \r7\().8h,  \r4\().8h,  \r6\().8h
    111 
    112        // a0 b0 c0 d0 e0 f0 g0 h0 a4 b4 c4 d4 e4 f4 g4 h4
    113        trn1            \r0\().4s,  \r1\().4s,  \r5\().4s
    114        // a2 b2 c2 d2 e2 f2 g2 h2 a6 b6 c6 d6 e6 f6 g6 h6
    115        trn2            \r2\().4s,  \r1\().4s,  \r5\().4s
    116        // a1 b1 c1 d1 e1 f1 g1 h1 a5 b5 c5 d5 e5 f5 g5 h5
    117        trn1            \r1\().4s,  \r3\().4s,  \r7\().4s
    118        // a3 b3 c3 d3 e3 f3 g3 h3 a7 b7 c7 d7 e7 f7 g7 h7
    119        trn2            \r3\().4s,  \r3\().4s,  \r7\().4s
    120 
    121        \xtl\()2        \r4\().8h,  \r0\().16b
    122        \xtl            \r0\().8h,  \r0\().8b
    123        \xtl\()2        \r6\().8h,  \r2\().16b
    124        \xtl            \r2\().8h,  \r2\().8b
    125        \xtl\()2        \r5\().8h,  \r1\().16b
    126        \xtl            \r1\().8h,  \r1\().8b
    127        \xtl\()2        \r7\().8h,  \r3\().16b
    128        \xtl            \r3\().8h,  \r3\().8b
    129 .endm
    130 
    131 .macro transpose_8x8h r0, r1, r2, r3, r4, r5, r6, r7, t8, t9
    132        trn1            \t8\().8h,  \r0\().8h,  \r1\().8h
    133        trn2            \t9\().8h,  \r0\().8h,  \r1\().8h
    134        trn1            \r1\().8h,  \r2\().8h,  \r3\().8h
    135        trn2            \r3\().8h,  \r2\().8h,  \r3\().8h
    136        trn1            \r0\().8h,  \r4\().8h,  \r5\().8h
    137        trn2            \r5\().8h,  \r4\().8h,  \r5\().8h
    138        trn1            \r2\().8h,  \r6\().8h,  \r7\().8h
    139        trn2            \r7\().8h,  \r6\().8h,  \r7\().8h
    140 
    141        trn1            \r4\().4s,  \r0\().4s,  \r2\().4s
    142        trn2            \r2\().4s,  \r0\().4s,  \r2\().4s
    143        trn1            \r6\().4s,  \r5\().4s,  \r7\().4s
    144        trn2            \r7\().4s,  \r5\().4s,  \r7\().4s
    145        trn1            \r5\().4s,  \t9\().4s,  \r3\().4s
    146        trn2            \t9\().4s,  \t9\().4s,  \r3\().4s
    147        trn1            \r3\().4s,  \t8\().4s,  \r1\().4s
    148        trn2            \t8\().4s,  \t8\().4s,  \r1\().4s
    149 
    150        trn1            \r0\().2d,  \r3\().2d,  \r4\().2d
    151        trn2            \r4\().2d,  \r3\().2d,  \r4\().2d
    152        trn1            \r1\().2d,  \r5\().2d,  \r6\().2d
    153        trn2            \r5\().2d,  \r5\().2d,  \r6\().2d
    154        trn2            \r6\().2d,  \t8\().2d,  \r2\().2d
    155        trn1            \r2\().2d,  \t8\().2d,  \r2\().2d
    156        trn1            \r3\().2d,  \t9\().2d,  \r7\().2d
    157        trn2            \r7\().2d,  \t9\().2d,  \r7\().2d
    158 .endm
    159 
    160 .macro transpose_8x8h_mov r0, r1, r2, r3, r4, r5, r6, r7, t8, t9, o0, o1, o2, o3, o4, o5, o6, o7
    161        trn1            \t8\().8h,  \r0\().8h,  \r1\().8h
    162        trn2            \t9\().8h,  \r0\().8h,  \r1\().8h
    163        trn1            \r1\().8h,  \r2\().8h,  \r3\().8h
    164        trn2            \r3\().8h,  \r2\().8h,  \r3\().8h
    165        trn1            \r0\().8h,  \r4\().8h,  \r5\().8h
    166        trn2            \r5\().8h,  \r4\().8h,  \r5\().8h
    167        trn1            \r2\().8h,  \r6\().8h,  \r7\().8h
    168        trn2            \r7\().8h,  \r6\().8h,  \r7\().8h
    169 
    170        trn1            \r4\().4s,  \r0\().4s,  \r2\().4s
    171        trn2            \r2\().4s,  \r0\().4s,  \r2\().4s
    172        trn1            \r6\().4s,  \r5\().4s,  \r7\().4s
    173        trn2            \r7\().4s,  \r5\().4s,  \r7\().4s
    174        trn1            \r5\().4s,  \t9\().4s,  \r3\().4s
    175        trn2            \t9\().4s,  \t9\().4s,  \r3\().4s
    176        trn1            \r3\().4s,  \t8\().4s,  \r1\().4s
    177        trn2            \t8\().4s,  \t8\().4s,  \r1\().4s
    178 
    179        trn1            \o0\().2d,  \r3\().2d,  \r4\().2d
    180        trn2            \o4\().2d,  \r3\().2d,  \r4\().2d
    181        trn1            \o1\().2d,  \r5\().2d,  \r6\().2d
    182        trn2            \o5\().2d,  \r5\().2d,  \r6\().2d
    183        trn2            \o6\().2d,  \t8\().2d,  \r2\().2d
    184        trn1            \o2\().2d,  \t8\().2d,  \r2\().2d
    185        trn1            \o3\().2d,  \t9\().2d,  \r7\().2d
    186        trn2            \o7\().2d,  \t9\().2d,  \r7\().2d
    187 .endm
    188 
    189 .macro transpose_8x16b r0, r1, r2, r3, r4, r5, r6, r7, t8, t9
    190        trn1            \t8\().16b, \r0\().16b, \r1\().16b
    191        trn2            \t9\().16b, \r0\().16b, \r1\().16b
    192        trn1            \r1\().16b, \r2\().16b, \r3\().16b
    193        trn2            \r3\().16b, \r2\().16b, \r3\().16b
    194        trn1            \r0\().16b, \r4\().16b, \r5\().16b
    195        trn2            \r5\().16b, \r4\().16b, \r5\().16b
    196        trn1            \r2\().16b, \r6\().16b, \r7\().16b
    197        trn2            \r7\().16b, \r6\().16b, \r7\().16b
    198 
    199        trn1            \r4\().8h,  \r0\().8h,  \r2\().8h
    200        trn2            \r2\().8h,  \r0\().8h,  \r2\().8h
    201        trn1            \r6\().8h,  \r5\().8h,  \r7\().8h
    202        trn2            \r7\().8h,  \r5\().8h,  \r7\().8h
    203        trn1            \r5\().8h,  \t9\().8h,  \r3\().8h
    204        trn2            \t9\().8h,  \t9\().8h,  \r3\().8h
    205        trn1            \r3\().8h,  \t8\().8h,  \r1\().8h
    206        trn2            \t8\().8h,  \t8\().8h,  \r1\().8h
    207 
    208        trn1            \r0\().4s,  \r3\().4s,  \r4\().4s
    209        trn2            \r4\().4s,  \r3\().4s,  \r4\().4s
    210        trn1            \r1\().4s,  \r5\().4s,  \r6\().4s
    211        trn2            \r5\().4s,  \r5\().4s,  \r6\().4s
    212        trn2            \r6\().4s,  \t8\().4s,  \r2\().4s
    213        trn1            \r2\().4s,  \t8\().4s,  \r2\().4s
    214        trn1            \r3\().4s,  \t9\().4s,  \r7\().4s
    215        trn2            \r7\().4s,  \t9\().4s,  \r7\().4s
    216 .endm
    217 
    218 .macro  transpose_4x16b r0, r1, r2, r3, t4, t5, t6, t7
    219        trn1            \t4\().16b, \r0\().16b, \r1\().16b
    220        trn2            \t5\().16b, \r0\().16b, \r1\().16b
    221        trn1            \t6\().16b, \r2\().16b, \r3\().16b
    222        trn2            \t7\().16b, \r2\().16b, \r3\().16b
    223 
    224        trn1            \r0\().8h,  \t4\().8h,  \t6\().8h
    225        trn2            \r2\().8h,  \t4\().8h,  \t6\().8h
    226        trn1            \r1\().8h,  \t5\().8h,  \t7\().8h
    227        trn2            \r3\().8h,  \t5\().8h,  \t7\().8h
    228 .endm
    229 
    230 .macro  transpose_4x4h  r0, r1, r2, r3, t4, t5, t6, t7
    231        trn1            \t4\().4h,  \r0\().4h,  \r1\().4h
    232        trn2            \t5\().4h,  \r0\().4h,  \r1\().4h
    233        trn1            \t6\().4h,  \r2\().4h,  \r3\().4h
    234        trn2            \t7\().4h,  \r2\().4h,  \r3\().4h
    235 
    236        trn1            \r0\().2s,  \t4\().2s,  \t6\().2s
    237        trn2            \r2\().2s,  \t4\().2s,  \t6\().2s
    238        trn1            \r1\().2s,  \t5\().2s,  \t7\().2s
    239        trn2            \r3\().2s,  \t5\().2s,  \t7\().2s
    240 .endm
    241 
    242 .macro  transpose_4x4s  r0, r1, r2, r3, t4, t5, t6, t7
    243        trn1            \t4\().4s,  \r0\().4s,  \r1\().4s
    244        trn2            \t5\().4s,  \r0\().4s,  \r1\().4s
    245        trn1            \t6\().4s,  \r2\().4s,  \r3\().4s
    246        trn2            \t7\().4s,  \r2\().4s,  \r3\().4s
    247 
    248        trn1            \r0\().2d,  \t4\().2d,  \t6\().2d
    249        trn2            \r2\().2d,  \t4\().2d,  \t6\().2d
    250        trn1            \r1\().2d,  \t5\().2d,  \t7\().2d
    251        trn2            \r3\().2d,  \t5\().2d,  \t7\().2d
    252 .endm
    253 
    254 .macro  transpose_4x8h  r0, r1, r2, r3, t4, t5, t6, t7
    255        trn1            \t4\().8h,  \r0\().8h,  \r1\().8h
    256        trn2            \t5\().8h,  \r0\().8h,  \r1\().8h
    257        trn1            \t6\().8h,  \r2\().8h,  \r3\().8h
    258        trn2            \t7\().8h,  \r2\().8h,  \r3\().8h
    259 
    260        trn1            \r0\().4s,  \t4\().4s,  \t6\().4s
    261        trn2            \r2\().4s,  \t4\().4s,  \t6\().4s
    262        trn1            \r1\().4s,  \t5\().4s,  \t7\().4s
    263        trn2            \r3\().4s,  \t5\().4s,  \t7\().4s
    264 .endm
    265 
    266 .macro  transpose_4x8h_mov r0, r1, r2, r3, t4, t5, t6, t7, o0, o1, o2, o3
    267        trn1            \t4\().8h,  \r0\().8h,  \r1\().8h
    268        trn2            \t5\().8h,  \r0\().8h,  \r1\().8h
    269        trn1            \t6\().8h,  \r2\().8h,  \r3\().8h
    270        trn2            \t7\().8h,  \r2\().8h,  \r3\().8h
    271 
    272        trn1            \o0\().4s,  \t4\().4s,  \t6\().4s
    273        trn2            \o2\().4s,  \t4\().4s,  \t6\().4s
    274        trn1            \o1\().4s,  \t5\().4s,  \t7\().4s
    275        trn2            \o3\().4s,  \t5\().4s,  \t7\().4s
    276 .endm
    277 
    278 #endif /* DAV1D_SRC_ARM_64_UTIL_S */