tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

jsimdext.inc (15399B)


      1 ;
      2 ; jsimdext.inc - common declarations
      3 ;
      4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
      5 ; Copyright (C) 2010, 2016, 2018-2019, 2024, D. R. Commander.
      6 ; Copyright (C) 2018, Matthieu Darbois.
      7 ; Copyright (C) 2018, Matthias Räncker.
      8 ; Copyright (C) 2023, Aliaksiej Kandracienka.
      9 ;
     10 ; Based on the x86 SIMD extension for IJG JPEG library - version 1.02
     11 ;
     12 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
     13 ;
     14 ; This software is provided 'as-is', without any express or implied
     15 ; warranty.  In no event will the authors be held liable for any damages
     16 ; arising from the use of this software.
     17 ;
     18 ; Permission is granted to anyone to use this software for any purpose,
     19 ; including commercial applications, and to alter it and redistribute it
     20 ; freely, subject to the following restrictions:
     21 ;
     22 ; 1. The origin of this software must not be misrepresented; you must not
     23 ;    claim that you wrote the original software. If you use this software
     24 ;    in a product, an acknowledgment in the product documentation would be
     25 ;    appreciated but is not required.
     26 ; 2. Altered source versions must be plainly marked as such, and must not be
     27 ;    misrepresented as being the original software.
     28 ; 3. This notice may not be removed or altered from any source distribution.
     29 
     30 ; ==========================================================================
     31 ;  System-dependent configurations
     32 
     33 %ifdef WIN32    ; ----(nasm -fwin32 -DWIN32 ...)--------
     34 ; * Microsoft Visual C++
     35 ; * MinGW (Minimalist GNU for Windows)
     36 ; * CygWin
     37 ; * LCC-Win32
     38 
     39 ; -- segment definition --
     40 ;
     41 %ifdef __YASM_VER__
     42 %define SEG_TEXT   .text  align=32
     43 %define SEG_CONST  .rdata align=32
     44 %else
     45 %define SEG_TEXT   .text  align=32 public use32 class=CODE
     46 %define SEG_CONST  .rdata align=32 public use32 class=CONST
     47 %endif
     48 
     49 %elifdef WIN64  ; ----(nasm -fwin64 -DWIN64 ...)--------
     50 ; * Microsoft Visual C++
     51 
     52 ; -- segment definition --
     53 ;
     54 %ifdef __YASM_VER__
     55 %define SEG_TEXT    .text  align=32
     56 %define SEG_CONST   .rdata align=32
     57 %else
     58 %define SEG_TEXT    .text  align=32 public use64 class=CODE
     59 %define SEG_CONST   .rdata align=32 public use64 class=CONST
     60 %endif
     61 %define EXTN(name)  name                ; foo() -> foo
     62 
     63 %elifdef OBJ32  ; ----(nasm -fobj -DOBJ32 ...)----------
     64 ; * Borland C++ (Win32)
     65 
     66 ; -- segment definition --
     67 ;
     68 %define SEG_TEXT   _text align=32 public use32 class=CODE
     69 %define SEG_CONST  _data align=32 public use32 class=DATA
     70 
     71 %elifdef ELF    ; ----(nasm -felf[64] -DELF ...)------------
     72 ; * Linux
     73 ; * *BSD family Unix using elf format
     74 ; * Unix System V, including Solaris x86, UnixWare and SCO Unix
     75 
     76 ; mark stack as non-executable
     77 section .note.GNU-stack noalloc noexec nowrite progbits
     78 
     79 %ifdef __CET__
     80 %ifdef __x86_64__
     81 section .note.gnu.property note alloc noexec align=8
     82    dd 0x00000004, 0x00000010, 0x00000005, 0x00554e47
     83    dd 0xc0000002, 0x00000004, 0x00000003, 0x00000000
     84 %endif
     85 %endif
     86 
     87 ; -- segment definition --
     88 ;
     89 %ifdef __x86_64__
     90 %define SEG_TEXT   .text   progbits align=32
     91 %define SEG_CONST  .rodata progbits align=32
     92 %else
     93 %define SEG_TEXT   .text   progbits alloc exec   nowrite align=32
     94 %define SEG_CONST  .rodata progbits alloc noexec nowrite align=32
     95 %endif
     96 
     97 ; To make the code position-independent, append -DPIC to the commandline
     98 ;
     99 %define GOT_SYMBOL  _GLOBAL_OFFSET_TABLE_  ; ELF supports PIC
    100 %define EXTN(name)  name                   ; foo() -> foo
    101 
    102 %elifdef AOUT   ; ----(nasm -faoutb/aout -DAOUT ...)----
    103 ; * Older Linux using a.out format  (nasm -f aout -DAOUT ...)
    104 ; * *BSD family Unix using a.out format  (nasm -f aoutb -DAOUT ...)
    105 
    106 ; -- segment definition --
    107 ;
    108 %define SEG_TEXT   .text
    109 %define SEG_CONST  .data
    110 
    111 ; To make the code position-independent, append -DPIC to the commandline
    112 ;
    113 %define GOT_SYMBOL  __GLOBAL_OFFSET_TABLE_  ; BSD-style a.out supports PIC
    114 
    115 %elifdef MACHO  ; ----(nasm -fmacho -DMACHO ...)--------
    116 ; * NeXTstep/OpenStep/Rhapsody/Darwin/MacOS X (Mach-O format)
    117 
    118 ; -- segment definition --
    119 ;
    120 %define SEG_TEXT   .text  ;align=32     ; nasm doesn't accept align=32. why?
    121 %define SEG_CONST  .rodata align=32
    122 
    123 ; The generation of position-independent code (PIC) is the default on Darwin.
    124 ;
    125 %define PIC
    126 %define GOT_SYMBOL  _MACHO_PIC_         ; Mach-O style code-relative addressing
    127 
    128 %else           ; ----(Other case)----------------------
    129 
    130 ; -- segment definition --
    131 ;
    132 %define SEG_TEXT   .text
    133 %define SEG_CONST  .data
    134 
    135 %endif          ; ----------------------------------------------
    136 
    137 ; ==========================================================================
    138 
    139 ; --------------------------------------------------------------------------
    140 ;  Common types
    141 ;
    142 %ifdef __x86_64__
    143 %ifnidn __OUTPUT_FORMAT__, elfx32
    144 %define POINTER         qword           ; general pointer type
    145 %define SIZEOF_POINTER  SIZEOF_QWORD    ; sizeof(POINTER)
    146 %define POINTER_BIT     QWORD_BIT       ; sizeof(POINTER)*BYTE_BIT
    147 %define resp            resq
    148 %define dp              dq
    149 %define raxp            rax
    150 %define rbxp            rbx
    151 %define rcxp            rcx
    152 %define rdxp            rdx
    153 %define rsip            rsi
    154 %define rdip            rdi
    155 %define rbpp            rbp
    156 %define rspp            rsp
    157 %define r8p             r8
    158 %define r9p             r9
    159 %define r10p            r10
    160 %define r11p            r11
    161 %define r12p            r12
    162 %define r13p            r13
    163 %define r14p            r14
    164 %define r15p            r15
    165 %endif
    166 %endif
    167 %ifndef raxp
    168 %define POINTER         dword           ; general pointer type
    169 %define SIZEOF_POINTER  SIZEOF_DWORD    ; sizeof(POINTER)
    170 %define POINTER_BIT     DWORD_BIT       ; sizeof(POINTER)*BYTE_BIT
    171 %define resp            resd
    172 %define dp              dd
    173 ; x86_64 ILP32 ABI (x32)
    174 %define raxp            eax
    175 %define rbxp            ebx
    176 %define rcxp            ecx
    177 %define rdxp            edx
    178 %define rsip            esi
    179 %define rdip            edi
    180 %define rbpp            ebp
    181 %define rspp            esp
    182 %define r8p             r8d
    183 %define r9p             r9d
    184 %define r10p            r10d
    185 %define r11p            r11d
    186 %define r12p            r12d
    187 %define r13p            r13d
    188 %define r14p            r14d
    189 %define r15p            r15d
    190 %endif
    191 
    192 %define INT             dword           ; signed integer type
    193 %define SIZEOF_INT      SIZEOF_DWORD    ; sizeof(INT)
    194 %define INT_BIT         DWORD_BIT       ; sizeof(INT)*BYTE_BIT
    195 
    196 %define FP32            dword           ; IEEE754 single
    197 %define SIZEOF_FP32     SIZEOF_DWORD    ; sizeof(FP32)
    198 %define FP32_BIT        DWORD_BIT       ; sizeof(FP32)*BYTE_BIT
    199 
    200 %define MMWORD          qword           ; int64  (MMX register)
    201 %define SIZEOF_MMWORD   SIZEOF_QWORD    ; sizeof(MMWORD)
    202 %define MMWORD_BIT      QWORD_BIT       ; sizeof(MMWORD)*BYTE_BIT
    203 
    204 ; NASM is buggy and doesn't properly handle operand sizes for SSE
    205 ; instructions, so for now we have to define XMMWORD as blank.
    206 %define XMMWORD                         ; int128 (SSE register)
    207 %define SIZEOF_XMMWORD  SIZEOF_OWORD    ; sizeof(XMMWORD)
    208 %define XMMWORD_BIT     OWORD_BIT       ; sizeof(XMMWORD)*BYTE_BIT
    209 
    210 %define YMMWORD                         ; int256 (AVX register)
    211 %define SIZEOF_YMMWORD  SIZEOF_YWORD    ; sizeof(YMMWORD)
    212 %define YMMWORD_BIT     YWORD_BIT       ; sizeof(YMMWORD)*BYTE_BIT
    213 
    214 ; Similar hacks for when we load a dword or MMWORD into an xmm# register
    215 %define XMM_DWORD
    216 %define XMM_MMWORD
    217 
    218 %define SIZEOF_BYTE   1                 ; sizeof(byte)
    219 %define SIZEOF_WORD   2                 ; sizeof(word)
    220 %define SIZEOF_DWORD  4                 ; sizeof(dword)
    221 %define SIZEOF_QWORD  8                 ; sizeof(qword)
    222 %define SIZEOF_OWORD  16                ; sizeof(oword)
    223 %define SIZEOF_YWORD  32                ; sizeof(yword)
    224 
    225 %define BYTE_BIT      8                 ; CHAR_BIT in C
    226 %define WORD_BIT      16                ; sizeof(word)*BYTE_BIT
    227 %define DWORD_BIT     32                ; sizeof(dword)*BYTE_BIT
    228 %define QWORD_BIT     64                ; sizeof(qword)*BYTE_BIT
    229 %define OWORD_BIT     128               ; sizeof(oword)*BYTE_BIT
    230 %define YWORD_BIT     256               ; sizeof(yword)*BYTE_BIT
    231 
    232 ; --------------------------------------------------------------------------
    233 ;  External Symbol Name
    234 ;
    235 %ifndef EXTN
    236 %define EXTN(name)  _ %+ name           ; foo() -> _foo
    237 %endif
    238 
    239 ; --------------------------------------------------------------------------
    240 ;  Hidden symbols
    241 ;
    242 %ifdef ELF      ; ----(nasm -felf[64] -DELF ...)--------
    243 %define GLOBAL_FUNCTION(name)  global EXTN(name):function hidden
    244 %define GLOBAL_DATA(name)      global EXTN(name):data hidden
    245 %elifdef MACHO  ; ----(nasm -fmacho -DMACHO ...)--------
    246 %ifdef __YASM_VER__
    247 %define GLOBAL_FUNCTION(name)  global EXTN(name):private_extern
    248 %define GLOBAL_DATA(name)      global EXTN(name):private_extern
    249 %else
    250 %if __NASM_VERSION_ID__ >= 0x020E0000
    251 %define GLOBAL_FUNCTION(name)  global EXTN(name):private_extern
    252 %define GLOBAL_DATA(name)      global EXTN(name):private_extern
    253 %endif
    254 %endif
    255 %endif
    256 
    257 %ifndef GLOBAL_FUNCTION
    258 %define GLOBAL_FUNCTION(name)  global EXTN(name)
    259 %endif
    260 %ifndef GLOBAL_DATA
    261 %define GLOBAL_DATA(name)      global EXTN(name)
    262 %endif
    263 
    264 ; --------------------------------------------------------------------------
    265 ;  Macros for position-independent code (PIC) support
    266 ;
    267 %ifndef GOT_SYMBOL
    268 %undef PIC
    269 %endif
    270 
    271 %ifdef PIC  ; -------------------------------------------
    272 
    273 %ifidn GOT_SYMBOL, _MACHO_PIC_  ; --------------------
    274 
    275 ; At present, nasm doesn't seem to support PIC generation for Mach-O.
    276 ; The PIC support code below is a little tricky.
    277 
    278    SECTION     SEG_CONST
    279 const_base:
    280 
    281 %define GOTOFF(got, sym)  (got) + (sym) - const_base
    282 
    283 %imacro GET_GOT 1
    284    ; NOTE: this macro destroys ecx resister.
    285    call        %%geteip
    286    add         ecx, byte (%%ref - $)
    287    jmp         short %%adjust
    288 %%geteip:
    289    mov         ecx, POINTER [esp]
    290    ret
    291 %%adjust:
    292    push        ebp
    293    xor         ebp, ebp                ; ebp = 0
    294 %ifidni %1, ebx  ; (%1 == ebx)
    295    ; db 0x8D,0x9C + jmp near const_base =
    296    ;   lea ebx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,9C,E9,(offset32)
    297    db          0x8D, 0x9C              ; 8D,9C
    298    jmp         near const_base         ; E9,(const_base-%%ref)
    299 %%ref:
    300 %else  ; (%1 != ebx)
    301    ; db 0x8D,0x8C + jmp near const_base =
    302    ;   lea ecx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,8C,E9,(offset32)
    303    db          0x8D, 0x8C              ; 8D,8C
    304    jmp         near const_base         ; E9,(const_base-%%ref)
    305 %%ref:
    306    mov         %1, ecx
    307 %endif  ; (%1 == ebx)
    308    pop         ebp
    309 %endmacro
    310 
    311 %else     ; GOT_SYMBOL != _MACHO_PIC_ ----------------
    312 
    313 %define GOTOFF(got, sym)  (got) + (sym) wrt ..gotoff
    314 
    315 %imacro GET_GOT 1
    316    extern      GOT_SYMBOL
    317    call        %%geteip
    318    add         %1, GOT_SYMBOL + $$ - $ wrt ..gotpc
    319    jmp         short %%done
    320 %%geteip:
    321    mov         %1, POINTER [esp]
    322    ret
    323 %%done:
    324 %endmacro
    325 
    326 %endif    ; GOT_SYMBOL == _MACHO_PIC_ ----------------
    327 
    328 %imacro PUSHPIC 1.nolist
    329    push        %1
    330 %endmacro
    331 %imacro POPPIC  1.nolist
    332    pop         %1
    333 %endmacro
    334 %imacro MOVPIC  2.nolist
    335    mov         %1, %2
    336 %endmacro
    337 
    338 %else    ; !PIC -----------------------------------------
    339 
    340 %define GOTOFF(got, sym)  (sym)
    341 
    342 %imacro GET_GOT 1.nolist
    343 %endmacro
    344 %imacro PUSHPIC 1.nolist
    345 %endmacro
    346 %imacro POPPIC  1.nolist
    347 %endmacro
    348 %imacro MOVPIC  2.nolist
    349 %endmacro
    350 
    351 %endif   ;  PIC -----------------------------------------
    352 
    353 ; --------------------------------------------------------------------------
    354 ;  Align the next instruction on {2,4,8,16,..}-byte boundary.
    355 ;  ".balign n,,m" in GNU as
    356 ;
    357 %define MSKLE(x, y)  (~(((y) & 0xFFFF) - ((x) & 0xFFFF)) >> 16)
    358 %define FILLB(b, n)  (($$-(b)) & ((n)-1))
    359 
    360 %imacro ALIGNX 1-2.nolist 0xFFFF
    361 %%bs: \
    362  times MSKLE(FILLB(%%bs, %1), %2) & MSKLE(16, FILLB($, %1)) & FILLB($, %1) \
    363        db 0x90                                      ; nop
    364  times MSKLE(FILLB(%%bs, %1), %2) & FILLB($, %1) / 9 \
    365        db 0x8D, 0x9C, 0x23, 0x00, 0x00, 0x00, 0x00  ; lea ebx,[ebx+0x00000000]
    366  times MSKLE(FILLB(%%bs, %1), %2) & FILLB($, %1) / 7 \
    367        db 0x8D, 0xAC, 0x25, 0x00, 0x00, 0x00, 0x00  ; lea ebp,[ebp+0x00000000]
    368  times MSKLE(FILLB(%%bs, %1), %2) & FILLB($, %1) / 6 \
    369        db 0x8D, 0xAD, 0x00, 0x00, 0x00, 0x00        ; lea ebp,[ebp+0x00000000]
    370  times MSKLE(FILLB(%%bs, %1), %2) & FILLB($, %1) / 4 \
    371        db 0x8D, 0x6C, 0x25, 0x00                    ; lea ebp,[ebp+0x00]
    372  times MSKLE(FILLB(%%bs, %1), %2) & FILLB($, %1) / 3 \
    373        db 0x8D, 0x6D, 0x00                          ; lea ebp,[ebp+0x00]
    374  times MSKLE(FILLB(%%bs, %1), %2) & FILLB($, %1) / 2 \
    375        db 0x8B, 0xED                                ; mov ebp,ebp
    376  times MSKLE(FILLB(%%bs, %1), %2) & FILLB($, %1) / 1 \
    377        db 0x90                                      ; nop
    378 %endmacro
    379 
    380 ; Align the next data on {2,4,8,16,..}-byte boundary.
    381 ;
    382 %imacro ALIGNZ 1.nolist
    383    align       %1, db 0                ; filling zeros
    384 %endmacro
    385 
    386 %ifdef __x86_64__
    387 
    388 %ifdef WIN64
    389 
    390 %imacro COLLECT_ARGS 1
    391    sub         rsp, SIZEOF_XMMWORD
    392    movaps      XMMWORD [rsp], xmm6
    393    sub         rsp, SIZEOF_XMMWORD
    394    movaps      XMMWORD [rsp], xmm7
    395    mov         r10, rcx
    396 %if %1 > 1
    397    mov         r11, rdx
    398 %endif
    399 %if %1 > 2
    400    push        r12
    401    mov         r12, r8
    402 %endif
    403 %if %1 > 3
    404    push        r13
    405    mov         r13, r9
    406 %endif
    407 %if %1 > 4
    408    push        r14
    409    mov         r14, [rbp+48]
    410 %endif
    411 %if %1 > 5
    412    push        r15
    413    mov         r15, [rbp+56]
    414 %endif
    415    push        rsi
    416    push        rdi
    417 %endmacro
    418 
    419 %imacro UNCOLLECT_ARGS 1
    420    pop         rdi
    421    pop         rsi
    422 %if %1 > 5
    423    pop         r15
    424 %endif
    425 %if %1 > 4
    426    pop         r14
    427 %endif
    428 %if %1 > 3
    429    pop         r13
    430 %endif
    431 %if %1 > 2
    432    pop         r12
    433 %endif
    434    movaps      xmm7, XMMWORD [rsp]
    435    add         rsp, SIZEOF_XMMWORD
    436    movaps      xmm6, XMMWORD [rsp]
    437    add         rsp, SIZEOF_XMMWORD
    438 %endmacro
    439 
    440 %imacro PUSH_XMM 1
    441    sub         rsp, %1 * SIZEOF_XMMWORD
    442    movaps      XMMWORD [rsp+0*SIZEOF_XMMWORD], xmm8
    443 %if %1 > 1
    444    movaps      XMMWORD [rsp+1*SIZEOF_XMMWORD], xmm9
    445 %endif
    446 %if %1 > 2
    447    movaps      XMMWORD [rsp+2*SIZEOF_XMMWORD], xmm10
    448 %endif
    449 %if %1 > 3
    450    movaps      XMMWORD [rsp+3*SIZEOF_XMMWORD], xmm11
    451 %endif
    452 %endmacro
    453 
    454 %imacro POP_XMM 1
    455    movaps      xmm8, XMMWORD [rsp+0*SIZEOF_XMMWORD]
    456 %if %1 > 1
    457    movaps      xmm9, XMMWORD [rsp+1*SIZEOF_XMMWORD]
    458 %endif
    459 %if %1 > 2
    460    movaps      xmm10, XMMWORD [rsp+2*SIZEOF_XMMWORD]
    461 %endif
    462 %if %1 > 3
    463    movaps      xmm11, XMMWORD [rsp+3*SIZEOF_XMMWORD]
    464 %endif
    465    add         rsp, %1 * SIZEOF_XMMWORD
    466 %endmacro
    467 
    468 %else
    469 
    470 %imacro COLLECT_ARGS 1
    471    push        r10
    472    mov         r10, rdi
    473 %if %1 > 1
    474    push        r11
    475    mov         r11, rsi
    476 %endif
    477 %if %1 > 2
    478    push        r12
    479    mov         r12, rdx
    480 %endif
    481 %if %1 > 3
    482    push        r13
    483    mov         r13, rcx
    484 %endif
    485 %if %1 > 4
    486    push        r14
    487    mov         r14, r8
    488 %endif
    489 %if %1 > 5
    490    push        r15
    491    mov         r15, r9
    492 %endif
    493 %endmacro
    494 
    495 %imacro UNCOLLECT_ARGS 1
    496 %if %1 > 5
    497    pop         r15
    498 %endif
    499 %if %1 > 4
    500    pop         r14
    501 %endif
    502 %if %1 > 3
    503    pop         r13
    504 %endif
    505 %if %1 > 2
    506    pop         r12
    507 %endif
    508 %if %1 > 1
    509    pop         r11
    510 %endif
    511    pop         r10
    512 %endmacro
    513 
    514 %imacro PUSH_XMM 1
    515 %endmacro
    516 
    517 %imacro POP_XMM 1
    518 %endmacro
    519 
    520 %endif
    521 
    522 %endif
    523 
    524 %ifdef __CET__
    525 
    526 %imacro ENDBR64 0
    527    dd 0xfa1e0ff3
    528 %endmacro
    529 
    530 %else
    531 
    532 %imacro ENDBR64 0
    533 %endmacro
    534 
    535 %endif
    536 
    537 ; --------------------------------------------------------------------------
    538 ;  Defines picked up from the C headers
    539 ;
    540 %include "jsimdcfg.inc"
    541 
    542 ; --------------------------------------------------------------------------