tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

mpi_amd64_masm.asm (7781B)


      1 ; This Source Code Form is subject to the terms of the Mozilla Public
      2 ; License, v. 2.0. If a copy of the MPL was not distributed with this
      3 ; file, You can obtain one at http://mozilla.org/MPL/2.0/.
      4 
      5 ;
      6 ; This code is converted from mpi_amd64_gas.asm for MASM for x64.
      7 ;
      8 
      9 ; ------------------------------------------------------------------------
     10 ;
     11 ;  Implementation of s_mpv_mul_set_vec which exploits
     12 ;  the 64X64->128 bit  unsigned multiply instruction.
     13 ;
     14 ; ------------------------------------------------------------------------
     15 
     16 ; r = a * digit, r and a are vectors of length len
     17 ; returns the carry digit
     18 ; r and a are 64 bit aligned.
     19 ;
     20 ; uint64_t
     21 ; s_mpv_mul_set_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
     22 ;
     23 
     24 .CODE
     25 
     26 s_mpv_mul_set_vec64 PROC
     27 
     28        ; compatibilities for paramenter registers
     29        ;
     30        ; About GAS and MASM, the usage of parameter registers are different.
     31 
     32        push rdi
     33        push rsi
     34 
     35        mov rdi, rcx
     36        mov rsi, rdx
     37        mov edx, r8d
     38        mov rcx, r9
     39 
     40        xor rax, rax
     41        test rdx, rdx
     42        jz L17
     43        mov r8, rdx
     44        xor r9, r9
     45 
     46 L15:
     47        cmp r8, 8
     48        jb  L16
     49        mov rax, [rsi]
     50        mov r11, [8+rsi]
     51        mul rcx
     52        add rax, r9
     53        adc rdx, 0
     54        mov [0+rdi], rax
     55        mov r9, rdx
     56        mov rax,r11
     57        mov r11, [16+rsi]
     58        mul rcx
     59        add rax,r9
     60        adc rdx,0
     61        mov [8+rdi],rax
     62        mov r9,rdx
     63        mov rax,r11
     64        mov r11, [24+rsi]
     65        mul rcx
     66        add rax,r9
     67        adc rdx,0
     68        mov [16+rdi],rax
     69        mov r9,rdx
     70        mov rax,r11
     71        mov r11, [32+rsi]
     72        mul rcx
     73        add rax,r9
     74        adc rdx,0
     75        mov [24+rdi],rax
     76        mov r9,rdx
     77        mov rax,r11
     78        mov r11, [40+rsi]
     79        mul rcx
     80        add rax,r9
     81        adc rdx,0
     82        mov [32+rdi],rax
     83        mov r9,rdx
     84        mov rax,r11
     85        mov r11, [48+rsi]
     86        mul rcx
     87        add rax,r9
     88        adc rdx,0
     89        mov [40+rdi],rax
     90        mov r9,rdx
     91        mov rax,r11
     92        mov r11, [56+rsi]
     93        mul rcx
     94        add rax,r9
     95        adc rdx,0
     96        mov [48+rdi],rax
     97        mov r9,rdx
     98        mov rax,r11
     99        mul rcx
    100        add rax,r9
    101        adc rdx,0
    102        mov [56+rdi],rax
    103        mov r9,rdx
    104        add rsi, 64
    105        add rdi, 64
    106        sub r8, 8
    107        jz L17
    108        jmp L15
    109 
    110 L16:
    111        mov rax, [0+rsi]
    112        mul rcx
    113        add rax, r9
    114        adc rdx,0
    115        mov [0+rdi],rax
    116        mov r9,rdx
    117        dec r8
    118        jz L17
    119        mov rax, [8+rsi]
    120        mul rcx
    121        add rax,r9
    122        adc rdx,0
    123        mov [8+rdi], rax
    124        mov r9, rdx
    125        dec r8
    126        jz L17
    127        mov rax, [16+rsi]
    128        mul rcx
    129        add rax, r9
    130        adc rdx, 0
    131        mov [16+rdi],rax
    132        mov r9,rdx
    133        dec r8
    134        jz L17
    135        mov rax, [24+rsi]
    136        mul rcx
    137        add rax, r9
    138        adc rdx, 0
    139        mov [24+rdi], rax
    140        mov r9, rdx
    141        dec r8
    142        jz L17
    143        mov rax, [32+rsi]
    144        mul rcx
    145        add rax, r9
    146        adc rdx, 0
    147        mov [32+rdi],rax
    148        mov r9, rdx
    149        dec r8
    150        jz L17
    151        mov rax, [40+rsi]
    152        mul rcx
    153        add rax, r9
    154        adc rdx, 0
    155        mov [40+rdi], rax
    156        mov r9, rdx
    157        dec r8
    158        jz L17
    159        mov rax, [48+rsi]
    160        mul rcx
    161        add rax, r9
    162        adc rdx, 0
    163        mov [48+rdi], rax
    164        mov r9, rdx
    165        dec r8
    166        jz L17
    167 
    168 L17:
    169        mov rax, r9
    170        pop rsi
    171        pop rdi
    172        ret
    173 
    174 s_mpv_mul_set_vec64 ENDP
    175 
    176 
    177 ;------------------------------------------------------------------------
    178 ;
    179 ; Implementation of s_mpv_mul_add_vec which exploits
    180 ; the 64X64->128 bit  unsigned multiply instruction.
    181 ;
    182 ;------------------------------------------------------------------------
    183 
    184 ; r += a * digit, r and a are vectors of length len
    185 ; returns the carry digit
    186 ; r and a are 64 bit aligned.
    187 ;
    188 ; uint64_t
    189 ; s_mpv_mul_add_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
    190 ; 
    191 
    192 s_mpv_mul_add_vec64 PROC
    193 
    194        ; compatibilities for paramenter registers
    195        ;
    196        ; About GAS and MASM, the usage of parameter registers are different.
    197 
    198        push rdi
    199        push rsi
    200 
    201        mov rdi, rcx
    202        mov rsi, rdx
    203        mov edx, r8d
    204        mov rcx, r9
    205 
    206        xor rax, rax
    207        test rdx, rdx
    208        jz L27
    209        mov r8, rdx
    210        xor r9, r9
    211 
    212 L25:
    213        cmp r8, 8
    214        jb L26
    215        mov rax, [0+rsi]
    216        mov r10, [0+rdi]
    217        mov r11, [8+rsi]
    218        mul rcx
    219        add rax,r10
    220        adc rdx,0
    221        mov r10, [8+rdi]
    222        add rax,r9
    223        adc rdx,0
    224        mov [0+rdi],rax
    225        mov r9,rdx
    226        mov rax,r11
    227        mov r11, [16+rsi]
    228        mul rcx
    229        add rax,r10
    230        adc rdx,0
    231        mov r10, [16+rdi]
    232        add rax,r9
    233        adc rdx,0
    234        mov [8+rdi],rax
    235        mov r9,rdx
    236        mov rax,r11
    237        mov r11, [24+rsi]
    238        mul rcx
    239        add rax,r10
    240        adc rdx,0
    241        mov r10, [24+rdi]
    242        add rax,r9
    243        adc rdx,0
    244        mov [16+rdi],rax
    245        mov r9,rdx
    246        mov rax,r11
    247        mov r11, [32+rsi]
    248        mul rcx
    249        add rax,r10
    250        adc rdx,0
    251        mov r10, [32+rdi]
    252        add rax,r9
    253        adc rdx,0
    254        mov [24+rdi],rax
    255        mov r9,rdx
    256        mov rax,r11
    257        mov r11, [40+rsi]
    258        mul rcx
    259        add rax,r10
    260        adc rdx,0
    261        mov r10, [40+rdi]
    262        add rax,r9
    263        adc rdx,0
    264        mov [32+rdi],rax
    265        mov r9,rdx
    266        mov rax,r11
    267        mov r11, [48+rsi]
    268        mul rcx
    269        add rax,r10
    270        adc rdx,0
    271        mov r10, [48+rdi]
    272        add rax,r9
    273        adc rdx,0
    274        mov [40+rdi],rax
    275        mov r9,rdx
    276        mov rax,r11
    277        mov r11, [56+rsi]
    278        mul rcx
    279        add rax,r10
    280        adc rdx,0
    281        mov r10, [56+rdi]
    282        add rax,r9
    283        adc rdx,0
    284        mov [48+rdi],rax
    285        mov r9,rdx
    286        mov rax,r11
    287        mul rcx
    288        add rax,r10
    289        adc rdx,0
    290        add rax,r9
    291        adc rdx,0
    292        mov [56+rdi],rax
    293        mov r9,rdx
    294        add rsi,64
    295        add rdi,64
    296        sub r8, 8
    297        jz L27
    298        jmp L25
    299 
    300 L26:
    301        mov rax, [0+rsi]
    302        mov r10, [0+rdi]
    303        mul rcx
    304        add rax,r10
    305        adc rdx,0
    306        add rax,r9
    307        adc rdx,0
    308        mov [0+rdi],rax
    309        mov r9,rdx
    310        dec r8
    311        jz L27
    312        mov rax, [8+rsi]
    313        mov r10, [8+rdi]
    314        mul rcx
    315        add rax,r10
    316        adc rdx,0
    317        add rax,r9
    318        adc rdx,0
    319        mov [8+rdi],rax
    320        mov r9,rdx
    321        dec r8
    322        jz L27
    323        mov rax, [16+rsi]
    324        mov r10, [16+rdi]
    325        mul rcx
    326        add rax,r10
    327        adc rdx,0
    328        add rax,r9
    329        adc rdx,0
    330        mov [16+rdi],rax
    331        mov r9,rdx
    332        dec r8
    333        jz L27
    334        mov rax, [24+rsi]
    335        mov r10, [24+rdi]
    336        mul rcx
    337        add rax,r10
    338        adc rdx,0
    339        add rax,r9
    340        adc rdx,0
    341        mov [24+rdi],rax
    342        mov r9,rdx
    343        dec r8
    344        jz L27
    345        mov rax, [32+rsi]
    346        mov r10, [32+rdi]
    347        mul rcx
    348        add rax,r10
    349        adc rdx,0
    350        add rax,r9
    351        adc rdx,0
    352        mov [32+rdi],rax
    353        mov r9,rdx
    354        dec r8
    355        jz L27
    356        mov rax, [40+rsi]
    357        mov r10, [40+rdi]
    358        mul rcx
    359        add rax,r10
    360        adc rdx,0
    361        add rax,r9
    362        adc rdx,0
    363        mov [40+rdi],rax
    364        mov r9,rdx
    365        dec r8
    366        jz L27
    367        mov rax, [48+rsi]
    368        mov r10, [48+rdi]
    369        mul rcx
    370        add rax,r10
    371        adc rdx,0
    372        add rax, r9
    373        adc rdx, 0
    374        mov [48+rdi], rax
    375        mov r9, rdx
    376        dec r8
    377        jz L27
    378 
    379 L27:
    380        mov rax, r9
    381 
    382        pop rsi
    383        pop rdi
    384        ret
    385 
    386 s_mpv_mul_add_vec64 ENDP
    387 
    388 END