tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

arcfour-amd64-masm.asm (3882B)


      1 ; This Source Code Form is subject to the terms of the Mozilla Public
      2 ; License, v. 2.0. If a copy of the MPL was not distributed with this
      3 ; file, You can obtain one at http://mozilla.org/MPL/2.0/.
      4 
      5 ; ** ARCFOUR implementation optimized for AMD64.
      6 ; **
      7 ; ** The throughput achieved by this code is about 320 MBytes/sec, on
      8 ; ** a 1.8 GHz AMD Opteron (rev C0) processor.
      9 
     10 .CODE
     11 
     12 ; extern void ARCFOUR(RC4Context *cx, unsigned long long inputLen, 
     13 ;                     const unsigned char *input, unsigned char *output);
     14 
     15 
     16 ARCFOUR PROC
     17 
     18        push    rbp
     19        push    rbx
     20        push    rsi
     21        push    rdi
     22 
     23        mov     rbp, rcx                        ; key = ARG(key)
     24        mov     rbx, rdx                        ; rbx = ARG(len)
     25        mov     rsi, r8                         ; in = ARG(in)
     26        mov     rdi, r9                         ; out = ARG(out)
     27        mov     rcx, [rbp]                      ; x = key->x
     28        mov     rdx, [rbp+8]                    ; y = key->y
     29        add     rbp, 16                         ; d = key->data
     30        inc     rcx                             ; x++
     31        and     rcx, 0ffh                       ; x &= 0xff
     32        lea     rbx, [rbx+rsi-8]                ; rbx = in+len-8
     33        mov     r9, rbx                         ; tmp = in+len-8
     34        mov     rax, [rbp+rcx*8]                ; tx = d[x]
     35        cmp     rbx, rsi                        ; cmp in with in+len-8
     36        jl      Lend                            ; jump if (in+len-8 < in)
     37 
     38 Lstart:
     39        add     rsi, 8                          ; increment in
     40        add     rdi, 8                          ; increment out
     41 
     42        ;
     43        ; generate the next 8 bytes of the rc4 stream into r8
     44        ;
     45 
     46        mov     r11, 8                          ; byte counter
     47 
     48 @@:
     49        add     dl, al                          ; y += tx
     50        mov     ebx, [rbp+rdx*8]                ; ty = d[y]
     51        mov     [rbp+rcx*8], ebx                ; d[x] = ty
     52        add     bl, al                          ; val = ty + tx
     53        mov     [rbp+rdx*8], eax                ; d[y] = tx
     54        inc     cl                              ; x++ (NEXT ROUND)
     55        mov     eax, [rbp+rcx*8]                ; tx = d[x] (NEXT ROUND)
     56        mov     r8b, [rbp+rbx*8]                ; val = d[val]
     57        dec     r11b
     58        ror     r8, 8                           ; (ror does not change ZF)
     59        jnz     @b
     60 
     61        ;
     62        ; xor 8 bytes
     63        ;
     64 
     65        xor     r8, [rsi-8]
     66        cmp     rsi, r9                         ; cmp in+len-8 with in
     67        mov     [rdi-8], r8
     68        jle     Lstart
     69 
     70 Lend:
     71        add     r9, 8                           ; tmp = in+len
     72 
     73        ;
     74        ; handle the last bytes, one by one
     75        ;
     76 
     77 @@:
     78        cmp     r9, rsi                         ; cmp in with in+len
     79        jle     Lfinished                       ; jump if (in+len <= in)
     80        add     dl, al                          ; y += tx
     81        mov     ebx, [rbp+rdx*8]                ; ty = d[y]
     82        mov     [rbp+rcx*8], ebx                ; d[x] = ty
     83        add     bl, al                          ; val = ty + tx
     84        mov     [rbp+rdx*8], eax                ; d[y] = tx
     85        inc     cl                              ; x++ (NEXT ROUND)
     86        mov     eax, [rbp+rcx*8]                ; tx = d[x] (NEXT ROUND)
     87        mov     r8b, [rbp+rbx*8]                ; val = d[val]
     88        xor     r8b, [rsi]                      ; xor 1 byte
     89        mov     [rdi], r8b
     90        inc     rsi                             ; in++
     91        inc     rdi
     92        jmp     @b
     93 
     94 Lfinished:
     95        dec     rcx                             ; x--
     96        mov     [rbp-8], dl                     ; key->y = y
     97        mov     [rbp-16], cl                    ; key->x = x
     98 
     99        pop     rdi
    100        pop     rsi
    101        pop     rbx
    102        pop     rbp
    103        ret
    104 
    105 ARCFOUR ENDP
    106 
    107 END