tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

arcfour-amd64-gas.s (2478B)


      1 # This Source Code Form is subject to the terms of the Mozilla Public
      2 # License, v. 2.0. If a copy of the MPL was not distributed with this
      3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
      4 
      5 # ** ARCFOUR implementation optimized for AMD64.
      6 # **
      7 # ** The throughput achieved by this code is about 320 MBytes/sec, on
      8 # ** a 1.8 GHz AMD Opteron (rev C0) processor.
      9 
     10 .text
     11 .align 16
     12 .globl ARCFOUR
     13 .type ARCFOUR,@function
     14 ARCFOUR:
     15 pushq	%rbp
     16 pushq	%rbx
     17 movq	%rdi,		%rbp	# key = ARG(key)
     18 movq	%rsi,		%rbx	# rbx = ARG(len)
     19 movq	%rdx,		%rsi	# in = ARG(in)
     20 movq	%rcx,		%rdi	# out = ARG(out)
     21 movq	(%rbp),		%rcx	# x = key->x
     22 movq	8(%rbp),	%rdx	# y = key->y
     23 addq	$16,		%rbp	# d = key->data
     24 incq	%rcx			# x++
     25 andq	$255,		%rcx	# x &= 0xff
     26 leaq	-8(%rbx,%rsi),	%rbx	# rbx = in+len-8
     27 movq	%rbx,		%r9	# tmp = in+len-8
     28 movq	0(%rbp,%rcx,8),	%rax	# tx = d[x]
     29 cmpq	%rsi,		%rbx	# cmp in with in+len-8
     30 jl	.Lend			# jump if (in+len-8 < in)
     31 
     32 .Lstart:
     33 addq	$8,		%rsi		# increment in
     34 addq	$8,		%rdi		# increment out
     35 
     36 # generate the next 8 bytes of the rc4 stream into %r8
     37 movq	$8,		%r11		# byte counter
     38 1:	addb	%al,		%dl		# y += tx
     39 movl	0(%rbp,%rdx,8),	%ebx		# ty = d[y]
     40 movl	%ebx,		0(%rbp,%rcx,8)	# d[x] = ty
     41 addb	%al,		%bl		# val = ty + tx
     42 movl	%eax,		0(%rbp,%rdx,8)	# d[y] = tx
     43 incb	%cl				# x++		(NEXT ROUND)
     44 movl	0(%rbp,%rcx,8),	%eax		# tx = d[x]	(NEXT ROUND)
     45 movb	0(%rbp,%rbx,8),	%r8b		# val = d[val]
     46 decb	%r11b
     47 rorq	$8,		%r8		# (ror does not change ZF)
     48 jnz 	1b
     49 
     50 # xor 8 bytes
     51 xorq	-8(%rsi),	%r8
     52 cmpq	%r9,		%rsi		# cmp in+len-8 with in
     53 movq	%r8,		-8(%rdi)
     54 jle	.Lstart				# jump if (in <= in+len-8)
     55 
     56 .Lend:
     57 addq	$8,		%r9		# tmp = in+len
     58 
     59 # handle the last bytes, one by one
     60 1:	cmpq	%rsi,		%r9		# cmp in with in+len
     61 jle	.Lfinished			# jump if (in+len <= in)
     62 addb	%al,		%dl		# y += tx
     63 movl	0(%rbp,%rdx,8),	%ebx		# ty = d[y]
     64 movl	%ebx,		0(%rbp,%rcx,8)	# d[x] = ty
     65 addb	%al,		%bl		# val = ty + tx
     66 movl	%eax,		0(%rbp,%rdx,8)	# d[y] = tx
     67 incb	%cl				# x++		(NEXT ROUND)
     68 movl	0(%rbp,%rcx,8),	%eax		# tx = d[x]	(NEXT ROUND)
     69 movb	0(%rbp,%rbx,8),	%r8b		# val = d[val]
     70 xorb	(%rsi),		%r8b		# xor 1 byte
     71 movb	%r8b,		(%rdi)
     72 incq	%rsi				# in++
     73 incq	%rdi				# out++
     74 jmp 1b
     75 
     76 .Lfinished:
     77 decq	%rcx				# x--
     78 movb	%dl,		-8(%rbp)	# key->y = y
     79 movb	%cl,		-16(%rbp)	# key->x = x
     80 popq	%rbx
     81 popq	%rbp
     82 ret
     83 .L_ARCFOUR_end:
     84 .size ARCFOUR,.L_ARCFOUR_end-ARCFOUR
     85 
     86 # Magic indicating no need for an executable stack
     87 .section .note.GNU-stack,"",@progbits
     88 .previous