[ tor-browser ].git.dasho

mpi_x86.s (12676B)
      1 #
      2 # This Source Code Form is subject to the terms of the Mozilla Public
      3 # License, v. 2.0. If a copy of the MPL was not distributed with this
      4 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
      5 
      6 .data
      7 .align 4
      8 #
      9 # -1 means to call s_mpi_is_sse to determine if we support sse 
     10 #    instructions.
     11 #  0 means to use x86 instructions
     12 #  1 means to use sse2 instructions
     13 .type	is_sse,@object
     14 .size	is_sse,4
     15 is_sse: .long	-1 
     16 
     17 #
     18 # sigh, handle the difference between -fPIC and not PIC
     19 # default to pic, since this file seems to be exclusively
     20 # linux right now (solaris uses mpi_i86pc.s and windows uses
     21 # mpi_x86_asm.c)
     22 #
     23 .ifndef NO_PIC
     24 .macro GET   var,reg
     25    movl   \var@GOTOFF(%ebx),\reg
     26 .endm
     27 .macro PUT   reg,var
     28    movl   \reg,\var@GOTOFF(%ebx)
     29 .endm
     30 .else
     31 .macro GET   var,reg
     32    movl   \var,\reg
     33 .endm
     34 .macro PUT   reg,var
     35    movl   \reg,\var
     36 .endm
     37 .endif
     38 
     39 .text
     40 
     41 
     42 #  ebp - 36:	caller's esi
     43 #  ebp - 32:	caller's edi
     44 #  ebp - 28:	
     45 #  ebp - 24:	
     46 #  ebp - 20:	
     47 #  ebp - 16:	
     48 #  ebp - 12:	
     49 #  ebp - 8:	
     50 #  ebp - 4:	
     51 #  ebp + 0:	caller's ebp
     52 #  ebp + 4:	return address
     53 #  ebp + 8:	a	argument
     54 #  ebp + 12:	a_len	argument
     55 #  ebp + 16:	b	argument
     56 #  ebp + 20:	c	argument
     57 #  registers:
     58 # 	eax:
     59 #	ebx:	carry
     60 #	ecx:	a_len
     61 #	edx:
     62 #	esi:	a ptr
     63 #	edi:	c ptr
     64 .globl	s_mpv_mul_d
     65 .type	s_mpv_mul_d,@function
     66 s_mpv_mul_d:
     67    GET    is_sse,%eax
     68    cmp    $0,%eax
     69    je     s_mpv_mul_d_x86
     70    jg     s_mpv_mul_d_sse2
     71    call   s_mpi_is_sse2
     72    PUT    %eax,is_sse
     73    cmp    $0,%eax
     74    jg     s_mpv_mul_d_sse2
     75 s_mpv_mul_d_x86:
     76    push   %ebp
     77    mov    %esp,%ebp
     78    sub    $28,%esp
     79    push   %edi
     80    push   %esi
     81    push   %ebx
     82    movl   $0,%ebx		# carry = 0
     83    mov    12(%ebp),%ecx	# ecx = a_len
     84    mov    20(%ebp),%edi
     85    cmp    $0,%ecx
     86    je     2f			# jmp if a_len == 0
     87    mov    8(%ebp),%esi		# esi = a
     88    cld
     89 1:
     90    lodsl			# eax = [ds:esi]; esi += 4
     91    mov    16(%ebp),%edx	# edx = b
     92    mull   %edx			# edx:eax = Phi:Plo = a_i * b
     93 
     94    add    %ebx,%eax		# add carry (%ebx) to edx:eax
     95    adc    $0,%edx
     96    mov    %edx,%ebx		# high half of product becomes next carry
     97 
     98    stosl			# [es:edi] = ax; edi += 4;
     99    dec    %ecx			# --a_len
    100    jnz    1b			# jmp if a_len != 0
    101 2:
    102    mov    %ebx,0(%edi)		# *c = carry
    103    pop    %ebx
    104    pop    %esi
    105    pop    %edi
    106    leave  
    107    ret    
    108    nop
    109 s_mpv_mul_d_sse2:
    110    push   %ebp
    111    mov    %esp,%ebp
    112    push   %edi
    113    push   %esi
    114    psubq  %mm2,%mm2		# carry = 0
    115    mov    12(%ebp),%ecx	# ecx = a_len
    116    movd   16(%ebp),%mm1	# mm1 = b
    117    mov    20(%ebp),%edi
    118    cmp    $0,%ecx
    119    je     6f			# jmp if a_len == 0
    120    mov    8(%ebp),%esi		# esi = a
    121    cld
    122 5:
    123    movd   0(%esi),%mm0         # mm0 = *a++
    124    add    $4,%esi
    125    pmuludq %mm1,%mm0           # mm0 = b * *a++
    126    paddq  %mm0,%mm2            # add the carry
    127    movd   %mm2,0(%edi)         # store the 32bit result
    128    add    $4,%edi
    129    psrlq  $32, %mm2		# save the carry
    130    dec    %ecx			# --a_len
    131    jnz    5b			# jmp if a_len != 0
    132 6:
    133    movd   %mm2,0(%edi)		# *c = carry
    134    emms
    135    pop    %esi
    136    pop    %edi
    137    leave  
    138    ret    
    139    nop
    140 
    141 #  ebp - 36:	caller's esi
    142 #  ebp - 32:	caller's edi
    143 #  ebp - 28:	
    144 #  ebp - 24:	
    145 #  ebp - 20:	
    146 #  ebp - 16:	
    147 #  ebp - 12:	
    148 #  ebp - 8:	
    149 #  ebp - 4:	
    150 #  ebp + 0:	caller's ebp
    151 #  ebp + 4:	return address
    152 #  ebp + 8:	a	argument
    153 #  ebp + 12:	a_len	argument
    154 #  ebp + 16:	b	argument
    155 #  ebp + 20:	c	argument
    156 #  registers:
    157 # 	eax:
    158 #	ebx:	carry
    159 #	ecx:	a_len
    160 #	edx:
    161 #	esi:	a ptr
    162 #	edi:	c ptr
    163 .globl	s_mpv_mul_d_add
    164 .type	s_mpv_mul_d_add,@function
    165 s_mpv_mul_d_add:
    166    GET    is_sse,%eax
    167    cmp    $0,%eax
    168    je     s_mpv_mul_d_add_x86
    169    jg     s_mpv_mul_d_add_sse2
    170    call   s_mpi_is_sse2
    171    PUT    %eax,is_sse
    172    cmp    $0,%eax
    173    jg     s_mpv_mul_d_add_sse2
    174 s_mpv_mul_d_add_x86:
    175    push   %ebp
    176    mov    %esp,%ebp
    177    sub    $28,%esp
    178    push   %edi
    179    push   %esi
    180    push   %ebx
    181    movl   $0,%ebx		# carry = 0
    182    mov    12(%ebp),%ecx	# ecx = a_len
    183    mov    20(%ebp),%edi
    184    cmp    $0,%ecx
    185    je     11f			# jmp if a_len == 0
    186    mov    8(%ebp),%esi		# esi = a
    187    cld
    188 10:
    189    lodsl			# eax = [ds:esi]; esi += 4
    190    mov    16(%ebp),%edx	# edx = b
    191    mull   %edx			# edx:eax = Phi:Plo = a_i * b
    192 
    193    add    %ebx,%eax		# add carry (%ebx) to edx:eax
    194    adc    $0,%edx
    195    mov    0(%edi),%ebx		# add in current word from *c
    196    add    %ebx,%eax		
    197    adc    $0,%edx
    198    mov    %edx,%ebx		# high half of product becomes next carry
    199 
    200    stosl			# [es:edi] = ax; edi += 4;
    201    dec    %ecx			# --a_len
    202    jnz    10b			# jmp if a_len != 0
    203 11:
    204    mov    %ebx,0(%edi)		# *c = carry
    205    pop    %ebx
    206    pop    %esi
    207    pop    %edi
    208    leave  
    209    ret    
    210    nop
    211 s_mpv_mul_d_add_sse2:
    212    push   %ebp
    213    mov    %esp,%ebp
    214    push   %edi
    215    push   %esi
    216    psubq  %mm2,%mm2		# carry = 0
    217    mov    12(%ebp),%ecx	# ecx = a_len
    218    movd   16(%ebp),%mm1	# mm1 = b
    219    mov    20(%ebp),%edi
    220    cmp    $0,%ecx
    221    je     16f			# jmp if a_len == 0
    222    mov    8(%ebp),%esi		# esi = a
    223    cld
    224 15:
    225    movd   0(%esi),%mm0         # mm0 = *a++
    226    add    $4,%esi
    227    pmuludq %mm1,%mm0           # mm0 = b * *a++
    228    paddq  %mm0,%mm2            # add the carry
    229    movd   0(%edi),%mm0
    230    paddq  %mm0,%mm2            # add the carry
    231    movd   %mm2,0(%edi)         # store the 32bit result
    232    add    $4,%edi
    233    psrlq  $32, %mm2		# save the carry
    234    dec    %ecx			# --a_len
    235    jnz    15b			# jmp if a_len != 0
    236 16:
    237    movd   %mm2,0(%edi)		# *c = carry
    238    emms
    239    pop    %esi
    240    pop    %edi
    241    leave  
    242    ret    
    243    nop
    244 
    245 #  ebp - 8:	caller's esi
    246 #  ebp - 4:	caller's edi
    247 #  ebp + 0:	caller's ebp
    248 #  ebp + 4:	return address
    249 #  ebp + 8:	a	argument
    250 #  ebp + 12:	a_len	argument
    251 #  ebp + 16:	b	argument
    252 #  ebp + 20:	c	argument
    253 #  registers:
    254 # 	eax:
    255 #	ebx:	carry
    256 #	ecx:	a_len
    257 #	edx:
    258 #	esi:	a ptr
    259 #	edi:	c ptr
    260 .globl	s_mpv_mul_d_add_prop
    261 .type	s_mpv_mul_d_add_prop,@function
    262 s_mpv_mul_d_add_prop:
    263    GET    is_sse,%eax
    264    cmp    $0,%eax
    265    je     s_mpv_mul_d_add_prop_x86
    266    jg     s_mpv_mul_d_add_prop_sse2
    267    call   s_mpi_is_sse2
    268    PUT    %eax,is_sse
    269    cmp    $0,%eax
    270    jg     s_mpv_mul_d_add_prop_sse2
    271 s_mpv_mul_d_add_prop_x86:
    272    push   %ebp
    273    mov    %esp,%ebp
    274    sub    $28,%esp
    275    push   %edi
    276    push   %esi
    277    push   %ebx
    278    movl   $0,%ebx		# carry = 0
    279    mov    12(%ebp),%ecx	# ecx = a_len
    280    mov    20(%ebp),%edi
    281    cmp    $0,%ecx
    282    je     21f			# jmp if a_len == 0
    283    cld
    284    mov    8(%ebp),%esi		# esi = a
    285 20:
    286    lodsl			# eax = [ds:esi]; esi += 4
    287    mov    16(%ebp),%edx	# edx = b
    288    mull   %edx			# edx:eax = Phi:Plo = a_i * b
    289 
    290    add    %ebx,%eax		# add carry (%ebx) to edx:eax
    291    adc    $0,%edx
    292    mov    0(%edi),%ebx		# add in current word from *c
    293    add    %ebx,%eax		
    294    adc    $0,%edx
    295    mov    %edx,%ebx		# high half of product becomes next carry
    296 
    297    stosl			# [es:edi] = ax; edi += 4;
    298    dec    %ecx			# --a_len
    299    jnz    20b			# jmp if a_len != 0
    300 21:
    301    cmp    $0,%ebx		# is carry zero?
    302    jz     23f
    303    mov    0(%edi),%eax		# add in current word from *c
    304    add	   %ebx,%eax
    305    stosl			# [es:edi] = ax; edi += 4;
    306    jnc    23f
    307 22:
    308    mov    0(%edi),%eax		# add in current word from *c
    309    adc	   $0,%eax
    310    stosl			# [es:edi] = ax; edi += 4;
    311    jc     22b
    312 23:
    313    pop    %ebx
    314    pop    %esi
    315    pop    %edi
    316    leave  
    317    ret    
    318    nop
    319 s_mpv_mul_d_add_prop_sse2:
    320    push   %ebp
    321    mov    %esp,%ebp
    322    push   %edi
    323    push   %esi
    324    push   %ebx
    325    psubq  %mm2,%mm2		# carry = 0
    326    mov    12(%ebp),%ecx	# ecx = a_len
    327    movd   16(%ebp),%mm1	# mm1 = b
    328    mov    20(%ebp),%edi
    329    cmp    $0,%ecx
    330    je     26f			# jmp if a_len == 0
    331    mov    8(%ebp),%esi		# esi = a
    332    cld
    333 25:
    334    movd   0(%esi),%mm0         # mm0 = *a++
    335    movd   0(%edi),%mm3		# fetch the sum
    336    add    $4,%esi
    337    pmuludq %mm1,%mm0           # mm0 = b * *a++
    338    paddq  %mm0,%mm2            # add the carry
    339    paddq  %mm3,%mm2            # add *c++
    340    movd   %mm2,0(%edi)         # store the 32bit result
    341    add    $4,%edi
    342    psrlq  $32, %mm2		# save the carry
    343    dec    %ecx			# --a_len
    344    jnz    25b			# jmp if a_len != 0
    345 26:
    346    movd   %mm2,%ebx
    347    cmp    $0,%ebx		# is carry zero?
    348    jz     28f
    349    mov    0(%edi),%eax
    350    add    %ebx, %eax
    351    stosl
    352    jnc    28f
    353 27:
    354    mov    0(%edi),%eax		# add in current word from *c
    355    adc	   $0,%eax
    356    stosl			# [es:edi] = ax; edi += 4;
    357    jc     27b
    358 28:
    359    emms
    360    pop    %ebx
    361    pop    %esi
    362    pop    %edi
    363    leave  
    364    ret    
    365    nop
    366 
    367 
    368 #  ebp - 20:	caller's esi
    369 #  ebp - 16:	caller's edi
    370 #  ebp - 12:	
    371 #  ebp - 8:	carry
    372 #  ebp - 4:	a_len	local
    373 #  ebp + 0:	caller's ebp
    374 #  ebp + 4:	return address
    375 #  ebp + 8:	pa	argument
    376 #  ebp + 12:	a_len	argument
    377 #  ebp + 16:	ps	argument
    378 #  ebp + 20:	
    379 #  registers:
    380 # 	eax:
    381 #	ebx:	carry
    382 #	ecx:	a_len
    383 #	edx:
    384 #	esi:	a ptr
    385 #	edi:	c ptr
    386 
    387 .globl	s_mpv_sqr_add_prop
    388 .type	s_mpv_sqr_add_prop,@function
    389 s_mpv_sqr_add_prop:
    390     GET   is_sse,%eax
    391     cmp    $0,%eax
    392     je     s_mpv_sqr_add_prop_x86
    393     jg     s_mpv_sqr_add_prop_sse2
    394     call   s_mpi_is_sse2
    395     PUT    %eax,is_sse
    396     cmp    $0,%eax
    397     jg     s_mpv_sqr_add_prop_sse2
    398 s_mpv_sqr_add_prop_x86:
    399     push   %ebp
    400     mov    %esp,%ebp
    401     sub    $12,%esp
    402     push   %edi
    403     push   %esi
    404     push   %ebx
    405     movl   $0,%ebx		# carry = 0
    406     mov    12(%ebp),%ecx	# a_len
    407     mov    16(%ebp),%edi	# edi = ps
    408     cmp    $0,%ecx
    409     je     31f			# jump if a_len == 0
    410     cld
    411     mov    8(%ebp),%esi	# esi = pa
    412 30:
    413     lodsl			# %eax = [ds:si]; si += 4;
    414     mull   %eax
    415 
    416     add    %ebx,%eax		# add "carry"
    417     adc    $0,%edx
    418     mov    0(%edi),%ebx
    419     add    %ebx,%eax		# add low word from result
    420     mov    4(%edi),%ebx
    421     stosl			# [es:di] = %eax; di += 4;
    422     adc    %ebx,%edx		# add high word from result
    423     movl   $0,%ebx
    424     mov    %edx,%eax
    425     adc    $0,%ebx
    426     stosl			# [es:di] = %eax; di += 4;
    427     dec    %ecx		# --a_len
    428     jnz    30b			# jmp if a_len != 0
    429 31:
    430    cmp    $0,%ebx		# is carry zero?
    431    jz     34f
    432    mov    0(%edi),%eax		# add in current word from *c
    433    add	   %ebx,%eax
    434    stosl			# [es:edi] = ax; edi += 4;
    435    jnc    34f
    436 32:
    437    mov    0(%edi),%eax		# add in current word from *c
    438    adc	   $0,%eax
    439    stosl			# [es:edi] = ax; edi += 4;
    440    jc     32b
    441 34:
    442    pop    %ebx
    443    pop    %esi
    444    pop    %edi
    445    leave  
    446    ret    
    447    nop
    448 s_mpv_sqr_add_prop_sse2:
    449    push   %ebp
    450    mov    %esp,%ebp
    451    push   %edi
    452    push   %esi
    453    push   %ebx
    454    psubq  %mm2,%mm2		# carry = 0
    455    mov    12(%ebp),%ecx	# ecx = a_len
    456    mov    16(%ebp),%edi
    457    cmp    $0,%ecx
    458    je     36f			# jmp if a_len == 0
    459    mov    8(%ebp),%esi		# esi = a
    460    cld
    461 35:
    462    movd   0(%esi),%mm0        # mm0 = *a
    463    movd   0(%edi),%mm3	       # fetch the sum
    464    add	   $4,%esi
    465    pmuludq %mm0,%mm0          # mm0 = sqr(a)
    466    paddq  %mm0,%mm2           # add the carry
    467    paddq  %mm3,%mm2           # add the low word
    468    movd   4(%edi),%mm3
    469    movd   %mm2,0(%edi)        # store the 32bit result
    470    psrlq  $32, %mm2	
    471    paddq  %mm3,%mm2           # add the high word
    472    movd   %mm2,4(%edi)        # store the 32bit result
    473    psrlq  $32, %mm2	       # save the carry.
    474    add    $8,%edi
    475    dec    %ecx			# --a_len
    476    jnz    35b			# jmp if a_len != 0
    477 36:
    478    movd   %mm2,%ebx
    479    cmp    $0,%ebx		# is carry zero?
    480    jz     38f
    481    mov    0(%edi),%eax
    482    add    %ebx, %eax
    483    stosl
    484    jnc    38f
    485 37:
    486    mov    0(%edi),%eax		# add in current word from *c
    487    adc	   $0,%eax
    488    stosl			# [es:edi] = ax; edi += 4;
    489    jc     37b
    490 38:
    491    emms
    492    pop    %ebx
    493    pop    %esi
    494    pop    %edi
    495    leave  
    496    ret    
    497    nop
    498 
    499 #
    500 # Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
    501 # so its high bit is 1.   This code is from NSPR.
    502 #
    503 # mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
    504 # 		          mp_digit *qp, mp_digit *rp)
    505 
    506 #  esp +  0:   Caller's ebx
    507 #  esp +  4:	return address
    508 #  esp +  8:	Nhi	argument
    509 #  esp + 12:	Nlo	argument
    510 #  esp + 16:	divisor	argument
    511 #  esp + 20:	qp	argument
    512 #  esp + 24:   rp	argument
    513 #  registers:
    514 # 	eax:
    515 #	ebx:	carry
    516 #	ecx:	a_len
    517 #	edx:
    518 #	esi:	a ptr
    519 #	edi:	c ptr
    520 # 
    521 
    522 .globl	s_mpv_div_2dx1d
    523 .type	s_mpv_div_2dx1d,@function
    524 s_mpv_div_2dx1d:
    525       push   %ebx
    526       mov    8(%esp),%edx
    527       mov    12(%esp),%eax
    528       mov    16(%esp),%ebx
    529       div    %ebx
    530       mov    20(%esp),%ebx
    531       mov    %eax,0(%ebx)
    532       mov    24(%esp),%ebx
    533       mov    %edx,0(%ebx)
    534       xor    %eax,%eax		# return zero
    535       pop    %ebx
    536       ret    
    537       nop
    538  
    539 # Magic indicating no need for an executable stack
    540 .section .note.GNU-stack, "", @progbits
    541 .previous
	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE