tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

jsimdcpu.asm (3758B)


      1 ;
      2 ; jsimdcpu.asm - SIMD instruction support check
      3 ;
      4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
      5 ; Copyright (C) 2016, D. R. Commander.
      6 ;
      7 ; Based on the x86 SIMD extension for IJG JPEG library
      8 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
      9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
     10 ;
     11 ; This file should be assembled with NASM (Netwide Assembler) or Yasm.
     12 
     13 %include "jsimdext.inc"
     14 
     15 ; --------------------------------------------------------------------------
     16    SECTION     SEG_TEXT
     17    BITS        32
     18 ;
     19 ; Check if the CPU supports SIMD instructions
     20 ;
     21 ; GLOBAL(unsigned int)
     22 ; jpeg_simd_cpu_support(void)
     23 ;
     24 
     25    align       32
     26    GLOBAL_FUNCTION(jpeg_simd_cpu_support)
     27 
     28 EXTN(jpeg_simd_cpu_support):
     29    push        ebx
     30 ;   push        ecx                     ; need not be preserved
     31 ;   push        edx                     ; need not be preserved
     32 ;   push        esi                     ; unused
     33    push        edi
     34 
     35    xor         edi, edi                ; simd support flag
     36 
     37    pushfd
     38    pop         eax
     39    mov         edx, eax
     40    xor         eax, 1<<21              ; flip ID bit in EFLAGS
     41    push        eax
     42    popfd
     43    pushfd
     44    pop         eax
     45    xor         eax, edx
     46    jz          near .return            ; CPUID is not supported
     47 
     48    ; Check whether CPUID leaf 07H is supported
     49    ; (leaf 07H is used to check for AVX2 instruction support)
     50    xor         eax, eax
     51    cpuid
     52    test        eax, eax
     53    jz          near .return
     54    cmp         eax, 7
     55    jl          short .no_avx2          ; Maximum leaf < 07H
     56 
     57    ; Check for AVX2 instruction support
     58    mov         eax, 7
     59    xor         ecx, ecx
     60    cpuid
     61    mov         eax, ebx
     62    test        eax, 1<<5               ; bit5:AVX2
     63    jz          short .no_avx2
     64 
     65    ; Check for AVX2 O/S support
     66    mov         eax, 1
     67    xor         ecx, ecx
     68    cpuid
     69    test        ecx, 1<<27
     70    jz          short .no_avx2          ; O/S does not support XSAVE
     71    test        ecx, 1<<28
     72    jz          short .no_avx2          ; CPU does not support AVX2
     73 
     74    xor         ecx, ecx
     75    xgetbv
     76    and         eax, 6
     77    cmp         eax, 6                  ; O/S does not manage XMM/YMM state
     78                                        ; using XSAVE
     79    jnz         short .no_avx2
     80 
     81    or          edi, JSIMD_AVX2
     82 .no_avx2:
     83 
     84    ; Check CPUID leaf 01H for MMX, SSE, and SSE2 support
     85    xor         eax, eax
     86    inc         eax
     87    cpuid
     88    mov         eax, edx                ; eax = Standard feature flags
     89 
     90    ; Check for MMX instruction support
     91    test        eax, 1<<23              ; bit23:MMX
     92    jz          short .no_mmx
     93    or          edi, byte JSIMD_MMX
     94 .no_mmx:
     95    test        eax, 1<<25              ; bit25:SSE
     96    jz          short .no_sse
     97    or          edi, byte JSIMD_SSE
     98 .no_sse:
     99    test        eax, 1<<26              ; bit26:SSE2
    100    jz          short .no_sse2
    101    or          edi, byte JSIMD_SSE2
    102 .no_sse2:
    103 
    104    ; Check for 3DNow! instruction support
    105    mov         eax, 0x80000000
    106    cpuid
    107    cmp         eax, 0x80000000
    108    jbe         short .return
    109 
    110    mov         eax, 0x80000001
    111    cpuid
    112    mov         eax, edx                ; eax = Extended feature flags
    113 
    114    test        eax, 1<<31              ; bit31:3DNow!(vendor independent)
    115    jz          short .no_3dnow
    116    or          edi, byte JSIMD_3DNOW
    117 .no_3dnow:
    118 
    119 .return:
    120    mov         eax, edi
    121 
    122    pop         edi
    123 ;   pop         esi                     ; unused
    124 ;   pop         edx                     ; need not be preserved
    125 ;   pop         ecx                     ; need not be preserved
    126    pop         ebx
    127    ret
    128 
    129 ; For some reason, the OS X linker does not honor the request to align the
    130 ; segment unless we do this.
    131    align       32