tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

jsimdcpu.asm (2326B)


      1 ;
      2 ; jsimdcpu.asm - SIMD instruction support check
      3 ;
      4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
      5 ; Copyright (C) 2016, D. R. Commander.
      6 ; Copyright (C) 2023, Aliaksiej Kandracienka.
      7 ;
      8 ; Based on
      9 ; x86 SIMD extension for IJG JPEG library
     10 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
     11 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
     12 ;
     13 ; This file should be assembled with NASM (Netwide Assembler) or Yasm.
     14 
     15 %include "jsimdext.inc"
     16 
     17 ; --------------------------------------------------------------------------
     18    SECTION     SEG_TEXT
     19    BITS        64
     20 ;
     21 ; Check if the CPU supports SIMD instructions
     22 ;
     23 ; GLOBAL(unsigned int)
     24 ; jpeg_simd_cpu_support(void)
     25 ;
     26 
     27    align       32
     28    GLOBAL_FUNCTION(jpeg_simd_cpu_support)
     29 
     30 EXTN(jpeg_simd_cpu_support):
     31    push        rbp
     32    mov         rbp, rsp
     33    push        rbx
     34    push        rdi
     35 
     36    xor         rdi, rdi                ; simd support flag
     37 
     38    ; Assume that all x86-64 processors support SSE & SSE2 instructions
     39    or          rdi, JSIMD_SSE2
     40    or          rdi, JSIMD_SSE
     41 
     42    ; Check whether CPUID leaf 07H is supported
     43    ; (leaf 07H is used to check for AVX2 instruction support)
     44    mov         rax, 0
     45    cpuid
     46    cmp         rax, 7
     47    jl          short .return           ; Maximum leaf < 07H
     48 
     49    ; Check for AVX2 instruction support
     50    mov         rax, 7
     51    xor         rcx, rcx
     52    cpuid
     53    mov         rax, rbx                ; rax = Extended feature flags
     54 
     55    test        rax, 1<<5               ; bit5:AVX2
     56    jz          short .return
     57 
     58    ; Check for AVX2 O/S support
     59    mov         rax, 1
     60    xor         rcx, rcx
     61    cpuid
     62    test        rcx, 1<<27
     63    jz          short .return           ; O/S does not support XSAVE
     64    test        rcx, 1<<28
     65    jz          short .return           ; CPU does not support AVX2
     66 
     67    xor         rcx, rcx
     68    xgetbv
     69    and         rax, 6
     70    cmp         rax, 6                  ; O/S does not manage XMM/YMM state
     71                                        ; using XSAVE
     72    jnz         short .return
     73 
     74    or          rdi, JSIMD_AVX2
     75 
     76 .return:
     77    mov         rax, rdi
     78 
     79    pop         rdi
     80    pop         rbx
     81    pop         rbp
     82    ret
     83 
     84 ; For some reason, the OS X linker does not honor the request to align the
     85 ; segment unless we do this.
     86    align       32