jsimdcpu.asm (3758B)
1 ; 2 ; jsimdcpu.asm - SIMD instruction support check 3 ; 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB 5 ; Copyright (C) 2016, D. R. Commander. 6 ; 7 ; Based on the x86 SIMD extension for IJG JPEG library 8 ; Copyright (C) 1999-2006, MIYASAKA Masaru. 9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc 10 ; 11 ; This file should be assembled with NASM (Netwide Assembler) or Yasm. 12 13 %include "jsimdext.inc" 14 15 ; -------------------------------------------------------------------------- 16 SECTION SEG_TEXT 17 BITS 32 18 ; 19 ; Check if the CPU supports SIMD instructions 20 ; 21 ; GLOBAL(unsigned int) 22 ; jpeg_simd_cpu_support(void) 23 ; 24 25 align 32 26 GLOBAL_FUNCTION(jpeg_simd_cpu_support) 27 28 EXTN(jpeg_simd_cpu_support): 29 push ebx 30 ; push ecx ; need not be preserved 31 ; push edx ; need not be preserved 32 ; push esi ; unused 33 push edi 34 35 xor edi, edi ; simd support flag 36 37 pushfd 38 pop eax 39 mov edx, eax 40 xor eax, 1<<21 ; flip ID bit in EFLAGS 41 push eax 42 popfd 43 pushfd 44 pop eax 45 xor eax, edx 46 jz near .return ; CPUID is not supported 47 48 ; Check whether CPUID leaf 07H is supported 49 ; (leaf 07H is used to check for AVX2 instruction support) 50 xor eax, eax 51 cpuid 52 test eax, eax 53 jz near .return 54 cmp eax, 7 55 jl short .no_avx2 ; Maximum leaf < 07H 56 57 ; Check for AVX2 instruction support 58 mov eax, 7 59 xor ecx, ecx 60 cpuid 61 mov eax, ebx 62 test eax, 1<<5 ; bit5:AVX2 63 jz short .no_avx2 64 65 ; Check for AVX2 O/S support 66 mov eax, 1 67 xor ecx, ecx 68 cpuid 69 test ecx, 1<<27 70 jz short .no_avx2 ; O/S does not support XSAVE 71 test ecx, 1<<28 72 jz short .no_avx2 ; CPU does not support AVX2 73 74 xor ecx, ecx 75 xgetbv 76 and eax, 6 77 cmp eax, 6 ; O/S does not manage XMM/YMM state 78 ; using XSAVE 79 jnz short .no_avx2 80 81 or edi, JSIMD_AVX2 82 .no_avx2: 83 84 ; Check CPUID leaf 01H for MMX, SSE, and SSE2 support 85 xor eax, eax 86 inc eax 87 cpuid 88 mov eax, edx ; eax = Standard feature flags 89 90 ; Check for MMX instruction support 91 test eax, 1<<23 ; bit23:MMX 92 jz short .no_mmx 93 or edi, byte JSIMD_MMX 94 .no_mmx: 95 test eax, 1<<25 ; bit25:SSE 96 jz short .no_sse 97 or edi, byte JSIMD_SSE 98 .no_sse: 99 test eax, 1<<26 ; bit26:SSE2 100 jz short .no_sse2 101 or edi, byte JSIMD_SSE2 102 .no_sse2: 103 104 ; Check for 3DNow! instruction support 105 mov eax, 0x80000000 106 cpuid 107 cmp eax, 0x80000000 108 jbe short .return 109 110 mov eax, 0x80000001 111 cpuid 112 mov eax, edx ; eax = Extended feature flags 113 114 test eax, 1<<31 ; bit31:3DNow!(vendor independent) 115 jz short .no_3dnow 116 or edi, byte JSIMD_3DNOW 117 .no_3dnow: 118 119 .return: 120 mov eax, edi 121 122 pop edi 123 ; pop esi ; unused 124 ; pop edx ; need not be preserved 125 ; pop ecx ; need not be preserved 126 pop ebx 127 ret 128 129 ; For some reason, the OS X linker does not honor the request to align the 130 ; segment unless we do this. 131 align 32