SSE.cpp (8384B)
1 /* vim: set shiftwidth=4 tabstop=8 autoindent cindent expandtab: */ 2 /* This Source Code Form is subject to the terms of the Mozilla Public 3 * License, v. 2.0. If a copy of the MPL was not distributed with this 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 6 /* compile-time and runtime tests for whether to use SSE instructions */ 7 8 #include "SSE.h" 9 10 #include "mozilla/Attributes.h" 11 12 #ifdef HAVE_CPUID_H 13 // cpuid.h is available on gcc 4.3 and higher on i386 and x86_64 14 # include <cpuid.h> 15 #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64)) 16 // MSVC 2005 or newer on x86-32 or x86-64 17 # include <intrin.h> 18 #endif 19 20 namespace { 21 22 // SSE.h has parallel #ifs which declare MOZILLA_SSE_HAVE_CPUID_DETECTION. 23 // We can't declare these functions in the header file, however, because 24 // <intrin.h> conflicts with <windows.h> on MSVC 2005, and some files want to 25 // include both SSE.h and <windows.h>. 26 27 #ifdef HAVE_CPUID_H 28 29 enum CPUIDRegister { eax = 0, ebx = 1, ecx = 2, edx = 3 }; 30 31 static bool has_cpuid_bits(unsigned int level, CPUIDRegister reg, 32 unsigned int bits) { 33 unsigned int regs[4]; 34 unsigned int eax, ebx, ecx, edx; 35 unsigned max = __get_cpuid_max(level & 0x80000000u, nullptr); 36 if (level > max) return false; 37 __cpuid_count(level, 0, eax, ebx, ecx, edx); 38 regs[0] = eax; 39 regs[1] = ebx; 40 regs[2] = ecx; 41 regs[3] = edx; 42 return (regs[reg] & bits) == bits; 43 } 44 45 static bool has_cpuid_bits_ex(unsigned int level, CPUIDRegister reg, 46 unsigned int bits) { 47 unsigned int regs[4]; 48 unsigned int eax, ebx, ecx, edx; 49 unsigned max = __get_cpuid_max(level & 0x80000000u, nullptr); 50 if (level > max) return false; 51 __cpuid_count(level, 1, eax, ebx, ecx, edx); 52 regs[0] = eax; 53 regs[1] = ebx; 54 regs[2] = ecx; 55 regs[3] = edx; 56 return (regs[reg] & bits) == bits; 57 } 58 59 #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64)) 60 61 enum CPUIDRegister { eax = 0, ebx = 1, ecx = 2, edx = 3 }; 62 63 static bool has_cpuid_bits(unsigned int level, CPUIDRegister reg, 64 unsigned int bits) { 65 // Check that the level in question is supported. 66 int regs[4]; 67 __cpuid_ex(regs, level & 0x80000000u, 1); 68 if (unsigned(regs[0]) < level) return false; 69 70 // "The __cpuid intrinsic clears the ECX register before calling the cpuid 71 // instruction." 72 __cpuid_ex(regs, level, 1); 73 return (unsigned(regs[reg]) & bits) == bits; 74 } 75 76 #elif (defined(__GNUC__) || defined(__SUNPRO_CC)) && \ 77 (defined(__i386) || defined(__x86_64__)) 78 79 enum CPUIDRegister { eax = 0, ebx = 1, ecx = 2, edx = 3 }; 80 81 # ifdef __i386 82 static void moz_cpuid(int CPUInfo[4], int InfoType) { 83 asm("xchg %esi, %ebx\n" 84 "xor %ecx, %ecx\n" // ecx is the sub-leaf (we only ever need 0) 85 "cpuid\n" 86 "movl %eax, (%edi)\n" 87 "movl %ebx, 4(%edi)\n" 88 "movl %ecx, 8(%edi)\n" 89 "movl %edx, 12(%edi)\n" 90 "xchg %esi, %ebx\n" 91 : 92 : "a"(InfoType), // %eax 93 "D"(CPUInfo) // %edi 94 : "%ecx", "%edx", "%esi"); 95 } 96 static void moz_cpuid_ex(int CPUInfo[4], int InfoType) { 97 asm("xchg %esi, %ebx\n" 98 "movl 1, %ecx\n" 99 "cpuid\n" 100 "movl %eax, (%edi)\n" 101 "movl %ebx, 4(%edi)\n" 102 "movl %ecx, 8(%edi)\n" 103 "movl %edx, 12(%edi)\n" 104 "xchg %esi, %ebx\n" 105 : 106 : "a"(InfoType), // %eax 107 "D"(CPUInfo) // %edi 108 : "%ecx", "%edx", "%esi"); 109 } 110 # else 111 static void moz_cpuid(int CPUInfo[4], int InfoType) { 112 asm("xchg %rsi, %rbx\n" 113 "xor %ecx, %ecx\n" // ecx is the sub-leaf (we only ever need 0) 114 "cpuid\n" 115 "movl %eax, (%rdi)\n" 116 "movl %ebx, 4(%rdi)\n" 117 "movl %ecx, 8(%rdi)\n" 118 "movl %edx, 12(%rdi)\n" 119 "xchg %rsi, %rbx\n" 120 : 121 : "a"(InfoType), // %eax 122 "D"(CPUInfo) // %rdi 123 : "%ecx", "%edx", "%rsi"); 124 } 125 static void moz_cpuid_ex(int CPUInfo[4], int InfoType) { 126 asm("xchg %rsi, %rbx\n" 127 "movl 1, %ecx\n" 128 "cpuid\n" 129 "movl %eax, (%rdi)\n" 130 "movl %ebx, 4(%rdi)\n" 131 "movl %ecx, 8(%rdi)\n" 132 "movl %edx, 12(%rdi)\n" 133 "xchg %rsi, %rbx\n" 134 : 135 : "a"(InfoType), // %eax 136 "D"(CPUInfo) // %rdi 137 : "%ecx", "%edx", "%rsi"); 138 } 139 # endif 140 141 static bool has_cpuid_bits(unsigned int level, CPUIDRegister reg, 142 unsigned int bits) { 143 // Check that the level in question is supported. 144 volatile int regs[4]; 145 moz_cpuid((int*)regs, level & 0x80000000u); 146 if (unsigned(regs[0]) < level) return false; 147 148 moz_cpuid((int*)regs, level); 149 return (unsigned(regs[reg]) & bits) == bits; 150 } 151 152 static bool has_cpuid_bits_ex(unsigned int level, CPUIDRegister reg, 153 unsigned int bits) { 154 // Check that the level in question is supported. 155 volatile int regs[4]; 156 moz_cpuid_ex((int*)regs, level & 0x80000000u); 157 if (unsigned(regs[0]) < level) return false; 158 159 moz_cpuid_ex((int*)regs, level); 160 return (unsigned(regs[reg]) & bits) == bits; 161 } 162 163 #endif // end CPUID declarations 164 165 } // namespace 166 167 namespace mozilla { 168 169 namespace sse_private { 170 171 #if defined(MOZILLA_SSE_HAVE_CPUID_DETECTION) 172 173 # if !defined(MOZILLA_PRESUME_MMX) 174 MOZ_RUNINIT bool mmx_enabled = has_cpuid_bits(1u, edx, (1u << 23)); 175 # endif 176 177 # if !defined(MOZILLA_PRESUME_SSE) 178 MOZ_RUNINIT bool sse_enabled = has_cpuid_bits(1u, edx, (1u << 25)); 179 # endif 180 181 # if !defined(MOZILLA_PRESUME_SSE2) 182 MOZ_RUNINIT bool sse2_enabled = has_cpuid_bits(1u, edx, (1u << 26)); 183 # endif 184 185 # if !defined(MOZILLA_PRESUME_SSE3) 186 MOZ_RUNINIT bool sse3_enabled = has_cpuid_bits(1u, ecx, (1u << 0)); 187 # endif 188 189 # if !defined(MOZILLA_PRESUME_SSSE3) 190 MOZ_RUNINIT bool ssse3_enabled = has_cpuid_bits(1u, ecx, (1u << 9)); 191 # endif 192 193 # if !defined(MOZILLA_PRESUME_SSE4A) 194 MOZ_RUNINIT bool sse4a_enabled = has_cpuid_bits(0x80000001u, ecx, (1u << 6)); 195 # endif 196 197 # if !defined(MOZILLA_PRESUME_SSE4_1) 198 MOZ_RUNINIT bool sse4_1_enabled = has_cpuid_bits(1u, ecx, (1u << 19)); 199 # endif 200 201 # if !defined(MOZILLA_PRESUME_SSE4_2) 202 MOZ_RUNINIT bool sse4_2_enabled = has_cpuid_bits(1u, ecx, (1u << 20)); 203 # endif 204 205 # if !defined(MOZILLA_PRESUME_FMA3) 206 MOZ_RUNINIT bool fma3_enabled = has_cpuid_bits(1u, ecx, (1u << 12)); 207 # endif 208 209 # if !defined(MOZILLA_PRESUME_AVX) || !defined(MOZILLA_PRESUME_AVX2) 210 static bool has_avx() { 211 # if defined(MOZILLA_PRESUME_AVX) 212 return true; 213 # else 214 const unsigned AVX = 1u << 28; 215 const unsigned OSXSAVE = 1u << 27; 216 const unsigned XSAVE = 1u << 26; 217 218 const unsigned XMM_STATE = 1u << 1; 219 const unsigned YMM_STATE = 1u << 2; 220 const unsigned AVX_STATE = XMM_STATE | YMM_STATE; 221 222 return has_cpuid_bits(1u, ecx, AVX | OSXSAVE | XSAVE) && 223 // ensure the OS supports XSAVE of YMM registers 224 (xgetbv(0) & AVX_STATE) == AVX_STATE; 225 # endif // MOZILLA_PRESUME_AVX 226 } 227 # endif // !MOZILLA_PRESUME_AVX || !MOZILLA_PRESUME_AVX2 228 229 # if !defined(MOZILLA_PRESUME_AVX) 230 MOZ_RUNINIT bool avx_enabled = has_avx(); 231 # endif 232 233 # if !defined(MOZILLA_PRESUME_AVX2) 234 MOZ_RUNINIT bool avx2_enabled = has_avx() && has_cpuid_bits(7u, ebx, (1u << 5)); 235 # endif 236 237 # if !defined(MOZILLA_PRESUME_AVXVNNI) 238 MOZ_RUNINIT bool avxvnni_enabled = has_cpuid_bits_ex(7u, eax, (1u << 4)); 239 # endif 240 241 # if !defined(MOZILLA_PRESUME_AES) 242 MOZ_RUNINIT bool aes_enabled = has_cpuid_bits(1u, ecx, (1u << 25)); 243 # endif 244 245 # if !defined(MOZILLA_PRESUME_SHA) 246 MOZ_RUNINIT bool sha_enabled = has_cpuid_bits_ex(7u, ebx, (1u << 29)); 247 # endif 248 249 # if !defined(MOZILLA_PRESUME_SHA512) 250 MOZ_RUNINIT bool sha512_enabled = has_cpuid_bits_ex(7u, eax, (1u << 0)); 251 # endif 252 253 // To accommodate old QEMU, put BMI behind `has_avx`. 254 // https://searchfox.org/firefox-main/rev/938e8f38c6765875e998d5c2965ad5864f5a5ee2/js/src/jit/x86-shared/Assembler-x86-shared.cpp#380-381 255 256 # if !defined(MOZILLA_PRESUME_BMI) 257 MOZ_RUNINIT bool bmi_enabled = has_avx() && has_cpuid_bits(7u, ebx, (1u << 3)); 258 # endif 259 260 # if !defined(MOZILLA_PRESUME_BMI2) 261 MOZ_RUNINIT bool bmi2_enabled = has_avx() && 262 has_cpuid_bits(7u, ebx, (1u << 3)) && 263 has_cpuid_bits(7u, ebx, (1u << 8)); 264 # endif 265 266 MOZ_RUNINIT bool has_constant_tsc = has_cpuid_bits(0x80000007u, edx, (1u << 8)); 267 268 #endif 269 270 } // namespace sse_private 271 272 #ifdef HAVE_CPUID_H 273 274 uint64_t xgetbv(uint32_t xcr) { 275 uint32_t eax, edx; 276 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(xcr)); 277 return (uint64_t)(edx) << 32 | eax; 278 } 279 280 #endif 281 282 } // namespace mozilla