cpu_detect.cc (10236B)
1 // Copyright 2022 The Abseil Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // https://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "absl/crc/internal/cpu_detect.h" 16 17 #include <cstdint> 18 #include <string> 19 20 #include "absl/base/config.h" 21 #include "absl/types/optional.h" // IWYU pragma: keep 22 23 #if defined(__aarch64__) && defined(__linux__) 24 #include <asm/hwcap.h> 25 #include <sys/auxv.h> 26 #endif 27 28 #if defined(__aarch64__) && defined(__APPLE__) 29 #if defined(__has_include) && __has_include(<arm/cpu_capabilities_public.h>) 30 #include <arm/cpu_capabilities_public.h> 31 #endif 32 #include <sys/sysctl.h> 33 #include <sys/types.h> 34 #endif 35 36 #if defined(_WIN32) || defined(_WIN64) 37 #include <intrin.h> 38 #endif 39 40 #if defined(__x86_64__) || defined(_M_X64) 41 #if ABSL_HAVE_BUILTIN(__cpuid) 42 // MSVC-equivalent __cpuid intrinsic declaration for clang-like compilers 43 // for non-Windows build environments. 44 extern void __cpuid(int[4], int); 45 #elif !defined(_WIN32) && !defined(_WIN64) 46 // MSVC defines this function for us. 47 // https://learn.microsoft.com/en-us/cpp/intrinsics/cpuid-cpuidex 48 static void __cpuid(int cpu_info[4], int info_type) { 49 __asm__ volatile("cpuid \n\t" 50 : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), 51 "=d"(cpu_info[3]) 52 : "a"(info_type), "c"(0)); 53 } 54 #endif // !defined(_WIN32) && !defined(_WIN64) 55 #endif // defined(__x86_64__) || defined(_M_X64) 56 57 namespace absl { 58 ABSL_NAMESPACE_BEGIN 59 namespace crc_internal { 60 61 #if defined(__x86_64__) || defined(_M_X64) 62 63 namespace { 64 65 enum class Vendor { 66 kUnknown, 67 kIntel, 68 kAmd, 69 }; 70 71 Vendor GetVendor() { 72 // Get the vendor string (issue CPUID with eax = 0). 73 int cpu_info[4]; 74 __cpuid(cpu_info, 0); 75 76 std::string vendor; 77 vendor.append(reinterpret_cast<char*>(&cpu_info[1]), 4); 78 vendor.append(reinterpret_cast<char*>(&cpu_info[3]), 4); 79 vendor.append(reinterpret_cast<char*>(&cpu_info[2]), 4); 80 if (vendor == "GenuineIntel") { 81 return Vendor::kIntel; 82 } else if (vendor == "AuthenticAMD") { 83 return Vendor::kAmd; 84 } else { 85 return Vendor::kUnknown; 86 } 87 } 88 89 CpuType GetIntelCpuType() { 90 // To get general information and extended features we send eax = 1 and 91 // ecx = 0 to cpuid. The response is returned in eax, ebx, ecx and edx. 92 // (See Intel 64 and IA-32 Architectures Software Developer's Manual 93 // Volume 2A: Instruction Set Reference, A-M CPUID). 94 // https://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-vol-2a-manual.html 95 // https://learn.microsoft.com/en-us/cpp/intrinsics/cpuid-cpuidex 96 int cpu_info[4]; 97 __cpuid(cpu_info, 1); 98 99 // Response in eax bits as follows: 100 // 0-3 (stepping id) 101 // 4-7 (model number), 102 // 8-11 (family code), 103 // 12-13 (processor type), 104 // 16-19 (extended model) 105 // 20-27 (extended family) 106 107 int family = (cpu_info[0] >> 8) & 0x0f; 108 int model_num = (cpu_info[0] >> 4) & 0x0f; 109 int ext_family = (cpu_info[0] >> 20) & 0xff; 110 int ext_model_num = (cpu_info[0] >> 16) & 0x0f; 111 112 int brand_id = cpu_info[1] & 0xff; 113 114 // Process the extended family and model info if necessary 115 if (family == 0x0f) { 116 family += ext_family; 117 } 118 119 if (family == 0x0f || family == 0x6) { 120 model_num += (ext_model_num << 4); 121 } 122 123 switch (brand_id) { 124 case 0: // no brand ID, so parse CPU family/model 125 switch (family) { 126 case 6: // Most PentiumIII processors are in this category 127 switch (model_num) { 128 case 0x2c: // Westmere: Gulftown 129 return CpuType::kIntelWestmere; 130 case 0x2d: // Sandybridge 131 return CpuType::kIntelSandybridge; 132 case 0x3e: // Ivybridge 133 return CpuType::kIntelIvybridge; 134 case 0x3c: // Haswell (client) 135 case 0x3f: // Haswell 136 return CpuType::kIntelHaswell; 137 case 0x4f: // Broadwell 138 case 0x56: // BroadwellDE 139 return CpuType::kIntelBroadwell; 140 case 0x55: // Skylake Xeon 141 if ((cpu_info[0] & 0x0f) < 5) { // stepping < 5 is skylake 142 return CpuType::kIntelSkylakeXeon; 143 } else { // stepping >= 5 is cascadelake 144 return CpuType::kIntelCascadelakeXeon; 145 } 146 case 0x5e: // Skylake (client) 147 return CpuType::kIntelSkylake; 148 default: 149 return CpuType::kUnknown; 150 } 151 default: 152 return CpuType::kUnknown; 153 } 154 default: 155 return CpuType::kUnknown; 156 } 157 } 158 159 CpuType GetAmdCpuType() { 160 // To get general information and extended features we send eax = 1 and 161 // ecx = 0 to cpuid. The response is returned in eax, ebx, ecx and edx. 162 // (See Intel 64 and IA-32 Architectures Software Developer's Manual 163 // Volume 2A: Instruction Set Reference, A-M CPUID). 164 // https://learn.microsoft.com/en-us/cpp/intrinsics/cpuid-cpuidex 165 int cpu_info[4]; 166 __cpuid(cpu_info, 1); 167 168 // Response in eax bits as follows: 169 // 0-3 (stepping id) 170 // 4-7 (model number), 171 // 8-11 (family code), 172 // 12-13 (processor type), 173 // 16-19 (extended model) 174 // 20-27 (extended family) 175 176 int family = (cpu_info[0] >> 8) & 0x0f; 177 int model_num = (cpu_info[0] >> 4) & 0x0f; 178 int ext_family = (cpu_info[0] >> 20) & 0xff; 179 int ext_model_num = (cpu_info[0] >> 16) & 0x0f; 180 181 if (family == 0x0f) { 182 family += ext_family; 183 model_num += (ext_model_num << 4); 184 } 185 186 switch (family) { 187 case 0x17: 188 switch (model_num) { 189 case 0x0: // Stepping Ax 190 case 0x1: // Stepping Bx 191 return CpuType::kAmdNaples; 192 case 0x30: // Stepping Ax 193 case 0x31: // Stepping Bx 194 return CpuType::kAmdRome; 195 default: 196 return CpuType::kUnknown; 197 } 198 break; 199 case 0x19: 200 switch (model_num) { 201 case 0x0: // Stepping Ax 202 case 0x1: // Stepping B0 203 return CpuType::kAmdMilan; 204 case 0x10: // Stepping A0 205 case 0x11: // Stepping B0 206 return CpuType::kAmdGenoa; 207 case 0x44: // Stepping A0 208 return CpuType::kAmdRyzenV3000; 209 default: 210 return CpuType::kUnknown; 211 } 212 break; 213 default: 214 return CpuType::kUnknown; 215 } 216 } 217 218 } // namespace 219 220 CpuType GetCpuType() { 221 switch (GetVendor()) { 222 case Vendor::kIntel: 223 return GetIntelCpuType(); 224 case Vendor::kAmd: 225 return GetAmdCpuType(); 226 default: 227 return CpuType::kUnknown; 228 } 229 } 230 231 bool SupportsArmCRC32PMULL() { return false; } 232 233 #elif defined(__aarch64__) && defined(__linux__) 234 235 #ifndef HWCAP_CPUID 236 #define HWCAP_CPUID (1 << 11) 237 #endif 238 239 #define ABSL_INTERNAL_AARCH64_ID_REG_READ(id, val) \ 240 asm("mrs %0, " #id : "=r"(val)) 241 242 CpuType GetCpuType() { 243 // MIDR_EL1 is not visible to EL0, however the access will be emulated by 244 // linux if AT_HWCAP has HWCAP_CPUID set. 245 // 246 // This method will be unreliable on heterogeneous computing systems (ex: 247 // big.LITTLE) since the value of MIDR_EL1 will change based on the calling 248 // thread. 249 uint64_t hwcaps = getauxval(AT_HWCAP); 250 if (hwcaps & HWCAP_CPUID) { 251 uint64_t midr = 0; 252 ABSL_INTERNAL_AARCH64_ID_REG_READ(MIDR_EL1, midr); 253 uint32_t implementer = (midr >> 24) & 0xff; 254 uint32_t part_number = (midr >> 4) & 0xfff; 255 switch (implementer) { 256 case 0x41: 257 switch (part_number) { 258 case 0xd0c: return CpuType::kArmNeoverseN1; 259 case 0xd40: return CpuType::kArmNeoverseV1; 260 case 0xd49: return CpuType::kArmNeoverseN2; 261 case 0xd4f: return CpuType::kArmNeoverseV2; 262 default: 263 return CpuType::kUnknown; 264 } 265 break; 266 case 0xc0: 267 switch (part_number) { 268 case 0xac3: return CpuType::kAmpereSiryn; 269 default: 270 return CpuType::kUnknown; 271 } 272 break; 273 default: 274 return CpuType::kUnknown; 275 } 276 } 277 return CpuType::kUnknown; 278 } 279 280 bool SupportsArmCRC32PMULL() { 281 #if defined(HWCAP_CRC32) && defined(HWCAP_PMULL) 282 uint64_t hwcaps = getauxval(AT_HWCAP); 283 return (hwcaps & HWCAP_CRC32) && (hwcaps & HWCAP_PMULL); 284 #else 285 return false; 286 #endif 287 } 288 289 #elif defined(__aarch64__) && defined(__APPLE__) 290 291 CpuType GetCpuType() { return CpuType::kUnknown; } 292 293 template <typename T> 294 static absl::optional<T> ReadSysctlByName(const char* name) { 295 T val; 296 size_t val_size = sizeof(T); 297 int ret = sysctlbyname(name, &val, &val_size, nullptr, 0); 298 if (ret == -1) { 299 return absl::nullopt; 300 } 301 return val; 302 } 303 304 bool SupportsArmCRC32PMULL() { 305 // Newer XNU kernels support querying all capabilities in a single 306 // sysctlbyname. 307 #if defined(CAP_BIT_CRC32) && defined(CAP_BIT_FEAT_PMULL) 308 static const absl::optional<uint64_t> caps = 309 ReadSysctlByName<uint64_t>("hw.optional.arm.caps"); 310 if (caps.has_value()) { 311 constexpr uint64_t kCrc32AndPmullCaps = 312 (uint64_t{1} << CAP_BIT_CRC32) | (uint64_t{1} << CAP_BIT_FEAT_PMULL); 313 return (*caps & kCrc32AndPmullCaps) == kCrc32AndPmullCaps; 314 } 315 #endif 316 317 // https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics#3915619 318 static const absl::optional<int> armv8_crc32 = 319 ReadSysctlByName<int>("hw.optional.armv8_crc32"); 320 if (armv8_crc32.value_or(0) == 0) { 321 return false; 322 } 323 // https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics#3918855 324 static const absl::optional<int> feat_pmull = 325 ReadSysctlByName<int>("hw.optional.arm.FEAT_PMULL"); 326 if (feat_pmull.value_or(0) == 0) { 327 return false; 328 } 329 return true; 330 } 331 332 #else 333 334 CpuType GetCpuType() { return CpuType::kUnknown; } 335 336 bool SupportsArmCRC32PMULL() { return false; } 337 338 #endif 339 340 } // namespace crc_internal 341 ABSL_NAMESPACE_END 342 } // namespace absl