Cpu-vixl.cpp (16255B)
1 // Copyright 2015, VIXL authors 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions are met: 6 // 7 // * Redistributions of source code must retain the above copyright notice, 8 // this list of conditions and the following disclaimer. 9 // * Redistributions in binary form must reproduce the above copyright notice, 10 // this list of conditions and the following disclaimer in the documentation 11 // and/or other materials provided with the distribution. 12 // * Neither the name of ARM Limited nor the names of its contributors may be 13 // used to endorse or promote products derived from this software without 14 // specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND 17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27 #include "jit/arm64/vixl/Cpu-vixl.h" 28 29 #include "jstypes.h" 30 31 #if defined(__aarch64__) && (defined(__ANDROID__) || defined(__linux__)) 32 #include <sys/auxv.h> 33 #define VIXL_USE_LINUX_HWCAP 1 34 #endif 35 36 #include "jit/arm64/vixl/Utils-vixl.h" 37 38 39 namespace vixl { 40 41 42 const IDRegister::Field AA64PFR0::kFP(16, Field::kSigned); 43 const IDRegister::Field AA64PFR0::kAdvSIMD(20, Field::kSigned); 44 const IDRegister::Field AA64PFR0::kRAS(28); 45 const IDRegister::Field AA64PFR0::kSVE(32); 46 const IDRegister::Field AA64PFR0::kDIT(48); 47 const IDRegister::Field AA64PFR0::kCSV2(56); 48 const IDRegister::Field AA64PFR0::kCSV3(60); 49 50 const IDRegister::Field AA64PFR1::kBT(0); 51 const IDRegister::Field AA64PFR1::kSSBS(4); 52 const IDRegister::Field AA64PFR1::kMTE(8); 53 const IDRegister::Field AA64PFR1::kSME(24); 54 55 const IDRegister::Field AA64ISAR0::kAES(4); 56 const IDRegister::Field AA64ISAR0::kSHA1(8); 57 const IDRegister::Field AA64ISAR0::kSHA2(12); 58 const IDRegister::Field AA64ISAR0::kCRC32(16); 59 const IDRegister::Field AA64ISAR0::kAtomic(20); 60 const IDRegister::Field AA64ISAR0::kRDM(28); 61 const IDRegister::Field AA64ISAR0::kSHA3(32); 62 const IDRegister::Field AA64ISAR0::kSM3(36); 63 const IDRegister::Field AA64ISAR0::kSM4(40); 64 const IDRegister::Field AA64ISAR0::kDP(44); 65 const IDRegister::Field AA64ISAR0::kFHM(48); 66 const IDRegister::Field AA64ISAR0::kTS(52); 67 const IDRegister::Field AA64ISAR0::kRNDR(60); 68 69 const IDRegister::Field AA64ISAR1::kDPB(0); 70 const IDRegister::Field AA64ISAR1::kAPA(4); 71 const IDRegister::Field AA64ISAR1::kAPI(8); 72 const IDRegister::Field AA64ISAR1::kJSCVT(12); 73 const IDRegister::Field AA64ISAR1::kFCMA(16); 74 const IDRegister::Field AA64ISAR1::kLRCPC(20); 75 const IDRegister::Field AA64ISAR1::kGPA(24); 76 const IDRegister::Field AA64ISAR1::kGPI(28); 77 const IDRegister::Field AA64ISAR1::kFRINTTS(32); 78 const IDRegister::Field AA64ISAR1::kSB(36); 79 const IDRegister::Field AA64ISAR1::kSPECRES(40); 80 const IDRegister::Field AA64ISAR1::kBF16(44); 81 const IDRegister::Field AA64ISAR1::kDGH(48); 82 const IDRegister::Field AA64ISAR1::kI8MM(52); 83 84 const IDRegister::Field AA64ISAR2::kWFXT(0); 85 const IDRegister::Field AA64ISAR2::kRPRES(4); 86 const IDRegister::Field AA64ISAR2::kMOPS(16); 87 const IDRegister::Field AA64ISAR2::kCSSC(52); 88 89 const IDRegister::Field AA64MMFR0::kECV(60); 90 91 const IDRegister::Field AA64MMFR1::kLO(16); 92 const IDRegister::Field AA64MMFR1::kAFP(44); 93 94 const IDRegister::Field AA64MMFR2::kAT(32); 95 96 const IDRegister::Field AA64ZFR0::kSVEver(0); 97 const IDRegister::Field AA64ZFR0::kAES(4); 98 const IDRegister::Field AA64ZFR0::kBitPerm(16); 99 const IDRegister::Field AA64ZFR0::kBF16(20); 100 const IDRegister::Field AA64ZFR0::kSHA3(32); 101 const IDRegister::Field AA64ZFR0::kSM4(40); 102 const IDRegister::Field AA64ZFR0::kI8MM(44); 103 const IDRegister::Field AA64ZFR0::kF32MM(52); 104 const IDRegister::Field AA64ZFR0::kF64MM(56); 105 106 const IDRegister::Field AA64SMFR0::kSMEf32f32(32, 1); 107 const IDRegister::Field AA64SMFR0::kSMEb16f32(34, 1); 108 const IDRegister::Field AA64SMFR0::kSMEf16f32(35, 1); 109 const IDRegister::Field AA64SMFR0::kSMEi8i32(36); 110 const IDRegister::Field AA64SMFR0::kSMEf64f64(48, 1); 111 const IDRegister::Field AA64SMFR0::kSMEi16i64(52); 112 const IDRegister::Field AA64SMFR0::kSMEfa64(63, 1); 113 114 CPUFeatures AA64PFR0::GetCPUFeatures() const { 115 CPUFeatures f; 116 if (Get(kFP) >= 0) f.Combine(CPUFeatures::kFP); 117 if (Get(kFP) >= 1) f.Combine(CPUFeatures::kFPHalf); 118 if (Get(kAdvSIMD) >= 0) f.Combine(CPUFeatures::kNEON); 119 if (Get(kAdvSIMD) >= 1) f.Combine(CPUFeatures::kNEONHalf); 120 if (Get(kRAS) >= 1) f.Combine(CPUFeatures::kRAS); 121 if (Get(kSVE) >= 1) f.Combine(CPUFeatures::kSVE); 122 if (Get(kDIT) >= 1) f.Combine(CPUFeatures::kDIT); 123 if (Get(kCSV2) >= 1) f.Combine(CPUFeatures::kCSV2); 124 if (Get(kCSV2) >= 2) f.Combine(CPUFeatures::kSCXTNUM); 125 if (Get(kCSV3) >= 1) f.Combine(CPUFeatures::kCSV3); 126 return f; 127 } 128 129 CPUFeatures AA64PFR1::GetCPUFeatures() const { 130 CPUFeatures f; 131 if (Get(kBT) >= 1) f.Combine(CPUFeatures::kBTI); 132 if (Get(kSSBS) >= 1) f.Combine(CPUFeatures::kSSBS); 133 if (Get(kSSBS) >= 2) f.Combine(CPUFeatures::kSSBSControl); 134 if (Get(kMTE) >= 1) f.Combine(CPUFeatures::kMTEInstructions); 135 if (Get(kMTE) >= 2) f.Combine(CPUFeatures::kMTE); 136 if (Get(kMTE) >= 3) f.Combine(CPUFeatures::kMTE3); 137 if (Get(kSME) >= 1) f.Combine(CPUFeatures::kSME); 138 return f; 139 } 140 141 CPUFeatures AA64ISAR0::GetCPUFeatures() const { 142 CPUFeatures f; 143 if (Get(kAES) >= 1) f.Combine(CPUFeatures::kAES); 144 if (Get(kAES) >= 2) f.Combine(CPUFeatures::kPmull1Q); 145 if (Get(kSHA1) >= 1) f.Combine(CPUFeatures::kSHA1); 146 if (Get(kSHA2) >= 1) f.Combine(CPUFeatures::kSHA2); 147 if (Get(kSHA2) >= 2) f.Combine(CPUFeatures::kSHA512); 148 if (Get(kCRC32) >= 1) f.Combine(CPUFeatures::kCRC32); 149 if (Get(kAtomic) >= 1) f.Combine(CPUFeatures::kAtomics); 150 if (Get(kRDM) >= 1) f.Combine(CPUFeatures::kRDM); 151 if (Get(kSHA3) >= 1) f.Combine(CPUFeatures::kSHA3); 152 if (Get(kSM3) >= 1) f.Combine(CPUFeatures::kSM3); 153 if (Get(kSM4) >= 1) f.Combine(CPUFeatures::kSM4); 154 if (Get(kDP) >= 1) f.Combine(CPUFeatures::kDotProduct); 155 if (Get(kFHM) >= 1) f.Combine(CPUFeatures::kFHM); 156 if (Get(kTS) >= 1) f.Combine(CPUFeatures::kFlagM); 157 if (Get(kTS) >= 2) f.Combine(CPUFeatures::kAXFlag); 158 if (Get(kRNDR) >= 1) f.Combine(CPUFeatures::kRNG); 159 return f; 160 } 161 162 CPUFeatures AA64ISAR1::GetCPUFeatures() const { 163 CPUFeatures f; 164 if (Get(kDPB) >= 1) f.Combine(CPUFeatures::kDCPoP); 165 if (Get(kDPB) >= 2) f.Combine(CPUFeatures::kDCCVADP); 166 if (Get(kJSCVT) >= 1) f.Combine(CPUFeatures::kJSCVT); 167 if (Get(kFCMA) >= 1) f.Combine(CPUFeatures::kFcma); 168 if (Get(kLRCPC) >= 1) f.Combine(CPUFeatures::kRCpc); 169 if (Get(kLRCPC) >= 2) f.Combine(CPUFeatures::kRCpcImm); 170 if (Get(kFRINTTS) >= 1) f.Combine(CPUFeatures::kFrintToFixedSizedInt); 171 if (Get(kSB) >= 1) f.Combine(CPUFeatures::kSB); 172 if (Get(kSPECRES) >= 1) f.Combine(CPUFeatures::kSPECRES); 173 if (Get(kBF16) >= 1) f.Combine(CPUFeatures::kBF16); 174 if (Get(kBF16) >= 2) f.Combine(CPUFeatures::kEBF16); 175 if (Get(kDGH) >= 1) f.Combine(CPUFeatures::kDGH); 176 if (Get(kI8MM) >= 1) f.Combine(CPUFeatures::kI8MM); 177 178 // Only one of these fields should be non-zero, but they have the same 179 // encodings, so merge the logic. 180 int apx = std::max(Get(kAPI), Get(kAPA)); 181 if (apx >= 1) { 182 f.Combine(CPUFeatures::kPAuth); 183 // APA (rather than API) indicates QARMA. 184 if (Get(kAPA) >= 1) f.Combine(CPUFeatures::kPAuthQARMA); 185 if (apx == 0b0010) f.Combine(CPUFeatures::kPAuthEnhancedPAC); 186 if (apx >= 0b0011) f.Combine(CPUFeatures::kPAuthEnhancedPAC2); 187 if (apx >= 0b0100) f.Combine(CPUFeatures::kPAuthFPAC); 188 if (apx >= 0b0101) f.Combine(CPUFeatures::kPAuthFPACCombined); 189 } 190 191 if (Get(kGPI) >= 1) f.Combine(CPUFeatures::kPAuthGeneric); 192 if (Get(kGPA) >= 1) { 193 f.Combine(CPUFeatures::kPAuthGeneric, CPUFeatures::kPAuthGenericQARMA); 194 } 195 return f; 196 } 197 198 CPUFeatures AA64ISAR2::GetCPUFeatures() const { 199 CPUFeatures f; 200 if (Get(kWFXT) >= 2) f.Combine(CPUFeatures::kWFXT); 201 if (Get(kRPRES) >= 1) f.Combine(CPUFeatures::kRPRES); 202 if (Get(kMOPS) >= 1) f.Combine(CPUFeatures::kMOPS); 203 if (Get(kCSSC) >= 1) f.Combine(CPUFeatures::kCSSC); 204 return f; 205 } 206 207 CPUFeatures AA64MMFR0::GetCPUFeatures() const { 208 CPUFeatures f; 209 if (Get(kECV) >= 1) f.Combine(CPUFeatures::kECV); 210 return f; 211 } 212 213 CPUFeatures AA64MMFR1::GetCPUFeatures() const { 214 CPUFeatures f; 215 if (Get(kLO) >= 1) f.Combine(CPUFeatures::kLORegions); 216 if (Get(kAFP) >= 1) f.Combine(CPUFeatures::kAFP); 217 return f; 218 } 219 220 CPUFeatures AA64MMFR2::GetCPUFeatures() const { 221 CPUFeatures f; 222 if (Get(kAT) >= 1) f.Combine(CPUFeatures::kUSCAT); 223 return f; 224 } 225 226 CPUFeatures AA64ZFR0::GetCPUFeatures() const { 227 // This register is only available with SVE, but reads-as-zero in its absence, 228 // so it's always safe to read it. 229 CPUFeatures f; 230 if (Get(kF64MM) >= 1) f.Combine(CPUFeatures::kSVEF64MM); 231 if (Get(kF32MM) >= 1) f.Combine(CPUFeatures::kSVEF32MM); 232 if (Get(kI8MM) >= 1) f.Combine(CPUFeatures::kSVEI8MM); 233 if (Get(kSM4) >= 1) f.Combine(CPUFeatures::kSVESM4); 234 if (Get(kSHA3) >= 1) f.Combine(CPUFeatures::kSVESHA3); 235 if (Get(kBF16) >= 1) f.Combine(CPUFeatures::kSVEBF16); 236 if (Get(kBF16) >= 2) f.Combine(CPUFeatures::kSVE_EBF16); 237 if (Get(kBitPerm) >= 1) f.Combine(CPUFeatures::kSVEBitPerm); 238 if (Get(kAES) >= 1) f.Combine(CPUFeatures::kSVEAES); 239 if (Get(kAES) >= 2) f.Combine(CPUFeatures::kSVEPmull128); 240 if (Get(kSVEver) >= 1) f.Combine(CPUFeatures::kSVE2); 241 return f; 242 } 243 244 CPUFeatures AA64SMFR0::GetCPUFeatures() const { 245 CPUFeatures f; 246 if (Get(kSMEf32f32) >= 1) f.Combine(CPUFeatures::kSMEf32f32); 247 if (Get(kSMEb16f32) >= 1) f.Combine(CPUFeatures::kSMEb16f32); 248 if (Get(kSMEf16f32) >= 1) f.Combine(CPUFeatures::kSMEf16f32); 249 if (Get(kSMEi8i32) >= 15) f.Combine(CPUFeatures::kSMEi8i32); 250 if (Get(kSMEf64f64) >= 1) f.Combine(CPUFeatures::kSMEf64f64); 251 if (Get(kSMEi16i64) >= 15) f.Combine(CPUFeatures::kSMEi16i64); 252 if (Get(kSMEfa64) >= 1) f.Combine(CPUFeatures::kSMEfa64); 253 return f; 254 } 255 256 int IDRegister::Get(IDRegister::Field field) const { 257 int msb = field.GetMsb(); 258 int lsb = field.GetLsb(); 259 VIXL_STATIC_ASSERT(static_cast<size_t>(Field::kMaxWidthInBits) < 260 (sizeof(int) * kBitsPerByte)); 261 switch (field.GetType()) { 262 case Field::kSigned: 263 return static_cast<int>(ExtractSignedBitfield64(msb, lsb, value_)); 264 case Field::kUnsigned: 265 return static_cast<int>(ExtractUnsignedBitfield64(msb, lsb, value_)); 266 } 267 VIXL_UNREACHABLE(); 268 return 0; 269 } 270 271 CPUFeatures CPU::InferCPUFeaturesFromIDRegisters() { 272 CPUFeatures f; 273 #define VIXL_COMBINE_ID_REG(NAME, MRS_ARG) \ 274 f.Combine(Read##NAME().GetCPUFeatures()); 275 VIXL_AARCH64_ID_REG_LIST(VIXL_COMBINE_ID_REG) 276 #undef VIXL_COMBINE_ID_REG 277 return f; 278 } 279 280 CPUFeatures CPU::InferCPUFeaturesFromOS( 281 CPUFeatures::QueryIDRegistersOption option) { 282 CPUFeatures features; 283 284 #ifdef VIXL_USE_LINUX_HWCAP 285 // Map each set bit onto a feature. Ideally, we'd use HWCAP_* macros rather 286 // than explicit bits, but explicit bits allow us to identify features that 287 // the toolchain doesn't know about. 288 static const CPUFeatures::Feature kFeatureBitsLow[] = 289 {// Bits 0-7 290 CPUFeatures::kFP, 291 CPUFeatures::kNEON, 292 CPUFeatures::kNone, // "EVTSTRM", which VIXL doesn't track. 293 CPUFeatures::kAES, 294 CPUFeatures::kPmull1Q, 295 CPUFeatures::kSHA1, 296 CPUFeatures::kSHA2, 297 CPUFeatures::kCRC32, 298 // Bits 8-15 299 CPUFeatures::kAtomics, 300 CPUFeatures::kFPHalf, 301 CPUFeatures::kNEONHalf, 302 CPUFeatures::kIDRegisterEmulation, 303 CPUFeatures::kRDM, 304 CPUFeatures::kJSCVT, 305 CPUFeatures::kFcma, 306 CPUFeatures::kRCpc, 307 // Bits 16-23 308 CPUFeatures::kDCPoP, 309 CPUFeatures::kSHA3, 310 CPUFeatures::kSM3, 311 CPUFeatures::kSM4, 312 CPUFeatures::kDotProduct, 313 CPUFeatures::kSHA512, 314 CPUFeatures::kSVE, 315 CPUFeatures::kFHM, 316 // Bits 24-31 317 CPUFeatures::kDIT, 318 CPUFeatures::kUSCAT, 319 CPUFeatures::kRCpcImm, 320 CPUFeatures::kFlagM, 321 CPUFeatures::kSSBSControl, 322 CPUFeatures::kSB, 323 CPUFeatures::kPAuth, 324 CPUFeatures::kPAuthGeneric}; 325 VIXL_STATIC_ASSERT(ArrayLength(kFeatureBitsLow) < 64); 326 327 static const CPUFeatures::Feature kFeatureBitsHigh[] = 328 {// Bits 0-7 329 CPUFeatures::kDCCVADP, 330 CPUFeatures::kSVE2, 331 CPUFeatures::kSVEAES, 332 CPUFeatures::kSVEPmull128, 333 CPUFeatures::kSVEBitPerm, 334 CPUFeatures::kSVESHA3, 335 CPUFeatures::kSVESM4, 336 CPUFeatures::kAXFlag, 337 // Bits 8-15 338 CPUFeatures::kFrintToFixedSizedInt, 339 CPUFeatures::kSVEI8MM, 340 CPUFeatures::kSVEF32MM, 341 CPUFeatures::kSVEF64MM, 342 CPUFeatures::kSVEBF16, 343 CPUFeatures::kI8MM, 344 CPUFeatures::kBF16, 345 CPUFeatures::kDGH, 346 // Bits 16-23 347 CPUFeatures::kRNG, 348 CPUFeatures::kBTI, 349 CPUFeatures::kMTE, 350 CPUFeatures::kECV, 351 CPUFeatures::kAFP, 352 CPUFeatures::kRPRES, 353 CPUFeatures::kMTE3, 354 CPUFeatures::kSME, 355 // Bits 24-31 356 CPUFeatures::kSMEi16i64, 357 CPUFeatures::kSMEf64f64, 358 CPUFeatures::kSMEi8i32, 359 CPUFeatures::kSMEf16f32, 360 CPUFeatures::kSMEb16f32, 361 CPUFeatures::kSMEf32f32, 362 CPUFeatures::kSMEfa64, 363 CPUFeatures::kWFXT, 364 // Bits 32-39 365 CPUFeatures::kEBF16, 366 CPUFeatures::kSVE_EBF16}; 367 VIXL_STATIC_ASSERT(ArrayLength(kFeatureBitsHigh) < 64); 368 369 auto combine_features = [&features](uint64_t hwcap, 370 const CPUFeatures::Feature* feature_array, 371 size_t features_size) { 372 for (size_t i = 0; i < features_size; i++) { 373 if (hwcap & (UINT64_C(1) << i)) features.Combine(feature_array[i]); 374 } 375 }; 376 377 uint64_t hwcap_low = getauxval(AT_HWCAP); 378 uint64_t hwcap_high = getauxval(AT_HWCAP2); 379 380 combine_features(hwcap_low, kFeatureBitsLow, ArrayLength(kFeatureBitsLow)); 381 combine_features(hwcap_high, kFeatureBitsHigh, ArrayLength(kFeatureBitsHigh)); 382 383 // MTE support from HWCAP2 signifies FEAT_MTE1 and FEAT_MTE2 support 384 if (features.Has(CPUFeatures::kMTE)) { 385 features.Combine(CPUFeatures::kMTEInstructions); 386 } 387 #elif defined(XP_DARWIN) 388 // Apple processors have kJSCVT, kDotProduct, and kAtomics features. 389 features.Combine(CPUFeatures::kJSCVT, CPUFeatures::kDotProduct, 390 CPUFeatures::kAtomics); 391 #endif // VIXL_USE_LINUX_HWCAP 392 393 if ((option == CPUFeatures::kQueryIDRegistersIfAvailable) && 394 (features.Has(CPUFeatures::kIDRegisterEmulation))) { 395 features.Combine(InferCPUFeaturesFromIDRegisters()); 396 } 397 return features; 398 } 399 400 401 #ifdef __aarch64__ 402 #define VIXL_READ_ID_REG(NAME, MRS_ARG) \ 403 NAME CPU::Read##NAME() { \ 404 uint64_t value = 0; \ 405 __asm__("mrs %0, " MRS_ARG : "=r"(value)); \ 406 return NAME(value); \ 407 } 408 #else // __aarch64__ 409 #define VIXL_READ_ID_REG(NAME, MRS_ARG) \ 410 NAME CPU::Read##NAME() { \ 411 VIXL_UNREACHABLE(); \ 412 return NAME(0); \ 413 } 414 #endif // __aarch64__ 415 416 VIXL_AARCH64_ID_REG_LIST(VIXL_READ_ID_REG) 417 418 #undef VIXL_READ_ID_REG 419 420 421 // Initialise to smallest possible cache size. 422 unsigned CPU::dcache_line_size_ = 1; 423 unsigned CPU::icache_line_size_ = 1; 424 425 426 // Query the SVE vector length. This requires CPUFeatures::kSVE. 427 int CPU::ReadSVEVectorLengthInBits() { 428 #ifdef __aarch64__ 429 uint64_t vl; 430 // To support compilers that don't understand `rdvl`, encode the value 431 // directly and move it manually. 432 __asm__( 433 " .word 0x04bf5100\n" // rdvl x0, #8 434 " mov %[vl], x0\n" 435 : [vl] "=r"(vl) 436 : 437 : "x0"); 438 VIXL_ASSERT(vl <= INT_MAX); 439 return static_cast<int>(vl); 440 #else 441 VIXL_UNREACHABLE(); 442 return 0; 443 #endif 444 } 445 446 447 } // namespace vixl