Cpu-Features-vixl.h (24744B)
1 // Copyright 2018, VIXL authors 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions are met: 6 // 7 // * Redistributions of source code must retain the above copyright notice, 8 // this list of conditions and the following disclaimer. 9 // * Redistributions in binary form must reproduce the above copyright notice, 10 // this list of conditions and the following disclaimer in the documentation 11 // and/or other materials provided with the distribution. 12 // * Neither the name of ARM Limited nor the names of its contributors may be 13 // used to endorse or promote products derived from this software without 14 // specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND 17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27 #ifndef VIXL_CPU_FEATURES_H 28 #define VIXL_CPU_FEATURES_H 29 30 #include <bitset> 31 #include <ostream> 32 33 #include "jit/arm64/vixl/Globals-vixl.h" 34 35 36 namespace vixl { 37 38 39 // VIXL aims to handle and detect all architectural features that are likely to 40 // influence code-generation decisions at EL0 (user-space). 41 // 42 // - There may be multiple VIXL feature flags for a given architectural 43 // extension. This occurs where the extension allow components to be 44 // implemented independently, or where kernel support is needed, and is likely 45 // to be fragmented. 46 // 47 // For example, Pointer Authentication (kPAuth*) has a separate feature flag 48 // for access to PACGA, and to indicate that the QARMA algorithm is 49 // implemented. 50 // 51 // - Conversely, some extensions have configuration options that do not affect 52 // EL0, so these are presented as a single VIXL feature. 53 // 54 // For example, the RAS extension (kRAS) has several variants, but the only 55 // feature relevant to VIXL is the addition of the ESB instruction so we only 56 // need a single flag. 57 // 58 // - VIXL offers separate flags for separate features even if they're 59 // architecturally linked. 60 // 61 // For example, the architecture requires kFPHalf and kNEONHalf to be equal, 62 // but they have separate hardware ID register fields so VIXL presents them as 63 // separate features. 64 // 65 // - VIXL can detect every feature for which it can generate code. 66 // 67 // - VIXL can detect some features for which it cannot generate code. 68 // 69 // The CPUFeatures::Feature enum — derived from the macro list below — is 70 // frequently extended. New features may be added to the list at any point, and 71 // no assumptions should be made about the numerical values assigned to each 72 // enum constant. The symbolic names can be considered to be stable. 73 // 74 // The debug descriptions are used only for debug output. The 'cpuinfo' strings 75 // are informative; VIXL does not use /proc/cpuinfo for feature detection. 76 77 // clang-format off 78 #define VIXL_CPU_FEATURE_LIST(V) \ 79 /* If set, the OS traps and emulates MRS accesses to relevant (EL1) ID_* */ \ 80 /* registers, so that the detailed feature registers can be read */ \ 81 /* directly. */ \ 82 \ 83 /* Constant name Debug description Linux 'cpuinfo' string. */ \ 84 V(kIDRegisterEmulation, "ID register emulation", "cpuid") \ 85 \ 86 V(kFP, "FP", "fp") \ 87 V(kNEON, "NEON", "asimd") \ 88 V(kCRC32, "CRC32", "crc32") \ 89 V(kDGH, "DGH", "dgh") \ 90 /* Speculation control features. */ \ 91 V(kCSV2, "CSV2", NULL) \ 92 V(kSCXTNUM, "SCXTNUM", NULL) \ 93 V(kCSV3, "CSV3", NULL) \ 94 V(kSB, "SB", "sb") \ 95 V(kSPECRES, "SPECRES", NULL) \ 96 V(kSSBS, "SSBS", NULL) \ 97 V(kSSBSControl, "SSBS (PSTATE control)", "ssbs") \ 98 /* Cryptographic support instructions. */ \ 99 V(kAES, "AES", "aes") \ 100 V(kSHA1, "SHA1", "sha1") \ 101 V(kSHA2, "SHA2", "sha2") \ 102 /* A form of PMULL{2} with a 128-bit (1Q) result. */ \ 103 V(kPmull1Q, "Pmull1Q", "pmull") \ 104 /* Atomic operations on memory: CAS, LDADD, STADD, SWP, etc. */ \ 105 V(kAtomics, "Atomics", "atomics") \ 106 /* Limited ordering regions: LDLAR, STLLR and their variants. */ \ 107 V(kLORegions, "LORegions", NULL) \ 108 /* Rounding doubling multiply add/subtract: SQRDMLAH and SQRDMLSH. */ \ 109 V(kRDM, "RDM", "asimdrdm") \ 110 /* Scalable Vector Extension. */ \ 111 V(kSVE, "SVE", "sve") \ 112 V(kSVEF64MM, "SVE F64MM", "svef64mm") \ 113 V(kSVEF32MM, "SVE F32MM", "svef32mm") \ 114 V(kSVEI8MM, "SVE I8MM", "svei8imm") \ 115 V(kSVEBF16, "SVE BFloat16", "svebf16") \ 116 /* SDOT and UDOT support (in NEON). */ \ 117 V(kDotProduct, "DotProduct", "asimddp") \ 118 /* Int8 matrix multiplication (in NEON). */ \ 119 V(kI8MM, "NEON I8MM", "i8mm") \ 120 /* Half-precision (FP16) support for FP and NEON, respectively. */ \ 121 V(kFPHalf, "FPHalf", "fphp") \ 122 V(kNEONHalf, "NEONHalf", "asimdhp") \ 123 /* BFloat16 support (in both FP and NEON.) */ \ 124 V(kBF16, "FP/NEON BFloat 16", "bf16") \ 125 /* The RAS extension, including the ESB instruction. */ \ 126 V(kRAS, "RAS", NULL) \ 127 /* Data cache clean to the point of persistence: DC CVAP. */ \ 128 V(kDCPoP, "DCPoP", "dcpop") \ 129 /* Data cache clean to the point of deep persistence: DC CVADP. */ \ 130 V(kDCCVADP, "DCCVADP", "dcpodp") \ 131 /* Cryptographic support instructions. */ \ 132 V(kSHA3, "SHA3", "sha3") \ 133 V(kSHA512, "SHA512", "sha512") \ 134 V(kSM3, "SM3", "sm3") \ 135 V(kSM4, "SM4", "sm4") \ 136 /* Pointer authentication for addresses. */ \ 137 V(kPAuth, "PAuth", "paca") \ 138 /* Pointer authentication for addresses uses QARMA. */ \ 139 V(kPAuthQARMA, "PAuthQARMA", NULL) \ 140 /* Generic authentication (using the PACGA instruction). */ \ 141 V(kPAuthGeneric, "PAuthGeneric", "pacg") \ 142 /* Generic authentication uses QARMA. */ \ 143 V(kPAuthGenericQARMA, "PAuthGenericQARMA", NULL) \ 144 /* JavaScript-style FP -> integer conversion instruction: FJCVTZS. */ \ 145 V(kJSCVT, "JSCVT", "jscvt") \ 146 /* Complex number support for NEON: FCMLA and FCADD. */ \ 147 V(kFcma, "Fcma", "fcma") \ 148 /* RCpc-based model (for weaker release consistency): LDAPR and variants. */ \ 149 V(kRCpc, "RCpc", "lrcpc") \ 150 V(kRCpcImm, "RCpc (imm)", "ilrcpc") \ 151 /* Flag manipulation instructions: SETF{8,16}, CFINV, RMIF. */ \ 152 V(kFlagM, "FlagM", "flagm") \ 153 /* Unaligned single-copy atomicity. */ \ 154 V(kUSCAT, "USCAT", "uscat") \ 155 /* FP16 fused multiply-add or -subtract long: FMLAL{2}, FMLSL{2}. */ \ 156 V(kFHM, "FHM", "asimdfhm") \ 157 /* Data-independent timing (for selected instructions). */ \ 158 V(kDIT, "DIT", "dit") \ 159 /* Branch target identification. */ \ 160 V(kBTI, "BTI", "bti") \ 161 /* Flag manipulation instructions: {AX,XA}FLAG */ \ 162 V(kAXFlag, "AXFlag", "flagm2") \ 163 /* Random number generation extension, */ \ 164 V(kRNG, "RNG", "rng") \ 165 /* Floating-point round to {32,64}-bit integer. */ \ 166 V(kFrintToFixedSizedInt,"Frint (bounded)", "frint") \ 167 /* Memory Tagging Extension. */ \ 168 V(kMTEInstructions, "MTE (EL0 instructions)", NULL) \ 169 V(kMTE, "MTE", NULL) \ 170 V(kMTE3, "MTE (asymmetric)", "mte3") \ 171 /* PAuth extensions. */ \ 172 V(kPAuthEnhancedPAC, "PAuth EnhancedPAC", NULL) \ 173 V(kPAuthEnhancedPAC2, "PAuth EnhancedPAC2", NULL) \ 174 V(kPAuthFPAC, "PAuth FPAC", NULL) \ 175 V(kPAuthFPACCombined, "PAuth FPACCombined", NULL) \ 176 /* Scalable Vector Extension 2. */ \ 177 V(kSVE2, "SVE2", "sve2") \ 178 V(kSVESM4, "SVE SM4", "svesm4") \ 179 V(kSVESHA3, "SVE SHA3", "svesha3") \ 180 V(kSVEBitPerm, "SVE BitPerm", "svebitperm") \ 181 V(kSVEAES, "SVE AES", "sveaes") \ 182 V(kSVEPmull128, "SVE Pmull128", "svepmull") \ 183 /* Alternate floating-point behavior */ \ 184 V(kAFP, "AFP", "afp") \ 185 /* Enhanced Counter Virtualization */ \ 186 V(kECV, "ECV", "ecv") \ 187 /* Increased precision of Reciprocal Estimate and Square Root Estimate */ \ 188 V(kRPRES, "RPRES", "rpres") \ 189 /* Memory operation instructions, for memcpy, memset */ \ 190 V(kMOPS, "Memory ops", NULL) \ 191 /* Scalable Matrix Extension (SME) */ \ 192 V(kSME, "SME", "sme") \ 193 V(kSMEi16i64, "SME (i16i64)", "smei16i64") \ 194 V(kSMEf64f64, "SME (f64f64)", "smef64f64") \ 195 V(kSMEi8i32, "SME (i8i32)", "smei8i32") \ 196 V(kSMEf16f32, "SME (f16f32)", "smef16f32") \ 197 V(kSMEb16f32, "SME (b16f32)", "smeb16f32") \ 198 V(kSMEf32f32, "SME (f32f32)", "smef32f32") \ 199 V(kSMEfa64, "SME (fa64)", "smefa64") \ 200 /* WFET and WFIT instruction support */ \ 201 V(kWFXT, "WFXT", "wfxt") \ 202 /* Extended BFloat16 instructions */ \ 203 V(kEBF16, "EBF16", "ebf16") \ 204 V(kSVE_EBF16, "EBF16 (SVE)", "sveebf16") \ 205 V(kCSSC, "CSSC", "cssc") \ 206 V(kGCS, "GCS", "gcs") 207 // clang-format on 208 209 210 class CPUFeaturesConstIterator; 211 212 // A representation of the set of features known to be supported by the target 213 // device. Each feature is represented by a simple boolean flag. 214 // 215 // - When the Assembler is asked to assemble an instruction, it asserts (in 216 // debug mode) that the necessary features are available. 217 // 218 // - TODO: The MacroAssembler relies on the Assembler's assertions, but in 219 // some cases it may be useful for macros to generate a fall-back sequence 220 // in case features are not available. 221 // 222 // - The Simulator assumes by default that all features are available, but it 223 // is possible to configure it to fail if the simulated code uses features 224 // that are not enabled. 225 // 226 // The Simulator also offers pseudo-instructions to allow features to be 227 // enabled and disabled dynamically. This is useful when you want to ensure 228 // that some features are constrained to certain areas of code. 229 // 230 // - The base Disassembler knows nothing about CPU features, but the 231 // PrintDisassembler can be configured to annotate its output with warnings 232 // about unavailable features. The Simulator uses this feature when 233 // instruction trace is enabled. 234 // 235 // - The Decoder-based components -- the Simulator and PrintDisassembler -- 236 // rely on a CPUFeaturesAuditor visitor. This visitor keeps a list of 237 // features actually encountered so that a large block of code can be 238 // examined (either directly or through simulation), and the required 239 // features analysed later. 240 // 241 // Expected usage: 242 // 243 // // By default, VIXL uses CPUFeatures::AArch64LegacyBaseline(), for 244 // // compatibility with older version of VIXL. 245 // MacroAssembler masm; 246 // 247 // // Generate code only for the current CPU. 248 // masm.SetCPUFeatures(CPUFeatures::InferFromOS()); 249 // 250 // // Turn off feature checking entirely. 251 // masm.SetCPUFeatures(CPUFeatures::All()); 252 // 253 // Feature set manipulation: 254 // 255 // CPUFeatures f; // The default constructor gives an empty set. 256 // // Individual features can be added (or removed). 257 // f.Combine(CPUFeatures::kFP, CPUFeatures::kNEON, CPUFeatures::AES); 258 // f.Remove(CPUFeatures::kNEON); 259 // 260 // // Some helpers exist for extensions that provide several features. 261 // f.Remove(CPUFeatures::All()); 262 // f.Combine(CPUFeatures::AArch64LegacyBaseline()); 263 // 264 // // Chained construction is also possible. 265 // CPUFeatures g = 266 // f.With(CPUFeatures::kPmull1Q).Without(CPUFeatures::kCRC32); 267 // 268 // // Features can be queried. Where multiple features are given, they are 269 // // combined with logical AND. 270 // if (h.Has(CPUFeatures::kNEON)) { ... } 271 // if (h.Has(CPUFeatures::kFP, CPUFeatures::kNEON)) { ... } 272 // if (h.Has(g)) { ... } 273 // // If the empty set is requested, the result is always 'true'. 274 // VIXL_ASSERT(h.Has(CPUFeatures())); 275 // 276 // // For debug and reporting purposes, features can be enumerated (or 277 // // printed directly): 278 // std::cout << CPUFeatures::kNEON; // Prints something like "NEON". 279 // std::cout << f; // Prints something like "FP, NEON, CRC32". 280 class CPUFeatures { 281 public: 282 // clang-format off 283 // Individual features. 284 // These should be treated as opaque tokens. User code should not rely on 285 // specific numeric values or ordering. 286 enum Feature { 287 // Refer to VIXL_CPU_FEATURE_LIST (above) for the list of feature names that 288 // this class supports. 289 290 kNone = -1, 291 #define VIXL_DECLARE_FEATURE(SYMBOL, NAME, CPUINFO) SYMBOL, 292 VIXL_CPU_FEATURE_LIST(VIXL_DECLARE_FEATURE) 293 #undef VIXL_DECLARE_FEATURE 294 kNumberOfFeatures 295 }; 296 // clang-format on 297 298 // By default, construct with no features enabled. 299 constexpr CPUFeatures() : features_{} {} 300 301 // Construct with some features already enabled. 302 template <typename T, typename... U> 303 CPUFeatures(T first, U... others) : features_{} { 304 Combine(first, others...); 305 } 306 307 // Construct with all features enabled. This can be used to disable feature 308 // checking: `Has(...)` returns true regardless of the argument. 309 static CPUFeatures All(); 310 311 // Construct an empty CPUFeatures. This is equivalent to the default 312 // constructor, but is provided for symmetry and convenience. 313 static CPUFeatures None() { return CPUFeatures(); } 314 315 // The presence of these features was assumed by version of VIXL before this 316 // API was added, so using this set by default ensures API compatibility. 317 static CPUFeatures AArch64LegacyBaseline() { 318 return CPUFeatures(kFP, kNEON, kCRC32); 319 } 320 321 // Construct a new CPUFeatures object using ID registers. This assumes that 322 // kIDRegisterEmulation is present. 323 static CPUFeatures InferFromIDRegisters(); 324 325 enum QueryIDRegistersOption { 326 kDontQueryIDRegisters, 327 kQueryIDRegistersIfAvailable 328 }; 329 330 // Construct a new CPUFeatures object based on what the OS reports. 331 static CPUFeatures InferFromOS( 332 QueryIDRegistersOption option = kQueryIDRegistersIfAvailable); 333 334 // Combine another CPUFeatures object into this one. Features that already 335 // exist in this set are left unchanged. 336 void Combine(const CPUFeatures& other); 337 338 // Combine a specific feature into this set. If it already exists in the set, 339 // the set is left unchanged. 340 void Combine(Feature feature); 341 342 // Combine multiple features (or feature sets) into this set. 343 template <typename T, typename... U> 344 void Combine(T first, U... others) { 345 Combine(first); 346 Combine(others...); 347 } 348 349 // Remove features in another CPUFeatures object from this one. 350 void Remove(const CPUFeatures& other); 351 352 // Remove a specific feature from this set. This has no effect if the feature 353 // doesn't exist in the set. 354 void Remove(Feature feature0); 355 356 // Remove multiple features (or feature sets) from this set. 357 template <typename T, typename... U> 358 void Remove(T first, U... others) { 359 Remove(first); 360 Remove(others...); 361 } 362 363 // Chaining helpers for convenient construction by combining other CPUFeatures 364 // or individual Features. 365 template <typename... T> 366 CPUFeatures With(T... others) const { 367 CPUFeatures f(*this); 368 f.Combine(others...); 369 return f; 370 } 371 372 template <typename... T> 373 CPUFeatures Without(T... others) const { 374 CPUFeatures f(*this); 375 f.Remove(others...); 376 return f; 377 } 378 379 // Test whether the `other` feature set is equal to or a subset of this one. 380 bool Has(const CPUFeatures& other) const; 381 382 // Test whether a single feature exists in this set. 383 // Note that `Has(kNone)` always returns true. 384 bool Has(Feature feature) const; 385 386 // Test whether all of the specified features exist in this set. 387 template <typename T, typename... U> 388 bool Has(T first, U... others) const { 389 return Has(first) && Has(others...); 390 } 391 392 // Return the number of enabled features. 393 size_t Count() const; 394 bool HasNoFeatures() const { return Count() == 0; } 395 396 // Check for equivalence. 397 bool operator==(const CPUFeatures& other) const { 398 return Has(other) && other.Has(*this); 399 } 400 bool operator!=(const CPUFeatures& other) const { return !(*this == other); } 401 402 typedef CPUFeaturesConstIterator const_iterator; 403 404 const_iterator begin() const; 405 const_iterator end() const; 406 407 private: 408 // Each bit represents a feature. This set will be extended as needed. 409 std::bitset<kNumberOfFeatures> features_; 410 411 friend std::ostream& operator<<(std::ostream& os, 412 const vixl::CPUFeatures& features); 413 }; 414 415 std::ostream& operator<<(std::ostream& os, vixl::CPUFeatures::Feature feature); 416 std::ostream& operator<<(std::ostream& os, const vixl::CPUFeatures& features); 417 418 // This is not a proper C++ iterator type, but it simulates enough of 419 // ForwardIterator that simple loops can be written. 420 class CPUFeaturesConstIterator { 421 public: 422 CPUFeaturesConstIterator(const CPUFeatures* cpu_features = NULL, 423 CPUFeatures::Feature start = CPUFeatures::kNone) 424 : cpu_features_(cpu_features), feature_(start) { 425 VIXL_ASSERT(IsValid()); 426 } 427 428 bool operator==(const CPUFeaturesConstIterator& other) const; 429 bool operator!=(const CPUFeaturesConstIterator& other) const { 430 return !(*this == other); 431 } 432 CPUFeaturesConstIterator& operator++(); 433 CPUFeaturesConstIterator operator++(int); 434 435 CPUFeatures::Feature operator*() const { 436 VIXL_ASSERT(IsValid()); 437 return feature_; 438 } 439 440 // For proper support of C++'s simplest "Iterator" concept, this class would 441 // have to define member types (such as CPUFeaturesIterator::pointer) to make 442 // it appear as if it iterates over Feature objects in memory. That is, we'd 443 // need CPUFeatures::iterator to behave like std::vector<Feature>::iterator. 444 // This is at least partially possible -- the std::vector<bool> specialisation 445 // does something similar -- but it doesn't seem worthwhile for a 446 // special-purpose debug helper, so they are omitted here. 447 private: 448 const CPUFeatures* cpu_features_; 449 CPUFeatures::Feature feature_; 450 451 bool IsValid() const { 452 if (cpu_features_ == NULL) { 453 return feature_ == CPUFeatures::kNone; 454 } 455 return cpu_features_->Has(feature_); 456 } 457 }; 458 459 // A convenience scope for temporarily modifying a CPU features object. This 460 // allows features to be enabled for short sequences. 461 // 462 // Expected usage: 463 // 464 // { 465 // CPUFeaturesScope cpu(&masm, CPUFeatures::kCRC32); 466 // // This scope can now use CRC32, as well as anything else that was enabled 467 // // before the scope. 468 // 469 // ... 470 // 471 // // At the end of the scope, the original CPU features are restored. 472 // } 473 class CPUFeaturesScope { 474 public: 475 // Start a CPUFeaturesScope on any object that implements 476 // `CPUFeatures* GetCPUFeatures()`. 477 template <typename T> 478 explicit CPUFeaturesScope(T* cpu_features_wrapper) 479 : cpu_features_(cpu_features_wrapper->GetCPUFeatures()), 480 old_features_(*cpu_features_) {} 481 482 // Start a CPUFeaturesScope on any object that implements 483 // `CPUFeatures* GetCPUFeatures()`, with the specified features enabled. 484 template <typename T, typename U, typename... V> 485 CPUFeaturesScope(T* cpu_features_wrapper, U first, V... features) 486 : cpu_features_(cpu_features_wrapper->GetCPUFeatures()), 487 old_features_(*cpu_features_) { 488 cpu_features_->Combine(first, features...); 489 } 490 491 ~CPUFeaturesScope() { *cpu_features_ = old_features_; } 492 493 // For advanced usage, the CPUFeatures object can be accessed directly. 494 // The scope will restore the original state when it ends. 495 496 CPUFeatures* GetCPUFeatures() const { return cpu_features_; } 497 498 void SetCPUFeatures(const CPUFeatures& cpu_features) { 499 *cpu_features_ = cpu_features; 500 } 501 502 private: 503 CPUFeatures* const cpu_features_; 504 const CPUFeatures old_features_; 505 }; 506 507 508 } // namespace vixl 509 510 #endif // VIXL_CPU_FEATURES_H