aarch64_cpudetect.c (7603B)
1 /* 2 * Copyright (c) 2023, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include "config/aom_config.h" 13 14 #include "arm_cpudetect.h" 15 16 #include "aom_ports/arm.h" 17 18 #if defined(__APPLE__) 19 #include <sys/sysctl.h> 20 #endif 21 22 #if !CONFIG_RUNTIME_CPU_DETECT 23 24 static int arm_get_cpu_caps(void) { 25 // This function should actually be a no-op. There is no way to adjust any of 26 // these because the RTCD tables do not exist: the functions are called 27 // statically. 28 int flags = 0; 29 #if HAVE_NEON 30 flags |= HAS_NEON; 31 #endif // HAVE_NEON 32 return flags; 33 } 34 35 #elif defined(__APPLE__) // end !CONFIG_RUNTIME_CPU_DETECT 36 37 // sysctlbyname() parameter documentation for instruction set characteristics: 38 // https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics 39 static inline bool have_feature(const char *feature) { 40 int64_t feature_present = 0; 41 size_t size = sizeof(feature_present); 42 if (sysctlbyname(feature, &feature_present, &size, NULL, 0) != 0) { 43 return false; 44 } 45 return feature_present; 46 } 47 48 static int arm_get_cpu_caps(void) { 49 int flags = 0; 50 #if HAVE_NEON 51 flags |= HAS_NEON; 52 #endif // HAVE_NEON 53 #if HAVE_ARM_CRC32 54 if (have_feature("hw.optional.armv8_crc32")) flags |= HAS_ARM_CRC32; 55 #endif // HAVE_ARM_CRC32 56 #if HAVE_NEON_DOTPROD 57 if (have_feature("hw.optional.arm.FEAT_DotProd")) flags |= HAS_NEON_DOTPROD; 58 #endif // HAVE_NEON_DOTPROD 59 #if HAVE_NEON_I8MM 60 if (have_feature("hw.optional.arm.FEAT_I8MM")) flags |= HAS_NEON_I8MM; 61 #endif // HAVE_NEON_I8MM 62 return flags; 63 } 64 65 #elif defined(_WIN32) // end __APPLE__ 66 67 static int arm_get_cpu_caps(void) { 68 int flags = 0; 69 // IsProcessorFeaturePresent() parameter documentation: 70 // https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent#parameters 71 #if HAVE_NEON 72 flags |= HAS_NEON; // Neon is mandatory in Armv8.0-A. 73 #endif // HAVE_NEON 74 #if HAVE_ARM_CRC32 75 if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE)) { 76 flags |= HAS_ARM_CRC32; 77 } 78 #endif // HAVE_ARM_CRC32 79 #if HAVE_NEON_DOTPROD 80 // Support for PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE was added in Windows SDK 81 // 20348, supported by Windows 11 and Windows Server 2022. 82 #if defined(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE) 83 if (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)) { 84 flags |= HAS_NEON_DOTPROD; 85 } 86 #endif // defined(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE) 87 #endif // HAVE_NEON_DOTPROD 88 #if HAVE_NEON_I8MM 89 // Support for PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE was added in Windows SDK 90 // 26100. 91 #if defined(PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE) 92 // There's no PF_* flag that indicates whether plain I8MM is available 93 // or not. But if SVE_I8MM is available, that also implies that 94 // regular I8MM is available. 95 if (IsProcessorFeaturePresent(PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE)) { 96 flags |= HAS_NEON_I8MM; 97 } 98 #endif // defined(PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE) 99 #endif // HAVE_NEON_I8MM 100 #if HAVE_SVE 101 // Support for PF_ARM_SVE_INSTRUCTIONS_AVAILABLE was added in Windows SDK 26100. 102 #if defined(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE) 103 if (IsProcessorFeaturePresent(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE)) { 104 flags |= HAS_SVE; 105 } 106 #endif // defined(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE) 107 #endif // HAVE_SVE 108 #if HAVE_SVE2 109 // Support for PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE was added in Windows SDK 110 // 26100. 111 #if defined(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE) 112 if (IsProcessorFeaturePresent(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE)) { 113 flags |= HAS_SVE2; 114 } 115 #endif // defined(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE) 116 #endif // HAVE_SVE2 117 return flags; 118 } 119 120 #elif defined(AOM_USE_ANDROID_CPU_FEATURES) 121 122 static int arm_get_cpu_caps(void) { 123 int flags = 0; 124 #if HAVE_NEON 125 flags |= HAS_NEON; // Neon is mandatory in Armv8.0-A. 126 #endif // HAVE_NEON 127 return flags; 128 } 129 130 #elif defined(__linux__) // end defined(AOM_USE_ANDROID_CPU_FEATURES) 131 132 #include <sys/auxv.h> 133 134 // Define hwcap values ourselves: building with an old auxv header where these 135 // hwcap values are not defined should not prevent features from being enabled. 136 #define AOM_AARCH64_HWCAP_CRC32 (1 << 7) 137 #define AOM_AARCH64_HWCAP_ASIMDDP (1 << 20) 138 #define AOM_AARCH64_HWCAP_SVE (1 << 22) 139 #define AOM_AARCH64_HWCAP2_SVE2 (1 << 1) 140 #define AOM_AARCH64_HWCAP2_I8MM (1 << 13) 141 142 static int arm_get_cpu_caps(void) { 143 int flags = 0; 144 #if HAVE_ARM_CRC32 || HAVE_NEON_DOTPROD || HAVE_SVE 145 unsigned long hwcap = getauxval(AT_HWCAP); 146 #endif 147 #if HAVE_NEON_I8MM || HAVE_SVE2 148 unsigned long hwcap2 = getauxval(AT_HWCAP2); 149 #endif 150 151 #if HAVE_NEON 152 flags |= HAS_NEON; // Neon is mandatory in Armv8.0-A. 153 #endif // HAVE_NEON 154 #if HAVE_ARM_CRC32 155 if (hwcap & AOM_AARCH64_HWCAP_CRC32) flags |= HAS_ARM_CRC32; 156 #endif // HAVE_ARM_CRC32 157 #if HAVE_NEON_DOTPROD 158 if (hwcap & AOM_AARCH64_HWCAP_ASIMDDP) flags |= HAS_NEON_DOTPROD; 159 #endif // HAVE_NEON_DOTPROD 160 #if HAVE_NEON_I8MM 161 if (hwcap2 & AOM_AARCH64_HWCAP2_I8MM) flags |= HAS_NEON_I8MM; 162 #endif // HAVE_NEON_I8MM 163 #if HAVE_SVE 164 if (hwcap & AOM_AARCH64_HWCAP_SVE) flags |= HAS_SVE; 165 #endif // HAVE_SVE 166 #if HAVE_SVE2 167 if (hwcap2 & AOM_AARCH64_HWCAP2_SVE2) flags |= HAS_SVE2; 168 #endif // HAVE_SVE2 169 return flags; 170 } 171 172 #elif defined(__Fuchsia__) // end __linux__ 173 174 #include <zircon/features.h> 175 #include <zircon/syscalls.h> 176 177 // Added in https://fuchsia-review.googlesource.com/c/fuchsia/+/894282. 178 #ifndef ZX_ARM64_FEATURE_ISA_I8MM 179 #define ZX_ARM64_FEATURE_ISA_I8MM ((uint32_t)(1u << 19)) 180 #endif 181 // Added in https://fuchsia-review.googlesource.com/c/fuchsia/+/895083. 182 #ifndef ZX_ARM64_FEATURE_ISA_SVE 183 #define ZX_ARM64_FEATURE_ISA_SVE ((uint32_t)(1u << 20)) 184 #endif 185 186 static int arm_get_cpu_caps(void) { 187 int flags = 0; 188 #if HAVE_NEON 189 flags |= HAS_NEON; // Neon is mandatory in Armv8.0-A. 190 #endif // HAVE_NEON 191 uint32_t features; 192 zx_status_t status = zx_system_get_features(ZX_FEATURE_KIND_CPU, &features); 193 if (status != ZX_OK) return flags; 194 #if HAVE_ARM_CRC32 195 if (features & ZX_ARM64_FEATURE_ISA_CRC32) flags |= HAS_ARM_CRC32; 196 #endif // HAVE_ARM_CRC32 197 #if HAVE_NEON_DOTPROD 198 if (features & ZX_ARM64_FEATURE_ISA_DP) flags |= HAS_NEON_DOTPROD; 199 #endif // HAVE_NEON_DOTPROD 200 #if HAVE_NEON_I8MM 201 if (features & ZX_ARM64_FEATURE_ISA_I8MM) flags |= HAS_NEON_I8MM; 202 #endif // HAVE_NEON_I8MM 203 #if HAVE_SVE 204 if (features & ZX_ARM64_FEATURE_ISA_SVE) flags |= HAS_SVE; 205 #endif // HAVE_SVE 206 return flags; 207 } 208 209 #else // end __Fuchsia__ 210 #error \ 211 "Runtime CPU detection selected, but no CPU detection method " \ 212 "available for your platform. Rerun cmake with -DCONFIG_RUNTIME_CPU_DETECT=0." 213 #endif 214 215 int aom_arm_cpu_caps(void) { 216 int flags = 0; 217 if (!arm_cpu_env_flags(&flags)) { 218 flags = arm_get_cpu_caps() & arm_cpu_env_mask(); 219 } 220 221 // Restrict flags: FEAT_I8MM assumes that FEAT_DotProd is available. 222 if (!(flags & HAS_NEON_DOTPROD)) flags &= ~HAS_NEON_I8MM; 223 224 // Restrict flags: SVE assumes that FEAT_{DotProd,I8MM} are available. 225 if (!(flags & HAS_NEON_DOTPROD)) flags &= ~HAS_SVE; 226 if (!(flags & HAS_NEON_I8MM)) flags &= ~HAS_SVE; 227 228 // Restrict flags: SVE2 assumes that FEAT_SVE is available. 229 if (!(flags & HAS_SVE)) flags &= ~HAS_SVE2; 230 231 return flags; 232 }