tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

aarch64_cpudetect.c (7603B)


      1 /*
      2 * Copyright (c) 2023, Alliance for Open Media. All rights reserved.
      3 *
      4 * This source code is subject to the terms of the BSD 2 Clause License and
      5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 * was not distributed with this source code in the LICENSE file, you can
      7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 * Media Patent License 1.0 was not distributed with this source code in the
      9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 */
     11 
     12 #include "config/aom_config.h"
     13 
     14 #include "arm_cpudetect.h"
     15 
     16 #include "aom_ports/arm.h"
     17 
     18 #if defined(__APPLE__)
     19 #include <sys/sysctl.h>
     20 #endif
     21 
     22 #if !CONFIG_RUNTIME_CPU_DETECT
     23 
     24 static int arm_get_cpu_caps(void) {
     25  // This function should actually be a no-op. There is no way to adjust any of
     26  // these because the RTCD tables do not exist: the functions are called
     27  // statically.
     28  int flags = 0;
     29 #if HAVE_NEON
     30  flags |= HAS_NEON;
     31 #endif  // HAVE_NEON
     32  return flags;
     33 }
     34 
     35 #elif defined(__APPLE__)  // end !CONFIG_RUNTIME_CPU_DETECT
     36 
     37 // sysctlbyname() parameter documentation for instruction set characteristics:
     38 // https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics
     39 static inline bool have_feature(const char *feature) {
     40  int64_t feature_present = 0;
     41  size_t size = sizeof(feature_present);
     42  if (sysctlbyname(feature, &feature_present, &size, NULL, 0) != 0) {
     43    return false;
     44  }
     45  return feature_present;
     46 }
     47 
     48 static int arm_get_cpu_caps(void) {
     49  int flags = 0;
     50 #if HAVE_NEON
     51  flags |= HAS_NEON;
     52 #endif  // HAVE_NEON
     53 #if HAVE_ARM_CRC32
     54  if (have_feature("hw.optional.armv8_crc32")) flags |= HAS_ARM_CRC32;
     55 #endif  // HAVE_ARM_CRC32
     56 #if HAVE_NEON_DOTPROD
     57  if (have_feature("hw.optional.arm.FEAT_DotProd")) flags |= HAS_NEON_DOTPROD;
     58 #endif  // HAVE_NEON_DOTPROD
     59 #if HAVE_NEON_I8MM
     60  if (have_feature("hw.optional.arm.FEAT_I8MM")) flags |= HAS_NEON_I8MM;
     61 #endif  // HAVE_NEON_I8MM
     62  return flags;
     63 }
     64 
     65 #elif defined(_WIN32)  // end __APPLE__
     66 
     67 static int arm_get_cpu_caps(void) {
     68  int flags = 0;
     69 // IsProcessorFeaturePresent() parameter documentation:
     70 // https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent#parameters
     71 #if HAVE_NEON
     72  flags |= HAS_NEON;  // Neon is mandatory in Armv8.0-A.
     73 #endif  // HAVE_NEON
     74 #if HAVE_ARM_CRC32
     75  if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE)) {
     76    flags |= HAS_ARM_CRC32;
     77  }
     78 #endif  // HAVE_ARM_CRC32
     79 #if HAVE_NEON_DOTPROD
     80 // Support for PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE was added in Windows SDK
     81 // 20348, supported by Windows 11 and Windows Server 2022.
     82 #if defined(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)
     83  if (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)) {
     84    flags |= HAS_NEON_DOTPROD;
     85  }
     86 #endif  // defined(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)
     87 #endif  // HAVE_NEON_DOTPROD
     88 #if HAVE_NEON_I8MM
     89 // Support for PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE was added in Windows SDK
     90 // 26100.
     91 #if defined(PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE)
     92  // There's no PF_* flag that indicates whether plain I8MM is available
     93  // or not. But if SVE_I8MM is available, that also implies that
     94  // regular I8MM is available.
     95  if (IsProcessorFeaturePresent(PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE)) {
     96    flags |= HAS_NEON_I8MM;
     97  }
     98 #endif  // defined(PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE)
     99 #endif  // HAVE_NEON_I8MM
    100 #if HAVE_SVE
    101 // Support for PF_ARM_SVE_INSTRUCTIONS_AVAILABLE was added in Windows SDK 26100.
    102 #if defined(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE)
    103  if (IsProcessorFeaturePresent(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE)) {
    104    flags |= HAS_SVE;
    105  }
    106 #endif  // defined(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE)
    107 #endif  // HAVE_SVE
    108 #if HAVE_SVE2
    109 // Support for PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE was added in Windows SDK
    110 // 26100.
    111 #if defined(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE)
    112  if (IsProcessorFeaturePresent(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE)) {
    113    flags |= HAS_SVE2;
    114  }
    115 #endif  // defined(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE)
    116 #endif  // HAVE_SVE2
    117  return flags;
    118 }
    119 
    120 #elif defined(AOM_USE_ANDROID_CPU_FEATURES)
    121 
    122 static int arm_get_cpu_caps(void) {
    123  int flags = 0;
    124 #if HAVE_NEON
    125  flags |= HAS_NEON;  // Neon is mandatory in Armv8.0-A.
    126 #endif  // HAVE_NEON
    127  return flags;
    128 }
    129 
    130 #elif defined(__linux__)  // end defined(AOM_USE_ANDROID_CPU_FEATURES)
    131 
    132 #include <sys/auxv.h>
    133 
    134 // Define hwcap values ourselves: building with an old auxv header where these
    135 // hwcap values are not defined should not prevent features from being enabled.
    136 #define AOM_AARCH64_HWCAP_CRC32 (1 << 7)
    137 #define AOM_AARCH64_HWCAP_ASIMDDP (1 << 20)
    138 #define AOM_AARCH64_HWCAP_SVE (1 << 22)
    139 #define AOM_AARCH64_HWCAP2_SVE2 (1 << 1)
    140 #define AOM_AARCH64_HWCAP2_I8MM (1 << 13)
    141 
    142 static int arm_get_cpu_caps(void) {
    143  int flags = 0;
    144 #if HAVE_ARM_CRC32 || HAVE_NEON_DOTPROD || HAVE_SVE
    145  unsigned long hwcap = getauxval(AT_HWCAP);
    146 #endif
    147 #if HAVE_NEON_I8MM || HAVE_SVE2
    148  unsigned long hwcap2 = getauxval(AT_HWCAP2);
    149 #endif
    150 
    151 #if HAVE_NEON
    152  flags |= HAS_NEON;  // Neon is mandatory in Armv8.0-A.
    153 #endif  // HAVE_NEON
    154 #if HAVE_ARM_CRC32
    155  if (hwcap & AOM_AARCH64_HWCAP_CRC32) flags |= HAS_ARM_CRC32;
    156 #endif  // HAVE_ARM_CRC32
    157 #if HAVE_NEON_DOTPROD
    158  if (hwcap & AOM_AARCH64_HWCAP_ASIMDDP) flags |= HAS_NEON_DOTPROD;
    159 #endif  // HAVE_NEON_DOTPROD
    160 #if HAVE_NEON_I8MM
    161  if (hwcap2 & AOM_AARCH64_HWCAP2_I8MM) flags |= HAS_NEON_I8MM;
    162 #endif  // HAVE_NEON_I8MM
    163 #if HAVE_SVE
    164  if (hwcap & AOM_AARCH64_HWCAP_SVE) flags |= HAS_SVE;
    165 #endif  // HAVE_SVE
    166 #if HAVE_SVE2
    167  if (hwcap2 & AOM_AARCH64_HWCAP2_SVE2) flags |= HAS_SVE2;
    168 #endif  // HAVE_SVE2
    169  return flags;
    170 }
    171 
    172 #elif defined(__Fuchsia__)  // end __linux__
    173 
    174 #include <zircon/features.h>
    175 #include <zircon/syscalls.h>
    176 
    177 // Added in https://fuchsia-review.googlesource.com/c/fuchsia/+/894282.
    178 #ifndef ZX_ARM64_FEATURE_ISA_I8MM
    179 #define ZX_ARM64_FEATURE_ISA_I8MM ((uint32_t)(1u << 19))
    180 #endif
    181 // Added in https://fuchsia-review.googlesource.com/c/fuchsia/+/895083.
    182 #ifndef ZX_ARM64_FEATURE_ISA_SVE
    183 #define ZX_ARM64_FEATURE_ISA_SVE ((uint32_t)(1u << 20))
    184 #endif
    185 
    186 static int arm_get_cpu_caps(void) {
    187  int flags = 0;
    188 #if HAVE_NEON
    189  flags |= HAS_NEON;  // Neon is mandatory in Armv8.0-A.
    190 #endif  // HAVE_NEON
    191  uint32_t features;
    192  zx_status_t status = zx_system_get_features(ZX_FEATURE_KIND_CPU, &features);
    193  if (status != ZX_OK) return flags;
    194 #if HAVE_ARM_CRC32
    195  if (features & ZX_ARM64_FEATURE_ISA_CRC32) flags |= HAS_ARM_CRC32;
    196 #endif  // HAVE_ARM_CRC32
    197 #if HAVE_NEON_DOTPROD
    198  if (features & ZX_ARM64_FEATURE_ISA_DP) flags |= HAS_NEON_DOTPROD;
    199 #endif  // HAVE_NEON_DOTPROD
    200 #if HAVE_NEON_I8MM
    201  if (features & ZX_ARM64_FEATURE_ISA_I8MM) flags |= HAS_NEON_I8MM;
    202 #endif  // HAVE_NEON_I8MM
    203 #if HAVE_SVE
    204  if (features & ZX_ARM64_FEATURE_ISA_SVE) flags |= HAS_SVE;
    205 #endif  // HAVE_SVE
    206  return flags;
    207 }
    208 
    209 #else  // end __Fuchsia__
    210 #error \
    211    "Runtime CPU detection selected, but no CPU detection method " \
    212 "available for your platform. Rerun cmake with -DCONFIG_RUNTIME_CPU_DETECT=0."
    213 #endif
    214 
    215 int aom_arm_cpu_caps(void) {
    216  int flags = 0;
    217  if (!arm_cpu_env_flags(&flags)) {
    218    flags = arm_get_cpu_caps() & arm_cpu_env_mask();
    219  }
    220 
    221  // Restrict flags: FEAT_I8MM assumes that FEAT_DotProd is available.
    222  if (!(flags & HAS_NEON_DOTPROD)) flags &= ~HAS_NEON_I8MM;
    223 
    224  // Restrict flags: SVE assumes that FEAT_{DotProd,I8MM} are available.
    225  if (!(flags & HAS_NEON_DOTPROD)) flags &= ~HAS_SVE;
    226  if (!(flags & HAS_NEON_I8MM)) flags &= ~HAS_SVE;
    227 
    228  // Restrict flags: SVE2 assumes that FEAT_SVE is available.
    229  if (!(flags & HAS_SVE)) flags &= ~HAS_SVE2;
    230 
    231  return flags;
    232 }