cpu.c (13852B)
1 /* 2 * This file is part of FFmpeg. 3 * 4 * FFmpeg is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU Lesser General Public 6 * License as published by the Free Software Foundation; either 7 * version 2.1 of the License, or (at your option) any later version. 8 * 9 * FFmpeg is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 * Lesser General Public License for more details. 13 * 14 * You should have received a copy of the GNU Lesser General Public 15 * License along with FFmpeg; if not, write to the Free Software 16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 19 #include "config.h" 20 21 #if HAVE_SCHED_GETAFFINITY 22 #ifndef _GNU_SOURCE 23 # define _GNU_SOURCE 24 #endif 25 #include <sched.h> 26 #endif 27 28 #include <stddef.h> 29 #include <stdint.h> 30 #include <stdatomic.h> 31 32 #include "attributes.h" 33 #include "cpu.h" 34 #include "cpu_internal.h" 35 #include "opt.h" 36 #include "common.h" 37 38 #if HAVE_GETPROCESSAFFINITYMASK || HAVE_WINRT 39 #include <windows.h> 40 #endif 41 #if HAVE_SYSCTL 42 #if HAVE_SYS_PARAM_H 43 #include <sys/param.h> 44 #endif 45 #include <sys/types.h> 46 #include <sys/sysctl.h> 47 #endif 48 #if HAVE_UNISTD_H 49 #include <unistd.h> 50 #endif 51 52 #if HAVE_GETAUXVAL || HAVE_ELF_AUX_INFO 53 #include <sys/auxv.h> 54 #endif 55 56 static atomic_int cpu_flags = -1; 57 static atomic_int cpu_count = -1; 58 59 static int get_cpu_flags(void) 60 { 61 #if ARCH_MIPS 62 return ff_get_cpu_flags_mips(); 63 #elif ARCH_AARCH64 64 return ff_get_cpu_flags_aarch64(); 65 #elif ARCH_ARM 66 return ff_get_cpu_flags_arm(); 67 #elif ARCH_PPC 68 return ff_get_cpu_flags_ppc(); 69 #elif ARCH_RISCV 70 return ff_get_cpu_flags_riscv(); 71 #elif ARCH_WASM 72 return ff_get_cpu_flags_wasm(); 73 #elif ARCH_X86 74 return ff_get_cpu_flags_x86(); 75 #elif ARCH_LOONGARCH 76 return ff_get_cpu_flags_loongarch(); 77 #endif 78 return 0; 79 } 80 81 void av_force_cpu_flags(int arg){ 82 if (ARCH_X86 && 83 (arg & ( AV_CPU_FLAG_3DNOW | 84 AV_CPU_FLAG_3DNOWEXT | 85 AV_CPU_FLAG_MMXEXT | 86 AV_CPU_FLAG_SSE | 87 AV_CPU_FLAG_SSE2 | 88 AV_CPU_FLAG_SSE2SLOW | 89 AV_CPU_FLAG_SSE3 | 90 AV_CPU_FLAG_SSE3SLOW | 91 AV_CPU_FLAG_SSSE3 | 92 AV_CPU_FLAG_SSE4 | 93 AV_CPU_FLAG_SSE42 | 94 AV_CPU_FLAG_AVX | 95 AV_CPU_FLAG_AVXSLOW | 96 AV_CPU_FLAG_XOP | 97 AV_CPU_FLAG_FMA3 | 98 AV_CPU_FLAG_FMA4 | 99 AV_CPU_FLAG_AVX2 | 100 AV_CPU_FLAG_AVX512 )) 101 && !(arg & AV_CPU_FLAG_MMX)) { 102 av_log(NULL, AV_LOG_WARNING, "MMX implied by specified flags\n"); 103 arg |= AV_CPU_FLAG_MMX; 104 } 105 106 atomic_store_explicit(&cpu_flags, arg, memory_order_relaxed); 107 } 108 109 int av_get_cpu_flags(void) 110 { 111 int flags = atomic_load_explicit(&cpu_flags, memory_order_relaxed); 112 if (flags == -1) { 113 flags = get_cpu_flags(); 114 atomic_store_explicit(&cpu_flags, flags, memory_order_relaxed); 115 } 116 return flags; 117 } 118 119 int av_parse_cpu_caps(unsigned *flags, const char *s) 120 { 121 static const AVOption cpuflags_opts[] = { 122 { "flags" , NULL, 0, AV_OPT_TYPE_FLAGS, { .i64 = 0 }, INT64_MIN, INT64_MAX, .unit = "flags" }, 123 #if ARCH_PPC 124 { "altivec" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ALTIVEC }, .unit = "flags" }, 125 { "vsx" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_VSX }, .unit = "flags" }, 126 { "power8" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_POWER8 }, .unit = "flags" }, 127 #elif ARCH_X86 128 { "mmx" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_MMX }, .unit = "flags" }, 129 { "mmx2" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_MMX2 }, .unit = "flags" }, 130 { "mmxext" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_MMX2 }, .unit = "flags" }, 131 { "sse" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SSE }, .unit = "flags" }, 132 { "sse2" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SSE2 }, .unit = "flags" }, 133 { "sse2slow", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SSE2SLOW }, .unit = "flags" }, 134 { "sse3" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SSE3 }, .unit = "flags" }, 135 { "sse3slow", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SSE3SLOW }, .unit = "flags" }, 136 { "ssse3" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SSSE3 }, .unit = "flags" }, 137 { "atom" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ATOM }, .unit = "flags" }, 138 { "sse4.1" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SSE4 }, .unit = "flags" }, 139 { "sse4.2" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SSE42 }, .unit = "flags" }, 140 { "avx" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AVX }, .unit = "flags" }, 141 { "avxslow" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AVXSLOW }, .unit = "flags" }, 142 { "xop" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_XOP }, .unit = "flags" }, 143 { "fma3" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_FMA3 }, .unit = "flags" }, 144 { "fma4" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_FMA4 }, .unit = "flags" }, 145 { "avx2" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AVX2 }, .unit = "flags" }, 146 { "bmi1" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_BMI1 }, .unit = "flags" }, 147 { "bmi2" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_BMI2 }, .unit = "flags" }, 148 { "3dnow" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_3DNOW }, .unit = "flags" }, 149 { "3dnowext", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_3DNOWEXT }, .unit = "flags" }, 150 { "cmov", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_CMOV }, .unit = "flags" }, 151 { "aesni", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AESNI }, .unit = "flags" }, 152 { "avx512" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AVX512 }, .unit = "flags" }, 153 { "avx512icl", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AVX512ICL }, .unit = "flags" }, 154 { "slowgather", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SLOW_GATHER }, .unit = "flags" }, 155 156 #define CPU_FLAG_P2 AV_CPU_FLAG_CMOV | AV_CPU_FLAG_MMX 157 #define CPU_FLAG_P3 CPU_FLAG_P2 | AV_CPU_FLAG_MMX2 | AV_CPU_FLAG_SSE 158 #define CPU_FLAG_P4 CPU_FLAG_P3| AV_CPU_FLAG_SSE2 159 { "pentium2", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPU_FLAG_P2 }, .unit = "flags" }, 160 { "pentium3", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPU_FLAG_P3 }, .unit = "flags" }, 161 { "pentium4", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPU_FLAG_P4 }, .unit = "flags" }, 162 163 #define CPU_FLAG_K62 AV_CPU_FLAG_MMX | AV_CPU_FLAG_3DNOW 164 #define CPU_FLAG_ATHLON CPU_FLAG_K62 | AV_CPU_FLAG_CMOV | AV_CPU_FLAG_3DNOWEXT | AV_CPU_FLAG_MMX2 165 #define CPU_FLAG_ATHLONXP CPU_FLAG_ATHLON | AV_CPU_FLAG_SSE 166 #define CPU_FLAG_K8 CPU_FLAG_ATHLONXP | AV_CPU_FLAG_SSE2 167 { "k6", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_MMX }, .unit = "flags" }, 168 { "k62", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPU_FLAG_K62 }, .unit = "flags" }, 169 { "athlon", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPU_FLAG_ATHLON }, .unit = "flags" }, 170 { "athlonxp", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPU_FLAG_ATHLONXP }, .unit = "flags" }, 171 { "k8", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPU_FLAG_K8 }, .unit = "flags" }, 172 #elif ARCH_ARM 173 { "armv5te", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ARMV5TE }, .unit = "flags" }, 174 { "armv6", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ARMV6 }, .unit = "flags" }, 175 { "armv6t2", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ARMV6T2 }, .unit = "flags" }, 176 { "vfp", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_VFP }, .unit = "flags" }, 177 { "vfp_vm", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_VFP_VM }, .unit = "flags" }, 178 { "vfpv3", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_VFPV3 }, .unit = "flags" }, 179 { "neon", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_NEON }, .unit = "flags" }, 180 { "setend", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SETEND }, .unit = "flags" }, 181 #elif ARCH_AARCH64 182 { "armv8", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ARMV8 }, .unit = "flags" }, 183 { "neon", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_NEON }, .unit = "flags" }, 184 { "vfp", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_VFP }, .unit = "flags" }, 185 { "dotprod", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_DOTPROD }, .unit = "flags" }, 186 { "i8mm", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_I8MM }, .unit = "flags" }, 187 { "sve", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SVE }, .unit = "flags" }, 188 { "sve2", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SVE2 }, .unit = "flags" }, 189 #elif ARCH_MIPS 190 { "mmi", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_MMI }, .unit = "flags" }, 191 { "msa", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_MSA }, .unit = "flags" }, 192 #elif ARCH_LOONGARCH 193 { "lsx", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_LSX }, .unit = "flags" }, 194 { "lasx", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_LASX }, .unit = "flags" }, 195 #elif ARCH_RISCV 196 { "rvi", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RVI }, .unit = "flags" }, 197 { "rvb", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RVB }, .unit = "flags" }, 198 { "zve32x", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RVV_I32 }, .unit = "flags" }, 199 { "zve32f", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RVV_F32 }, .unit = "flags" }, 200 { "zve64x", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RVV_I64 }, .unit = "flags" }, 201 { "zve64d", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RVV_F64 }, .unit = "flags" }, 202 { "zbb", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RVB_BASIC }, .unit = "flags" }, 203 { "zvbb", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RV_ZVBB }, .unit = "flags" }, 204 { "misaligned", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RV_MISALIGNED }, .unit = "flags" }, 205 #elif ARCH_WASM 206 { "simd128", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SIMD128 }, .unit = "flags" }, 207 #endif 208 { NULL }, 209 }; 210 static const AVClass class = { 211 .class_name = "cpuflags", 212 .item_name = av_default_item_name, 213 .option = cpuflags_opts, 214 .version = LIBAVUTIL_VERSION_INT, 215 }; 216 const AVClass *pclass = &class; 217 218 return av_opt_eval_flags(&pclass, &cpuflags_opts[0], s, flags); 219 } 220 221 int av_cpu_count(void) 222 { 223 static atomic_int printed = 0; 224 225 int nb_cpus = 1; 226 int count = 0; 227 #if HAVE_WINRT 228 SYSTEM_INFO sysinfo; 229 #endif 230 #if HAVE_SCHED_GETAFFINITY && defined(CPU_COUNT) 231 cpu_set_t cpuset; 232 233 CPU_ZERO(&cpuset); 234 235 if (!sched_getaffinity(0, sizeof(cpuset), &cpuset)) 236 nb_cpus = CPU_COUNT(&cpuset); 237 #elif HAVE_GETPROCESSAFFINITYMASK 238 DWORD_PTR proc_aff, sys_aff; 239 if (GetProcessAffinityMask(GetCurrentProcess(), &proc_aff, &sys_aff)) 240 nb_cpus = av_popcount64(proc_aff); 241 #elif HAVE_SYSCTL && defined(HW_NCPUONLINE) 242 int mib[2] = { CTL_HW, HW_NCPUONLINE }; 243 size_t len = sizeof(nb_cpus); 244 245 if (sysctl(mib, 2, &nb_cpus, &len, NULL, 0) == -1) 246 nb_cpus = 0; 247 #elif HAVE_SYSCTL && defined(HW_NCPU) 248 int mib[2] = { CTL_HW, HW_NCPU }; 249 size_t len = sizeof(nb_cpus); 250 251 if (sysctl(mib, 2, &nb_cpus, &len, NULL, 0) == -1) 252 nb_cpus = 0; 253 #elif HAVE_SYSCONF && defined(_SC_NPROC_ONLN) 254 nb_cpus = sysconf(_SC_NPROC_ONLN); 255 #elif HAVE_SYSCONF && defined(_SC_NPROCESSORS_ONLN) 256 nb_cpus = sysconf(_SC_NPROCESSORS_ONLN); 257 #elif HAVE_WINRT 258 GetNativeSystemInfo(&sysinfo); 259 nb_cpus = sysinfo.dwNumberOfProcessors; 260 #endif 261 262 if (!atomic_exchange_explicit(&printed, 1, memory_order_relaxed)) 263 av_log(NULL, AV_LOG_DEBUG, "detected %d logical cores\n", nb_cpus); 264 265 count = atomic_load_explicit(&cpu_count, memory_order_relaxed); 266 267 if (count > 0) { 268 nb_cpus = count; 269 av_log(NULL, AV_LOG_DEBUG, "overriding to %d logical cores\n", nb_cpus); 270 } 271 272 return nb_cpus; 273 } 274 275 void av_cpu_force_count(int count) 276 { 277 atomic_store_explicit(&cpu_count, count, memory_order_relaxed); 278 } 279 280 size_t av_cpu_max_align(void) 281 { 282 #if ARCH_MIPS 283 return ff_get_cpu_max_align_mips(); 284 #elif ARCH_AARCH64 285 return ff_get_cpu_max_align_aarch64(); 286 #elif ARCH_ARM 287 return ff_get_cpu_max_align_arm(); 288 #elif ARCH_PPC 289 return ff_get_cpu_max_align_ppc(); 290 #elif ARCH_WASM 291 return ff_get_cpu_max_align_wasm(); 292 #elif ARCH_X86 293 return ff_get_cpu_max_align_x86(); 294 #elif ARCH_LOONGARCH 295 return ff_get_cpu_max_align_loongarch(); 296 #endif 297 298 return 8; 299 } 300 301 unsigned long ff_getauxval(unsigned long type) 302 { 303 #if HAVE_GETAUXVAL 304 return getauxval(type); 305 #elif HAVE_ELF_AUX_INFO 306 unsigned long aux = 0; 307 int ret = elf_aux_info(type, &aux, sizeof(aux)); 308 if (ret != 0) { 309 errno = ret; 310 } 311 return aux; 312 #else 313 errno = ENOSYS; 314 return 0; 315 #endif 316 }