sysinfo.cc (15312B)
1 // Copyright 2017 The Abseil Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // https://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "absl/base/internal/sysinfo.h" 16 17 #include "absl/base/attributes.h" 18 19 #ifdef _WIN32 20 #include <windows.h> 21 #else 22 #include <fcntl.h> 23 #include <pthread.h> 24 #include <sys/stat.h> 25 #include <sys/types.h> 26 #include <unistd.h> 27 #endif 28 29 #ifdef __linux__ 30 #include <sys/syscall.h> 31 #endif 32 33 #if defined(__APPLE__) || defined(__FreeBSD__) 34 #include <sys/sysctl.h> 35 #endif 36 37 #ifdef __FreeBSD__ 38 #include <pthread_np.h> 39 #endif 40 41 #ifdef __NetBSD__ 42 #include <lwp.h> 43 #endif 44 45 #if defined(__myriad2__) 46 #include <rtems.h> 47 #endif 48 49 #if defined(__Fuchsia__) 50 #include <zircon/process.h> 51 #endif 52 53 #include <string.h> 54 55 #include <cassert> 56 #include <cerrno> 57 #include <cstdint> 58 #include <cstdio> 59 #include <cstdlib> 60 #include <ctime> 61 #include <limits> 62 #include <thread> // NOLINT(build/c++11) 63 #include <utility> 64 #include <vector> 65 66 #include "absl/base/call_once.h" 67 #include "absl/base/config.h" 68 #include "absl/base/internal/raw_logging.h" 69 #include "absl/base/internal/spinlock.h" 70 #include "absl/base/internal/unscaledcycleclock.h" 71 #include "absl/base/thread_annotations.h" 72 73 namespace absl { 74 ABSL_NAMESPACE_BEGIN 75 namespace base_internal { 76 77 namespace { 78 79 #if defined(_WIN32) 80 81 // Returns number of bits set in `bitMask` 82 DWORD Win32CountSetBits(ULONG_PTR bitMask) { 83 for (DWORD bitSetCount = 0; ; ++bitSetCount) { 84 if (bitMask == 0) return bitSetCount; 85 bitMask &= bitMask - 1; 86 } 87 } 88 89 // Returns the number of logical CPUs using GetLogicalProcessorInformation(), or 90 // 0 if the number of processors is not available or can not be computed. 91 // https://docs.microsoft.com/en-us/windows/win32/api/sysinfoapi/nf-sysinfoapi-getlogicalprocessorinformation 92 int Win32NumCPUs() { 93 #pragma comment(lib, "kernel32.lib") 94 using Info = SYSTEM_LOGICAL_PROCESSOR_INFORMATION; 95 96 DWORD info_size = sizeof(Info); 97 Info* info(static_cast<Info*>(malloc(info_size))); 98 if (info == nullptr) return 0; 99 100 bool success = GetLogicalProcessorInformation(info, &info_size); 101 if (!success && GetLastError() == ERROR_INSUFFICIENT_BUFFER) { 102 free(info); 103 info = static_cast<Info*>(malloc(info_size)); 104 if (info == nullptr) return 0; 105 success = GetLogicalProcessorInformation(info, &info_size); 106 } 107 108 DWORD logicalProcessorCount = 0; 109 if (success) { 110 Info* ptr = info; 111 DWORD byteOffset = 0; 112 while (byteOffset + sizeof(Info) <= info_size) { 113 switch (ptr->Relationship) { 114 case RelationProcessorCore: 115 logicalProcessorCount += Win32CountSetBits(ptr->ProcessorMask); 116 break; 117 118 case RelationNumaNode: 119 case RelationCache: 120 case RelationProcessorPackage: 121 // Ignore other entries 122 break; 123 124 default: 125 // Ignore unknown entries 126 break; 127 } 128 byteOffset += sizeof(Info); 129 ptr++; 130 } 131 } 132 free(info); 133 return static_cast<int>(logicalProcessorCount); 134 } 135 136 #endif 137 138 } // namespace 139 140 static int GetNumCPUs() { 141 #if defined(__myriad2__) 142 return 1; 143 #elif defined(_WIN32) 144 const int hardware_concurrency = Win32NumCPUs(); 145 return hardware_concurrency ? hardware_concurrency : 1; 146 #elif defined(_AIX) 147 return sysconf(_SC_NPROCESSORS_ONLN); 148 #else 149 // Other possibilities: 150 // - Read /sys/devices/system/cpu/online and use cpumask_parse() 151 // - sysconf(_SC_NPROCESSORS_ONLN) 152 return static_cast<int>(std::thread::hardware_concurrency()); 153 #endif 154 } 155 156 #if defined(_WIN32) 157 158 static double GetNominalCPUFrequency() { 159 #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) && \ 160 !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) 161 // UWP apps don't have access to the registry and currently don't provide an 162 // API informing about CPU nominal frequency. 163 return 1.0; 164 #else 165 #pragma comment(lib, "advapi32.lib") // For Reg* functions. 166 HKEY key; 167 // Use the Reg* functions rather than the SH functions because shlwapi.dll 168 // pulls in gdi32.dll which makes process destruction much more costly. 169 if (RegOpenKeyExA(HKEY_LOCAL_MACHINE, 170 "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", 0, 171 KEY_READ, &key) == ERROR_SUCCESS) { 172 DWORD type = 0; 173 DWORD data = 0; 174 DWORD data_size = sizeof(data); 175 auto result = RegQueryValueExA(key, "~MHz", nullptr, &type, 176 reinterpret_cast<LPBYTE>(&data), &data_size); 177 RegCloseKey(key); 178 if (result == ERROR_SUCCESS && type == REG_DWORD && 179 data_size == sizeof(data)) { 180 return data * 1e6; // Value is MHz. 181 } 182 } 183 return 1.0; 184 #endif // WINAPI_PARTITION_APP && !WINAPI_PARTITION_DESKTOP 185 } 186 187 #elif defined(CTL_HW) && defined(HW_CPU_FREQ) 188 189 static double GetNominalCPUFrequency() { 190 unsigned freq; 191 size_t size = sizeof(freq); 192 int mib[2] = {CTL_HW, HW_CPU_FREQ}; 193 if (sysctl(mib, 2, &freq, &size, nullptr, 0) == 0) { 194 return static_cast<double>(freq); 195 } 196 return 1.0; 197 } 198 199 #else 200 201 // Helper function for reading a long from a file. Returns true if successful 202 // and the memory location pointed to by value is set to the value read. 203 static bool ReadLongFromFile(const char *file, long *value) { 204 bool ret = false; 205 #if defined(_POSIX_C_SOURCE) 206 const int file_mode = (O_RDONLY | O_CLOEXEC); 207 #else 208 const int file_mode = O_RDONLY; 209 #endif 210 211 int fd = open(file, file_mode); 212 if (fd != -1) { 213 char line[1024]; 214 char *err; 215 memset(line, '\0', sizeof(line)); 216 ssize_t len; 217 do { 218 len = read(fd, line, sizeof(line) - 1); 219 } while (len < 0 && errno == EINTR); 220 if (len <= 0) { 221 ret = false; 222 } else { 223 const long temp_value = strtol(line, &err, 10); 224 if (line[0] != '\0' && (*err == '\n' || *err == '\0')) { 225 *value = temp_value; 226 ret = true; 227 } 228 } 229 close(fd); 230 } 231 return ret; 232 } 233 234 #if defined(ABSL_INTERNAL_UNSCALED_CYCLECLOCK_FREQUENCY_IS_CPU_FREQUENCY) 235 236 // Reads a monotonic time source and returns a value in 237 // nanoseconds. The returned value uses an arbitrary epoch, not the 238 // Unix epoch. 239 static int64_t ReadMonotonicClockNanos() { 240 struct timespec t; 241 #ifdef CLOCK_MONOTONIC_RAW 242 int rc = clock_gettime(CLOCK_MONOTONIC_RAW, &t); 243 #else 244 int rc = clock_gettime(CLOCK_MONOTONIC, &t); 245 #endif 246 if (rc != 0) { 247 ABSL_INTERNAL_LOG( 248 FATAL, "clock_gettime() failed: (" + std::to_string(errno) + ")"); 249 } 250 return int64_t{t.tv_sec} * 1000000000 + t.tv_nsec; 251 } 252 253 class UnscaledCycleClockWrapperForInitializeFrequency { 254 public: 255 static int64_t Now() { return base_internal::UnscaledCycleClock::Now(); } 256 }; 257 258 struct TimeTscPair { 259 int64_t time; // From ReadMonotonicClockNanos(). 260 int64_t tsc; // From UnscaledCycleClock::Now(). 261 }; 262 263 // Returns a pair of values (monotonic kernel time, TSC ticks) that 264 // approximately correspond to each other. This is accomplished by 265 // doing several reads and picking the reading with the lowest 266 // latency. This approach is used to minimize the probability that 267 // our thread was preempted between clock reads. 268 static TimeTscPair GetTimeTscPair() { 269 int64_t best_latency = std::numeric_limits<int64_t>::max(); 270 TimeTscPair best; 271 for (int i = 0; i < 10; ++i) { 272 int64_t t0 = ReadMonotonicClockNanos(); 273 int64_t tsc = UnscaledCycleClockWrapperForInitializeFrequency::Now(); 274 int64_t t1 = ReadMonotonicClockNanos(); 275 int64_t latency = t1 - t0; 276 if (latency < best_latency) { 277 best_latency = latency; 278 best.time = t0; 279 best.tsc = tsc; 280 } 281 } 282 return best; 283 } 284 285 // Measures and returns the TSC frequency by taking a pair of 286 // measurements approximately `sleep_nanoseconds` apart. 287 static double MeasureTscFrequencyWithSleep(int sleep_nanoseconds) { 288 auto t0 = GetTimeTscPair(); 289 struct timespec ts; 290 ts.tv_sec = 0; 291 ts.tv_nsec = sleep_nanoseconds; 292 while (nanosleep(&ts, &ts) != 0 && errno == EINTR) {} 293 auto t1 = GetTimeTscPair(); 294 double elapsed_ticks = t1.tsc - t0.tsc; 295 double elapsed_time = (t1.time - t0.time) * 1e-9; 296 return elapsed_ticks / elapsed_time; 297 } 298 299 // Measures and returns the TSC frequency by calling 300 // MeasureTscFrequencyWithSleep(), doubling the sleep interval until the 301 // frequency measurement stabilizes. 302 static double MeasureTscFrequency() { 303 double last_measurement = -1.0; 304 int sleep_nanoseconds = 1000000; // 1 millisecond. 305 for (int i = 0; i < 8; ++i) { 306 double measurement = MeasureTscFrequencyWithSleep(sleep_nanoseconds); 307 if (measurement * 0.99 < last_measurement && 308 last_measurement < measurement * 1.01) { 309 // Use the current measurement if it is within 1% of the 310 // previous measurement. 311 return measurement; 312 } 313 last_measurement = measurement; 314 sleep_nanoseconds *= 2; 315 } 316 return last_measurement; 317 } 318 319 #endif // ABSL_INTERNAL_UNSCALED_CYCLECLOCK_FREQUENCY_IS_CPU_FREQUENCY 320 321 static double GetNominalCPUFrequency() { 322 long freq = 0; 323 324 // Google's production kernel has a patch to export the TSC 325 // frequency through sysfs. If the kernel is exporting the TSC 326 // frequency use that. There are issues where cpuinfo_max_freq 327 // cannot be relied on because the BIOS may be exporting an invalid 328 // p-state (on x86) or p-states may be used to put the processor in 329 // a new mode (turbo mode). Essentially, those frequencies cannot 330 // always be relied upon. The same reasons apply to /proc/cpuinfo as 331 // well. 332 if (ReadLongFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)) { 333 return freq * 1e3; // Value is kHz. 334 } 335 336 #if defined(ABSL_INTERNAL_UNSCALED_CYCLECLOCK_FREQUENCY_IS_CPU_FREQUENCY) 337 // On these platforms, the TSC frequency is the nominal CPU 338 // frequency. But without having the kernel export it directly 339 // though /sys/devices/system/cpu/cpu0/tsc_freq_khz, there is no 340 // other way to reliably get the TSC frequency, so we have to 341 // measure it ourselves. Some CPUs abuse cpuinfo_max_freq by 342 // exporting "fake" frequencies for implementing new features. For 343 // example, Intel's turbo mode is enabled by exposing a p-state 344 // value with a higher frequency than that of the real TSC 345 // rate. Because of this, we prefer to measure the TSC rate 346 // ourselves on i386 and x86-64. 347 return MeasureTscFrequency(); 348 #else 349 350 // If CPU scaling is in effect, we want to use the *maximum* 351 // frequency, not whatever CPU speed some random processor happens 352 // to be using now. 353 if (ReadLongFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", 354 &freq)) { 355 return freq * 1e3; // Value is kHz. 356 } 357 358 return 1.0; 359 #endif // !ABSL_INTERNAL_UNSCALED_CYCLECLOCK_FREQUENCY_IS_CPU_FREQUENCY 360 } 361 362 #endif 363 364 ABSL_CONST_INIT static once_flag init_num_cpus_once; 365 ABSL_CONST_INIT static int num_cpus = 0; 366 367 // NumCPUs() may be called before main() and before malloc is properly 368 // initialized, therefore this must not allocate memory. 369 int NumCPUs() { 370 base_internal::LowLevelCallOnce( 371 &init_num_cpus_once, []() { num_cpus = GetNumCPUs(); }); 372 return num_cpus; 373 } 374 375 // A default frequency of 0.0 might be dangerous if it is used in division. 376 ABSL_CONST_INIT static once_flag init_nominal_cpu_frequency_once; 377 ABSL_CONST_INIT static double nominal_cpu_frequency = 1.0; 378 379 // NominalCPUFrequency() may be called before main() and before malloc is 380 // properly initialized, therefore this must not allocate memory. 381 double NominalCPUFrequency() { 382 base_internal::LowLevelCallOnce( 383 &init_nominal_cpu_frequency_once, 384 []() { nominal_cpu_frequency = GetNominalCPUFrequency(); }); 385 return nominal_cpu_frequency; 386 } 387 388 #if defined(_WIN32) 389 390 pid_t GetTID() { 391 return pid_t{GetCurrentThreadId()}; 392 } 393 394 #elif defined(__linux__) 395 396 #ifndef SYS_gettid 397 #define SYS_gettid __NR_gettid 398 #endif 399 400 pid_t GetTID() { 401 return static_cast<pid_t>(syscall(SYS_gettid)); 402 } 403 404 #elif defined(__akaros__) 405 406 pid_t GetTID() { 407 // Akaros has a concept of "vcore context", which is the state the program 408 // is forced into when we need to make a user-level scheduling decision, or 409 // run a signal handler. This is analogous to the interrupt context that a 410 // CPU might enter if it encounters some kind of exception. 411 // 412 // There is no current thread context in vcore context, but we need to give 413 // a reasonable answer if asked for a thread ID (e.g., in a signal handler). 414 // Thread 0 always exists, so if we are in vcore context, we return that. 415 // 416 // Otherwise, we know (since we are using pthreads) that the uthread struct 417 // current_uthread is pointing to is the first element of a 418 // struct pthread_tcb, so we extract and return the thread ID from that. 419 // 420 // TODO(dcross): Akaros anticipates moving the thread ID to the uthread 421 // structure at some point. We should modify this code to remove the cast 422 // when that happens. 423 if (in_vcore_context()) 424 return 0; 425 return reinterpret_cast<struct pthread_tcb *>(current_uthread)->id; 426 } 427 428 #elif defined(__myriad2__) 429 430 pid_t GetTID() { 431 uint32_t tid; 432 rtems_task_ident(RTEMS_SELF, 0, &tid); 433 return tid; 434 } 435 436 #elif defined(__APPLE__) 437 438 pid_t GetTID() { 439 uint64_t tid; 440 // `nullptr` here implies this thread. This only fails if the specified 441 // thread is invalid or the pointer-to-tid is null, so we needn't worry about 442 // it. 443 pthread_threadid_np(nullptr, &tid); 444 return static_cast<pid_t>(tid); 445 } 446 447 #elif defined(__FreeBSD__) 448 449 pid_t GetTID() { return static_cast<pid_t>(pthread_getthreadid_np()); } 450 451 #elif defined(__OpenBSD__) 452 453 pid_t GetTID() { return getthrid(); } 454 455 #elif defined(__NetBSD__) 456 457 pid_t GetTID() { return static_cast<pid_t>(_lwp_self()); } 458 459 #elif defined(__native_client__) 460 461 pid_t GetTID() { 462 auto* thread = pthread_self(); 463 static_assert(sizeof(pid_t) == sizeof(thread), 464 "In NaCL int expected to be the same size as a pointer"); 465 return reinterpret_cast<pid_t>(thread); 466 } 467 468 #elif defined(__Fuchsia__) 469 470 pid_t GetTID() { 471 // Use our thread handle as the TID, which should be unique within this 472 // process (but may not be globally unique). The handle value was chosen over 473 // a kernel object ID (KOID) because zx_handle_t (32-bits) can be cast to a 474 // pid_t type without loss of precision, but a zx_koid_t (64-bits) cannot. 475 return static_cast<pid_t>(zx_thread_self()); 476 } 477 478 #else 479 480 // Fallback implementation of `GetTID` using `pthread_self`. 481 pid_t GetTID() { 482 // `pthread_t` need not be arithmetic per POSIX; platforms where it isn't 483 // should be handled above. 484 return static_cast<pid_t>(pthread_self()); 485 } 486 487 #endif 488 489 // GetCachedTID() caches the thread ID in thread-local storage (which is a 490 // userspace construct) to avoid unnecessary system calls. Without this caching, 491 // it can take roughly 98ns, while it takes roughly 1ns with this caching. 492 pid_t GetCachedTID() { 493 #ifdef ABSL_HAVE_THREAD_LOCAL 494 static thread_local pid_t thread_id = GetTID(); 495 return thread_id; 496 #else 497 return GetTID(); 498 #endif // ABSL_HAVE_THREAD_LOCAL 499 } 500 501 } // namespace base_internal 502 ABSL_NAMESPACE_END 503 } // namespace absl