PHC.cpp (80803B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 // PHC is a probabilistic heap checker. A tiny fraction of randomly chosen heap 8 // allocations are subject to some expensive checking via the use of OS page 9 // access protection. A failed check triggers a crash, whereupon useful 10 // information about the failure is put into the crash report. The cost and 11 // coverage for each user is minimal, but spread over the entire user base the 12 // coverage becomes significant. 13 // 14 // The idea comes from Chromium, where it is called GWP-ASAN. (Firefox uses PHC 15 // as the name because GWP-ASAN is long, awkward, and doesn't have any 16 // particular meaning.) 17 // 18 // In the current implementation up to 64 allocations per process can become 19 // PHC allocations. These allocations must be page-sized or smaller. Each PHC 20 // allocation gets its own page, and when the allocation is freed its page is 21 // marked inaccessible until the page is reused for another allocation. This 22 // means that a use-after-free defect (which includes double-frees) will be 23 // caught if the use occurs before the page is reused for another allocation. 24 // The crash report will contain stack traces for the allocation site, the free 25 // site, and the use-after-free site, which is often enough to diagnose the 26 // defect. 27 // 28 // Also, each PHC allocation is followed by a guard page. The PHC allocation is 29 // positioned so that its end abuts the guard page (or as close as possible, 30 // given alignment constraints). This means that a bounds violation at the end 31 // of the allocation (overflow) will be caught. The crash report will contain 32 // stack traces for the allocation site and the bounds violation use site, 33 // which is often enough to diagnose the defect. 34 // 35 // (A bounds violation at the start of the allocation (underflow) will not be 36 // caught, unless it is sufficiently large to hit the preceding allocation's 37 // guard page, which is not that likely. It would be possible to look more 38 // assiduously for underflow by randomly placing some allocations at the end of 39 // the page and some at the start of the page, and GWP-ASAN does this. PHC does 40 // not, however, because overflow is likely to be much more common than 41 // underflow in practice.) 42 // 43 // We use a simple heuristic to categorize a guard page access as overflow or 44 // underflow: if the address falls in the lower half of the guard page, we 45 // assume it is overflow, otherwise we assume it is underflow. More 46 // sophisticated heuristics are possible, but this one is very simple, and it is 47 // likely that most overflows/underflows in practice are very close to the page 48 // boundary. 49 // 50 // The design space for the randomization strategy is large. The current 51 // implementation has a large random delay before it starts operating, and a 52 // small random delay between each PHC allocation attempt. Each freed PHC 53 // allocation is quarantined for a medium random delay before being reused, in 54 // order to increase the chance of catching UAFs. 55 // 56 // The basic cost of PHC's operation is as follows. 57 // 58 // - The physical memory cost is 64 pages plus some metadata (including stack 59 // traces) for each page. This amounts to 256 KiB per process on 60 // architectures with 4 KiB pages and 1024 KiB on macOS/AArch64 which uses 61 // 16 KiB pages. 62 // 63 // - The virtual memory cost is the physical memory cost plus the guard pages: 64 // another 64 pages. This amounts to another 256 KiB per process on 65 // architectures with 4 KiB pages and 1024 KiB on macOS/AArch64 which uses 66 // 16 KiB pages. PHC is currently only enabled on 64-bit platforms so the 67 // impact of the virtual memory usage is negligible. 68 // 69 // - Every allocation requires a size check and a decrement-and-check of an 70 // atomic counter. When the counter reaches zero a PHC allocation can occur, 71 // which involves marking a page as accessible and getting a stack trace for 72 // the allocation site. Otherwise, mozjemalloc performs the allocation. 73 // 74 // - Every deallocation requires a range check on the pointer to see if it 75 // involves a PHC allocation. (The choice to only do PHC allocations that are 76 // a page or smaller enables this range check, because the 64 pages are 77 // contiguous. Allowing larger allocations would make this more complicated, 78 // and we definitely don't want something as slow as a hash table lookup on 79 // every deallocation.) PHC deallocations involve marking a page as 80 // inaccessible and getting a stack trace for the deallocation site. 81 // 82 // Note that calls to realloc(), free(), and malloc_usable_size() will 83 // immediately crash if the given pointer falls within a page allocation's 84 // page, but does not point to the start of the allocation itself. 85 // 86 // void* p = malloc(64); 87 // free(p + 1); // p+1 doesn't point to the allocation start; crash 88 // 89 // Such crashes will not have the PHC fields in the crash report. 90 // 91 // PHC-specific tests can be run with the following commands: 92 // - gtests: `./mach gtest '*PHC*'` 93 // - xpcshell-tests: `./mach test toolkit/crashreporter/test/unit` 94 // - This runs some non-PHC tests as well. 95 96 #include "PHC.h" 97 98 #include <stdlib.h> 99 #include <time.h> 100 101 #include <algorithm> 102 103 #ifdef XP_WIN 104 # include <process.h> 105 #else 106 # include <sys/mman.h> 107 # include <sys/types.h> 108 # include <pthread.h> 109 # include <unistd.h> 110 #endif 111 112 #include "mozjemalloc.h" 113 #include "Chunk.h" 114 #include "FdPrintf.h" 115 #include "Mutex.h" 116 #include "mozilla/Assertions.h" 117 #include "mozilla/Atomics.h" 118 #include "mozilla/Attributes.h" 119 #include "mozilla/CheckedInt.h" 120 #include "mozilla/Maybe.h" 121 #include "mozilla/StackWalk.h" 122 #include "mozilla/ThreadLocal.h" 123 #include "mozilla/XorShift128PlusRNG.h" 124 125 using namespace mozilla; 126 127 //--------------------------------------------------------------------------- 128 // Utilities 129 //--------------------------------------------------------------------------- 130 131 #ifdef ANDROID 132 // Android doesn't have pthread_atfork defined in pthread.h. 133 extern "C" MOZ_EXPORT int pthread_atfork(void (*)(void), void (*)(void), 134 void (*)(void)); 135 #endif 136 137 #ifndef DISALLOW_COPY_AND_ASSIGN 138 # define DISALLOW_COPY_AND_ASSIGN(T) \ 139 T(const T&); \ 140 void operator=(const T&) 141 #endif 142 143 // This class provides infallible operations for the small number of heap 144 // allocations that PHC does for itself. It would be nice if we could use the 145 // InfallibleAllocPolicy from mozalloc, but PHC cannot use mozalloc. 146 class InfallibleAllocPolicy { 147 public: 148 static void AbortOnFailure(const void* aP) { 149 if (!aP) { 150 MOZ_CRASH("PHC failed to allocate"); 151 } 152 } 153 154 template <class T> 155 static T* new_() { 156 void* p = MozJemalloc::malloc(sizeof(T)); 157 AbortOnFailure(p); 158 return new (p) T; 159 } 160 161 template <class T> 162 static T* new_(size_t n) { 163 void* p = MozJemalloc::malloc(sizeof(T) * n); 164 AbortOnFailure(p); 165 return new (p) T[n]; 166 } 167 168 // Realloc for arrays, because we don't know the original size we can't 169 // initialize the elements past that size. The caller must do that. 170 template <class T> 171 static T* realloc(T* aOldArray, size_t n) { 172 void* p = MozJemalloc::realloc(aOldArray, sizeof(T) * n); 173 AbortOnFailure(p); 174 return reinterpret_cast<T*>(p); 175 } 176 }; 177 178 //--------------------------------------------------------------------------- 179 // Stack traces 180 //--------------------------------------------------------------------------- 181 182 // This code is similar to the equivalent code within DMD. 183 184 class StackTrace : public phc::StackTrace { 185 public: 186 StackTrace() = default; 187 188 void Clear() { mLength = 0; } 189 190 void Fill(); 191 192 private: 193 static void StackWalkCallback(uint32_t aFrameNumber, void* aPc, void* aSp, 194 void* aClosure) { 195 StackTrace* st = (StackTrace*)aClosure; 196 MOZ_ASSERT(st->mLength < kMaxFrames); 197 st->mPcs[st->mLength] = aPc; 198 st->mLength++; 199 MOZ_ASSERT(st->mLength == aFrameNumber); 200 } 201 }; 202 203 // WARNING WARNING WARNING: this function must only be called when PHC::mMutex 204 // is *not* locked, otherwise we might get deadlocks. 205 // 206 // How? On Windows, MozStackWalk() can lock a mutex, M, from the shared library 207 // loader. Another thread might call malloc() while holding M locked (when 208 // loading a shared library) and try to lock PHC::mMutex, causing a deadlock. 209 // So PHC::mMutex can't be locked during the call to MozStackWalk(). (For 210 // details, see https://bugzilla.mozilla.org/show_bug.cgi?id=374829#c8. On 211 // Linux, something similar can happen; see bug 824340. So we just disallow it 212 // on all platforms.) 213 // 214 // In DMD, to avoid this problem we temporarily unlock the equivalent mutex for 215 // the MozStackWalk() call. But that's grotty, and things are a bit different 216 // here, so we just require that stack traces be obtained before locking 217 // PHC::mMutex. 218 // 219 // Unfortunately, there is no reliable way at compile-time or run-time to ensure 220 // this pre-condition. Hence this large comment. 221 // 222 void StackTrace::Fill() { 223 mLength = 0; 224 225 // These ifdefs should be kept in sync with the conditions in 226 // phc_implies_frame_pointers in build/moz.configure/memory.configure 227 #if defined(XP_WIN) && defined(_M_IX86) 228 // This avoids MozStackWalk(), which causes unusably slow startup on Win32 229 // when it is called during static initialization (see bug 1241684). 230 // 231 // This code is cribbed from the Gecko Profiler, which also uses 232 // FramePointerStackWalk() on Win32: Registers::SyncPopulate() for the 233 // frame pointer, and GetStackTop() for the stack end. 234 CONTEXT context; 235 RtlCaptureContext(&context); 236 void** fp = reinterpret_cast<void**>(context.Ebp); 237 238 PNT_TIB pTib = reinterpret_cast<PNT_TIB>(NtCurrentTeb()); 239 void* stackEnd = static_cast<void*>(pTib->StackBase); 240 FramePointerStackWalk(StackWalkCallback, kMaxFrames, this, fp, stackEnd); 241 #elif defined(XP_DARWIN) 242 // This avoids MozStackWalk(), which has become unusably slow on Mac due to 243 // changes in libunwind. 244 // 245 // This code is cribbed from the Gecko Profiler, which also uses 246 // FramePointerStackWalk() on Mac: Registers::SyncPopulate() for the frame 247 // pointer, and GetStackTop() for the stack end. 248 # pragma GCC diagnostic push 249 # pragma GCC diagnostic ignored "-Wframe-address" 250 void** fp = reinterpret_cast<void**>(__builtin_frame_address(1)); 251 # pragma GCC diagnostic pop 252 void* stackEnd = pthread_get_stackaddr_np(pthread_self()); 253 FramePointerStackWalk(StackWalkCallback, kMaxFrames, this, fp, stackEnd); 254 #else 255 MozStackWalk(StackWalkCallback, nullptr, kMaxFrames, this); 256 #endif 257 } 258 259 //--------------------------------------------------------------------------- 260 // Logging 261 //--------------------------------------------------------------------------- 262 263 // Change this to 1 to enable some PHC logging. Useful for debugging. 264 #define PHC_LOGGING 0 265 266 static void Log(const char* fmt, ...); 267 268 //--------------------------------------------------------------------------- 269 // Array implementation 270 //--------------------------------------------------------------------------- 271 272 // Unlike mfbt/Array.h this array has a dynamic size, but unlike a vector its 273 // size is set explicitly rather than grown as needed. 274 template <typename T> 275 class PHCArray { 276 private: 277 size_t mCapacity = 0; 278 T* mArray = nullptr; 279 280 public: 281 PHCArray() {} 282 283 ~PHCArray() { 284 for (size_t i = 0; i < mCapacity; i++) { 285 mArray[i].~T(); 286 } 287 MozJemalloc::free(mArray); 288 } 289 290 const T& operator[](size_t aIndex) const { 291 MOZ_ASSERT(aIndex < mCapacity); 292 return mArray[aIndex]; 293 } 294 T& operator[](size_t aIndex) { 295 MOZ_ASSERT(aIndex < mCapacity); 296 return mArray[aIndex]; 297 } 298 299 T* begin() { return mArray; } 300 const T* begin() const { return mArray; } 301 const T* end() const { return &mArray[mCapacity]; } 302 303 void Init(size_t aCapacity) { 304 MOZ_ASSERT(mCapacity == 0); 305 MOZ_ASSERT(mArray == nullptr); 306 307 mArray = InfallibleAllocPolicy::new_<T>(aCapacity); 308 mCapacity = aCapacity; 309 } 310 311 size_t Capacity() const { return mCapacity; } 312 313 void GrowTo(size_t aNewCapacity) { 314 MOZ_ASSERT(aNewCapacity > mCapacity); 315 if (mCapacity == 0) { 316 Init(aNewCapacity); 317 return; 318 } 319 mArray = InfallibleAllocPolicy::realloc<T>(mArray, aNewCapacity); 320 for (size_t i = mCapacity; i < aNewCapacity; i++) { 321 new (&mArray[i]) T(); 322 } 323 mCapacity = aNewCapacity; 324 } 325 326 size_t SizeOfExcludingThis() { 327 return MozJemalloc::malloc_usable_size(mArray); 328 } 329 }; 330 331 //--------------------------------------------------------------------------- 332 // Global state 333 //--------------------------------------------------------------------------- 334 335 // Throughout this entire file time is measured as the number of sub-page 336 // allocations performed (by PHC and mozjemalloc combined). `Time` is 64-bit 337 // because we could have more than 2**32 allocations in a long-running session. 338 // `Delay` is 32-bit because the delays used within PHC are always much smaller 339 // than 2**32. Delay must be unsigned so that IsPowerOfTwo() can work on some 340 // Delay values. 341 using Time = uint64_t; // A moment in time. 342 using Delay = uint32_t; // A time duration. 343 static constexpr Delay DELAY_MAX = UINT32_MAX / 2; 344 345 // PHC only runs if the page size is 4 KiB; anything more is uncommon and would 346 // use too much memory. So we hardwire this size for all platforms but macOS 347 // on ARM processors. For the latter we make an exception because the minimum 348 // page size supported is 16KiB so there's no way to go below that. 349 static const size_t kPageSize = 350 #if defined(XP_DARWIN) && defined(__aarch64__) 351 16384 352 #else 353 4096 354 #endif 355 ; 356 357 // We align the PHC area to a multiple of the jemalloc and JS GC chunk size 358 // (both use 1MB aligned chunks) so that their address computations don't lead 359 // from non-PHC memory into PHC memory causing misleading PHC stacks to be 360 // attached to a crash report. 361 static const size_t kPhcAlign = 1024 * 1024; 362 363 static_assert(IsPowerOfTwo(kPhcAlign)); 364 static_assert((kPhcAlign % kPageSize) == 0); 365 366 // PHC will reserve some address space this large, then depending on runtime 367 // configuration will use a smaller fraction of it. Making 368 // kPhcVirtualReservation the upper-bound of PHC's memory size. 369 // * On 32bit systems with less available address space we choose a more 370 // moderate value. 371 // * On 64bit systems we set the limit to so that there are no more than 32,768 372 // mappings, half of Linux's default limit (Bug 1969856). For 4KB pages 373 // that's 128MB. 374 #ifdef HAVE_64BIT_BUILD 375 # if defined(XP_DARWIN) && defined(__aarch64__) 376 static const size_t kPhcVirtualReservation = 512 * 1024 * 1024; 377 # else 378 static const size_t kPhcVirtualReservation = 128 * 1024 * 1024; 379 # endif 380 #else 381 static const size_t kPhcVirtualReservation = 2 * 1024 * 1024; 382 #endif 383 384 // The amount to decrement from the shared allocation delay each time a thread's 385 // local allocation delay reaches zero. 386 static const Delay kDelayDecrementAmount = 256; 387 388 // When PHC is disabled on the current thread wait this many allocations before 389 // accessing sAllocDelay once more. 390 static const Delay kDelayBackoffAmount = 64; 391 392 // When PHC is disabled globally reset the shared delay by this many allocations 393 // to keep code running on the fast path. 394 static const Delay kDelayResetWhenDisabled = 64 * 1024; 395 396 // The default state for PHC. Either Enabled or OnlyFree. 397 #define DEFAULT_STATE mozilla::phc::OnlyFree 398 399 // The maximum time. 400 static const Time kMaxTime = ~(Time(0)); 401 402 // Truncate aRnd to the range (1 .. aAvgDelay*2). If aRnd is random, this 403 // results in an average value of aAvgDelay + 0.5, which is close enough to 404 // aAvgDelay. aAvgDelay must be a power-of-two for speed. 405 constexpr Delay Rnd64ToDelay(Delay aAvgDelay, uint64_t aRnd) { 406 MOZ_ASSERT(IsPowerOfTwo(aAvgDelay), "must be a power of two"); 407 408 return (aRnd & (uint64_t(aAvgDelay) * 2 - 1)) + 1; 409 } 410 411 static Delay CheckProbability(int64_t aProb) { 412 // Limit delays calculated from prefs to 0x80000000, this is the largest 413 // power-of-two that fits in a Delay since it is a uint32_t. 414 // The minimum is 2 that way not every allocation goes straight to PHC. 415 return RoundUpPow2(std::clamp(aProb, int64_t(2), int64_t(0x80000000))); 416 } 417 418 // On MacOS, the first __thread/thread_local access calls malloc, which leads 419 // to an infinite loop. So we use pthread-based TLS instead, which somehow 420 // doesn't have this problem. 421 #if !defined(XP_DARWIN) 422 # define PHC_THREAD_LOCAL(T) MOZ_THREAD_LOCAL(T) 423 #else 424 # define PHC_THREAD_LOCAL(T) \ 425 detail::ThreadLocal<T, detail::ThreadLocalKeyStorage> 426 #endif 427 428 enum class AllocPageState { 429 NeverAllocated = 0, 430 InUse = 1, 431 Freed = 2, 432 }; 433 434 // Metadata for each allocation page. 435 class AllocPageInfo { 436 public: 437 AllocPageInfo() 438 : mState(AllocPageState::NeverAllocated), 439 mBaseAddr(nullptr), 440 mReuseTime(0) {} 441 442 // The current allocation page state. 443 AllocPageState mState; 444 445 // The arena that the allocation is nominally from. This isn't meaningful 446 // within PHC, which has no arenas. But it is necessary for reallocation of 447 // page allocations as normal allocations, such as in this code: 448 // 449 // p = moz_arena_malloc(arenaId, 4096); 450 // realloc(p, 8192); 451 // 452 // The realloc is more than one page, and thus too large for PHC to handle. 453 // Therefore, if PHC handles the first allocation, it must ask mozjemalloc 454 // to allocate the 8192 bytes in the correct arena, and to do that, it must 455 // call MozJemalloc::moz_arena_malloc with the correct arenaId under the 456 // covers. Therefore it must record that arenaId. 457 // 458 // This field is also needed for jemalloc_ptr_info() to work, because it 459 // also returns the arena ID (but only in debug builds). 460 // 461 // - NeverAllocated: must be 0. 462 // - InUse | Freed: can be any valid arena ID value. 463 Maybe<arena_id_t> mArenaId; 464 465 // The starting address of the allocation. Will not be the same as the page 466 // address unless the allocation is a full page. 467 // - NeverAllocated: must be 0. 468 // - InUse | Freed: must be within the allocation page. 469 uint8_t* mBaseAddr; 470 471 // Usable size is computed as the number of bytes between the pointer and 472 // the end of the allocation page. This might be bigger than the requested 473 // size, especially if an outsized alignment is requested. 474 size_t UsableSize() const { 475 return mState == AllocPageState::NeverAllocated 476 ? 0 477 : kPageSize - 478 (reinterpret_cast<uintptr_t>(mBaseAddr) & (kPageSize - 1)); 479 } 480 481 // The internal fragmentation for this allocation. 482 size_t FragmentationBytes() const { 483 MOZ_ASSERT(kPageSize >= UsableSize()); 484 return mState == AllocPageState::InUse ? kPageSize - UsableSize() : 0; 485 } 486 487 // The allocation stack. 488 // - NeverAllocated: Nothing. 489 // - InUse | Freed: Some. 490 Maybe<StackTrace> mAllocStack; 491 492 // The free stack. 493 // - NeverAllocated | InUse: Nothing. 494 // - Freed: Some. 495 Maybe<StackTrace> mFreeStack; 496 497 // The time at which the page is available for reuse, as measured against 498 // mNow. When the page is in use this value will be kMaxTime. 499 // - NeverAllocated: must be 0. 500 // - InUse: must be kMaxTime. 501 // - Freed: must be > 0 and < kMaxTime. 502 Time mReuseTime; 503 504 #if PHC_LOGGING 505 Time mFreeTime; 506 #endif 507 508 // The next index for a free list of pages.` 509 Maybe<uintptr_t> mNextPage; 510 511 void AssertInUse() const { 512 MOZ_ASSERT(mState == AllocPageState::InUse); 513 // There is nothing to assert about aPage.mArenaId. 514 MOZ_ASSERT(mBaseAddr); 515 MOZ_ASSERT(UsableSize() > 0); 516 MOZ_ASSERT(mAllocStack.isSome()); 517 MOZ_ASSERT(mFreeStack.isNothing()); 518 MOZ_ASSERT(mReuseTime == kMaxTime); 519 MOZ_ASSERT(!mNextPage); 520 } 521 522 void AssertNotInUse() const { 523 // We can assert a lot about `NeverAllocated` pages, but not much about 524 // `Freed` pages. 525 #ifdef DEBUG 526 bool isFresh = mState == AllocPageState::NeverAllocated; 527 MOZ_ASSERT(isFresh || mState == AllocPageState::Freed); 528 MOZ_ASSERT_IF(isFresh, mArenaId == Nothing()); 529 MOZ_ASSERT(isFresh == (mBaseAddr == nullptr)); 530 MOZ_ASSERT(isFresh == (mAllocStack.isNothing())); 531 MOZ_ASSERT(isFresh == (mFreeStack.isNothing())); 532 MOZ_ASSERT(mReuseTime != kMaxTime); 533 #endif 534 } 535 536 bool IsPageInUse() const { return mState == AllocPageState::InUse; } 537 bool IsPageFreed() const { return mState == AllocPageState::Freed; } 538 539 bool IsPageAllocatable(Time aNow) const { 540 return !IsPageInUse() && aNow >= mReuseTime; 541 } 542 543 void SetInUse(const Maybe<arena_id_t>& aArenaId, uint8_t* aBaseAddr, 544 const StackTrace& aAllocStack) { 545 AssertNotInUse(); 546 mState = AllocPageState::InUse; 547 mArenaId = aArenaId; 548 mBaseAddr = aBaseAddr; 549 mAllocStack = Some(aAllocStack); 550 mFreeStack = Nothing(); 551 mReuseTime = kMaxTime; 552 553 MOZ_ASSERT(!mNextPage); 554 } 555 556 void ResizeInUse(const Maybe<arena_id_t>& aArenaId, uint8_t* aNewBaseAddr, 557 const StackTrace& aAllocStack) { 558 AssertInUse(); 559 560 // page.mState is not changed. 561 if (aArenaId.isSome()) { 562 // Crash if the arenas don't match. 563 MOZ_RELEASE_ASSERT(mArenaId == aArenaId); 564 } 565 mBaseAddr = aNewBaseAddr; 566 // We could just keep the original alloc stack, but the realloc stack is 567 // more recent and therefore seems more useful. 568 mAllocStack = Some(aAllocStack); 569 // mFreeStack is not changed. 570 // mReuseTime is not changed. 571 // mNextPage is not changed. 572 } 573 574 void SetPageFreed(const Maybe<arena_id_t>& aArenaId, 575 const StackTrace& aFreeStack, Delay aReuseDelay, 576 Time aNow) { 577 AssertInUse(); 578 579 mState = AllocPageState::Freed; 580 581 // page.mArenaId is left unchanged, for jemalloc_ptr_info() calls that 582 // occur after freeing (e.g. in the PtrInfo test in TestJemalloc.cpp). 583 if (aArenaId.isSome()) { 584 // Crash if the arenas don't match. 585 MOZ_RELEASE_ASSERT(mArenaId == aArenaId); 586 } 587 588 // page.musableSize is left unchanged, for reporting on UAF, and for 589 // jemalloc_ptr_info() calls that occur after freeing (e.g. in the PtrInfo 590 // test in TestJemalloc.cpp). 591 592 // page.mAllocStack is left unchanged, for reporting on UAF. 593 594 mFreeStack = Some(aFreeStack); 595 #if PHC_LOGGING 596 mFreeTime = aNow; 597 #endif 598 mReuseTime = aNow + aReuseDelay; 599 } 600 }; 601 602 // The virtual address space reserved by PHC. It is shared, immutable global 603 // state. Initialized by phc_init() and never changed after that. phc_init() 604 // runs early enough that no synchronization is needed. 605 class PHCRegion { 606 private: 607 // The bounds of PHC's virtual address space. These are only ever set once 608 // before any threads are spawned, after that they're read only, and therefore 609 // can be accessed without a lock. 610 uint8_t* mPagesStart = nullptr; 611 uint8_t* mPagesLimit = nullptr; 612 613 public: 614 // Allocates the allocation pages and the guard pages, contiguously. 615 bool AllocVirtualAddresses() { 616 MOZ_ASSERT(!mPagesStart || !mPagesLimit); 617 618 // The memory allocated here is never freed, because it would happen at 619 // process termination when it would be of little use. 620 621 // On Windows in particular we want to control how the memory is initially 622 // reserved. Windows pages memory in immediately which creates performance 623 // problems and could affect stability. 624 void* pages = 625 pages_mmap_aligned(kPhcVirtualReservation, kPhcAlign, ReserveOnly); 626 if (!pages) { 627 return false; 628 } 629 630 mPagesStart = static_cast<uint8_t*>(pages); 631 mPagesLimit = mPagesStart + kPhcVirtualReservation; 632 Log("AllocVirtualAddresses at %p..%p\n", mPagesStart, mPagesLimit); 633 return true; 634 } 635 636 constexpr PHCRegion() {} 637 638 bool IsInFirstGuardPage(const void* aPtr) { 639 MOZ_ASSERT(mPagesStart != nullptr && mPagesLimit != nullptr); 640 return mPagesStart <= aPtr && aPtr < mPagesStart + kPageSize; 641 } 642 643 // Get the address of the allocation page referred to via an index. Used when 644 // marking the page as accessible/inaccessible. 645 uint8_t* AllocPagePtr(uintptr_t aIndex) { 646 MOZ_ASSERT(mPagesStart != nullptr && mPagesLimit != nullptr); 647 // Multiply by two and add one to account for allocation pages *and* guard 648 // pages. 649 return mPagesStart + (2 * aIndex + 1) * kPageSize; 650 } 651 652 MOZ_ALWAYS_INLINE bool WithinBounds(const void* aPtr) const { 653 MOZ_ASSERT(mPagesStart && mPagesLimit); 654 return aPtr >= mPagesStart && aPtr < mPagesLimit; 655 } 656 657 const uint8_t* PagesStart() const { return mPagesStart; } 658 659 size_t ReservedBytes() const { 660 return mPagesStart ? kPhcVirtualReservation - kPageSize : 0; 661 } 662 }; 663 664 class PtrKind; 665 666 // Shared, mutable global state. Many fields are protected by sMutex; functions 667 // that access those feilds should take a PHCLock as proof that mMutex is held. 668 // Other fields are TLS or Atomic and don't need the lock. 669 class PHC { 670 public: 671 // The RNG seeds here are poor, but non-reentrant since this can be called 672 // from malloc(). SetState() will reset the RNG later. 673 PHC() : mRNG(RandomSeed<1>(), RandomSeed<2>()) { 674 mMutex.Init(); 675 if (!tlsIsDisabled.init()) { 676 MOZ_CRASH(); 677 } 678 if (!tlsAllocDelay.init()) { 679 MOZ_CRASH(); 680 } 681 if (!tlsLastDelay.init()) { 682 MOZ_CRASH(); 683 } 684 685 // This constructor is part of PHC's very early initialisation, 686 // see phc_init(), and if PHC is default-on it'll start marking allocations 687 // and we must setup the delay. However once XPCOM starts it'll call 688 // SetState() which will re-initialise the RNG and allocation delay. 689 690 #ifdef EARLY_BETA_OR_EARLIER 691 Resize(16 * 1024 * 1024); 692 #else 693 // Before Bug 1867191 PHC used no more than approximately 1.1MB when it was 694 // set to a round number of 256 pages. To keep the size the same we now 695 // specify this strange total size, but will follow-up with a more sensible 696 // maximum in the future. 697 Resize((1024 + 128) * 1024); 698 #endif 699 700 { 701 MutexAutoLock lock(mMutex); 702 ForceSetNewAllocDelay(Rnd64ToDelay(mAvgFirstAllocDelay, Random64())); 703 } 704 } 705 706 void Resize(size_t aSizeBytes) { 707 // -1 since the last page in the virtual address space must be a guard page. 708 size_t max_pages = (kPhcVirtualReservation / kPageSize / 2) - 1; 709 size_t size_pages = aSizeBytes / kPageSize; 710 size_pages = std::min(size_pages, max_pages); 711 712 MutexAutoLock lock(mMutex); 713 714 size_t old_size_pages = NumAllocPages(); 715 if (size_pages > old_size_pages) { 716 Log("Growing PHC storage from %zu to %zu\n", old_size_pages, size_pages); 717 mAllocPages.GrowTo(size_pages); 718 for (size_t i = old_size_pages; i < size_pages; i++) { 719 AppendPageToFreeList(i); 720 } 721 } else if (size_pages < old_size_pages) { 722 Log("Shrink requested and ignored."); 723 } 724 } 725 726 uint64_t Random64() MOZ_REQUIRES(mMutex) { return mRNG.next(); } 727 728 PtrKind GetPtrKind(const void* aPtr); 729 730 // Get the address of the allocation page referred to via an index. Used 731 // when checking pointers against page boundaries. 732 uint8_t* AllocPageBaseAddr(uintptr_t aIndex) MOZ_REQUIRES(mMutex) { 733 return mAllocPages[aIndex].mBaseAddr; 734 } 735 736 Maybe<arena_id_t> PageArena(uintptr_t aIndex) MOZ_REQUIRES(mMutex) { 737 const AllocPageInfo& page = mAllocPages[aIndex]; 738 page.AssertInUse(); 739 740 return page.mArenaId; 741 } 742 743 size_t PageUsableSize(uintptr_t aIndex) MOZ_REQUIRES(mMutex) { 744 const AllocPageInfo& page = mAllocPages[aIndex]; 745 page.AssertInUse(); 746 747 return page.UsableSize(); 748 } 749 750 void GetMemoryUsage(phc::MemoryUsage& aInfo) MOZ_EXCLUDES(mMutex) { 751 MutexAutoLock lock(mMutex); 752 753 aInfo = phc::MemoryUsage(); 754 for (const auto& page : mAllocPages) { 755 if (page.IsPageInUse()) { 756 aInfo.mAllocatedBytes += page.UsableSize(); 757 aInfo.mFragmentationBytes += page.FragmentationBytes(); 758 } 759 } 760 761 // We know `this` is heap allocated. 762 aInfo.mMetadataBytes = MozJemalloc::malloc_usable_size(this) + 763 mAllocPages.SizeOfExcludingThis(); 764 } 765 766 void SetPageInUse(uintptr_t aIndex, const Maybe<arena_id_t>& aArenaId, 767 uint8_t* aBaseAddr, const StackTrace& aAllocStack) 768 MOZ_REQUIRES(mMutex) { 769 mAllocPages[aIndex].SetInUse(aArenaId, aBaseAddr, aAllocStack); 770 } 771 772 #if PHC_LOGGING 773 Time GetFreeTime(uintptr_t aIndex) const MOZ_REQUIRES(mMutex) { 774 return mAllocPages[aIndex].mFreeTime; 775 } 776 #endif 777 778 void ResizePageInUse(uintptr_t aIndex, const Maybe<arena_id_t>& aArenaId, 779 uint8_t* aNewBaseAddr, const StackTrace& aAllocStack) 780 MOZ_REQUIRES(mMutex) { 781 mAllocPages[aIndex].ResizeInUse(aArenaId, aNewBaseAddr, aAllocStack); 782 }; 783 784 void SetPageFreed(uintptr_t aIndex, const Maybe<arena_id_t>& aArenaId, 785 const StackTrace& aFreeStack, Delay aReuseDelay) 786 MOZ_REQUIRES(mMutex) { 787 AllocPageInfo& page = mAllocPages[aIndex]; 788 789 page.SetPageFreed(aArenaId, aFreeStack, aReuseDelay, Now()); 790 791 MOZ_ASSERT(!page.mNextPage); 792 AppendPageToFreeList(aIndex); 793 } 794 795 static void CrashOnGuardPage(void* aPtr) { 796 // An operation on a guard page? This is a bounds violation. Deliberately 797 // touch the page in question to cause a crash that triggers the usual PHC 798 // machinery. 799 Log("CrashOnGuardPage(%p), bounds violation\n", aPtr); 800 *static_cast<uint8_t*>(aPtr) = 0; 801 MOZ_CRASH("unreachable"); 802 } 803 804 void EnsureValidAndInUse(void* aPtr, uintptr_t aIndex) MOZ_REQUIRES(mMutex) { 805 const AllocPageInfo& page = mAllocPages[aIndex]; 806 807 // The pointer must point to the start of the allocation. 808 MOZ_RELEASE_ASSERT(page.mBaseAddr == aPtr); 809 810 if (page.mState == AllocPageState::Freed) { 811 Log("EnsureValidAndInUse(%p), use-after-free\n", aPtr); 812 // An operation on a freed page? This is a particular kind of 813 // use-after-free. Deliberately touch the page in question, in order to 814 // cause a crash that triggers the usual PHC machinery. But unlock mMutex 815 // first, because that self-same PHC machinery needs to re-lock it, and 816 // the crash causes non-local control flow so mMutex won't be unlocked 817 // the normal way in the caller. 818 mMutex.Unlock(); 819 *static_cast<uint8_t*>(aPtr) = 0; 820 MOZ_CRASH("unreachable"); 821 } 822 } 823 824 // This expects sPHC::mMutex to be locked but can't check it with a parameter 825 // since we try-lock it. 826 void FillAddrInfo(uintptr_t aIndex, const void* aBaseAddr, bool isGuardPage, 827 phc::AddrInfo& aOut) MOZ_REQUIRES(mMutex) { 828 const AllocPageInfo& page = mAllocPages[aIndex]; 829 if (isGuardPage) { 830 aOut.mKind = phc::AddrInfo::Kind::GuardPage; 831 } else { 832 switch (page.mState) { 833 case AllocPageState::NeverAllocated: 834 aOut.mKind = phc::AddrInfo::Kind::NeverAllocatedPage; 835 break; 836 837 case AllocPageState::InUse: 838 aOut.mKind = phc::AddrInfo::Kind::InUsePage; 839 break; 840 841 case AllocPageState::Freed: 842 aOut.mKind = phc::AddrInfo::Kind::FreedPage; 843 break; 844 845 default: 846 MOZ_CRASH(); 847 } 848 } 849 aOut.mBaseAddr = page.mBaseAddr; 850 aOut.mUsableSize = page.UsableSize(); 851 aOut.mAllocStack = page.mAllocStack; 852 aOut.mFreeStack = page.mFreeStack; 853 } 854 855 void FillJemallocPtrInfo(const void* aPtr, uintptr_t aIndex, 856 jemalloc_ptr_info_t* aInfo) MOZ_REQUIRES(mMutex) { 857 const AllocPageInfo& page = mAllocPages[aIndex]; 858 switch (page.mState) { 859 case AllocPageState::NeverAllocated: 860 break; 861 862 case AllocPageState::InUse: { 863 // Only return TagLiveAlloc if the pointer is within the bounds of the 864 // allocation's usable size. 865 uint8_t* base = page.mBaseAddr; 866 uint8_t* limit = base + page.UsableSize(); 867 if (base <= aPtr && aPtr < limit) { 868 *aInfo = {TagLiveAlloc, page.mBaseAddr, page.UsableSize(), 869 page.mArenaId.valueOr(0)}; 870 return; 871 } 872 break; 873 } 874 875 case AllocPageState::Freed: { 876 // Only return TagFreedAlloc if the pointer is within the bounds of the 877 // former allocation's usable size. 878 uint8_t* base = page.mBaseAddr; 879 uint8_t* limit = base + page.UsableSize(); 880 if (base <= aPtr && aPtr < limit) { 881 *aInfo = {TagFreedAlloc, page.mBaseAddr, page.UsableSize(), 882 page.mArenaId.valueOr(0)}; 883 return; 884 } 885 break; 886 } 887 888 default: 889 MOZ_CRASH(); 890 } 891 892 // Pointers into guard pages will end up here, as will pointers into 893 // allocation pages that aren't within the allocation's bounds. 894 *aInfo = {TagUnknown, nullptr, 0, 0}; 895 } 896 897 #ifndef XP_WIN 898 static void prefork() MOZ_NO_THREAD_SAFETY_ANALYSIS { 899 PHC::sPHC->mMutex.Lock(); 900 } 901 static void postfork_parent() MOZ_NO_THREAD_SAFETY_ANALYSIS { 902 PHC::sPHC->mMutex.Unlock(); 903 } 904 static void postfork_child() { PHC::sPHC->mMutex.Init(); } 905 #endif 906 907 void IncPageAllocHits() MOZ_REQUIRES(mMutex) { 908 #if PHC_LOGGING 909 mPageAllocHits++; 910 #endif 911 } 912 void IncPageAllocMisses() MOZ_REQUIRES(mMutex) { 913 #if PHC_LOGGING 914 mPageAllocMisses++; 915 #endif 916 } 917 918 phc::PHCStats GetPageStatsLocked() MOZ_REQUIRES(mMutex) { 919 phc::PHCStats stats; 920 921 for (const auto& page : mAllocPages) { 922 stats.mSlotsAllocated += page.IsPageInUse() ? 1 : 0; 923 stats.mSlotsFreed += page.IsPageFreed() ? 1 : 0; 924 } 925 stats.mSlotsUnused = 926 NumAllocPages() - stats.mSlotsAllocated - stats.mSlotsFreed; 927 928 return stats; 929 } 930 931 phc::PHCStats GetPageStats() MOZ_EXCLUDES(mMutex) { 932 MutexAutoLock lock(mMutex); 933 return GetPageStatsLocked(); 934 } 935 936 #if PHC_LOGGING 937 size_t PageAllocHits() MOZ_REQUIRES(mMutex) { return mPageAllocHits; } 938 size_t PageAllocAttempts() MOZ_REQUIRES(mMutex) { 939 return mPageAllocHits + mPageAllocMisses; 940 } 941 942 // This is an integer because FdPrintf only supports integer printing. 943 size_t PageAllocHitRate() MOZ_REQUIRES(mMutex) { 944 return mPageAllocHits * 100 / (mPageAllocHits + mPageAllocMisses); 945 } 946 #endif 947 948 void LogNoAlloc(size_t aReqSize, size_t aAlignment, Delay newAllocDelay); 949 950 // Should we make new PHC allocations? 951 bool ShouldMakeNewAllocations() const { 952 return mPhcState == mozilla::phc::Enabled; 953 } 954 955 using PHCState = mozilla::phc::PHCState; 956 void SetState(PHCState aState) { 957 if (mPhcState != PHCState::Enabled && aState == PHCState::Enabled) { 958 MutexAutoLock lock(mMutex); 959 // Reset the RNG at this point with a better seed. 960 ResetRNG(); 961 ForceSetNewAllocDelay(Rnd64ToDelay(mAvgFirstAllocDelay, Random64())); 962 } 963 964 mPhcState = aState; 965 } 966 967 void ResetRNG() MOZ_REQUIRES(mMutex) { 968 mRNG = non_crypto::XorShift128PlusRNG(RandomSeed<0>(), RandomSeed<1>()); 969 } 970 971 void SetProbabilities(int64_t aAvgDelayFirst, int64_t aAvgDelayNormal, 972 int64_t aAvgDelayPageReuse) MOZ_EXCLUDES(mMutex) { 973 MutexAutoLock lock(mMutex); 974 975 mAvgFirstAllocDelay = CheckProbability(aAvgDelayFirst); 976 mAvgAllocDelay = CheckProbability(aAvgDelayNormal); 977 mAvgPageReuseDelay = CheckProbability(aAvgDelayPageReuse); 978 } 979 980 static void DisableOnCurrentThread() { 981 MOZ_ASSERT(!tlsIsDisabled.get()); 982 tlsIsDisabled.set(true); 983 } 984 985 void EnableOnCurrentThread() { 986 MOZ_ASSERT(tlsIsDisabled.get()); 987 tlsIsDisabled.set(false); 988 } 989 990 static bool IsDisabledOnCurrentThread() { return tlsIsDisabled.get(); } 991 992 static Time Now() { 993 if (!sPHC) { 994 return 0; 995 } 996 997 return sPHC->mNow; 998 } 999 1000 void AdvanceNow(uint32_t delay = 0) { 1001 mNow += tlsLastDelay.get() - delay; 1002 tlsLastDelay.set(delay); 1003 } 1004 1005 // Decrements the delay and returns true if it's time to make a new PHC 1006 // allocation. 1007 static bool DecrementDelay() { 1008 const Delay alloc_delay = tlsAllocDelay.get(); 1009 1010 if (MOZ_LIKELY(alloc_delay > 0)) { 1011 tlsAllocDelay.set(alloc_delay - 1); 1012 return false; 1013 } 1014 // The local delay has expired, check the shared delay. This path is also 1015 // executed on a new thread's first allocation, the result is the same: all 1016 // the thread's TLS fields will be initialised. 1017 1018 // This accesses sPHC but we want to ensure it's still a static member 1019 // function so that sPHC isn't dereferenced until after the hot path above. 1020 MOZ_ASSERT(sPHC); 1021 sPHC->AdvanceNow(); 1022 1023 // Use an atomic fetch-and-subtract. This uses unsigned underflow semantics 1024 // to avoid doing a full compare-and-swap. 1025 Delay new_delay = (sAllocDelay -= kDelayDecrementAmount); 1026 Delay old_delay = new_delay + kDelayDecrementAmount; 1027 if (MOZ_LIKELY(new_delay < DELAY_MAX)) { 1028 // Normal case, we decremented the shared delay but it's not yet 1029 // underflowed. 1030 tlsAllocDelay.set(kDelayDecrementAmount); 1031 tlsLastDelay.set(kDelayDecrementAmount); 1032 Log("Update sAllocDelay <- %zu, tlsAllocDelay <- %zu\n", 1033 size_t(new_delay), size_t(kDelayDecrementAmount)); 1034 return false; 1035 } 1036 1037 if (old_delay < new_delay) { 1038 // The shared delay only just underflowed, so unless we hit exactly zero 1039 // we should set our local counter and continue. 1040 Log("Update sAllocDelay <- %zu, tlsAllocDelay <- %zu\n", 1041 size_t(new_delay), size_t(old_delay)); 1042 if (old_delay == 0) { 1043 // We don't need to set tlsAllocDelay because it's already zero, we know 1044 // because the condition at the beginning of this function failed. 1045 return true; 1046 } 1047 tlsAllocDelay.set(old_delay); 1048 tlsLastDelay.set(old_delay); 1049 return false; 1050 } 1051 1052 // The delay underflowed on another thread or a previous failed allocation 1053 // by this thread. Return true and attempt the next allocation, if the 1054 // other thread wins we'll check for that before committing. 1055 Log("Update sAllocDelay <- %zu, tlsAllocDelay <- %zu\n", size_t(new_delay), 1056 size_t(alloc_delay)); 1057 return true; 1058 } 1059 1060 static void ResetLocalAllocDelay(Delay aDelay = 0) { 1061 // We could take some delay from the shared delay but we'd need a 1062 // compare-and-swap because this is called on paths that don't make 1063 // allocations. Or we can set the local delay to zero and let it get 1064 // initialised on the next allocation. 1065 tlsAllocDelay.set(aDelay); 1066 tlsLastDelay.set(aDelay); 1067 } 1068 1069 static void ForceSetNewAllocDelay(Delay aNewAllocDelay) { 1070 Log("Setting sAllocDelay <- %zu\n", size_t(aNewAllocDelay)); 1071 sAllocDelay = aNewAllocDelay; 1072 ResetLocalAllocDelay(); 1073 } 1074 1075 // Set a new allocation delay and return true if the delay was less than zero 1076 // (but it's unsigned so interpret it as signed) indicating that we won the 1077 // race to make the next allocation. 1078 static bool SetNewAllocDelay(Delay aNewAllocDelay) { 1079 bool cas_retry; 1080 do { 1081 // We read the current delay on every iteration, we consider that the PHC 1082 // allocation is still "up for grabs" if sAllocDelay < 0. This is safe 1083 // even while other threads continuing to fetch-and-subtract sAllocDelay 1084 // in DecrementDelay(), up to DELAY_MAX (2^31) calls to DecrementDelay(). 1085 Delay read_delay = sAllocDelay; 1086 if (read_delay < DELAY_MAX) { 1087 // Another thread already set a valid delay. 1088 Log("Observe delay %zu this thread lost the race\n", 1089 size_t(read_delay)); 1090 ResetLocalAllocDelay(); 1091 return false; 1092 } else { 1093 Log("Preparing for CAS, read sAllocDelay %zu\n", size_t(read_delay)); 1094 } 1095 1096 cas_retry = !sAllocDelay.compareExchange(read_delay, aNewAllocDelay); 1097 if (cas_retry) { 1098 Log("Lost the CAS, sAllocDelay is now %zu\n", size_t(sAllocDelay)); 1099 cpu_pause(); 1100 // We raced against another thread and lost. 1101 } 1102 } while (cas_retry); 1103 Log("Won the CAS, set sAllocDelay = %zu\n", size_t(sAllocDelay)); 1104 ResetLocalAllocDelay(); 1105 return true; 1106 } 1107 1108 static Delay LocalAllocDelay() { return tlsAllocDelay.get(); } 1109 static Delay SharedAllocDelay() { return sAllocDelay; } 1110 1111 static Delay LastDelay() { return tlsLastDelay.get(); } 1112 1113 Maybe<uintptr_t> PopNextFreeIfAllocatable(Time now) MOZ_REQUIRES(mMutex) { 1114 if (!mFreePageListHead) { 1115 return Nothing(); 1116 } 1117 1118 uintptr_t index = mFreePageListHead.value(); 1119 1120 MOZ_RELEASE_ASSERT(index < NumAllocPages()); 1121 AllocPageInfo& page = mAllocPages[index]; 1122 page.AssertNotInUse(); 1123 1124 if (!page.IsPageAllocatable(now)) { 1125 return Nothing(); 1126 } 1127 1128 mFreePageListHead = page.mNextPage; 1129 page.mNextPage = Nothing(); 1130 if (!mFreePageListHead) { 1131 mFreePageListTail = Nothing(); 1132 } 1133 1134 return Some(index); 1135 } 1136 1137 void UnpopNextFree(uintptr_t index) MOZ_REQUIRES(mMutex) { 1138 AllocPageInfo& page = mAllocPages[index]; 1139 MOZ_ASSERT(!page.mNextPage); 1140 1141 page.mNextPage = mFreePageListHead; 1142 mFreePageListHead = Some(index); 1143 if (!mFreePageListTail) { 1144 mFreePageListTail = Some(index); 1145 } 1146 } 1147 1148 void AppendPageToFreeList(uintptr_t aIndex) MOZ_REQUIRES(mMutex) { 1149 MOZ_RELEASE_ASSERT(aIndex < NumAllocPages()); 1150 AllocPageInfo& page = mAllocPages[aIndex]; 1151 MOZ_ASSERT(!page.mNextPage); 1152 MOZ_ASSERT(mFreePageListHead != Some(aIndex) && 1153 mFreePageListTail != Some(aIndex)); 1154 1155 if (!mFreePageListTail) { 1156 // The list is empty this page will become the beginning and end. 1157 MOZ_ASSERT(!mFreePageListHead); 1158 mFreePageListHead = Some(aIndex); 1159 } else { 1160 MOZ_ASSERT(mFreePageListTail.value() < NumAllocPages()); 1161 AllocPageInfo& tail_page = mAllocPages[mFreePageListTail.value()]; 1162 MOZ_ASSERT(!tail_page.mNextPage); 1163 tail_page.mNextPage = Some(aIndex); 1164 } 1165 page.mNextPage = Nothing(); 1166 mFreePageListTail = Some(aIndex); 1167 } 1168 1169 private: 1170 template <int N> 1171 uint64_t RandomSeed() { 1172 // An older version of this code used RandomUint64() here, but on Mac that 1173 // function uses arc4random(), which can allocate, which would cause 1174 // re-entry, which would be bad. So we just use time(), a local variable 1175 // address and a global variable address. These are mediocre sources of 1176 // entropy, but good enough for PHC. 1177 static_assert(N == 0 || N == 1 || N == 2, "must be 0, 1 or 2"); 1178 uint64_t seed; 1179 if (N == 0) { 1180 time_t t = time(nullptr); 1181 seed = t ^ (t << 32); 1182 } else if (N == 1) { 1183 seed = uintptr_t(&seed) ^ (uintptr_t(&seed) << 32); 1184 } else { 1185 seed = uintptr_t(&sRegion) ^ (uintptr_t(&sRegion) << 32); 1186 } 1187 return seed; 1188 } 1189 1190 public: 1191 // Attempt a page allocation if the time and the size are right. Allocated 1192 // memory is zeroed if aZero is true. On failure, the caller should attempt a 1193 // normal allocation via MozJemalloc. Can be called in a context where 1194 // PHC::mMutex is locked. 1195 void* MaybePageAlloc(const Maybe<arena_id_t>& aArenaId, size_t aReqSize, 1196 size_t aAlignment, bool aZero); 1197 1198 void FreePage(uintptr_t aIndex, const Maybe<arena_id_t>& aArenaId, 1199 const StackTrace& aFreeStack, Delay aReuseDelay); 1200 1201 // This handles both free and moz_arena_free. 1202 void PageFree(const Maybe<arena_id_t>& aArenaId, void* aPtr); 1203 1204 Maybe<void*> PageRealloc(const Maybe<arena_id_t>& aArenaId, void* aOldPtr, 1205 size_t aNewSize); 1206 1207 void PagePtrInfo(const void* aPtr, jemalloc_ptr_info_t* aInfo); 1208 1209 size_t PtrUsableSize(usable_ptr_t aPtr); 1210 1211 bool IsPHCAllocation(const void* aPtr, mozilla::phc::AddrInfo* aOut); 1212 1213 void Crash(const char* aMessage); 1214 1215 private: 1216 // To improve locality we try to order this file by how frequently different 1217 // fields are modified and place all the modified-together fields early and 1218 // ideally within a single cache line. 1219 // The mutex that protects the other members. 1220 alignas(kCacheLineSize) Mutex mMutex MOZ_UNANNOTATED; 1221 1222 // The current time. We use ReleaseAcquire semantics since we attempt to 1223 // update this by larger increments and don't want to lose an entire update. 1224 Atomic<Time, ReleaseAcquire> mNow; 1225 1226 // This will only ever be updated from one thread. The other threads should 1227 // eventually get the update. 1228 Atomic<PHCState, Relaxed> mPhcState = 1229 Atomic<PHCState, Relaxed>(DEFAULT_STATE); 1230 1231 // RNG for deciding which allocations to treat specially. It doesn't need to 1232 // be high quality. 1233 // 1234 // This is a raw pointer for the reason explained in the comment above 1235 // PHC's constructor. Don't change it to UniquePtr or anything like that. 1236 non_crypto::XorShift128PlusRNG mRNG MOZ_GUARDED_BY(mMutex); 1237 1238 // A linked list of free pages. Pages are allocated from the head of the list 1239 // and returned to the tail. The list will naturally order itself by "last 1240 // freed time" so if the head of the list can't satisfy an allocation due to 1241 // time then none of the pages can. 1242 Maybe<uintptr_t> mFreePageListHead MOZ_GUARDED_BY(mMutex); 1243 Maybe<uintptr_t> mFreePageListTail MOZ_GUARDED_BY(mMutex); 1244 1245 #if PHC_LOGGING 1246 // How many allocations that could have been page allocs actually were? As 1247 // constrained kNumAllocPages. If the hit ratio isn't close to 100% it's 1248 // likely that the global constants are poorly chosen. 1249 size_t mPageAllocHits MOZ_GUARDED_BY(mMutex) = 0; 1250 size_t mPageAllocMisses MOZ_GUARDED_BY(mMutex) = 0; 1251 #endif 1252 1253 // The remaining fields are updated much less often, place them on the next 1254 // cache line. 1255 1256 // The average delay before doing any page allocations at the start of a 1257 // process. Note that roughly 1 million allocations occur in the main process 1258 // while starting the browser. The delay range is 1..gAvgFirstAllocDelay*2. 1259 alignas(kCacheLineSize) Delay mAvgFirstAllocDelay 1260 MOZ_GUARDED_BY(mMutex) = 64 * 1024; 1261 1262 // The average delay until the next attempted page allocation, once we get 1263 // past the first delay. The delay range is 1..kAvgAllocDelay*2. 1264 Delay mAvgAllocDelay MOZ_GUARDED_BY(mMutex) = 16 * 1024; 1265 1266 // The average delay before reusing a freed page. Should be significantly 1267 // larger than kAvgAllocDelay, otherwise there's not much point in having it. 1268 // The delay range is (kAvgAllocDelay / 2)..(kAvgAllocDelay / 2 * 3). This is 1269 // different to the other delay ranges in not having a minimum of 1, because 1270 // that's such a short delay that there is a high likelihood of bad stacks in 1271 // any crash report. 1272 Delay mAvgPageReuseDelay MOZ_GUARDED_BY(mMutex) = 256 * 1024; 1273 1274 // When true, PHC does as little as possible. 1275 // 1276 // (a) It does not allocate any new page allocations. 1277 // 1278 // (b) It avoids doing any operations that might call malloc/free/etc., which 1279 // would cause re-entry into PHC. (In practice, MozStackWalk() is the 1280 // only such operation.) Note that calls to the functions in MozJemalloc 1281 // are ok. 1282 // 1283 // For example, replace_malloc() will just fall back to mozjemalloc. However, 1284 // operations involving existing allocations are more complex, because those 1285 // existing allocations may be page allocations. For example, if 1286 // replace_free() is passed a page allocation on a PHC-disabled thread, it 1287 // will free the page allocation in the usual way, but it will get a dummy 1288 // freeStack in order to avoid calling MozStackWalk(), as per (b) above. 1289 // 1290 // This single disabling mechanism has two distinct uses. 1291 // 1292 // - It's used to prevent re-entry into PHC, which can cause correctness 1293 // problems. For example, consider this sequence. 1294 // 1295 // 1. enter replace_free() 1296 // 2. which calls PageFree() 1297 // 3. which calls MozStackWalk() 1298 // 4. which locks a mutex M, and then calls malloc 1299 // 5. enter replace_malloc() 1300 // 6. which calls MaybePageAlloc() 1301 // 7. which calls MozStackWalk() 1302 // 8. which (re)locks a mutex M --> deadlock 1303 // 1304 // We avoid this sequence by "disabling" the thread in PageFree() (at step 1305 // 2), which causes MaybePageAlloc() to fail, avoiding the call to 1306 // MozStackWalk() (at step 7). 1307 // 1308 // In practice, realloc or free of a PHC allocation is unlikely on a thread 1309 // that is disabled because of this use: MozStackWalk() will probably only 1310 // realloc/free allocations that it allocated itself, but those won't be 1311 // page allocations because PHC is disabled before calling MozStackWalk(). 1312 // 1313 // (Note that MaybePageAlloc() could safely do a page allocation so long as 1314 // it avoided calling MozStackWalk() by getting a dummy allocStack. But it 1315 // wouldn't be useful, and it would prevent the second use below.) 1316 // 1317 // - It's used to prevent PHC allocations in some tests that rely on 1318 // mozjemalloc's exact allocation behaviour, which PHC does not replicate 1319 // exactly. (Note that (b) isn't necessary for this use -- MozStackWalk() 1320 // could be safely called -- but it is necessary for the first use above.) 1321 // 1322 static PHC_THREAD_LOCAL(bool) tlsIsDisabled; 1323 1324 // Delay until the next attempt at a page allocation. The delay is made up of 1325 // two parts the global delay and each thread's local portion of that delay: 1326 // 1327 // delay = sDelay + sum_all_threads(tlsAllocDelay) 1328 // 1329 // Threads use their local delay to reduce contention on the shared delay. 1330 // 1331 // See the comment in MaybePageAlloc() for an explanation of why it uses 1332 // ReleaseAcquire semantics. 1333 static Atomic<Delay, ReleaseAcquire> sAllocDelay; 1334 static PHC_THREAD_LOCAL(Delay) tlsAllocDelay; 1335 1336 // The last value we set tlsAllocDelay to before starting to count down. 1337 static PHC_THREAD_LOCAL(Delay) tlsLastDelay; 1338 1339 // Using mfbt/Array.h makes MOZ_GUARDED_BY more reliable than a C array. 1340 PHCArray<AllocPageInfo> mAllocPages MOZ_GUARDED_BY(mMutex); 1341 1342 public: 1343 // There are two kinds of page. 1344 // - Allocation pages, from which allocations are made. 1345 // - Guard pages, which are never touched by PHC. 1346 // 1347 size_t NumAllocPages() const MOZ_REQUIRES(mMutex) { 1348 return mAllocPages.Capacity(); 1349 } 1350 1351 // These page kinds are interleaved; each allocation page has a guard page on 1352 // either side. 1353 size_t NumAllPages() const MOZ_REQUIRES(mMutex) { 1354 return NumAllocPages() * 2 + 1; 1355 } 1356 1357 Delay GetAvgAllocDelay() MOZ_REQUIRES(mMutex) { return mAvgAllocDelay; } 1358 Delay GetAvgFirstAllocDelay() MOZ_REQUIRES(mMutex) { 1359 return mAvgFirstAllocDelay; 1360 } 1361 Delay GetAvgPageReuseDelay() MOZ_REQUIRES(mMutex) { 1362 return mAvgPageReuseDelay; 1363 } 1364 Delay ReuseDelay() MOZ_REQUIRES(mMutex) { 1365 Delay avg_reuse_delay = GetAvgPageReuseDelay(); 1366 return (avg_reuse_delay / 2) + 1367 Rnd64ToDelay(avg_reuse_delay / 2, Random64()); 1368 } 1369 1370 // Both of these are accessed early on hot code paths. We make them both 1371 // static variables rathan making sRegion a member of sPHC to keep these hot 1372 // code paths as fast as possible. They're both "write once" so they can 1373 // share a cache line. 1374 static PHCRegion sRegion; 1375 static PHC* sPHC; 1376 }; 1377 1378 // Maps a pointer to a PHC-specific structure: 1379 // - A guard page (it is unspecified which one) 1380 // - An allocation page (with an index < kNumAllocPages) 1381 // 1382 // PtrKind should only be used on pointers that are within PHC's virtual address 1383 // range. Callers should usually check sRegion.WithinBounds() first, if 1384 // successful then PHC::GetPtrKind() can be used safely. 1385 // 1386 // The standard way of handling a PtrKind is to check sRegion.WithinBounds() 1387 // first, and if that succeeds, to call GetPtrKind and check IsGuardPage(), and 1388 // if that fails, then this is a PHC pointer. 1389 class PtrKind { 1390 private: 1391 enum class Tag : uint8_t { 1392 GuardPage, 1393 AllocPage, 1394 }; 1395 1396 Tag mTag; 1397 uintptr_t mIndex; // Only used if mTag == Tag::AllocPage. 1398 1399 protected: 1400 // Detect what a pointer points to. This constructor must be fast because it 1401 // is called for every call to free(), realloc(), malloc_usable_size(), and 1402 // jemalloc_ptr_info(). 1403 PtrKind(const void* aPtr, const uint8_t* aPagesStart) { 1404 uintptr_t offset = static_cast<const uint8_t*>(aPtr) - aPagesStart; 1405 uintptr_t allPageIndex = offset / kPageSize; 1406 1407 if (allPageIndex & 1) { 1408 // Odd-indexed pages are allocation pages. 1409 uintptr_t allocPageIndex = allPageIndex / 2; 1410 mTag = Tag::AllocPage; 1411 mIndex = allocPageIndex; 1412 } else { 1413 // Even-numbered pages are guard pages. 1414 mTag = Tag::GuardPage; 1415 } 1416 } 1417 friend PtrKind PHC::GetPtrKind(const void* aPtr); 1418 1419 public: 1420 bool IsGuardPage() const { return mTag == Tag::GuardPage; } 1421 1422 // This should only be called after IsGuardPage() has returned false. 1423 Maybe<uintptr_t> AllocPageIndex(uintptr_t aNumPages) const { 1424 MOZ_RELEASE_ASSERT(mTag == Tag::AllocPage); 1425 1426 if (mIndex < aNumPages) { 1427 return Some(mIndex); 1428 } else { 1429 return Nothing(); 1430 } 1431 } 1432 }; 1433 1434 PtrKind PHC::GetPtrKind(const void* aPtr) { 1435 MOZ_ASSERT(sRegion.WithinBounds(aPtr)); 1436 return PtrKind(aPtr, sRegion.PagesStart()); 1437 } 1438 1439 // These globals are read together and hardly ever written. They should be on 1440 // the same cache line. They should be in a different cache line to data that 1441 // is manipulated often (sMutex and mNow are members of sPHC for that reason) so 1442 // that this cache line can be shared amoung cores. 1443 alignas(kCacheLineSize) PHCRegion PHC::sRegion; 1444 PHC* PHC::sPHC; 1445 1446 PHC_THREAD_LOCAL(bool) PHC::tlsIsDisabled; 1447 PHC_THREAD_LOCAL(Delay) PHC::tlsAllocDelay; 1448 Atomic<Delay, ReleaseAcquire> PHC::sAllocDelay; 1449 PHC_THREAD_LOCAL(Delay) PHC::tlsLastDelay; 1450 1451 // When PHC wants to crash we first have to unlock so that the crash reporter 1452 // can call into PHC to lockup its pointer. That also means that before calling 1453 // PHCCrash please ensure that state is consistent. Because this can report an 1454 // arbitrary string, use of it must be reviewed by Firefox data stewards. 1455 void PHC::Crash(const char* aMessage) MOZ_REQUIRES(mMutex) { 1456 mMutex.Unlock(); 1457 MOZ_CRASH_UNSAFE(aMessage); 1458 } 1459 1460 class AutoDisableOnCurrentThread { 1461 public: 1462 AutoDisableOnCurrentThread(const AutoDisableOnCurrentThread&) = delete; 1463 1464 const AutoDisableOnCurrentThread& operator=( 1465 const AutoDisableOnCurrentThread&) = delete; 1466 1467 explicit AutoDisableOnCurrentThread() { PHC::DisableOnCurrentThread(); } 1468 ~AutoDisableOnCurrentThread() { PHC::sPHC->EnableOnCurrentThread(); } 1469 }; 1470 1471 //--------------------------------------------------------------------------- 1472 // Initialisation 1473 //--------------------------------------------------------------------------- 1474 1475 // WARNING: this function runs *very* early -- before all static initializers 1476 // have run. For this reason, non-scalar globals (sPHC) are allocated 1477 // dynamically (so we can guarantee their construction in this function) rather 1478 // than statically. sRegion is allocated statically to avoid an extra 1479 // dereference. 1480 // 1481 // If initialisation fails sPHC will be null. Returning bool won't help the 1482 // caller as there's nothing they can do. 1483 void phc_init() { 1484 // We must only initialise once. 1485 MOZ_ASSERT(!PHC::sPHC); 1486 1487 if (GetKernelPageSize() != kPageSize) { 1488 return; 1489 } 1490 1491 if (!PHC::sRegion.AllocVirtualAddresses()) { 1492 return; 1493 } 1494 1495 // sPHC is never freed. It lives for the life of the process. 1496 PHC::sPHC = InfallibleAllocPolicy::new_<PHC>(); 1497 1498 #ifndef XP_WIN 1499 // Avoid deadlocks when forking by acquiring our state lock prior to forking 1500 // and releasing it after forking. See |LogAlloc|'s |phc_init| for 1501 // in-depth details. 1502 pthread_atfork(PHC::prefork, PHC::postfork_parent, PHC::postfork_child); 1503 #endif 1504 } 1505 1506 //--------------------------------------------------------------------------- 1507 // Page allocation operations 1508 //--------------------------------------------------------------------------- 1509 1510 // This is the hot-path for testing if we should make a PHC allocation, it 1511 // should be inlined into the caller while the remainder of the tests that are 1512 // in MaybePageAlloc need not be inlined. 1513 static MOZ_ALWAYS_INLINE bool ShouldPageAllocHot(size_t aReqSize) { 1514 if (MOZ_UNLIKELY(!PHC::sPHC)) { 1515 return false; 1516 } 1517 1518 if (MOZ_UNLIKELY(aReqSize > kPageSize)) { 1519 return false; 1520 } 1521 1522 // Decrement the delay. If it's zero, we do a page allocation and reset the 1523 // delay to a random number. 1524 if (MOZ_LIKELY(!PHC::DecrementDelay())) { 1525 return false; 1526 } 1527 1528 return true; 1529 } 1530 1531 void PHC::LogNoAlloc(size_t aReqSize, size_t aAlignment, Delay newAllocDelay) 1532 MOZ_REQUIRES(mMutex) { 1533 // No pages are available, or VirtualAlloc/mprotect failed. 1534 #if PHC_LOGGING 1535 phc::PHCStats stats = GetPageStatsLocked(); 1536 Log("No PageAlloc(%zu, %zu), sAllocDelay <- %zu, fullness %zu/%zu/%zu, " 1537 "hits %zu/%zu (%zu%%)\n", 1538 aReqSize, aAlignment, size_t(newAllocDelay), stats.mSlotsAllocated, 1539 stats.mSlotsFreed, NumAllocPages(), PageAllocHits(), PageAllocAttempts(), 1540 PageAllocHitRate()); 1541 #endif 1542 } 1543 1544 void* PHC::MaybePageAlloc(const Maybe<arena_id_t>& aArenaId, size_t aReqSize, 1545 size_t aAlignment, bool aZero) { 1546 MOZ_ASSERT(IsPowerOfTwo(aAlignment)); 1547 if (!ShouldMakeNewAllocations()) { 1548 // Reset the allocation delay so that we take the fast path most of the 1549 // time. Rather than take the lock and use the RNG which are unnecessary 1550 // when PHC is disabled, instead set the delay to a reasonably high number, 1551 // the default average first allocation delay. This is reset when PHC is 1552 // re-enabled anyway. 1553 ForceSetNewAllocDelay(kDelayResetWhenDisabled); 1554 return nullptr; 1555 } 1556 1557 if (IsDisabledOnCurrentThread()) { 1558 // We don't reset sAllocDelay since that might affect other threads. We 1559 // assume this is okay because either this thread will be re-enabled after 1560 // less than DELAY_MAX allocations or that there are other active threads 1561 // that will reset sAllocDelay. We do reset our local delay which will 1562 // cause this thread to "back off" from updating sAllocDelay on future 1563 // allocations. 1564 ResetLocalAllocDelay(kDelayBackoffAmount); 1565 return nullptr; 1566 } 1567 1568 // Disable on this thread *before* getting the stack trace. 1569 AutoDisableOnCurrentThread disable; 1570 1571 // Get the stack trace *before* locking the mutex. If we return nullptr then 1572 // it was a waste, but it's not so frequent, and doing a stack walk while 1573 // the mutex is locked is problematic (see the big comment on 1574 // StackTrace::Fill() for details). 1575 StackTrace allocStack; 1576 allocStack.Fill(); 1577 1578 MutexAutoLock lock(mMutex); 1579 1580 Time now = Now(); 1581 1582 Delay newAllocDelay = Rnd64ToDelay(GetAvgAllocDelay(), Random64()); 1583 if (!SetNewAllocDelay(newAllocDelay)) { 1584 return nullptr; 1585 } 1586 1587 // Pages are allocated from a free list populated in order of when they're 1588 // freed. If the page at the head of the list is too recently freed to be 1589 // reused then no other pages on the list will be either. 1590 1591 Maybe<uintptr_t> mb_index = PopNextFreeIfAllocatable(now); 1592 if (!mb_index) { 1593 IncPageAllocMisses(); 1594 LogNoAlloc(aReqSize, aAlignment, newAllocDelay); 1595 return nullptr; 1596 } 1597 uintptr_t index = mb_index.value(); 1598 1599 #if PHC_LOGGING 1600 Time lifetime = 0; 1601 #endif 1602 uint8_t* pagePtr = sRegion.AllocPagePtr(index); 1603 MOZ_ASSERT(pagePtr); 1604 bool ok = 1605 #ifdef XP_WIN 1606 !!VirtualAlloc(pagePtr, kPageSize, MEM_COMMIT, PAGE_READWRITE); 1607 #else 1608 mprotect(pagePtr, kPageSize, PROT_READ | PROT_WRITE) == 0; 1609 #endif 1610 1611 if (!ok) { 1612 UnpopNextFree(index); 1613 IncPageAllocMisses(); 1614 LogNoAlloc(aReqSize, aAlignment, newAllocDelay); 1615 return nullptr; 1616 } 1617 1618 size_t usableSize = MozJemalloc::malloc_good_size(aReqSize); 1619 MOZ_ASSERT(usableSize > 0); 1620 1621 // Put the allocation as close to the end of the page as possible, 1622 // allowing for alignment requirements. 1623 uint8_t* ptr = pagePtr + kPageSize - usableSize; 1624 if (aAlignment != 1) { 1625 ptr = reinterpret_cast<uint8_t*>( 1626 (reinterpret_cast<uintptr_t>(ptr) & ~(aAlignment - 1))); 1627 } 1628 1629 #if PHC_LOGGING 1630 Time then = GetFreeTime(index); 1631 lifetime = then != 0 ? now - then : 0; 1632 #endif 1633 1634 SetPageInUse(index, aArenaId, ptr, allocStack); 1635 1636 if (aZero) { 1637 memset(ptr, 0, usableSize); 1638 } else { 1639 #ifdef DEBUG 1640 memset(ptr, kAllocJunk, usableSize); 1641 #endif 1642 } 1643 1644 IncPageAllocHits(); 1645 #if PHC_LOGGING 1646 phc::PHCStats stats = GetPageStatsLocked(); 1647 Log("PageAlloc(%zu, %zu) -> %p[%zu]/%p (%zu) (z%zu), sAllocDelay <- %zu, " 1648 "fullness %zu/%zu/%zu, hits %zu/%zu (%zu%%), lifetime %zu\n", 1649 aReqSize, aAlignment, pagePtr, index, ptr, usableSize, 1650 size_t(newAllocDelay), size_t(SharedAllocDelay()), stats.mSlotsAllocated, 1651 stats.mSlotsFreed, NumAllocPages(), PageAllocHits(), PageAllocAttempts(), 1652 PageAllocHitRate(), lifetime); 1653 #endif 1654 1655 return ptr; 1656 } 1657 1658 void PHC::FreePage(uintptr_t aIndex, const Maybe<arena_id_t>& aArenaId, 1659 const StackTrace& aFreeStack, Delay aReuseDelay) 1660 MOZ_REQUIRES(mMutex) { 1661 void* pagePtr = sRegion.AllocPagePtr(aIndex); 1662 1663 #ifdef XP_WIN 1664 if (!VirtualFree(pagePtr, kPageSize, MEM_DECOMMIT)) { 1665 Crash("VirtualFree failed"); 1666 } 1667 #else 1668 if (mmap(pagePtr, kPageSize, PROT_NONE, MAP_FIXED | MAP_PRIVATE | MAP_ANON, 1669 -1, 0) == MAP_FAILED) { 1670 Crash("mmap failed"); 1671 } 1672 #endif 1673 1674 SetPageFreed(aIndex, aArenaId, aFreeStack, aReuseDelay); 1675 } 1676 1677 //--------------------------------------------------------------------------- 1678 // replace-malloc machinery 1679 //--------------------------------------------------------------------------- 1680 1681 // This handles malloc, moz_arena_malloc, and realloc-with-a-nullptr. 1682 MOZ_ALWAYS_INLINE static void* PageMalloc(const Maybe<arena_id_t>& aArenaId, 1683 size_t aReqSize) { 1684 void* ptr = 1685 ShouldPageAllocHot(aReqSize) 1686 // The test on aArenaId here helps the compiler optimise away 1687 // the construction of Nothing() in the caller. 1688 ? PHC::sPHC->MaybePageAlloc(aArenaId.isSome() ? aArenaId : Nothing(), 1689 aReqSize, /* aAlignment */ 1, 1690 /* aZero */ false) 1691 : nullptr; 1692 return ptr ? ptr 1693 : (aArenaId.isSome() 1694 ? MozJemalloc::moz_arena_malloc(*aArenaId, aReqSize) 1695 : MozJemalloc::malloc(aReqSize)); 1696 } 1697 1698 inline void* MozJemallocPHC::malloc(size_t aReqSize) { 1699 return PageMalloc(Nothing(), aReqSize); 1700 } 1701 1702 // This handles both calloc and moz_arena_calloc. 1703 MOZ_ALWAYS_INLINE static void* PageCalloc(const Maybe<arena_id_t>& aArenaId, 1704 size_t aNum, size_t aReqSize) { 1705 CheckedInt<size_t> checkedSize = CheckedInt<size_t>(aNum) * aReqSize; 1706 if (!checkedSize.isValid()) { 1707 return nullptr; 1708 } 1709 1710 void* ptr = 1711 ShouldPageAllocHot(checkedSize.value()) 1712 // The test on aArenaId here helps the compiler optimise away 1713 // the construction of Nothing() in the caller. 1714 ? PHC::sPHC->MaybePageAlloc(aArenaId.isSome() ? aArenaId : Nothing(), 1715 checkedSize.value(), /* aAlignment */ 1, 1716 /* aZero */ true) 1717 : nullptr; 1718 return ptr ? ptr 1719 : (aArenaId.isSome() 1720 ? MozJemalloc::moz_arena_calloc(*aArenaId, aNum, aReqSize) 1721 : MozJemalloc::calloc(aNum, aReqSize)); 1722 } 1723 1724 inline void* MozJemallocPHC::calloc(size_t aNum, size_t aReqSize) { 1725 return PageCalloc(Nothing(), aNum, aReqSize); 1726 } 1727 1728 MOZ_ALWAYS_INLINE static bool FastIsPHCPtr(const void* aPtr) { 1729 if (MOZ_UNLIKELY(!PHC::sPHC)) { 1730 return false; 1731 } 1732 1733 return PHC::sRegion.WithinBounds(aPtr); 1734 } 1735 1736 // This function handles both realloc and moz_arena_realloc. 1737 // 1738 // As always, realloc is complicated, and doubly so when there are two 1739 // different kinds of allocations in play. Here are the possible transitions, 1740 // and what we do in practice. 1741 // 1742 // - normal-to-normal: This is straightforward and obviously necessary. 1743 // 1744 // - normal-to-page: This is disallowed because it would require getting the 1745 // arenaId of the normal allocation, which isn't possible in non-DEBUG builds 1746 // for security reasons. 1747 // 1748 // - page-to-page: This is done whenever possible, i.e. whenever the new size 1749 // is less than or equal to 4 KiB. This choice counterbalances the 1750 // disallowing of normal-to-page allocations, in order to avoid biasing 1751 // towards or away from page allocations. It always occurs in-place. 1752 // 1753 // - page-to-normal: this is done only when necessary, i.e. only when the new 1754 // size is greater than 4 KiB. This choice naturally flows from the 1755 // prior choice on page-to-page transitions. 1756 // 1757 // In summary: realloc doesn't change the allocation kind unless it must. 1758 // 1759 // This function may return: 1760 // - Some(pointer) when PHC handled the reallocation. 1761 // - Some(nullptr) when PHC should have handled a page-to-normal transition 1762 // but couldn't because of OOM. 1763 // - Nothing() when PHC is disabled or the original allocation was not 1764 // under PHC. 1765 MOZ_ALWAYS_INLINE static Maybe<void*> MaybePageRealloc( 1766 const Maybe<arena_id_t>& aArenaId, void* aOldPtr, size_t aNewSize) { 1767 if (!aOldPtr) { 1768 // Null pointer. Treat like malloc(aNewSize). 1769 return Some(PageMalloc(aArenaId, aNewSize)); 1770 } 1771 1772 if (MOZ_UNLIKELY(!FastIsPHCPtr(aOldPtr))) { 1773 // A normal-to-normal transition. 1774 return Nothing(); 1775 } 1776 1777 return PHC::sPHC->PageRealloc(aArenaId, aOldPtr, aNewSize); 1778 } 1779 1780 Maybe<void*> PHC::PageRealloc(const Maybe<arena_id_t>& aArenaId, void* aOldPtr, 1781 size_t aNewSize) MOZ_EXCLUDES(mMutex) { 1782 PtrKind pk = GetPtrKind(aOldPtr); 1783 1784 if (pk.IsGuardPage()) { 1785 CrashOnGuardPage(aOldPtr); 1786 } 1787 1788 // A page-to-something transition. 1789 AdvanceNow(LocalAllocDelay()); 1790 1791 // Note that `disable` has no effect unless it is emplaced below. 1792 Maybe<AutoDisableOnCurrentThread> disable; 1793 // Get the stack trace *before* locking the mutex. 1794 StackTrace stack; 1795 if (IsDisabledOnCurrentThread()) { 1796 // PHC is disabled on this thread. Leave the stack empty. 1797 } else { 1798 // Disable on this thread *before* getting the stack trace. 1799 disable.emplace(); 1800 stack.Fill(); 1801 } 1802 1803 MutexAutoLock lock(mMutex); 1804 1805 Maybe<uintptr_t> mb_index = pk.AllocPageIndex(NumAllocPages()); 1806 if (!mb_index) { 1807 Crash("Realloc of invalid pointer"); 1808 } 1809 // At this point we know we have an allocation page. 1810 uintptr_t index = mb_index.value(); 1811 1812 // Check for realloc() of a freed block. 1813 EnsureValidAndInUse(aOldPtr, index); 1814 1815 if (aNewSize <= kPageSize && ShouldMakeNewAllocations()) { 1816 // A page-to-page transition. Just keep using the page allocation. We do 1817 // this even if the thread is disabled, because it doesn't create a new 1818 // page allocation. Note that ResizePageInUse() checks aArenaId. 1819 // 1820 // Move the bytes with memmove(), because the old allocation and the new 1821 // allocation overlap. Move the usable size rather than the requested size, 1822 // because the user might have used malloc_usable_size() and filled up the 1823 // usable size. 1824 size_t oldUsableSize = PageUsableSize(index); 1825 size_t newUsableSize = MozJemalloc::malloc_good_size(aNewSize); 1826 uint8_t* pagePtr = sRegion.AllocPagePtr(index); 1827 uint8_t* newPtr = pagePtr + kPageSize - newUsableSize; 1828 memmove(newPtr, aOldPtr, std::min(oldUsableSize, aNewSize)); 1829 ResizePageInUse(index, aArenaId, newPtr, stack); 1830 Log("PageRealloc-Reuse(%p, %zu) -> %p\n", aOldPtr, aNewSize, newPtr); 1831 return Some(newPtr); 1832 } 1833 1834 // A page-to-normal transition (with the new size greater than page-sized). 1835 // (Note that aArenaId is checked below.) 1836 void* newPtr; 1837 if (aArenaId.isSome()) { 1838 newPtr = MozJemalloc::moz_arena_malloc(*aArenaId, aNewSize); 1839 } else { 1840 Maybe<arena_id_t> oldArenaId = PageArena(index); 1841 newPtr = (oldArenaId.isSome() 1842 ? MozJemalloc::moz_arena_malloc(*oldArenaId, aNewSize) 1843 : MozJemalloc::malloc(aNewSize)); 1844 } 1845 if (!newPtr) { 1846 return Some(nullptr); 1847 } 1848 1849 Delay reuseDelay = ReuseDelay(); 1850 1851 // Copy the usable size rather than the requested size, because the user 1852 // might have used malloc_usable_size() and filled up the usable size. Note 1853 // that FreePage() checks aArenaId (via SetPageFreed()). 1854 size_t oldUsableSize = PageUsableSize(index); 1855 memcpy(newPtr, aOldPtr, std::min(oldUsableSize, aNewSize)); 1856 FreePage(index, aArenaId, stack, reuseDelay); 1857 Log("PageRealloc-Free(%p[%zu], %zu) -> %p, %zu delay, reuse at ~%zu\n", 1858 aOldPtr, index, aNewSize, newPtr, size_t(reuseDelay), 1859 size_t(Now()) + reuseDelay); 1860 1861 return Some(newPtr); 1862 } 1863 1864 MOZ_ALWAYS_INLINE static void* PageRealloc(const Maybe<arena_id_t>& aArenaId, 1865 void* aOldPtr, size_t aNewSize) { 1866 Maybe<void*> ptr = MaybePageRealloc(aArenaId, aOldPtr, aNewSize); 1867 1868 return ptr.isSome() 1869 ? *ptr 1870 : (aArenaId.isSome() ? MozJemalloc::moz_arena_realloc( 1871 *aArenaId, aOldPtr, aNewSize) 1872 : MozJemalloc::realloc(aOldPtr, aNewSize)); 1873 } 1874 1875 inline void* MozJemallocPHC::realloc(void* aOldPtr, size_t aNewSize) { 1876 return PageRealloc(Nothing(), aOldPtr, aNewSize); 1877 } 1878 1879 void PHC::PageFree(const Maybe<arena_id_t>& aArenaId, void* aPtr) 1880 MOZ_EXCLUDES(mMutex) { 1881 PtrKind pk = GetPtrKind(aPtr); 1882 1883 if (pk.IsGuardPage()) { 1884 PHC::CrashOnGuardPage(aPtr); 1885 } 1886 1887 AdvanceNow(LocalAllocDelay()); 1888 1889 // Note that `disable` has no effect unless it is emplaced below. 1890 Maybe<AutoDisableOnCurrentThread> disable; 1891 // Get the stack trace *before* locking the mutex. 1892 StackTrace freeStack; 1893 if (IsDisabledOnCurrentThread()) { 1894 // PHC is disabled on this thread. Leave the stack empty. 1895 } else { 1896 // Disable on this thread *before* getting the stack trace. 1897 disable.emplace(); 1898 freeStack.Fill(); 1899 } 1900 1901 MutexAutoLock lock(mMutex); 1902 1903 Maybe<uintptr_t> mb_index = pk.AllocPageIndex(NumAllocPages()); 1904 if (!mb_index) { 1905 Crash("free of invalid pointer"); 1906 } 1907 // At this point we know we have an allocation page. 1908 uintptr_t index = mb_index.value(); 1909 1910 // Check for a double-free. 1911 EnsureValidAndInUse(aPtr, index); 1912 1913 // Note that FreePage() checks aArenaId (via SetPageFreed()). 1914 Delay reuseDelay = ReuseDelay(); 1915 FreePage(index, aArenaId, freeStack, reuseDelay); 1916 1917 #if PHC_LOGGING 1918 phc::PHCStats stats = GetPageStatsLocked(); 1919 Log("PageFree(%p[%zu]), %zu delay, reuse at ~%zu, fullness %zu/%zu/%zu\n", 1920 aPtr, index, size_t(reuseDelay), size_t(Now()) + reuseDelay, 1921 stats.mSlotsAllocated, stats.mSlotsFreed, NumAllocPages()); 1922 #endif 1923 } 1924 1925 MOZ_ALWAYS_INLINE static void PageFree(const Maybe<arena_id_t>& aArenaId, 1926 void* aPtr) { 1927 if (MOZ_UNLIKELY(FastIsPHCPtr(aPtr))) { 1928 // The tenery expression here helps the compiler optimise away the 1929 // construction of Nothing() in the caller. 1930 PHC::sPHC->PageFree(aArenaId.isSome() ? aArenaId : Nothing(), aPtr); 1931 return; 1932 } 1933 1934 aArenaId.isSome() ? MozJemalloc::moz_arena_free(*aArenaId, aPtr) 1935 : MozJemalloc::free(aPtr); 1936 } 1937 1938 inline void MozJemallocPHC::free(void* aPtr) { PageFree(Nothing(), aPtr); } 1939 1940 // This handles memalign and moz_arena_memalign. 1941 MOZ_ALWAYS_INLINE static void* PageMemalign(const Maybe<arena_id_t>& aArenaId, 1942 size_t aAlignment, 1943 size_t aReqSize) { 1944 MOZ_RELEASE_ASSERT(IsPowerOfTwo(aAlignment)); 1945 1946 // PHC can't satisfy an alignment greater than a page size, so fall back to 1947 // mozjemalloc in that case. 1948 void* ptr = nullptr; 1949 if (ShouldPageAllocHot(aReqSize) && aAlignment <= kPageSize) { 1950 // The test on aArenaId here helps the compiler optimise away 1951 // the construction of Nothing() in the caller. 1952 ptr = PHC::sPHC->MaybePageAlloc(aArenaId.isSome() ? aArenaId : Nothing(), 1953 aReqSize, aAlignment, /* aZero */ false); 1954 } 1955 return ptr ? ptr 1956 : (aArenaId.isSome() 1957 ? MozJemalloc::moz_arena_memalign(*aArenaId, aAlignment, 1958 aReqSize) 1959 : MozJemalloc::memalign(aAlignment, aReqSize)); 1960 } 1961 1962 inline void* MozJemallocPHC::memalign(size_t aAlignment, size_t aReqSize) { 1963 return PageMemalign(Nothing(), aAlignment, aReqSize); 1964 } 1965 1966 inline size_t MozJemallocPHC::malloc_usable_size(usable_ptr_t aPtr) { 1967 if (MOZ_LIKELY(!FastIsPHCPtr(aPtr))) { 1968 // Not a page allocation. Measure it normally. 1969 return MozJemalloc::malloc_usable_size(aPtr); 1970 } 1971 1972 return PHC::sPHC->PtrUsableSize(aPtr); 1973 } 1974 1975 size_t PHC::PtrUsableSize(usable_ptr_t aPtr) MOZ_EXCLUDES(mMutex) { 1976 PtrKind pk = GetPtrKind(aPtr); 1977 1978 if (pk.IsGuardPage()) { 1979 CrashOnGuardPage(const_cast<void*>(aPtr)); 1980 } 1981 1982 MutexAutoLock lock(mMutex); 1983 1984 Maybe<uintptr_t> index = pk.AllocPageIndex(NumAllocPages()); 1985 if (!index) { 1986 Crash("PtrUsableSize() of invalid pointer"); 1987 } 1988 1989 // At this point we know aPtr lands within an allocation page. But if aPtr 1990 // points to memory before the base address of the allocation, we return 0. 1991 void* pageBaseAddr = AllocPageBaseAddr(index.value()); 1992 1993 if (MOZ_UNLIKELY(aPtr < pageBaseAddr)) { 1994 return 0; 1995 } 1996 1997 return PageUsableSize(index.value()); 1998 } 1999 2000 inline void MozJemallocPHC::jemalloc_stats_internal( 2001 jemalloc_stats_t* aStats, jemalloc_bin_stats_t* aBinStats) { 2002 MozJemalloc::jemalloc_stats_internal(aStats, aBinStats); 2003 2004 if (!PHC::sPHC) { 2005 // If we're not initialised, then we're not using any additional memory and 2006 // have nothing to add to the report. 2007 return; 2008 } 2009 2010 // Add PHC's memory usage to the allocator's. 2011 phc::MemoryUsage mem_info; 2012 PHC::sPHC->GetMemoryUsage(mem_info); 2013 aStats->allocated += mem_info.mAllocatedBytes; 2014 aStats->waste += mem_info.mFragmentationBytes; 2015 aStats->mapped += PHC::sRegion.ReservedBytes() - mem_info.mAllocatedBytes - 2016 mem_info.mFragmentationBytes; 2017 2018 // guards is the gap between `allocated` and `mapped`. In some ways this 2019 // almost fits into aStats->wasted since it feels like wasted memory. However 2020 // wasted should only include committed memory and these guard pages are 2021 // uncommitted. Therefore we don't include it anywhere. 2022 // size_t guards = mapped - allocated; 2023 2024 // aStats.page_cache and aStats.bin_unused are left unchanged because PHC 2025 // doesn't have anything corresponding to those. 2026 2027 // The metadata is stored in normal heap allocations, so they're measured by 2028 // mozjemalloc as `allocated`. Move them into `bookkeeping`. 2029 // They're also reported under explicit/heap-overhead/phc/fragmentation in 2030 // about:memory. 2031 aStats->allocated -= mem_info.mMetadataBytes; 2032 aStats->bookkeeping += mem_info.mMetadataBytes; 2033 } 2034 2035 inline void MozJemallocPHC::jemalloc_stats_lite(jemalloc_stats_lite_t* aStats) { 2036 MozJemalloc::jemalloc_stats_lite(aStats); 2037 } 2038 2039 inline void MozJemallocPHC::jemalloc_ptr_info(const void* aPtr, 2040 jemalloc_ptr_info_t* aInfo) { 2041 if (MOZ_LIKELY(!FastIsPHCPtr(aPtr))) { 2042 // Not a page allocation. 2043 MozJemalloc::jemalloc_ptr_info(aPtr, aInfo); 2044 return; 2045 } 2046 2047 PHC::sPHC->PagePtrInfo(aPtr, aInfo); 2048 } 2049 2050 void PHC::PagePtrInfo(const void* aPtr, jemalloc_ptr_info_t* aInfo) 2051 MOZ_EXCLUDES(mMutex) { 2052 // We need to implement this properly, because various code locations do 2053 // things like checking that allocations are in the expected arena. 2054 2055 PtrKind pk = GetPtrKind(aPtr); 2056 2057 if (pk.IsGuardPage()) { 2058 // Treat a guard page as unknown because there's no better alternative. 2059 *aInfo = {TagUnknown, nullptr, 0, 0}; 2060 return; 2061 } 2062 2063 MutexAutoLock lock(mMutex); 2064 2065 // At this point we know we have an allocation page. 2066 Maybe<uintptr_t> index = pk.AllocPageIndex(NumAllocPages()); 2067 2068 if (!index) { 2069 Crash("JemallocPtrInfo of invalid pointer"); 2070 } 2071 2072 FillJemallocPtrInfo(aPtr, index.value(), aInfo); 2073 #if DEBUG 2074 Log("JemallocPtrInfo(%p[%zu]) -> {%zu, %p, %zu, %zu}\n", aPtr, index.value(), 2075 size_t(aInfo->tag), aInfo->addr, aInfo->size, aInfo->arenaId); 2076 #else 2077 Log("JemallocPtrInfo(%p[%zu]) -> {%zu, %p, %zu}\n", aPtr, index.value(), 2078 size_t(aInfo->tag), aInfo->addr, aInfo->size); 2079 #endif 2080 } 2081 2082 inline void* MozJemallocPHC::moz_arena_malloc(arena_id_t aArenaId, 2083 size_t aReqSize) { 2084 return PageMalloc(Some(aArenaId), aReqSize); 2085 } 2086 2087 inline void* MozJemallocPHC::moz_arena_calloc(arena_id_t aArenaId, size_t aNum, 2088 size_t aReqSize) { 2089 return PageCalloc(Some(aArenaId), aNum, aReqSize); 2090 } 2091 2092 inline void* MozJemallocPHC::moz_arena_realloc(arena_id_t aArenaId, 2093 void* aOldPtr, size_t aNewSize) { 2094 return PageRealloc(Some(aArenaId), aOldPtr, aNewSize); 2095 } 2096 2097 inline void MozJemallocPHC::moz_arena_free(arena_id_t aArenaId, void* aPtr) { 2098 return PageFree(Some(aArenaId), aPtr); 2099 } 2100 2101 inline void* MozJemallocPHC::moz_arena_memalign(arena_id_t aArenaId, 2102 size_t aAlignment, 2103 size_t aReqSize) { 2104 return PageMemalign(Some(aArenaId), aAlignment, aReqSize); 2105 } 2106 2107 bool PHC::IsPHCAllocation(const void* aPtr, mozilla::phc::AddrInfo* aOut) { 2108 PtrKind pk = GetPtrKind(aPtr); 2109 2110 bool isGuardPage = false; 2111 if (pk.IsGuardPage()) { 2112 if ((uintptr_t(aPtr) % kPageSize) < (kPageSize / 2)) { 2113 // The address is in the lower half of a guard page, so it's probably an 2114 // overflow. But first check that it is not on the very first guard 2115 // page, in which case it cannot be an overflow, and we ignore it. 2116 if (sRegion.IsInFirstGuardPage(aPtr)) { 2117 return false; 2118 } 2119 2120 // Get the allocation page preceding this guard page. 2121 pk = GetPtrKind(static_cast<const uint8_t*>(aPtr) - kPageSize); 2122 2123 } else { 2124 // The address is in the upper half of a guard page, so it's probably an 2125 // underflow. Get the allocation page following this guard page. 2126 pk = GetPtrKind(static_cast<const uint8_t*>(aPtr) + kPageSize); 2127 } 2128 2129 // Make a note of the fact that we hit a guard page. 2130 isGuardPage = true; 2131 } 2132 2133 if (aOut) { 2134 if (mMutex.TryLock()) { 2135 // At this point we know we have an allocation page. 2136 Maybe<uintptr_t> index = pk.AllocPageIndex(NumAllocPages()); 2137 if (!index) { 2138 mMutex.Unlock(); 2139 return false; 2140 } 2141 FillAddrInfo(index.value(), aPtr, isGuardPage, *aOut); 2142 Log("IsPHCAllocation: %zu, %p, %zu, %zu, %zu\n", size_t(aOut->mKind), 2143 aOut->mBaseAddr, aOut->mUsableSize, 2144 aOut->mAllocStack.isSome() ? aOut->mAllocStack->mLength : 0, 2145 aOut->mFreeStack.isSome() ? aOut->mFreeStack->mLength : 0); 2146 mMutex.Unlock(); 2147 } else { 2148 Log("IsPHCAllocation: PHC is locked\n"); 2149 aOut->mPhcWasLocked = true; 2150 } 2151 } 2152 return true; 2153 } 2154 2155 namespace mozilla::phc { 2156 2157 bool IsPHCAllocation(const void* aPtr, AddrInfo* aOut) { 2158 if (MOZ_LIKELY(!FastIsPHCPtr(aPtr))) { 2159 return false; 2160 } 2161 2162 return PHC::sPHC->IsPHCAllocation(aPtr, aOut); 2163 } 2164 2165 void DisablePHCOnCurrentThread() { 2166 PHC::DisableOnCurrentThread(); 2167 Log("DisablePHCOnCurrentThread: %zu\n", 0ul); 2168 } 2169 2170 void ReenablePHCOnCurrentThread() { 2171 PHC::sPHC->EnableOnCurrentThread(); 2172 Log("ReenablePHCOnCurrentThread: %zu\n", 0ul); 2173 } 2174 2175 bool IsPHCEnabledOnCurrentThread() { 2176 bool enabled = !PHC::IsDisabledOnCurrentThread(); 2177 Log("IsPHCEnabledOnCurrentThread: %zu\n", size_t(enabled)); 2178 return enabled; 2179 } 2180 2181 void PHCMemoryUsage(MemoryUsage& aMemoryUsage) { 2182 aMemoryUsage = MemoryUsage(); 2183 if (PHC::sPHC) { 2184 PHC::sPHC->GetMemoryUsage(aMemoryUsage); 2185 } 2186 } 2187 2188 void SetPHCSize(size_t aSizeBytes) { 2189 if (PHC::sPHC) { 2190 PHC::sPHC->Resize(aSizeBytes); 2191 } 2192 } 2193 2194 void GetPHCStats(PHCStats& aStats) { 2195 if (!PHC::sPHC) { 2196 aStats = PHCStats(); 2197 return; 2198 } 2199 2200 aStats = PHC::sPHC->GetPageStats(); 2201 } 2202 2203 // Enable or Disable PHC at runtime. If PHC is disabled it will still trap 2204 // bad uses of previous allocations, but won't track any new allocations. 2205 void SetPHCState(PHCState aState) { 2206 if (!PHC::sPHC) { 2207 return; 2208 } 2209 2210 PHC::sPHC->SetState(aState); 2211 } 2212 2213 void SetPHCProbabilities(int64_t aAvgDelayFirst, int64_t aAvgDelayNormal, 2214 int64_t aAvgDelayPageReuse) { 2215 if (!PHC::sPHC) { 2216 return; 2217 } 2218 2219 PHC::sPHC->SetProbabilities(aAvgDelayFirst, aAvgDelayNormal, 2220 aAvgDelayPageReuse); 2221 } 2222 2223 } // namespace mozilla::phc 2224 2225 #if PHC_LOGGING 2226 static size_t GetPid() { return size_t(getpid()); } 2227 2228 static size_t GetTid() { 2229 # if defined(XP_WIN) 2230 return size_t(GetCurrentThreadId()); 2231 # else 2232 return size_t(pthread_self()); 2233 # endif 2234 } 2235 #endif // PHC_LOGGING 2236 2237 static void Log(const char* fmt, ...) { 2238 #if PHC_LOGGING 2239 # if defined(XP_WIN) 2240 # define LOG_STDERR \ 2241 reinterpret_cast<intptr_t>(GetStdHandle(STD_ERROR_HANDLE)) 2242 # else 2243 # define LOG_STDERR 2 2244 # endif 2245 2246 char buf[256]; 2247 size_t pos = SNPrintf(buf, sizeof(buf), "PHC[%zu,%zu,~%zu] ", GetPid(), 2248 GetTid(), size_t(PHC::Now())); 2249 va_list vargs; 2250 va_start(vargs, fmt); 2251 pos += VSNPrintf(&buf[pos], sizeof(buf) - pos, fmt, vargs); 2252 MOZ_ASSERT(pos < sizeof(buf)); 2253 va_end(vargs); 2254 2255 FdPuts(LOG_STDERR, buf, pos); 2256 #endif // PHC_LOGGING 2257 }