DMD.cpp (59963B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #include <ctype.h> 8 #include <errno.h> 9 #include <limits.h> 10 #include <stdarg.h> 11 #include <stdio.h> 12 #include <stdlib.h> 13 #include <string.h> 14 15 #if !defined(MOZ_PROFILING) 16 # error "DMD requires MOZ_PROFILING" 17 #endif 18 19 #ifdef XP_WIN 20 # include <windows.h> 21 # include <process.h> 22 #else 23 # include <pthread.h> 24 # include <sys/types.h> 25 # include <unistd.h> 26 #endif 27 28 #ifdef ANDROID 29 # include <android/log.h> 30 #endif 31 32 #include "nscore.h" 33 34 #include "mozilla/Assertions.h" 35 #include "mozilla/CheckedArithmetic.h" 36 #include "mozilla/FastBernoulliTrial.h" 37 #include "mozilla/HashFunctions.h" 38 #include "mozilla/HashTable.h" 39 #include "mozilla/IntegerPrintfMacros.h" 40 #include "mozilla/JSONWriter.h" 41 #include "mozilla/Likely.h" 42 #include "mozilla/MemoryReporting.h" 43 #include "mozilla/PodOperations.h" 44 #include "mozilla/StackWalk.h" 45 #include "mozilla/ThreadLocal.h" 46 47 // CodeAddressService is defined entirely in the header, so this does not make 48 // DMD depend on XPCOM's object file. 49 #include "CodeAddressService.h" 50 51 // replace_malloc.h needs to be included before replace_malloc_bridge.h, 52 // which DMD.h includes, so DMD.h needs to be included after replace_malloc.h. 53 #include "replace_malloc.h" 54 #include "DMD.h" 55 56 namespace mozilla { 57 namespace dmd { 58 59 class DMDBridge : public ReplaceMallocBridge { 60 virtual DMDFuncs* GetDMDFuncs() override; 61 }; 62 63 static DMDBridge* gDMDBridge; 64 static DMDFuncs gDMDFuncs; 65 66 DMDFuncs* DMDBridge::GetDMDFuncs() { return &gDMDFuncs; } 67 68 MOZ_FORMAT_PRINTF(1, 2) 69 inline void StatusMsg(const char* aFmt, ...) { 70 va_list ap; 71 va_start(ap, aFmt); 72 gDMDFuncs.StatusMsg(aFmt, ap); 73 va_end(ap); 74 } 75 76 //--------------------------------------------------------------------------- 77 // Utilities 78 //--------------------------------------------------------------------------- 79 80 #ifndef DISALLOW_COPY_AND_ASSIGN 81 # define DISALLOW_COPY_AND_ASSIGN(T) \ 82 T(const T&); \ 83 void operator=(const T&) 84 #endif 85 86 static malloc_table_t gMallocTable; 87 88 // This provides infallible allocations (they abort on OOM). We use it for all 89 // of DMD's own allocations, which fall into the following three cases. 90 // 91 // - Direct allocations (the easy case). 92 // 93 // - Indirect allocations in mozilla::{Vector,HashSet,HashMap} -- this class 94 // serves as their AllocPolicy. 95 // 96 // - Other indirect allocations (e.g. MozStackWalk) -- see the comments on 97 // Thread::mBlockIntercepts and in replace_malloc for how these work. 98 // 99 // It would be nice if we could use the InfallibleAllocPolicy from mozalloc, 100 // but DMD cannot use mozalloc. 101 // 102 class InfallibleAllocPolicy { 103 static void ExitOnFailure(const void* aP); 104 105 public: 106 template <typename T> 107 static T* maybe_pod_malloc(size_t aNumElems) { 108 size_t size; 109 if (MOZ_UNLIKELY(!mozilla::SafeMul(aNumElems, sizeof(T), &size))) { 110 return nullptr; 111 } 112 return (T*)gMallocTable.malloc(size); 113 } 114 115 template <typename T> 116 static T* maybe_pod_calloc(size_t aNumElems) { 117 return (T*)gMallocTable.calloc(aNumElems, sizeof(T)); 118 } 119 120 template <typename T> 121 static T* maybe_pod_realloc(T* aPtr, size_t aOldSize, size_t aNewSize) { 122 size_t size; 123 if (MOZ_UNLIKELY(!mozilla::SafeMul(aNewSize, sizeof(T), &size))) { 124 return nullptr; 125 } 126 return (T*)gMallocTable.realloc(aPtr, size); 127 } 128 129 static void* malloc_(size_t aSize) { 130 void* p = gMallocTable.malloc(aSize); 131 ExitOnFailure(p); 132 return p; 133 } 134 135 template <typename T> 136 static T* pod_malloc(size_t aNumElems) { 137 T* p = maybe_pod_malloc<T>(aNumElems); 138 ExitOnFailure(p); 139 return p; 140 } 141 142 static void* calloc_(size_t aCount, size_t aSize) { 143 void* p = gMallocTable.calloc(aCount, aSize); 144 ExitOnFailure(p); 145 return p; 146 } 147 148 template <typename T> 149 static T* pod_calloc(size_t aNumElems) { 150 T* p = maybe_pod_calloc<T>(aNumElems); 151 ExitOnFailure(p); 152 return p; 153 } 154 155 static void* realloc_(void* aPtr, size_t aNewSize) { 156 void* p = gMallocTable.realloc(aPtr, aNewSize); 157 ExitOnFailure(p); 158 return p; 159 } 160 161 template <typename T> 162 static T* pod_realloc(T* aPtr, size_t aOldSize, size_t aNewSize) { 163 T* p = maybe_pod_realloc(aPtr, aOldSize, aNewSize); 164 ExitOnFailure(p); 165 return p; 166 } 167 168 static void* memalign_(size_t aAlignment, size_t aSize) { 169 void* p = gMallocTable.memalign(aAlignment, aSize); 170 ExitOnFailure(p); 171 return p; 172 } 173 174 template <typename T> 175 static void free_(T* aPtr, size_t aSize = 0) { 176 gMallocTable.free(aPtr); 177 } 178 179 static char* strdup_(const char* aStr) { 180 char* s = (char*)InfallibleAllocPolicy::malloc_(strlen(aStr) + 1); 181 strcpy(s, aStr); 182 return s; 183 } 184 185 template <class T> 186 static T* new_() { 187 void* mem = malloc_(sizeof(T)); 188 return new (mem) T; 189 } 190 191 template <class T, typename P1> 192 static T* new_(const P1& aP1) { 193 void* mem = malloc_(sizeof(T)); 194 return new (mem) T(aP1); 195 } 196 197 template <class T> 198 static void delete_(T* aPtr) { 199 if (aPtr) { 200 aPtr->~T(); 201 InfallibleAllocPolicy::free_(aPtr); 202 } 203 } 204 205 static void reportAllocOverflow() { ExitOnFailure(nullptr); } 206 bool checkSimulatedOOM() const { return true; } 207 }; 208 209 // This is only needed because of the |const void*| vs |void*| arg mismatch. 210 static size_t MallocSizeOf(const void* aPtr) { 211 return gMallocTable.malloc_usable_size(const_cast<void*>(aPtr)); 212 } 213 214 void DMDFuncs::StatusMsg(const char* aFmt, va_list aAp) { 215 #ifdef ANDROID 216 __android_log_vprint(ANDROID_LOG_INFO, "DMD", aFmt, aAp); 217 #else 218 // The +64 is easily enough for the "DMD[<pid>] " prefix and the NUL. 219 size_t size = strlen(aFmt) + 64; 220 char* fmt = (char*)InfallibleAllocPolicy::malloc_(size); 221 snprintf(fmt, size, "DMD[%d] %s", getpid(), aFmt); 222 vfprintf(stderr, fmt, aAp); 223 InfallibleAllocPolicy::free_(fmt); 224 #endif 225 } 226 227 /* static */ 228 void InfallibleAllocPolicy::ExitOnFailure(const void* aP) { 229 if (!aP) { 230 MOZ_CRASH("DMD out of memory; aborting"); 231 } 232 } 233 234 static double Percent(size_t part, size_t whole) { 235 return (whole == 0) ? 0 : 100 * (double)part / whole; 236 } 237 238 // Commifies the number. 239 static char* Show(size_t n, char* buf, size_t buflen) { 240 int nc = 0, i = 0, lasti = buflen - 2; 241 buf[lasti + 1] = '\0'; 242 if (n == 0) { 243 buf[lasti - i] = '0'; 244 i++; 245 } else { 246 while (n > 0) { 247 if (((i - nc) % 3) == 0 && i != 0) { 248 buf[lasti - i] = ','; 249 i++; 250 nc++; 251 } 252 buf[lasti - i] = static_cast<char>((n % 10) + '0'); 253 i++; 254 n /= 10; 255 } 256 } 257 int firstCharIndex = lasti - i + 1; 258 259 MOZ_ASSERT(firstCharIndex >= 0); 260 return &buf[firstCharIndex]; 261 } 262 263 //--------------------------------------------------------------------------- 264 // Options (Part 1) 265 //--------------------------------------------------------------------------- 266 267 class Options { 268 template <typename T> 269 struct NumOption { 270 const T mDefault; 271 const T mMax; 272 T mActual; 273 NumOption(T aDefault, T aMax) 274 : mDefault(aDefault), mMax(aMax), mActual(aDefault) {} 275 }; 276 277 // DMD has several modes. These modes affect what data is recorded and 278 // written to the output file, and the written data affects the 279 // post-processing that dmd.py can do. 280 // 281 // Users specify the mode as soon as DMD starts. This leads to minimal memory 282 // usage and log file size. It has the disadvantage that is inflexible -- if 283 // you want to change modes you have to re-run DMD. But in practice changing 284 // modes seems to be rare, so it's not much of a problem. 285 // 286 // An alternative possibility would be to always record and output *all* the 287 // information needed for all modes. This would let you choose the mode when 288 // running dmd.py, and so you could do multiple kinds of profiling on a 289 // single DMD run. But if you are only interested in one of the simpler 290 // modes, you'd pay the price of (a) increased memory usage and (b) *very* 291 // large log files. 292 // 293 // Finally, another alternative possibility would be to do mode selection 294 // partly at DMD startup or recording, and then partly in dmd.py. This would 295 // give some extra flexibility at moderate memory and file size cost. But 296 // certain mode pairs wouldn't work, which would be confusing. 297 // 298 enum class Mode { 299 // For each live block, this mode outputs: size (usable and slop) and 300 // (possibly) and allocation stack. This mode is good for live heap 301 // profiling. 302 Live, 303 304 // Like "Live", but for each live block it also outputs: zero or more 305 // report stacks. This mode is good for identifying where memory reporters 306 // should be added. This is the default mode. 307 DarkMatter, 308 309 // Like "Live", but also outputs the same data for dead blocks. This mode 310 // does cumulative heap profiling, which is good for identifying where large 311 // amounts of short-lived allocations ("heap churn") occur. 312 Cumulative, 313 314 // Like "Live", but this mode also outputs for each live block the address 315 // of the block and the values contained in the blocks. This mode is useful 316 // for investigating leaks, by helping to figure out which blocks refer to 317 // other blocks. This mode force-enables full stacks coverage. 318 Scan 319 }; 320 321 // With full stacks, every heap block gets a stack trace recorded for it. 322 // This is complete but slow. 323 // 324 // With partial stacks, not all heap blocks will get a stack trace recorded. 325 // A Bernoulli trial (see mfbt/FastBernoulliTrial.h for details) is performed 326 // for each heap block to decide if it gets one. Because bigger heap blocks 327 // are more likely to get a stack trace, even though most heap *blocks* won't 328 // get a stack trace, most heap *bytes* will. 329 enum class Stacks { Full, Partial }; 330 331 char* mDMDEnvVar; // a saved copy, for later printing 332 333 Mode mMode; 334 Stacks mStacks; 335 bool mShowDumpStats; 336 337 void BadArg(const char* aArg); 338 static const char* ValueIfMatch(const char* aArg, const char* aOptionName); 339 static bool GetLong(const char* aArg, const char* aOptionName, long aMin, 340 long aMax, long* aValue); 341 static bool GetBool(const char* aArg, const char* aOptionName, bool* aValue); 342 343 public: 344 explicit Options(const char* aDMDEnvVar); 345 346 bool IsLiveMode() const { return mMode == Mode::Live; } 347 bool IsDarkMatterMode() const { return mMode == Mode::DarkMatter; } 348 bool IsCumulativeMode() const { return mMode == Mode::Cumulative; } 349 bool IsScanMode() const { return mMode == Mode::Scan; } 350 351 const char* ModeString() const; 352 353 const char* DMDEnvVar() const { return mDMDEnvVar; } 354 355 bool DoFullStacks() const { return mStacks == Stacks::Full; } 356 size_t ShowDumpStats() const { return mShowDumpStats; } 357 }; 358 359 static Options* gOptions; 360 361 //--------------------------------------------------------------------------- 362 // The global lock 363 //--------------------------------------------------------------------------- 364 365 // MutexBase implements the platform-specific parts of a mutex. 366 367 #ifdef XP_WIN 368 369 class MutexBase { 370 CRITICAL_SECTION mCS; 371 372 DISALLOW_COPY_AND_ASSIGN(MutexBase); 373 374 public: 375 MutexBase() { InitializeCriticalSection(&mCS); } 376 ~MutexBase() { DeleteCriticalSection(&mCS); } 377 378 void Lock() { EnterCriticalSection(&mCS); } 379 void Unlock() { LeaveCriticalSection(&mCS); } 380 }; 381 382 #else 383 384 class MutexBase { 385 pthread_mutex_t mMutex; 386 387 MutexBase(const MutexBase&) = delete; 388 389 const MutexBase& operator=(const MutexBase&) = delete; 390 391 public: 392 MutexBase() { pthread_mutex_init(&mMutex, nullptr); } 393 394 void Lock() { pthread_mutex_lock(&mMutex); } 395 void Unlock() { pthread_mutex_unlock(&mMutex); } 396 }; 397 398 #endif 399 400 class Mutex : private MutexBase { 401 bool mIsLocked; 402 403 Mutex(const Mutex&) = delete; 404 405 const Mutex& operator=(const Mutex&) = delete; 406 407 public: 408 Mutex() : mIsLocked(false) {} 409 410 void Lock() { 411 MutexBase::Lock(); 412 MOZ_ASSERT(!mIsLocked); 413 mIsLocked = true; 414 } 415 416 void Unlock() { 417 MOZ_ASSERT(mIsLocked); 418 mIsLocked = false; 419 MutexBase::Unlock(); 420 } 421 422 bool IsLocked() { return mIsLocked; } 423 }; 424 425 // This lock must be held while manipulating global state such as 426 // gStackTraceTable, gLiveBlockTable, gDeadBlockTable. Note that gOptions is 427 // *not* protected by this lock because it is only written to by Options(), 428 // which is only invoked at start-up and in ResetEverything(), which is only 429 // used by SmokeDMD.cpp. 430 static Mutex* gStateLock = nullptr; 431 432 class AutoLockState { 433 AutoLockState(const AutoLockState&) = delete; 434 435 const AutoLockState& operator=(const AutoLockState&) = delete; 436 437 public: 438 AutoLockState() { gStateLock->Lock(); } 439 ~AutoLockState() { gStateLock->Unlock(); } 440 }; 441 442 class AutoUnlockState { 443 AutoUnlockState(const AutoUnlockState&) = delete; 444 445 const AutoUnlockState& operator=(const AutoUnlockState&) = delete; 446 447 public: 448 AutoUnlockState() { gStateLock->Unlock(); } 449 ~AutoUnlockState() { gStateLock->Lock(); } 450 }; 451 452 //--------------------------------------------------------------------------- 453 // Per-thread blocking of intercepts 454 //--------------------------------------------------------------------------- 455 456 // On MacOS, the first __thread/thread_local access calls malloc, which leads 457 // to an infinite loop. So we use pthread-based TLS instead, which somehow 458 // doesn't have this problem. 459 #if !defined(XP_DARWIN) 460 # define DMD_THREAD_LOCAL(T) MOZ_THREAD_LOCAL(T) 461 #else 462 # define DMD_THREAD_LOCAL(T) \ 463 detail::ThreadLocal<T, detail::ThreadLocalKeyStorage> 464 #endif 465 466 class Thread { 467 // Required for allocation via InfallibleAllocPolicy::new_. 468 friend class InfallibleAllocPolicy; 469 470 // When true, this blocks intercepts, which allows malloc interception 471 // functions to themselves call malloc. (Nb: for direct calls to malloc we 472 // can just use InfallibleAllocPolicy::{malloc_,new_}, but we sometimes 473 // indirectly call vanilla malloc via functions like MozStackWalk.) 474 bool mBlockIntercepts; 475 476 Thread() : mBlockIntercepts(false) {} 477 478 Thread(const Thread&) = delete; 479 480 const Thread& operator=(const Thread&) = delete; 481 482 static DMD_THREAD_LOCAL(Thread*) tlsThread; 483 484 public: 485 static void Init() { 486 if (!tlsThread.init()) { 487 MOZ_CRASH(); 488 } 489 } 490 491 static Thread* Fetch() { 492 Thread* t = tlsThread.get(); 493 if (MOZ_UNLIKELY(!t)) { 494 // This memory is never freed, even if the thread dies. It's a leak, but 495 // only a tiny one. 496 t = InfallibleAllocPolicy::new_<Thread>(); 497 tlsThread.set(t); 498 } 499 500 return t; 501 } 502 503 bool BlockIntercepts() { 504 MOZ_ASSERT(!mBlockIntercepts); 505 return mBlockIntercepts = true; 506 } 507 508 bool UnblockIntercepts() { 509 MOZ_ASSERT(mBlockIntercepts); 510 return mBlockIntercepts = false; 511 } 512 513 bool InterceptsAreBlocked() const { return mBlockIntercepts; } 514 }; 515 516 DMD_THREAD_LOCAL(Thread*) Thread::tlsThread; 517 518 // An object of this class must be created (on the stack) before running any 519 // code that might allocate. 520 class AutoBlockIntercepts { 521 Thread* const mT; 522 523 AutoBlockIntercepts(const AutoBlockIntercepts&) = delete; 524 525 const AutoBlockIntercepts& operator=(const AutoBlockIntercepts&) = delete; 526 527 public: 528 explicit AutoBlockIntercepts(Thread* aT) : mT(aT) { mT->BlockIntercepts(); } 529 ~AutoBlockIntercepts() { 530 MOZ_ASSERT(mT->InterceptsAreBlocked()); 531 mT->UnblockIntercepts(); 532 } 533 }; 534 535 //--------------------------------------------------------------------------- 536 // Location service 537 //--------------------------------------------------------------------------- 538 539 struct DescribeCodeAddressLock { 540 static void Unlock() { gStateLock->Unlock(); } 541 static void Lock() { gStateLock->Lock(); } 542 static bool IsLocked() { return gStateLock->IsLocked(); } 543 }; 544 545 typedef CodeAddressService<InfallibleAllocPolicy, DescribeCodeAddressLock> 546 CodeAddressService; 547 548 //--------------------------------------------------------------------------- 549 // Stack traces 550 //--------------------------------------------------------------------------- 551 552 class StackTrace { 553 public: 554 static const uint32_t MaxFrames = 24; 555 556 private: 557 uint32_t mLength; // The number of PCs. 558 const void* mPcs[MaxFrames]; // The PCs themselves. 559 560 public: 561 StackTrace() : mLength(0) {} 562 StackTrace(const StackTrace& aOther) : mLength(aOther.mLength) { 563 PodCopy(mPcs, aOther.mPcs, mLength); 564 } 565 566 uint32_t Length() const { return mLength; } 567 const void* Pc(uint32_t i) const { 568 MOZ_ASSERT(i < mLength); 569 return mPcs[i]; 570 } 571 572 uint32_t Size() const { return mLength * sizeof(mPcs[0]); } 573 574 // The stack trace returned by this function is interned in gStackTraceTable, 575 // and so is immortal and unmovable. 576 static const StackTrace* Get(Thread* aT); 577 578 // Hash policy. 579 580 typedef StackTrace* Lookup; 581 582 static mozilla::HashNumber hash(const StackTrace* const& aSt) { 583 return mozilla::HashBytes(aSt->mPcs, aSt->Size()); 584 } 585 586 static bool match(const StackTrace* const& aA, const StackTrace* const& aB) { 587 return aA->mLength == aB->mLength && 588 memcmp(aA->mPcs, aB->mPcs, aA->Size()) == 0; 589 } 590 591 private: 592 static void StackWalkCallback(uint32_t aFrameNumber, void* aPc, void* aSp, 593 void* aClosure) { 594 StackTrace* st = (StackTrace*)aClosure; 595 MOZ_ASSERT(st->mLength < MaxFrames); 596 st->mPcs[st->mLength] = aPc; 597 st->mLength++; 598 MOZ_ASSERT(st->mLength == aFrameNumber); 599 } 600 }; 601 602 typedef mozilla::HashSet<StackTrace*, StackTrace, InfallibleAllocPolicy> 603 StackTraceTable; 604 static StackTraceTable* gStackTraceTable = nullptr; 605 606 typedef mozilla::HashSet<const StackTrace*, 607 mozilla::DefaultHasher<const StackTrace*>, 608 InfallibleAllocPolicy> 609 StackTraceSet; 610 611 typedef mozilla::HashSet<const void*, mozilla::DefaultHasher<const void*>, 612 InfallibleAllocPolicy> 613 PointerSet; 614 typedef mozilla::HashMap<const void*, uint32_t, 615 mozilla::DefaultHasher<const void*>, 616 InfallibleAllocPolicy> 617 PointerIdMap; 618 619 // We won't GC the stack trace table until it this many elements. 620 static uint32_t gGCStackTraceTableWhenSizeExceeds = 4 * 1024; 621 622 /* static */ const StackTrace* StackTrace::Get(Thread* aT) { 623 MOZ_ASSERT(gStateLock->IsLocked()); 624 MOZ_ASSERT(aT->InterceptsAreBlocked()); 625 626 // On Windows, MozStackWalk can acquire a lock from the shared library 627 // loader. Another thread might call malloc while holding that lock (when 628 // loading a shared library). So we can't be in gStateLock during the call 629 // to MozStackWalk. For details, see 630 // https://bugzilla.mozilla.org/show_bug.cgi?id=374829#c8 631 // On Linux, something similar can happen; see bug 824340. 632 // So let's just release it on all platforms. 633 StackTrace tmp; 634 { 635 AutoUnlockState unlock; 636 // In each of the following cases, skipFrames is chosen so that the 637 // first frame in each stack trace is a replace_* function (or as close as 638 // possible, given the vagaries of inlining on different platforms). 639 #if defined(XP_WIN) && defined(_M_IX86) 640 // This avoids MozStackWalk(), which causes unusably slow startup on Win32 641 // when it is called during static initialization (see bug 1241684). 642 // 643 // This code is cribbed from the Gecko Profiler, which also uses 644 // FramePointerStackWalk() on Win32: REGISTERS_SYNC_POPULATE() for the 645 // frame pointer, and GetStackTop() for the stack end. 646 CONTEXT context; 647 RtlCaptureContext(&context); 648 void** fp = reinterpret_cast<void**>(context.Ebp); 649 650 PNT_TIB pTib = reinterpret_cast<PNT_TIB>(NtCurrentTeb()); 651 void* stackEnd = static_cast<void*>(pTib->StackBase); 652 FramePointerStackWalk(StackWalkCallback, MaxFrames, &tmp, fp, stackEnd); 653 #elif defined(XP_MACOSX) 654 // This avoids MozStackWalk(), which has become unusably slow on Mac due to 655 // changes in libunwind. 656 // 657 // This code is cribbed from the Gecko Profiler, which also uses 658 // FramePointerStackWalk() on Mac: REGISTERS_SYNC_POPULATE() for the frame 659 // pointer, and GetStackTop() for the stack end. 660 # pragma GCC diagnostic push 661 # pragma GCC diagnostic ignored "-Wframe-address" 662 void** fp = reinterpret_cast<void**>(__builtin_frame_address(1)); 663 # pragma GCC diagnostic pop 664 void* stackEnd = pthread_get_stackaddr_np(pthread_self()); 665 FramePointerStackWalk(StackWalkCallback, MaxFrames, &tmp, fp, stackEnd); 666 #else 667 MozStackWalk(StackWalkCallback, nullptr, MaxFrames, &tmp); 668 #endif 669 } 670 671 StackTraceTable::AddPtr p = gStackTraceTable->lookupForAdd(&tmp); 672 if (!p) { 673 StackTrace* stnew = InfallibleAllocPolicy::new_<StackTrace>(tmp); 674 MOZ_ALWAYS_TRUE(gStackTraceTable->add(p, stnew)); 675 } 676 return *p; 677 } 678 679 //--------------------------------------------------------------------------- 680 // Heap blocks 681 //--------------------------------------------------------------------------- 682 683 // This class combines a 2-byte-aligned pointer (i.e. one whose bottom bit 684 // is zero) with a 1-bit tag. 685 // 686 // |T| is the pointer type, e.g. |int*|, not the pointed-to type. This makes 687 // is easier to have const pointers, e.g. |TaggedPtr<const int*>|. 688 template <typename T> 689 class TaggedPtr { 690 union { 691 T mPtr; 692 uintptr_t mUint; 693 }; 694 695 static const uintptr_t kTagMask = uintptr_t(0x1); 696 static const uintptr_t kPtrMask = ~kTagMask; 697 698 static bool IsTwoByteAligned(T aPtr) { 699 return (uintptr_t(aPtr) & kTagMask) == 0; 700 } 701 702 public: 703 TaggedPtr() : mPtr(nullptr) {} 704 705 TaggedPtr(T aPtr, bool aBool) : mPtr(aPtr) { 706 MOZ_ASSERT(IsTwoByteAligned(aPtr)); 707 uintptr_t tag = uintptr_t(aBool); 708 MOZ_ASSERT(tag <= kTagMask); 709 mUint |= (tag & kTagMask); 710 } 711 712 void Set(T aPtr, bool aBool) { 713 MOZ_ASSERT(IsTwoByteAligned(aPtr)); 714 mPtr = aPtr; 715 uintptr_t tag = uintptr_t(aBool); 716 MOZ_ASSERT(tag <= kTagMask); 717 mUint |= (tag & kTagMask); 718 } 719 720 T Ptr() const { return reinterpret_cast<T>(mUint & kPtrMask); } 721 722 bool Tag() const { return bool(mUint & kTagMask); } 723 }; 724 725 // A live heap block. Stores both basic data and data about reports, if we're 726 // in DarkMatter mode. 727 class LiveBlock { 728 const void* mPtr; 729 const size_t mReqSize; // size requested 730 731 // The stack trace where this block was allocated, or nullptr if we didn't 732 // record one. 733 const StackTrace* const mAllocStackTrace; 734 735 // This array has two elements because we record at most two reports of a 736 // block. 737 // - Ptr: |mReportStackTrace| - stack trace where this block was reported. 738 // nullptr if not reported. 739 // - Tag bit 0: |mReportedOnAlloc| - was the block reported immediately on 740 // allocation? If so, DMD must not clear the report at the end of 741 // Analyze(). Only relevant if |mReportStackTrace| is non-nullptr. 742 // 743 // |mPtr| is used as the key in LiveBlockTable, so it's ok for this member 744 // to be |mutable|. 745 // 746 // Only used in DarkMatter mode. 747 mutable TaggedPtr<const StackTrace*> mReportStackTrace_mReportedOnAlloc[2]; 748 749 public: 750 LiveBlock(const void* aPtr, size_t aReqSize, 751 const StackTrace* aAllocStackTrace) 752 : mPtr(aPtr), mReqSize(aReqSize), mAllocStackTrace(aAllocStackTrace) {} 753 754 const void* Address() const { return mPtr; } 755 756 size_t ReqSize() const { return mReqSize; } 757 758 size_t SlopSize() const { return MallocSizeOf(mPtr) - mReqSize; } 759 760 const StackTrace* AllocStackTrace() const { return mAllocStackTrace; } 761 762 const StackTrace* ReportStackTrace1() const { 763 MOZ_ASSERT(gOptions->IsDarkMatterMode()); 764 return mReportStackTrace_mReportedOnAlloc[0].Ptr(); 765 } 766 767 const StackTrace* ReportStackTrace2() const { 768 MOZ_ASSERT(gOptions->IsDarkMatterMode()); 769 return mReportStackTrace_mReportedOnAlloc[1].Ptr(); 770 } 771 772 bool ReportedOnAlloc1() const { 773 MOZ_ASSERT(gOptions->IsDarkMatterMode()); 774 return mReportStackTrace_mReportedOnAlloc[0].Tag(); 775 } 776 777 bool ReportedOnAlloc2() const { 778 MOZ_ASSERT(gOptions->IsDarkMatterMode()); 779 return mReportStackTrace_mReportedOnAlloc[1].Tag(); 780 } 781 782 void AddStackTracesToTable(StackTraceSet& aStackTraces) const { 783 if (AllocStackTrace()) { 784 MOZ_ALWAYS_TRUE(aStackTraces.put(AllocStackTrace())); 785 } 786 if (gOptions->IsDarkMatterMode()) { 787 if (ReportStackTrace1()) { 788 MOZ_ALWAYS_TRUE(aStackTraces.put(ReportStackTrace1())); 789 } 790 if (ReportStackTrace2()) { 791 MOZ_ALWAYS_TRUE(aStackTraces.put(ReportStackTrace2())); 792 } 793 } 794 } 795 796 uint32_t NumReports() const { 797 MOZ_ASSERT(gOptions->IsDarkMatterMode()); 798 if (ReportStackTrace2()) { 799 MOZ_ASSERT(ReportStackTrace1()); 800 return 2; 801 } 802 if (ReportStackTrace1()) { 803 return 1; 804 } 805 return 0; 806 } 807 808 // This is |const| thanks to the |mutable| fields above. 809 void Report(Thread* aT, bool aReportedOnAlloc) const { 810 MOZ_ASSERT(gOptions->IsDarkMatterMode()); 811 // We don't bother recording reports after the 2nd one. 812 uint32_t numReports = NumReports(); 813 if (numReports < 2) { 814 mReportStackTrace_mReportedOnAlloc[numReports].Set(StackTrace::Get(aT), 815 aReportedOnAlloc); 816 } 817 } 818 819 void UnreportIfNotReportedOnAlloc() const { 820 MOZ_ASSERT(gOptions->IsDarkMatterMode()); 821 if (!ReportedOnAlloc1() && !ReportedOnAlloc2()) { 822 mReportStackTrace_mReportedOnAlloc[0].Set(nullptr, 0); 823 mReportStackTrace_mReportedOnAlloc[1].Set(nullptr, 0); 824 825 } else if (!ReportedOnAlloc1() && ReportedOnAlloc2()) { 826 // Shift the 2nd report down to the 1st one. 827 mReportStackTrace_mReportedOnAlloc[0] = 828 mReportStackTrace_mReportedOnAlloc[1]; 829 mReportStackTrace_mReportedOnAlloc[1].Set(nullptr, 0); 830 831 } else if (ReportedOnAlloc1() && !ReportedOnAlloc2()) { 832 mReportStackTrace_mReportedOnAlloc[1].Set(nullptr, 0); 833 } 834 } 835 836 // Hash policy. 837 838 typedef const void* Lookup; 839 840 static mozilla::HashNumber hash(const void* const& aPtr) { 841 return mozilla::HashGeneric(aPtr); 842 } 843 844 static bool match(const LiveBlock& aB, const void* const& aPtr) { 845 return aB.mPtr == aPtr; 846 } 847 }; 848 849 // A table of live blocks where the lookup key is the block address. 850 typedef mozilla::HashSet<LiveBlock, LiveBlock, InfallibleAllocPolicy> 851 LiveBlockTable; 852 static LiveBlockTable* gLiveBlockTable = nullptr; 853 854 class AggregatedLiveBlockHashPolicy { 855 public: 856 typedef const LiveBlock* const Lookup; 857 858 static mozilla::HashNumber hash(const LiveBlock* const& aB) { 859 return gOptions->IsDarkMatterMode() 860 ? mozilla::HashGeneric( 861 aB->ReqSize(), aB->SlopSize(), aB->AllocStackTrace(), 862 aB->ReportedOnAlloc1(), aB->ReportedOnAlloc2()) 863 : mozilla::HashGeneric(aB->ReqSize(), aB->SlopSize(), 864 aB->AllocStackTrace()); 865 } 866 867 static bool match(const LiveBlock* const& aA, const LiveBlock* const& aB) { 868 return gOptions->IsDarkMatterMode() 869 ? aA->ReqSize() == aB->ReqSize() && 870 aA->SlopSize() == aB->SlopSize() && 871 aA->AllocStackTrace() == aB->AllocStackTrace() && 872 aA->ReportStackTrace1() == aB->ReportStackTrace1() && 873 aA->ReportStackTrace2() == aB->ReportStackTrace2() 874 : aA->ReqSize() == aB->ReqSize() && 875 aA->SlopSize() == aB->SlopSize() && 876 aA->AllocStackTrace() == aB->AllocStackTrace(); 877 } 878 }; 879 880 // A table of live blocks where the lookup key is everything but the block 881 // address. For aggregating similar live blocks at output time. 882 typedef mozilla::HashMap<const LiveBlock*, size_t, 883 AggregatedLiveBlockHashPolicy, InfallibleAllocPolicy> 884 AggregatedLiveBlockTable; 885 886 // A freed heap block. 887 class DeadBlock { 888 const size_t mReqSize; // size requested 889 const size_t mSlopSize; // slop above size requested 890 891 // The stack trace where this block was allocated. 892 const StackTrace* const mAllocStackTrace; 893 894 public: 895 DeadBlock() : mReqSize(0), mSlopSize(0), mAllocStackTrace(nullptr) {} 896 897 explicit DeadBlock(const LiveBlock& aLb) 898 : mReqSize(aLb.ReqSize()), 899 mSlopSize(aLb.SlopSize()), 900 mAllocStackTrace(aLb.AllocStackTrace()) {} 901 902 ~DeadBlock() = default; 903 904 size_t ReqSize() const { return mReqSize; } 905 size_t SlopSize() const { return mSlopSize; } 906 907 const StackTrace* AllocStackTrace() const { return mAllocStackTrace; } 908 909 void AddStackTracesToTable(StackTraceSet& aStackTraces) const { 910 if (AllocStackTrace()) { 911 MOZ_ALWAYS_TRUE(aStackTraces.put(AllocStackTrace())); 912 } 913 } 914 915 // Hash policy. 916 917 typedef DeadBlock Lookup; 918 919 static mozilla::HashNumber hash(const DeadBlock& aB) { 920 return mozilla::HashGeneric(aB.ReqSize(), aB.SlopSize(), 921 aB.AllocStackTrace()); 922 } 923 924 static bool match(const DeadBlock& aA, const DeadBlock& aB) { 925 return aA.ReqSize() == aB.ReqSize() && aA.SlopSize() == aB.SlopSize() && 926 aA.AllocStackTrace() == aB.AllocStackTrace(); 927 } 928 }; 929 930 // For each unique DeadBlock value we store a count of how many actual dead 931 // blocks have that value. 932 typedef mozilla::HashMap<DeadBlock, size_t, DeadBlock, InfallibleAllocPolicy> 933 DeadBlockTable; 934 static DeadBlockTable* gDeadBlockTable = nullptr; 935 936 // Add the dead block to the dead block table, if that's appropriate. 937 void MaybeAddToDeadBlockTable(const DeadBlock& aDb) { 938 if (gOptions->IsCumulativeMode() && aDb.AllocStackTrace()) { 939 AutoLockState lock; 940 if (DeadBlockTable::AddPtr p = gDeadBlockTable->lookupForAdd(aDb)) { 941 p->value() += 1; 942 } else { 943 MOZ_ALWAYS_TRUE(gDeadBlockTable->add(p, aDb, 1)); 944 } 945 } 946 } 947 948 // Add a pointer to each live stack trace into the given StackTraceSet. (A 949 // stack trace is live if it's used by one of the live blocks.) 950 static void GatherUsedStackTraces(StackTraceSet& aStackTraces) { 951 MOZ_ASSERT(gStateLock->IsLocked()); 952 MOZ_ASSERT(Thread::Fetch()->InterceptsAreBlocked()); 953 954 aStackTraces.clear(); 955 MOZ_ALWAYS_TRUE(aStackTraces.reserve(512)); 956 957 for (auto iter = gLiveBlockTable->iter(); !iter.done(); iter.next()) { 958 iter.get().AddStackTracesToTable(aStackTraces); 959 } 960 961 for (auto iter = gDeadBlockTable->iter(); !iter.done(); iter.next()) { 962 iter.get().key().AddStackTracesToTable(aStackTraces); 963 } 964 } 965 966 // Delete stack traces that we aren't using, and compact our hashtable. 967 static void GCStackTraces() { 968 MOZ_ASSERT(gStateLock->IsLocked()); 969 MOZ_ASSERT(Thread::Fetch()->InterceptsAreBlocked()); 970 971 StackTraceSet usedStackTraces; 972 GatherUsedStackTraces(usedStackTraces); 973 974 // Delete all unused stack traces from gStackTraceTable. The ModIterator 975 // destructor will automatically rehash and compact the table. 976 for (auto iter = gStackTraceTable->modIter(); !iter.done(); iter.next()) { 977 StackTrace* const& st = iter.get(); 978 if (!usedStackTraces.has(st)) { 979 iter.remove(); 980 InfallibleAllocPolicy::delete_(st); 981 } 982 } 983 984 // Schedule a GC when we have twice as many stack traces as we had right after 985 // this GC finished. 986 gGCStackTraceTableWhenSizeExceeds = 2 * gStackTraceTable->count(); 987 } 988 989 //--------------------------------------------------------------------------- 990 // malloc/free callbacks 991 //--------------------------------------------------------------------------- 992 993 static FastBernoulliTrial* gBernoulli; 994 995 // In testing, a probability of 0.003 resulted in ~25% of heap blocks getting 996 // a stack trace and ~80% of heap bytes getting a stack trace. (This is 997 // possible because big heap blocks are more likely to get a stack trace.) 998 // 999 // We deliberately choose not to give the user control over this probability 1000 // (other than effectively setting it to 1 via --stacks=full) because it's 1001 // quite inscrutable and generally the user just wants "faster and imprecise" 1002 // or "slower and precise". 1003 // 1004 // The random number seeds are arbitrary and were obtained from random.org. If 1005 // you change them you'll need to change the tests as well, because their 1006 // expected output is based on the particular sequence of trial results that we 1007 // get with these seeds. 1008 static void ResetBernoulli() { 1009 new (gBernoulli) 1010 FastBernoulliTrial(0.003, 0x8e26eeee166bc8ca, 0x56820f304a9c9ae0); 1011 } 1012 1013 static void AllocCallback(void* aPtr, size_t aReqSize, Thread* aT) { 1014 if (!aPtr) { 1015 return; 1016 } 1017 1018 AutoLockState lock; 1019 AutoBlockIntercepts block(aT); 1020 1021 size_t actualSize = gMallocTable.malloc_usable_size(aPtr); 1022 1023 // We may or may not record the allocation stack trace, depending on the 1024 // options and the outcome of a Bernoulli trial. 1025 bool getTrace = gOptions->DoFullStacks() || gBernoulli->trial(actualSize); 1026 LiveBlock b(aPtr, aReqSize, getTrace ? StackTrace::Get(aT) : nullptr); 1027 LiveBlockTable::AddPtr p = gLiveBlockTable->lookupForAdd(aPtr); 1028 if (!p) { 1029 // Most common case: there wasn't a record already. 1030 MOZ_ALWAYS_TRUE(gLiveBlockTable->add(p, b)); 1031 } else { 1032 // Edge-case: there was a record for the same address. We'll assume the 1033 // allocator is not giving out a pointer to an existing allocation, so 1034 // this means the previously recorded allocation was freed while we were 1035 // blocking interceptions. This can happen while processing the data in 1036 // e.g. AnalyzeImpl. 1037 if (gOptions->IsCumulativeMode()) { 1038 // Copy it out so it can be added to the dead block list later. 1039 DeadBlock db(*p); 1040 MaybeAddToDeadBlockTable(db); 1041 } 1042 gLiveBlockTable->remove(p); 1043 MOZ_ALWAYS_TRUE(gLiveBlockTable->putNew(aPtr, b)); 1044 } 1045 } 1046 1047 static void FreeCallback(void* aPtr, Thread* aT, DeadBlock* aDeadBlock) { 1048 if (!aPtr) { 1049 return; 1050 } 1051 1052 AutoLockState lock; 1053 AutoBlockIntercepts block(aT); 1054 1055 if (LiveBlockTable::Ptr lb = gLiveBlockTable->lookup(aPtr)) { 1056 if (gOptions->IsCumulativeMode()) { 1057 // Copy it out so it can be added to the dead block list later. 1058 new (aDeadBlock) DeadBlock(*lb); 1059 } 1060 gLiveBlockTable->remove(lb); 1061 } else { 1062 // We have no record of the block. It must be a bogus pointer, or one that 1063 // DMD wasn't able to see allocated. This should be extremely rare. 1064 } 1065 1066 if (gStackTraceTable->count() > gGCStackTraceTableWhenSizeExceeds) { 1067 GCStackTraces(); 1068 } 1069 } 1070 1071 //--------------------------------------------------------------------------- 1072 // malloc/free interception 1073 //--------------------------------------------------------------------------- 1074 1075 static bool Init(malloc_table_t* aMallocTable); 1076 1077 } // namespace dmd 1078 } // namespace mozilla 1079 1080 static void* replace_malloc(size_t aSize) { 1081 using namespace mozilla::dmd; 1082 1083 Thread* t = Thread::Fetch(); 1084 if (t->InterceptsAreBlocked()) { 1085 // Intercepts are blocked, which means this must be a call to malloc 1086 // triggered indirectly by DMD (e.g. via MozStackWalk). Be infallible. 1087 return InfallibleAllocPolicy::malloc_(aSize); 1088 } 1089 1090 // This must be a call to malloc from outside DMD. Intercept it. 1091 void* ptr = gMallocTable.malloc(aSize); 1092 AllocCallback(ptr, aSize, t); 1093 return ptr; 1094 } 1095 1096 static void* replace_calloc(size_t aCount, size_t aSize) { 1097 using namespace mozilla::dmd; 1098 1099 Thread* t = Thread::Fetch(); 1100 if (t->InterceptsAreBlocked()) { 1101 return InfallibleAllocPolicy::calloc_(aCount, aSize); 1102 } 1103 1104 // |aCount * aSize| could overflow, but if that happens then 1105 // |gMallocTable.calloc()| will return nullptr and |AllocCallback()| will 1106 // return immediately without using the overflowed value. 1107 void* ptr = gMallocTable.calloc(aCount, aSize); 1108 AllocCallback(ptr, aCount * aSize, t); 1109 return ptr; 1110 } 1111 1112 static void* replace_realloc(void* aOldPtr, size_t aSize) { 1113 using namespace mozilla::dmd; 1114 1115 Thread* t = Thread::Fetch(); 1116 if (t->InterceptsAreBlocked()) { 1117 return InfallibleAllocPolicy::realloc_(aOldPtr, aSize); 1118 } 1119 1120 // If |aOldPtr| is nullptr, the call is equivalent to |malloc(aSize)|. 1121 if (!aOldPtr) { 1122 return replace_malloc(aSize); 1123 } 1124 1125 // Be very careful here! Must remove the block from the table before doing 1126 // the realloc to avoid races, just like in replace_free(). 1127 // Nb: This does an unnecessary hashtable remove+add if the block doesn't 1128 // move, but doing better isn't worth the effort. 1129 DeadBlock db; 1130 FreeCallback(aOldPtr, t, &db); 1131 void* ptr = gMallocTable.realloc(aOldPtr, aSize); 1132 if (ptr) { 1133 AllocCallback(ptr, aSize, t); 1134 MaybeAddToDeadBlockTable(db); 1135 } else { 1136 // If realloc fails, we undo the prior operations by re-inserting the old 1137 // pointer into the live block table. We don't have to do anything with the 1138 // dead block list because the dead block hasn't yet been inserted. The 1139 // block will end up looking like it was allocated for the first time here, 1140 // which is untrue, and the slop bytes will be zero, which may be untrue. 1141 // But this case is rare and doing better isn't worth the effort. 1142 AllocCallback(aOldPtr, gMallocTable.malloc_usable_size(aOldPtr), t); 1143 } 1144 return ptr; 1145 } 1146 1147 static void* replace_memalign(size_t aAlignment, size_t aSize) { 1148 using namespace mozilla::dmd; 1149 1150 Thread* t = Thread::Fetch(); 1151 if (t->InterceptsAreBlocked()) { 1152 return InfallibleAllocPolicy::memalign_(aAlignment, aSize); 1153 } 1154 1155 void* ptr = gMallocTable.memalign(aAlignment, aSize); 1156 AllocCallback(ptr, aSize, t); 1157 return ptr; 1158 } 1159 1160 static void replace_free(void* aPtr) { 1161 using namespace mozilla::dmd; 1162 1163 Thread* t = Thread::Fetch(); 1164 if (t->InterceptsAreBlocked()) { 1165 return InfallibleAllocPolicy::free_(aPtr); 1166 } 1167 1168 // Do the actual free after updating the table. Otherwise, another thread 1169 // could call malloc and get the freed block and update the table, and then 1170 // our update here would remove the newly-malloc'd block. 1171 DeadBlock db; 1172 FreeCallback(aPtr, t, &db); 1173 MaybeAddToDeadBlockTable(db); 1174 gMallocTable.free(aPtr); 1175 } 1176 1177 void replace_init(malloc_table_t* aMallocTable, ReplaceMallocBridge** aBridge) { 1178 if (mozilla::dmd::Init(aMallocTable)) { 1179 #define MALLOC_FUNCS MALLOC_FUNCS_MALLOC_BASE 1180 #define MALLOC_DECL(name, ...) aMallocTable->name = replace_##name; 1181 #include "malloc_decls.h" 1182 *aBridge = mozilla::dmd::gDMDBridge; 1183 } 1184 } 1185 1186 namespace mozilla { 1187 namespace dmd { 1188 1189 //--------------------------------------------------------------------------- 1190 // Options (Part 2) 1191 //--------------------------------------------------------------------------- 1192 1193 // Given an |aOptionName| like "foo", succeed if |aArg| has the form "foo=blah" 1194 // (where "blah" is non-empty) and return the pointer to "blah". |aArg| can 1195 // have leading space chars (but not other whitespace). 1196 const char* Options::ValueIfMatch(const char* aArg, const char* aOptionName) { 1197 MOZ_ASSERT(!isspace(*aArg)); // any leading whitespace should not remain 1198 size_t optionLen = strlen(aOptionName); 1199 if (strncmp(aArg, aOptionName, optionLen) == 0 && aArg[optionLen] == '=' && 1200 aArg[optionLen + 1]) { 1201 return aArg + optionLen + 1; 1202 } 1203 return nullptr; 1204 } 1205 1206 // Extracts a |long| value for an option from an argument. It must be within 1207 // the range |aMin..aMax| (inclusive). 1208 bool Options::GetLong(const char* aArg, const char* aOptionName, long aMin, 1209 long aMax, long* aValue) { 1210 if (const char* optionValue = ValueIfMatch(aArg, aOptionName)) { 1211 char* endPtr; 1212 *aValue = strtol(optionValue, &endPtr, /* base */ 10); 1213 if (!*endPtr && aMin <= *aValue && *aValue <= aMax && *aValue != LONG_MIN && 1214 *aValue != LONG_MAX) { 1215 return true; 1216 } 1217 } 1218 return false; 1219 } 1220 1221 // Extracts a |bool| value for an option -- encoded as "yes" or "no" -- from an 1222 // argument. 1223 bool Options::GetBool(const char* aArg, const char* aOptionName, bool* aValue) { 1224 if (const char* optionValue = ValueIfMatch(aArg, aOptionName)) { 1225 if (strcmp(optionValue, "yes") == 0) { 1226 *aValue = true; 1227 return true; 1228 } 1229 if (strcmp(optionValue, "no") == 0) { 1230 *aValue = false; 1231 return true; 1232 } 1233 } 1234 return false; 1235 } 1236 1237 Options::Options(const char* aDMDEnvVar) 1238 : mDMDEnvVar(aDMDEnvVar ? InfallibleAllocPolicy::strdup_(aDMDEnvVar) 1239 : nullptr), 1240 mMode(Mode::DarkMatter), 1241 mStacks(Stacks::Partial), 1242 mShowDumpStats(false) { 1243 char* e = mDMDEnvVar; 1244 if (e && strcmp(e, "1") != 0) { 1245 bool isEnd = false; 1246 while (!isEnd) { 1247 // Consume leading whitespace. 1248 while (isspace(*e)) { 1249 e++; 1250 } 1251 1252 // Save the start of the arg. 1253 const char* arg = e; 1254 1255 // Find the first char after the arg, and temporarily change it to '\0' 1256 // to isolate the arg. 1257 while (!isspace(*e) && *e != '\0') { 1258 e++; 1259 } 1260 char replacedChar = *e; 1261 isEnd = replacedChar == '\0'; 1262 *e = '\0'; 1263 1264 // Handle arg 1265 bool myBool; 1266 if (strcmp(arg, "--mode=live") == 0) { 1267 mMode = Mode::Live; 1268 } else if (strcmp(arg, "--mode=dark-matter") == 0) { 1269 mMode = Mode::DarkMatter; 1270 } else if (strcmp(arg, "--mode=cumulative") == 0) { 1271 mMode = Mode::Cumulative; 1272 } else if (strcmp(arg, "--mode=scan") == 0) { 1273 mMode = Mode::Scan; 1274 1275 } else if (strcmp(arg, "--stacks=full") == 0) { 1276 mStacks = Stacks::Full; 1277 } else if (strcmp(arg, "--stacks=partial") == 0) { 1278 mStacks = Stacks::Partial; 1279 1280 } else if (GetBool(arg, "--show-dump-stats", &myBool)) { 1281 mShowDumpStats = myBool; 1282 1283 } else if (strcmp(arg, "") == 0) { 1284 // This can only happen if there is trailing whitespace. Ignore. 1285 MOZ_ASSERT(isEnd); 1286 1287 } else { 1288 BadArg(arg); 1289 } 1290 1291 // Undo the temporary isolation. 1292 *e = replacedChar; 1293 } 1294 } 1295 1296 if (mMode == Mode::Scan) { 1297 mStacks = Stacks::Full; 1298 } 1299 } 1300 1301 void Options::BadArg(const char* aArg) { 1302 StatusMsg("\n"); 1303 StatusMsg("Bad entry in the $DMD environment variable: '%s'.\n", aArg); 1304 StatusMsg("See the output of |mach help run| for the allowed options.\n"); 1305 exit(1); 1306 } 1307 1308 const char* Options::ModeString() const { 1309 switch (mMode) { 1310 case Mode::Live: 1311 return "live"; 1312 case Mode::DarkMatter: 1313 return "dark-matter"; 1314 case Mode::Cumulative: 1315 return "cumulative"; 1316 case Mode::Scan: 1317 return "scan"; 1318 default: 1319 MOZ_ASSERT(false); 1320 return "(unknown DMD mode)"; 1321 } 1322 } 1323 1324 //--------------------------------------------------------------------------- 1325 // DMD start-up 1326 //--------------------------------------------------------------------------- 1327 1328 #ifndef XP_WIN 1329 static void prefork() { 1330 if (gStateLock) { 1331 gStateLock->Lock(); 1332 } 1333 } 1334 1335 static void postfork() { 1336 if (gStateLock) { 1337 gStateLock->Unlock(); 1338 } 1339 } 1340 #endif 1341 1342 // WARNING: this function runs *very* early -- before all static initializers 1343 // have run. For this reason, non-scalar globals such as gStateLock and 1344 // gStackTraceTable are allocated dynamically (so we can guarantee their 1345 // construction in this function) rather than statically. 1346 static bool Init(malloc_table_t* aMallocTable) { 1347 // DMD is controlled by the |DMD| environment variable. 1348 const char* e = getenv("DMD"); 1349 1350 if (!e) { 1351 return false; 1352 } 1353 // Initialize the function table first, because StatusMsg uses 1354 // InfallibleAllocPolicy::malloc_, which uses it. 1355 gMallocTable = *aMallocTable; 1356 1357 StatusMsg("$DMD = '%s'\n", e); 1358 1359 gDMDBridge = InfallibleAllocPolicy::new_<DMDBridge>(); 1360 1361 #ifndef XP_WIN 1362 // Avoid deadlocks when forking by acquiring our state lock prior to forking 1363 // and releasing it after forking. See |LogAlloc|'s |replace_init| for 1364 // in-depth details. 1365 // 1366 // Note: This must run after attempting an allocation so as to give the 1367 // system malloc a chance to insert its own atfork handler. 1368 pthread_atfork(prefork, postfork, postfork); 1369 #endif 1370 // Parse $DMD env var. 1371 gOptions = InfallibleAllocPolicy::new_<Options>(e); 1372 1373 gStateLock = InfallibleAllocPolicy::new_<Mutex>(); 1374 1375 gBernoulli = (FastBernoulliTrial*)InfallibleAllocPolicy::malloc_( 1376 sizeof(FastBernoulliTrial)); 1377 ResetBernoulli(); 1378 1379 Thread::Init(); 1380 1381 { 1382 AutoLockState lock; 1383 1384 gStackTraceTable = InfallibleAllocPolicy::new_<StackTraceTable>(8192); 1385 gLiveBlockTable = InfallibleAllocPolicy::new_<LiveBlockTable>(8192); 1386 1387 // Create this even if the mode isn't Cumulative (albeit with a small 1388 // size), in case the mode is changed later on (as is done by SmokeDMD.cpp, 1389 // for example). 1390 size_t tableSize = gOptions->IsCumulativeMode() ? 8192 : 4; 1391 gDeadBlockTable = InfallibleAllocPolicy::new_<DeadBlockTable>(tableSize); 1392 } 1393 1394 return true; 1395 } 1396 1397 //--------------------------------------------------------------------------- 1398 // Block reporting and unreporting 1399 //--------------------------------------------------------------------------- 1400 1401 static void ReportHelper(const void* aPtr, bool aReportedOnAlloc) { 1402 if (!gOptions->IsDarkMatterMode() || !aPtr) { 1403 return; 1404 } 1405 1406 Thread* t = Thread::Fetch(); 1407 1408 AutoBlockIntercepts block(t); 1409 AutoLockState lock; 1410 1411 if (LiveBlockTable::Ptr p = gLiveBlockTable->lookup(aPtr)) { 1412 p->Report(t, aReportedOnAlloc); 1413 } else { 1414 // We have no record of the block. It must be a bogus pointer. This should 1415 // be extremely rare because Report() is almost always called in 1416 // conjunction with a malloc_size_of-style function. Print a message so 1417 // that we get some feedback. 1418 StatusMsg("Unknown pointer %p\n", aPtr); 1419 } 1420 } 1421 1422 void DMDFuncs::Report(const void* aPtr) { 1423 ReportHelper(aPtr, /* onAlloc */ false); 1424 } 1425 1426 void DMDFuncs::ReportOnAlloc(const void* aPtr) { 1427 ReportHelper(aPtr, /* onAlloc */ true); 1428 } 1429 1430 //--------------------------------------------------------------------------- 1431 // DMD output 1432 //--------------------------------------------------------------------------- 1433 1434 // The version number of the output format. Increment this if you make 1435 // backwards-incompatible changes to the format. See DMD.h for the version 1436 // history. 1437 static const int kOutputVersionNumber = 5; 1438 1439 // Note that, unlike most SizeOf* functions, this function does not take a 1440 // |mozilla::MallocSizeOf| argument. That's because those arguments are 1441 // primarily to aid DMD track heap blocks... but DMD deliberately doesn't track 1442 // heap blocks it allocated for itself! 1443 // 1444 // SizeOfInternal should be called while you're holding the state lock and 1445 // while intercepts are blocked; SizeOf acquires the lock and blocks 1446 // intercepts. 1447 1448 static void SizeOfInternal(Sizes* aSizes) { 1449 MOZ_ASSERT(gStateLock->IsLocked()); 1450 MOZ_ASSERT(Thread::Fetch()->InterceptsAreBlocked()); 1451 1452 aSizes->Clear(); 1453 1454 StackTraceSet usedStackTraces; 1455 GatherUsedStackTraces(usedStackTraces); 1456 1457 for (auto iter = gStackTraceTable->iter(); !iter.done(); iter.next()) { 1458 StackTrace* const& st = iter.get(); 1459 1460 if (usedStackTraces.has(st)) { 1461 aSizes->mStackTracesUsed += MallocSizeOf(st); 1462 } else { 1463 aSizes->mStackTracesUnused += MallocSizeOf(st); 1464 } 1465 } 1466 1467 aSizes->mStackTraceTable = 1468 gStackTraceTable->shallowSizeOfIncludingThis(MallocSizeOf); 1469 1470 aSizes->mLiveBlockTable = 1471 gLiveBlockTable->shallowSizeOfIncludingThis(MallocSizeOf); 1472 1473 aSizes->mDeadBlockTable = 1474 gDeadBlockTable->shallowSizeOfIncludingThis(MallocSizeOf); 1475 } 1476 1477 void DMDFuncs::SizeOf(Sizes* aSizes) { 1478 aSizes->Clear(); 1479 1480 AutoBlockIntercepts block(Thread::Fetch()); 1481 AutoLockState lock; 1482 SizeOfInternal(aSizes); 1483 } 1484 1485 void DMDFuncs::ClearReports() { 1486 if (!gOptions->IsDarkMatterMode()) { 1487 return; 1488 } 1489 1490 AutoLockState lock; 1491 1492 // Unreport all blocks that were marked reported by a memory reporter. This 1493 // excludes those that were reported on allocation, because they need to keep 1494 // their reported marking. 1495 for (auto iter = gLiveBlockTable->iter(); !iter.done(); iter.next()) { 1496 iter.get().UnreportIfNotReportedOnAlloc(); 1497 } 1498 } 1499 1500 class ToIdStringConverter final { 1501 public: 1502 ToIdStringConverter() : mIdMap(512), mNextId(0) {} 1503 1504 // Converts a pointer to a unique ID. Reuses the existing ID for the pointer 1505 // if it's been seen before. 1506 const char* ToIdString(const void* aPtr) { 1507 uint32_t id; 1508 PointerIdMap::AddPtr p = mIdMap.lookupForAdd(aPtr); 1509 if (!p) { 1510 id = mNextId++; 1511 MOZ_ALWAYS_TRUE(mIdMap.add(p, aPtr, id)); 1512 } else { 1513 id = p->value(); 1514 } 1515 return Base32(id); 1516 } 1517 1518 size_t sizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) const { 1519 return mIdMap.shallowSizeOfExcludingThis(aMallocSizeOf); 1520 } 1521 1522 private: 1523 // This function converts an integer to base-32. We use base-32 values for 1524 // indexing into the traceTable and the frameTable, for the following reasons. 1525 // 1526 // - Base-32 gives more compact indices than base-16. 1527 // 1528 // - 32 is a power-of-two, which makes the necessary div/mod calculations 1529 // fast. 1530 // 1531 // - We can (and do) choose non-numeric digits for base-32. When 1532 // inspecting/debugging the JSON output, non-numeric indices are easier to 1533 // search for than numeric indices. 1534 // 1535 char* Base32(uint32_t aN) { 1536 static const char digits[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdef"; 1537 1538 char* b = mIdBuf + kIdBufLen - 1; 1539 *b = '\0'; 1540 do { 1541 b--; 1542 if (b == mIdBuf) { 1543 MOZ_CRASH("Base32 buffer too small"); 1544 } 1545 *b = digits[aN % 32]; 1546 aN /= 32; 1547 } while (aN); 1548 1549 return b; 1550 } 1551 1552 PointerIdMap mIdMap; 1553 uint32_t mNextId; 1554 1555 // |mIdBuf| must have space for at least eight chars, which is the space 1556 // needed to hold 'Dffffff' (including the terminating null char), which is 1557 // the base-32 representation of 0xffffffff. 1558 static const size_t kIdBufLen = 16; 1559 char mIdBuf[kIdBufLen]; 1560 }; 1561 1562 // Helper class for converting a pointer value to a string. 1563 class ToStringConverter { 1564 public: 1565 const char* ToPtrString(const void* aPtr) { 1566 snprintf(kPtrBuf, sizeof(kPtrBuf) - 1, "%" PRIxPTR, (uintptr_t)aPtr); 1567 return kPtrBuf; 1568 } 1569 1570 private: 1571 char kPtrBuf[32]; 1572 }; 1573 1574 static void WriteBlockContents(JSONWriter& aWriter, const LiveBlock& aBlock) { 1575 size_t numWords = aBlock.ReqSize() / sizeof(uintptr_t*); 1576 if (numWords == 0) { 1577 return; 1578 } 1579 1580 aWriter.StartArrayProperty("contents", aWriter.SingleLineStyle); 1581 { 1582 const uintptr_t** block = (const uintptr_t**)aBlock.Address(); 1583 ToStringConverter sc; 1584 for (size_t i = 0; i < numWords; ++i) { 1585 aWriter.StringElement(MakeStringSpan(sc.ToPtrString(block[i]))); 1586 } 1587 } 1588 aWriter.EndArray(); 1589 } 1590 1591 static void AnalyzeImpl(UniquePtr<JSONWriteFunc> aWriter) { 1592 // Some blocks may have been allocated while creating |aWriter|. Those blocks 1593 // will be freed at the end of this function when |write| is destroyed. The 1594 // allocations will have occurred while intercepts were not blocked, so the 1595 // frees better be as well, otherwise we'll get assertion failures. 1596 // Therefore, this declaration must precede the AutoBlockIntercepts 1597 // declaration, to ensure that |write| is destroyed *after* intercepts are 1598 // unblocked. 1599 JSONWriter writer(std::move(aWriter)); 1600 1601 AutoBlockIntercepts block(Thread::Fetch()); 1602 AutoLockState lock; 1603 1604 // Allocate this on the heap instead of the stack because it's fairly large. 1605 auto locService = InfallibleAllocPolicy::new_<CodeAddressService>(); 1606 1607 StackTraceSet usedStackTraces(512); 1608 PointerSet usedPcs(512); 1609 1610 size_t iscSize; 1611 1612 static int analysisCount = 1; 1613 StatusMsg("Dump %d {\n", analysisCount++); 1614 1615 writer.Start(); 1616 { 1617 writer.IntProperty("version", kOutputVersionNumber); 1618 1619 writer.StartObjectProperty("invocation"); 1620 { 1621 const char* var = gOptions->DMDEnvVar(); 1622 if (var) { 1623 writer.StringProperty("dmdEnvVar", MakeStringSpan(var)); 1624 } else { 1625 writer.NullProperty("dmdEnvVar"); 1626 } 1627 1628 writer.StringProperty("mode", MakeStringSpan(gOptions->ModeString())); 1629 } 1630 writer.EndObject(); 1631 1632 StatusMsg(" Constructing the heap block list...\n"); 1633 1634 ToIdStringConverter isc; 1635 ToStringConverter sc; 1636 1637 writer.StartArrayProperty("blockList"); 1638 { 1639 // Lambda that writes out a live block. 1640 auto writeLiveBlock = [&](const LiveBlock& aB, size_t aNum) { 1641 aB.AddStackTracesToTable(usedStackTraces); 1642 1643 MOZ_ASSERT_IF(gOptions->IsScanMode(), aNum == 1); 1644 1645 writer.StartObjectElement(writer.SingleLineStyle); 1646 { 1647 if (gOptions->IsScanMode()) { 1648 writer.StringProperty("addr", 1649 MakeStringSpan(sc.ToPtrString(aB.Address()))); 1650 WriteBlockContents(writer, aB); 1651 } 1652 writer.IntProperty("req", aB.ReqSize()); 1653 if (aB.SlopSize() > 0) { 1654 writer.IntProperty("slop", aB.SlopSize()); 1655 } 1656 1657 if (aB.AllocStackTrace()) { 1658 writer.StringProperty( 1659 "alloc", MakeStringSpan(isc.ToIdString(aB.AllocStackTrace()))); 1660 } 1661 1662 if (gOptions->IsDarkMatterMode() && aB.NumReports() > 0) { 1663 writer.StartArrayProperty("reps"); 1664 { 1665 if (aB.ReportStackTrace1()) { 1666 writer.StringElement( 1667 MakeStringSpan(isc.ToIdString(aB.ReportStackTrace1()))); 1668 } 1669 if (aB.ReportStackTrace2()) { 1670 writer.StringElement( 1671 MakeStringSpan(isc.ToIdString(aB.ReportStackTrace2()))); 1672 } 1673 } 1674 writer.EndArray(); 1675 } 1676 1677 if (aNum > 1) { 1678 writer.IntProperty("num", aNum); 1679 } 1680 } 1681 writer.EndObject(); 1682 }; 1683 1684 // Live blocks. 1685 if (!gOptions->IsScanMode()) { 1686 // At this point we typically have many LiveBlocks that differ only in 1687 // their address. Aggregate them to reduce the size of the output file. 1688 AggregatedLiveBlockTable agg(8192); 1689 for (auto iter = gLiveBlockTable->iter(); !iter.done(); iter.next()) { 1690 const LiveBlock& b = iter.get(); 1691 b.AddStackTracesToTable(usedStackTraces); 1692 1693 if (AggregatedLiveBlockTable::AddPtr p = agg.lookupForAdd(&b)) { 1694 p->value() += 1; 1695 } else { 1696 MOZ_ALWAYS_TRUE(agg.add(p, &b, 1)); 1697 } 1698 } 1699 1700 // Now iterate over the aggregated table. 1701 for (auto iter = agg.iter(); !iter.done(); iter.next()) { 1702 const LiveBlock& b = *iter.get().key(); 1703 size_t num = iter.get().value(); 1704 writeLiveBlock(b, num); 1705 } 1706 1707 } else { 1708 // In scan mode we cannot aggregate because we print each live block's 1709 // address and contents. 1710 for (auto iter = gLiveBlockTable->iter(); !iter.done(); iter.next()) { 1711 const LiveBlock& b = iter.get(); 1712 b.AddStackTracesToTable(usedStackTraces); 1713 1714 writeLiveBlock(b, 1); 1715 } 1716 } 1717 1718 // Dead blocks. 1719 for (auto iter = gDeadBlockTable->iter(); !iter.done(); iter.next()) { 1720 const DeadBlock& b = iter.get().key(); 1721 b.AddStackTracesToTable(usedStackTraces); 1722 1723 size_t num = iter.get().value(); 1724 MOZ_ASSERT(num > 0); 1725 1726 writer.StartObjectElement(writer.SingleLineStyle); 1727 { 1728 writer.IntProperty("req", b.ReqSize()); 1729 if (b.SlopSize() > 0) { 1730 writer.IntProperty("slop", b.SlopSize()); 1731 } 1732 if (b.AllocStackTrace()) { 1733 writer.StringProperty( 1734 "alloc", MakeStringSpan(isc.ToIdString(b.AllocStackTrace()))); 1735 } 1736 1737 if (num > 1) { 1738 writer.IntProperty("num", num); 1739 } 1740 } 1741 writer.EndObject(); 1742 } 1743 } 1744 writer.EndArray(); 1745 1746 StatusMsg(" Constructing the stack trace table...\n"); 1747 1748 writer.StartObjectProperty("traceTable"); 1749 { 1750 for (auto iter = usedStackTraces.iter(); !iter.done(); iter.next()) { 1751 const StackTrace* const st = iter.get(); 1752 writer.StartArrayProperty(MakeStringSpan(isc.ToIdString(st)), 1753 writer.SingleLineStyle); 1754 { 1755 for (uint32_t i = 0; i < st->Length(); i++) { 1756 const void* pc = st->Pc(i); 1757 writer.StringElement(MakeStringSpan(isc.ToIdString(pc))); 1758 MOZ_ALWAYS_TRUE(usedPcs.put(pc)); 1759 } 1760 } 1761 writer.EndArray(); 1762 } 1763 } 1764 writer.EndObject(); 1765 1766 StatusMsg(" Constructing the stack frame table...\n"); 1767 1768 writer.StartObjectProperty("frameTable"); 1769 { 1770 static const size_t locBufLen = 1024; 1771 char locBuf[locBufLen]; 1772 1773 for (auto iter = usedPcs.iter(); !iter.done(); iter.next()) { 1774 const void* const pc = iter.get(); 1775 1776 // Use 0 for the frame number. See the JSON format description comment 1777 // in DMD.h to understand why. 1778 locService->GetLocation(0, pc, locBuf, locBufLen); 1779 writer.StringProperty(MakeStringSpan(isc.ToIdString(pc)), 1780 MakeStringSpan(locBuf)); 1781 } 1782 } 1783 writer.EndObject(); 1784 1785 iscSize = isc.sizeOfExcludingThis(MallocSizeOf); 1786 } 1787 writer.End(); 1788 1789 if (gOptions->ShowDumpStats()) { 1790 Sizes sizes; 1791 SizeOfInternal(&sizes); 1792 1793 static const size_t kBufLen = 64; 1794 char buf1[kBufLen]; 1795 char buf2[kBufLen]; 1796 char buf3[kBufLen]; 1797 1798 StatusMsg(" Execution measurements {\n"); 1799 1800 StatusMsg(" Data structures that persist after Dump() ends {\n"); 1801 1802 StatusMsg(" Used stack traces: %10s bytes\n", 1803 Show(sizes.mStackTracesUsed, buf1, kBufLen)); 1804 1805 StatusMsg(" Unused stack traces: %10s bytes\n", 1806 Show(sizes.mStackTracesUnused, buf1, kBufLen)); 1807 1808 StatusMsg(" Stack trace table: %10s bytes (%s entries, %s used)\n", 1809 Show(sizes.mStackTraceTable, buf1, kBufLen), 1810 Show(gStackTraceTable->capacity(), buf2, kBufLen), 1811 Show(gStackTraceTable->count(), buf3, kBufLen)); 1812 1813 StatusMsg(" Live block table: %10s bytes (%s entries, %s used)\n", 1814 Show(sizes.mLiveBlockTable, buf1, kBufLen), 1815 Show(gLiveBlockTable->capacity(), buf2, kBufLen), 1816 Show(gLiveBlockTable->count(), buf3, kBufLen)); 1817 1818 StatusMsg(" Dead block table: %10s bytes (%s entries, %s used)\n", 1819 Show(sizes.mDeadBlockTable, buf1, kBufLen), 1820 Show(gDeadBlockTable->capacity(), buf2, kBufLen), 1821 Show(gDeadBlockTable->count(), buf3, kBufLen)); 1822 1823 StatusMsg(" }\n"); 1824 StatusMsg(" Data structures that are destroyed after Dump() ends {\n"); 1825 1826 StatusMsg( 1827 " Location service: %10s bytes\n", 1828 Show(locService->SizeOfIncludingThis(MallocSizeOf), buf1, kBufLen)); 1829 StatusMsg(" Used stack traces set: %10s bytes\n", 1830 Show(usedStackTraces.shallowSizeOfExcludingThis(MallocSizeOf), 1831 buf1, kBufLen)); 1832 StatusMsg( 1833 " Used PCs set: %10s bytes\n", 1834 Show(usedPcs.shallowSizeOfExcludingThis(MallocSizeOf), buf1, kBufLen)); 1835 StatusMsg(" Pointer ID map: %10s bytes\n", 1836 Show(iscSize, buf1, kBufLen)); 1837 1838 StatusMsg(" }\n"); 1839 StatusMsg(" Counts {\n"); 1840 1841 size_t hits = locService->NumCacheHits(); 1842 size_t misses = locService->NumCacheMisses(); 1843 size_t requests = hits + misses; 1844 StatusMsg(" Location service: %10s requests\n", 1845 Show(requests, buf1, kBufLen)); 1846 1847 size_t count = locService->CacheCount(); 1848 size_t capacity = locService->CacheCapacity(); 1849 StatusMsg( 1850 " Location service cache: " 1851 "%4.1f%% hit rate, %.1f%% occupancy at end\n", 1852 Percent(hits, requests), Percent(count, capacity)); 1853 1854 StatusMsg(" }\n"); 1855 StatusMsg(" }\n"); 1856 } 1857 1858 InfallibleAllocPolicy::delete_(locService); 1859 1860 StatusMsg("}\n"); 1861 } 1862 1863 void DMDFuncs::Analyze(UniquePtr<JSONWriteFunc> aWriter) { 1864 AnalyzeImpl(std::move(aWriter)); 1865 ClearReports(); 1866 } 1867 1868 //--------------------------------------------------------------------------- 1869 // Testing 1870 //--------------------------------------------------------------------------- 1871 1872 void DMDFuncs::ResetEverything(const char* aOptions) { 1873 AutoLockState lock; 1874 1875 // Reset options. 1876 InfallibleAllocPolicy::delete_(gOptions); 1877 gOptions = InfallibleAllocPolicy::new_<Options>(aOptions); 1878 1879 // Clear all existing blocks. 1880 gLiveBlockTable->clear(); 1881 gDeadBlockTable->clear(); 1882 1883 // Reset gBernoulli to a deterministic state. (Its current state depends on 1884 // all previous trials.) 1885 ResetBernoulli(); 1886 } 1887 1888 } // namespace dmd 1889 } // namespace mozilla