Replay.cpp (34560B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #define MOZ_MEMORY_IMPL 8 #include "mozmemory_wrap.h" 9 10 #ifdef _WIN32 11 # include <windows.h> 12 # include <io.h> 13 typedef intptr_t ssize_t; 14 #else 15 # include <sys/mman.h> 16 # include <unistd.h> 17 #endif 18 #ifdef XP_LINUX 19 # include <fcntl.h> 20 # include <stdlib.h> 21 #endif 22 #include <algorithm> 23 #include <cmath> 24 #include <cstdio> 25 #include <cstring> 26 27 #include "mozilla/Assertions.h" 28 #include "mozilla/MathAlgorithms.h" 29 #include "mozilla/Maybe.h" 30 #include "FdPrintf.h" 31 32 using namespace mozilla; 33 34 static void die(const char* message) { 35 /* Here, it doesn't matter that fprintf may allocate memory. */ 36 fprintf(stderr, "%s\n", message); 37 exit(1); 38 } 39 40 #ifdef XP_LINUX 41 MOZ_RUNINIT static size_t sPageSize = []() { return sysconf(_SC_PAGESIZE); }(); 42 #endif 43 44 /* We don't want to be using malloc() to allocate our internal tracking 45 * data, because that would change the parameters of what is being measured, 46 * so we want to use data types that directly use mmap/VirtualAlloc. */ 47 template <typename T, size_t Len> 48 class MappedArray { 49 public: 50 MappedArray() : mPtr(nullptr) { 51 #ifdef XP_LINUX 52 MOZ_RELEASE_ASSERT(!((sizeof(T) * Len) & (sPageSize - 1)), 53 "MappedArray size must be a multiple of the page size"); 54 #endif 55 } 56 57 ~MappedArray() { 58 if (mPtr) { 59 #ifdef _WIN32 60 VirtualFree(mPtr, sizeof(T) * Len, MEM_RELEASE); 61 #elif defined(XP_LINUX) 62 munmap(reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(mPtr) - 63 sPageSize), 64 sizeof(T) * Len + sPageSize * 2); 65 #else 66 munmap(mPtr, sizeof(T) * Len); 67 #endif 68 } 69 } 70 71 T& operator[](size_t aIndex) const { 72 if (mPtr) { 73 return mPtr[aIndex]; 74 } 75 76 #ifdef _WIN32 77 mPtr = reinterpret_cast<T*>(VirtualAlloc( 78 nullptr, sizeof(T) * Len, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE)); 79 if (mPtr == nullptr) { 80 die("VirtualAlloc error"); 81 } 82 #else 83 size_t data_size = sizeof(T) * Len; 84 size_t size = data_size; 85 # ifdef XP_LINUX 86 // See below 87 size += sPageSize * 2; 88 # endif 89 mPtr = reinterpret_cast<T*>(mmap(nullptr, size, PROT_READ | PROT_WRITE, 90 MAP_ANON | MAP_PRIVATE, -1, 0)); 91 if (mPtr == MAP_FAILED) { 92 die("Mmap error"); 93 } 94 # ifdef XP_LINUX 95 // On Linux we request a page on either side of the allocation and 96 // mprotect them. This prevents mappings in /proc/self/smaps from being 97 // merged and allows us to parse this file to calculate the allocator's RSS. 98 MOZ_ASSERT(0 == mprotect(mPtr, sPageSize, 0)); 99 MOZ_ASSERT(0 == mprotect(reinterpret_cast<void*>( 100 reinterpret_cast<uintptr_t>(mPtr) + data_size + 101 sPageSize), 102 sPageSize, 0)); 103 mPtr = reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(mPtr) + sPageSize); 104 # endif 105 #endif 106 return mPtr[aIndex]; 107 } 108 109 bool ownsMapping(uintptr_t addr) const { return addr == (uintptr_t)mPtr; } 110 111 bool allocated() const { return !!mPtr; } 112 113 private: 114 mutable T* mPtr; 115 }; 116 117 /* Type for records of allocations. */ 118 struct MemSlot { 119 void* mPtr; 120 121 // mRequest is only valid if mPtr is non-null. It doesn't need to be cleared 122 // when memory is freed or realloc()ed. 123 size_t mRequest; 124 }; 125 126 /* An almost infinite list of slots. 127 * In essence, this is a linked list of arrays of groups of slots. 128 * Each group is 1MB. On 64-bits, one group allows to store 64k allocations. 129 * Each MemSlotList instance can store 1023 such groups, which means more 130 * than 67M allocations. In case more would be needed, we chain to another 131 * MemSlotList, and so on. 132 * Using 1023 groups makes the MemSlotList itself page sized on 32-bits 133 * and 2 pages-sized on 64-bits. 134 */ 135 class MemSlotList { 136 static constexpr size_t kGroups = 1024 - 1; 137 static constexpr size_t kGroupSize = (1024 * 1024) / sizeof(MemSlot); 138 139 MappedArray<MemSlot, kGroupSize> mSlots[kGroups]; 140 MappedArray<MemSlotList, 1> mNext; 141 142 public: 143 MemSlot& operator[](size_t aIndex) const { 144 if (aIndex < kGroupSize * kGroups) { 145 return mSlots[aIndex / kGroupSize][aIndex % kGroupSize]; 146 } 147 aIndex -= kGroupSize * kGroups; 148 return mNext[0][aIndex]; 149 } 150 151 // Ask if any of the memory-mapped buffers use this range. 152 bool ownsMapping(uintptr_t aStart) const { 153 for (const auto& slot : mSlots) { 154 if (slot.allocated() && slot.ownsMapping(aStart)) { 155 return true; 156 } 157 } 158 return mNext.ownsMapping(aStart) || 159 (mNext.allocated() && mNext[0].ownsMapping(aStart)); 160 } 161 }; 162 163 /* Helper class for memory buffers */ 164 class Buffer { 165 public: 166 Buffer() : mBuf(nullptr), mLength(0) {} 167 168 Buffer(const void* aBuf, size_t aLength) 169 : mBuf(reinterpret_cast<const char*>(aBuf)), mLength(aLength) {} 170 171 /* Constructor for string literals. */ 172 template <size_t Size> 173 explicit Buffer(const char (&aStr)[Size]) : mBuf(aStr), mLength(Size - 1) {} 174 175 /* Returns a sub-buffer up-to but not including the given aNeedle character. 176 * The "parent" buffer itself is altered to begin after the aNeedle 177 * character. 178 * If the aNeedle character is not found, return the entire buffer, and empty 179 * the "parent" buffer. */ 180 Buffer SplitChar(char aNeedle) { 181 char* buf = const_cast<char*>(mBuf); 182 char* c = reinterpret_cast<char*>(memchr(buf, aNeedle, mLength)); 183 if (!c) { 184 return Split(mLength); 185 } 186 187 Buffer result = Split(c - buf); 188 // Remove the aNeedle character itself. 189 Split(1); 190 return result; 191 } 192 193 // Advance to the position after aNeedle. This is like SplitChar but does not 194 // return the skipped portion. 195 void Skip(char aNeedle, unsigned nTimes = 1) { 196 for (unsigned i = 0; i < nTimes; i++) { 197 SplitChar(aNeedle); 198 } 199 } 200 201 void SkipWhitespace() { 202 while (mLength > 0) { 203 if (!IsSpace(mBuf[0])) { 204 break; 205 } 206 mBuf++; 207 mLength--; 208 } 209 } 210 211 static bool IsSpace(char c) { 212 switch (c) { 213 case ' ': 214 case '\t': 215 case '\n': 216 case '\v': 217 case '\f': 218 case '\r': 219 return true; 220 } 221 return false; 222 } 223 224 /* Returns a sub-buffer of at most aLength characters. The "parent" buffer is 225 * amputated of those aLength characters. If the "parent" buffer is smaller 226 * than aLength, then its length is used instead. */ 227 Buffer Split(size_t aLength) { 228 Buffer result(mBuf, std::min(aLength, mLength)); 229 mLength -= result.mLength; 230 mBuf += result.mLength; 231 return result; 232 } 233 234 /* Move the buffer (including its content) to the memory address of the aOther 235 * buffer. */ 236 void Slide(Buffer aOther) { 237 memmove(const_cast<char*>(aOther.mBuf), mBuf, mLength); 238 mBuf = aOther.mBuf; 239 } 240 241 /* Returns whether the two involved buffers have the same content. */ 242 bool operator==(Buffer aOther) { 243 return mLength == aOther.mLength && 244 (mBuf == aOther.mBuf || !strncmp(mBuf, aOther.mBuf, mLength)); 245 } 246 247 bool operator!=(Buffer aOther) { return !(*this == aOther); } 248 249 /* Returns true if the buffer is not empty. */ 250 explicit operator bool() { return mLength; } 251 252 char operator[](size_t n) const { return mBuf[n]; } 253 254 /* Returns the memory location of the buffer. */ 255 const char* get() { return mBuf; } 256 257 /* Returns the memory location of the end of the buffer (technically, the 258 * first byte after the buffer). */ 259 const char* GetEnd() { return mBuf + mLength; } 260 261 /* Extend the buffer over the content of the other buffer, assuming it is 262 * adjacent. */ 263 void Extend(Buffer aOther) { 264 MOZ_ASSERT(aOther.mBuf == GetEnd()); 265 mLength += aOther.mLength; 266 } 267 268 size_t Length() const { return mLength; } 269 270 private: 271 const char* mBuf; 272 size_t mLength; 273 }; 274 275 /* Helper class to read from a file descriptor line by line. */ 276 class FdReader { 277 public: 278 explicit FdReader(int aFd, bool aNeedClose = false) 279 : mFd(aFd), 280 mNeedClose(aNeedClose), 281 mData(&mRawBuf, 0), 282 mBuf(&mRawBuf, sizeof(mRawBuf)) {} 283 284 FdReader(FdReader&& aOther) noexcept 285 : mFd(aOther.mFd), 286 mNeedClose(aOther.mNeedClose), 287 mData(&mRawBuf, 0), 288 mBuf(&mRawBuf, sizeof(mRawBuf)) { 289 memcpy(mRawBuf, aOther.mRawBuf, sizeof(mRawBuf)); 290 aOther.mFd = -1; 291 aOther.mNeedClose = false; 292 aOther.mData = Buffer(); 293 aOther.mBuf = Buffer(); 294 } 295 296 FdReader& operator=(const FdReader&) = delete; 297 FdReader(const FdReader&) = delete; 298 299 ~FdReader() { 300 if (mNeedClose) { 301 close(mFd); 302 } 303 } 304 305 /* Read a line from the file descriptor and returns it as a Buffer instance */ 306 Buffer ReadLine() { 307 while (true) { 308 Buffer result = mData.SplitChar('\n'); 309 310 /* There are essentially three different cases here: 311 * - '\n' was found "early". In this case, the end of the result buffer 312 * is before the beginning of the mData buffer (since SplitChar 313 * amputated it). 314 * - '\n' was found as the last character of mData. In this case, mData 315 * is empty, but still points at the end of mBuf. result points to what 316 * used to be in mData, without the last character. 317 * - '\n' was not found. In this case too, mData is empty and points at 318 * the end of mBuf. But result points to the entire buffer that used to 319 * be pointed by mData. 320 * Only in the latter case do both result and mData's end match, and it's 321 * the only case where we need to refill the buffer. 322 */ 323 if (result.GetEnd() != mData.GetEnd()) { 324 return result; 325 } 326 327 /* Since SplitChar emptied mData, make it point to what it had before. */ 328 mData = result; 329 330 /* And move it to the beginning of the read buffer. */ 331 mData.Slide(mBuf); 332 333 FillBuffer(); 334 335 if (!mData) { 336 return Buffer(); 337 } 338 } 339 } 340 341 private: 342 /* Fill the read buffer. */ 343 void FillBuffer() { 344 size_t size = mBuf.GetEnd() - mData.GetEnd(); 345 Buffer remainder(mData.GetEnd(), size); 346 347 ssize_t len = 1; 348 while (remainder && len > 0) { 349 len = ::read(mFd, const_cast<char*>(remainder.get()), size); 350 if (len < 0) { 351 die("Read error"); 352 } 353 size -= len; 354 mData.Extend(remainder.Split(len)); 355 } 356 } 357 358 /* File descriptor to read from. */ 359 int mFd; 360 bool mNeedClose; 361 362 /* Part of data that was read from the file descriptor but not returned with 363 * ReadLine yet. */ 364 Buffer mData; 365 /* Buffer representation of mRawBuf */ 366 Buffer mBuf; 367 /* read() buffer */ 368 char mRawBuf[4096]; 369 }; 370 371 MOZ_BEGIN_EXTERN_C 372 373 /* Function declarations for all the replace_malloc _impl functions. 374 * See memory/build/replace_malloc.c */ 375 #define MALLOC_DECL(name, return_type, ...) \ 376 return_type name##_impl(__VA_ARGS__); 377 #define MALLOC_FUNCS MALLOC_FUNCS_MALLOC 378 #include "malloc_decls.h" 379 380 #define MALLOC_DECL(name, return_type, ...) return_type name(__VA_ARGS__); 381 #define MALLOC_FUNCS MALLOC_FUNCS_JEMALLOC 382 #include "malloc_decls.h" 383 384 MOZ_END_EXTERN_C 385 386 template <unsigned Base = 10> 387 size_t parseNumber(Buffer aBuf) { 388 if (!aBuf) { 389 die("Malformed input"); 390 } 391 392 size_t result = 0; 393 for (const char *c = aBuf.get(), *end = aBuf.GetEnd(); c < end; c++) { 394 result *= Base; 395 if ((*c >= '0' && *c <= '9')) { 396 result += *c - '0'; 397 } else if (Base == 16 && *c >= 'a' && *c <= 'f') { 398 result += *c - 'a' + 10; 399 } else if (Base == 16 && *c >= 'A' && *c <= 'F') { 400 result += *c - 'A' + 10; 401 } else { 402 die("Malformed input"); 403 } 404 } 405 return result; 406 } 407 408 static size_t percent(size_t a, size_t b) { 409 if (!b) { 410 return 0; 411 } 412 return size_t(round(double(a) / double(b) * 100.0)); 413 } 414 415 class Distribution { 416 public: 417 // Default constructor used for array initialisation. 418 Distribution() 419 : mMaxSize(0), 420 mNextSmallest(0), 421 mShift(0), 422 mArrayOffset(0), 423 mArraySlots(0), 424 mTotalRequests(0), 425 mRequests{0} {} 426 427 Distribution(size_t max_size, size_t next_smallest, size_t bucket_size) 428 : mMaxSize(max_size), 429 mNextSmallest(next_smallest), 430 mShift(CeilingLog2(bucket_size)), 431 mArrayOffset(1 + next_smallest), 432 mArraySlots((max_size - next_smallest) >> mShift), 433 mTotalRequests(0), 434 mRequests{ 435 0, 436 } { 437 MOZ_ASSERT(mMaxSize); 438 MOZ_RELEASE_ASSERT(mArraySlots <= MAX_NUM_BUCKETS); 439 } 440 441 Distribution& operator=(const Distribution& aOther) = default; 442 443 void addRequest(size_t request) { 444 MOZ_ASSERT(mMaxSize); 445 446 mRequests[(request - mArrayOffset) >> mShift]++; 447 mTotalRequests++; 448 } 449 450 void printDist(platform_handle_t std_err) { 451 MOZ_ASSERT(mMaxSize); 452 453 // The translation to turn a slot index into a memory request size. 454 const size_t array_offset_add = (1 << mShift) + mNextSmallest; 455 456 FdPrintf(std_err, "\n%zu-bin Distribution:\n", mMaxSize); 457 FdPrintf(std_err, " request : count percent\n"); 458 size_t range_start = mNextSmallest + 1; 459 for (size_t j = 0; j < mArraySlots; j++) { 460 size_t range_end = (j << mShift) + array_offset_add; 461 FdPrintf(std_err, "%5zu - %5zu: %6zu %6zu%%\n", range_start, range_end, 462 mRequests[j], percent(mRequests[j], mTotalRequests)); 463 range_start = range_end + 1; 464 } 465 } 466 467 size_t maxSize() const { return mMaxSize; } 468 469 private: 470 static constexpr size_t MAX_NUM_BUCKETS = 16; 471 472 // If size is zero this distribution is uninitialised. 473 size_t mMaxSize; 474 size_t mNextSmallest; 475 476 // Parameters to convert a size into a slot number. 477 unsigned mShift; 478 unsigned mArrayOffset; 479 480 // The number of slots. 481 unsigned mArraySlots; 482 483 size_t mTotalRequests; 484 size_t mRequests[MAX_NUM_BUCKETS]; 485 }; 486 487 #ifdef XP_LINUX 488 struct MemoryMap { 489 uintptr_t mStart; 490 uintptr_t mEnd; 491 bool mReadable; 492 bool mPrivate; 493 bool mAnon; 494 bool mIsStack; 495 bool mIsSpecial; 496 size_t mRSS; 497 498 bool IsCandidate() const { 499 // Candidates mappings are: 500 // * anonymous 501 // * they are private (not shared), 502 // * anonymous or "[heap]" (not another area such as stack), 503 // 504 // The only mappings we're falsely including are the .bss segments for 505 // shared libraries. 506 return mReadable && mPrivate && mAnon && !mIsStack && !mIsSpecial; 507 } 508 }; 509 510 class SMapsReader : private FdReader { 511 private: 512 explicit SMapsReader(FdReader&& reader) : FdReader(std::move(reader)) {} 513 514 public: 515 static Maybe<SMapsReader> open() { 516 int fd = ::open(FILENAME, O_RDONLY); 517 if (fd < 0) { 518 perror(FILENAME); 519 return mozilla::Nothing(); 520 } 521 522 return Some(SMapsReader(FdReader(fd, true))); 523 } 524 525 Maybe<MemoryMap> readMap(platform_handle_t aStdErr) { 526 // This is not very tolerant of format changes because things like 527 // parseNumber will crash if they get a bad value. TODO: make this 528 // soft-fail. 529 530 Buffer line = ReadLine(); 531 if (!line) { 532 return Nothing(); 533 } 534 535 // We're going to be at the start of an entry, start tokenising the first 536 // line. 537 538 // Range 539 Buffer range = line.SplitChar(' '); 540 uintptr_t range_start = parseNumber<16>(range.SplitChar('-')); 541 uintptr_t range_end = parseNumber<16>(range); 542 543 // Mode. 544 Buffer mode = line.SplitChar(' '); 545 if (mode.Length() != 4) { 546 FdPrintf(aStdErr, "Couldn't parse SMAPS file\n"); 547 return Nothing(); 548 } 549 bool readable = mode[0] == 'r'; 550 bool private_ = mode[3] == 'p'; 551 552 // Offset, device and inode. 553 line.SkipWhitespace(); 554 bool zero_offset = !parseNumber<16>(line.SplitChar(' ')); 555 line.SkipWhitespace(); 556 bool no_device = line.SplitChar(' ') == Buffer("00:00"); 557 line.SkipWhitespace(); 558 bool zero_inode = !parseNumber(line.SplitChar(' ')); 559 bool is_anon = zero_offset && no_device && zero_inode; 560 561 // Filename, or empty for anon mappings. 562 line.SkipWhitespace(); 563 Buffer filename = line.SplitChar(' '); 564 565 bool is_stack; 566 bool is_special; 567 if (filename && filename[0] == '[') { 568 is_stack = filename == Buffer("[stack]"); 569 is_special = filename == Buffer("[vdso]") || 570 filename == Buffer("[vvar]") || 571 filename == Buffer("[vsyscall]"); 572 } else { 573 is_stack = false; 574 is_special = false; 575 } 576 577 size_t rss = 0; 578 while ((line = ReadLine())) { 579 Buffer field = line.SplitChar(':'); 580 if (field == Buffer("VmFlags")) { 581 // This is the last field, at least in the current format. Break this 582 // loop to read the next mapping. 583 break; 584 } 585 586 if (field == Buffer("Rss")) { 587 line.SkipWhitespace(); 588 Buffer value = line.SplitChar(' '); 589 rss = parseNumber(value) * 1024; 590 } 591 } 592 593 return Some(MemoryMap({range_start, range_end, readable, private_, is_anon, 594 is_stack, is_special, rss})); 595 } 596 597 static constexpr char FILENAME[] = "/proc/self/smaps"; 598 }; 599 #endif // XP_LINUX 600 601 /* Class to handle dispatching the replay function calls to replace-malloc. */ 602 class Replay { 603 public: 604 Replay() { 605 #ifdef _WIN32 606 // See comment in FdPrintf.h as to why native win32 handles are used. 607 mStdErr = GetStdHandle(STD_ERROR_HANDLE); 608 #else 609 mStdErr = fileno(stderr); 610 #endif 611 #ifdef XP_LINUX 612 BuildInitialMapInfo(); 613 #endif 614 } 615 616 void enableSlopCalculation() { mCalculateSlop = true; } 617 void enableMemset() { mDoMemset = true; } 618 619 MemSlot& operator[](size_t index) const { return mSlots[index]; } 620 621 void malloc(Buffer& aArgs, Buffer& aResult) { 622 MemSlot& aSlot = SlotForResult(aResult); 623 mOps++; 624 size_t size = parseNumber(aArgs); 625 aSlot.mPtr = ::malloc_impl(size); 626 if (aSlot.mPtr) { 627 aSlot.mRequest = size; 628 MaybeCommit(aSlot); 629 if (mCalculateSlop) { 630 mTotalRequestedSize += size; 631 mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr); 632 } 633 } 634 } 635 636 void posix_memalign(Buffer& aArgs, Buffer& aResult) { 637 MemSlot& aSlot = SlotForResult(aResult); 638 mOps++; 639 size_t alignment = parseNumber(aArgs.SplitChar(',')); 640 size_t size = parseNumber(aArgs); 641 void* ptr; 642 if (::posix_memalign_impl(&ptr, alignment, size) == 0) { 643 aSlot.mPtr = ptr; 644 aSlot.mRequest = size; 645 MaybeCommit(aSlot); 646 if (mCalculateSlop) { 647 mTotalRequestedSize += size; 648 mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr); 649 } 650 } else { 651 aSlot.mPtr = nullptr; 652 } 653 } 654 655 void aligned_alloc(Buffer& aArgs, Buffer& aResult) { 656 MemSlot& aSlot = SlotForResult(aResult); 657 mOps++; 658 size_t alignment = parseNumber(aArgs.SplitChar(',')); 659 size_t size = parseNumber(aArgs); 660 aSlot.mPtr = ::aligned_alloc_impl(alignment, size); 661 if (aSlot.mPtr) { 662 aSlot.mRequest = size; 663 MaybeCommit(aSlot); 664 if (mCalculateSlop) { 665 mTotalRequestedSize += size; 666 mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr); 667 } 668 } 669 } 670 671 void calloc(Buffer& aArgs, Buffer& aResult) { 672 MemSlot& aSlot = SlotForResult(aResult); 673 mOps++; 674 size_t num = parseNumber(aArgs.SplitChar(',')); 675 size_t size = parseNumber(aArgs); 676 aSlot.mPtr = ::calloc_impl(num, size); 677 if (aSlot.mPtr) { 678 aSlot.mRequest = num * size; 679 MaybeCommit(aSlot); 680 if (mCalculateSlop) { 681 mTotalRequestedSize += num * size; 682 mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr); 683 } 684 } 685 } 686 687 void realloc(Buffer& aArgs, Buffer& aResult) { 688 MemSlot& aSlot = SlotForResult(aResult); 689 mOps++; 690 Buffer dummy = aArgs.SplitChar('#'); 691 if (dummy) { 692 die("Malformed input"); 693 } 694 size_t slot_id = parseNumber(aArgs.SplitChar(',')); 695 size_t size = parseNumber(aArgs); 696 MemSlot& old_slot = (*this)[slot_id]; 697 void* old_ptr = old_slot.mPtr; 698 old_slot.mPtr = nullptr; 699 aSlot.mPtr = ::realloc_impl(old_ptr, size); 700 if (aSlot.mPtr) { 701 aSlot.mRequest = size; 702 MaybeCommit(aSlot); 703 if (mCalculateSlop) { 704 mTotalRequestedSize += size; 705 mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr); 706 } 707 } 708 } 709 710 void free(Buffer& aArgs, Buffer& aResult) { 711 if (aResult) { 712 die("Malformed input"); 713 } 714 mOps++; 715 Buffer dummy = aArgs.SplitChar('#'); 716 if (dummy) { 717 die("Malformed input"); 718 } 719 size_t slot_id = parseNumber(aArgs); 720 MemSlot& slot = (*this)[slot_id]; 721 ::free_impl(slot.mPtr); 722 slot.mPtr = nullptr; 723 } 724 725 void memalign(Buffer& aArgs, Buffer& aResult) { 726 MemSlot& aSlot = SlotForResult(aResult); 727 mOps++; 728 size_t alignment = parseNumber(aArgs.SplitChar(',')); 729 size_t size = parseNumber(aArgs); 730 aSlot.mPtr = ::memalign_impl(alignment, size); 731 if (aSlot.mPtr) { 732 aSlot.mRequest = size; 733 MaybeCommit(aSlot); 734 if (mCalculateSlop) { 735 mTotalRequestedSize += size; 736 mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr); 737 } 738 } 739 } 740 741 void valloc(Buffer& aArgs, Buffer& aResult) { 742 MemSlot& aSlot = SlotForResult(aResult); 743 mOps++; 744 size_t size = parseNumber(aArgs); 745 aSlot.mPtr = ::valloc_impl(size); 746 if (aSlot.mPtr) { 747 aSlot.mRequest = size; 748 MaybeCommit(aSlot); 749 if (mCalculateSlop) { 750 mTotalRequestedSize += size; 751 mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr); 752 } 753 } 754 } 755 756 void jemalloc_stats(Buffer& aArgs, Buffer& aResult) { 757 if (aArgs || aResult) { 758 die("Malformed input"); 759 } 760 mOps++; 761 jemalloc_stats_t stats; 762 // Using a variable length array here is a GCC & Clang extension. But it 763 // allows us to place this on the stack and not alter jemalloc's profiling. 764 const size_t num_bins = ::jemalloc_stats_num_bins(); 765 const size_t MAX_NUM_BINS = 100; 766 if (num_bins > MAX_NUM_BINS) { 767 die("Exceeded maximum number of jemalloc stats bins"); 768 } 769 jemalloc_bin_stats_t bin_stats[MAX_NUM_BINS] = {{0}}; 770 ::jemalloc_stats_internal(&stats, bin_stats); 771 772 #ifdef XP_LINUX 773 size_t rss = get_rss(); 774 #endif 775 776 size_t num_objects = 0; 777 size_t num_sloppy_objects = 0; 778 size_t total_allocated = 0; 779 size_t total_slop = 0; 780 size_t large_slop = 0; 781 size_t large_used = 0; 782 size_t huge_slop = 0; 783 size_t huge_used = 0; 784 size_t bin_slop[MAX_NUM_BINS] = {0}; 785 786 for (size_t slot_id = 0; slot_id < mNumUsedSlots; slot_id++) { 787 MemSlot& slot = mSlots[slot_id]; 788 if (slot.mPtr) { 789 size_t used = ::malloc_usable_size_impl(slot.mPtr); 790 size_t slop = used - slot.mRequest; 791 total_allocated += used; 792 total_slop += slop; 793 num_objects++; 794 if (slop) { 795 num_sloppy_objects++; 796 } 797 798 if (used <= 799 (stats.subpage_max ? stats.subpage_max : stats.quantum_wide_max)) { 800 // We know that this is an inefficient linear search, but there's a 801 // small number of bins and this is simple. 802 for (unsigned i = 0; i < num_bins; i++) { 803 auto& bin = bin_stats[i]; 804 if (used == bin.size) { 805 bin_slop[i] += slop; 806 break; 807 } 808 } 809 } else if (used <= stats.large_max) { 810 large_slop += slop; 811 large_used += used; 812 } else { 813 huge_slop += slop; 814 huge_used += used; 815 } 816 } 817 } 818 819 // This formula corresponds to the calculation of wasted (from committed and 820 // the other parameters) within jemalloc_stats() 821 size_t committed = stats.allocated + stats.waste + stats.pages_dirty + 822 stats.bookkeeping + stats.bin_unused; 823 824 FdPrintf(mStdErr, "\n"); 825 FdPrintf(mStdErr, "Objects: %9zu\n", num_objects); 826 FdPrintf(mStdErr, "Slots: %9zu\n", mNumUsedSlots); 827 FdPrintf(mStdErr, "Ops: %9zu\n", mOps); 828 FdPrintf(mStdErr, "mapped: %9zu\n", stats.mapped); 829 FdPrintf(mStdErr, "committed: %9zu\n", committed); 830 #ifdef XP_LINUX 831 if (rss) { 832 FdPrintf(mStdErr, "rss: %9zu\n", rss); 833 } 834 #endif 835 FdPrintf(mStdErr, "allocated: %9zu\n", stats.allocated); 836 FdPrintf(mStdErr, "waste: %9zu\n", stats.waste); 837 FdPrintf(mStdErr, "dirty: %9zu\n", stats.pages_dirty); 838 FdPrintf(mStdErr, "fresh: %9zu\n", stats.pages_fresh); 839 FdPrintf(mStdErr, "madvised: %9zu\n", stats.pages_madvised); 840 FdPrintf(mStdErr, "bookkeep: %9zu\n", stats.bookkeeping); 841 FdPrintf(mStdErr, "bin-unused: %9zu\n", stats.bin_unused); 842 FdPrintf(mStdErr, "quantum-max: %9zu\n", stats.quantum_max); 843 FdPrintf(mStdErr, "quantum-wide-max: %9zu\n", stats.quantum_wide_max); 844 FdPrintf(mStdErr, "subpage-max: %9zu\n", stats.subpage_max); 845 FdPrintf(mStdErr, "large-max: %9zu\n", stats.large_max); 846 if (mCalculateSlop) { 847 size_t slop = mTotalAllocatedSize - mTotalRequestedSize; 848 FdPrintf(mStdErr, 849 "Total slop for all allocations: %zuKiB/%zuKiB (%zu%%)\n", 850 slop / 1024, mTotalAllocatedSize / 1024, 851 percent(slop, mTotalAllocatedSize)); 852 } 853 FdPrintf(mStdErr, "Live sloppy objects: %zu/%zu (%zu%%)\n", 854 num_sloppy_objects, num_objects, 855 percent(num_sloppy_objects, num_objects)); 856 FdPrintf(mStdErr, "Live sloppy bytes: %zuKiB/%zuKiB (%zu%%)\n", 857 total_slop / 1024, total_allocated / 1024, 858 percent(total_slop, total_allocated)); 859 860 FdPrintf(mStdErr, "\n%8s %11s %10s %8s %9s %9s %8s\n", "bin-size", 861 "unused (c)", "total (c)", "used (c)", "non-full (r)", "total (r)", 862 "used (r)"); 863 for (unsigned i = 0; i < num_bins; i++) { 864 auto& bin = bin_stats[i]; 865 MOZ_ASSERT(bin.size); 866 FdPrintf(mStdErr, "%8zu %8zuKiB %7zuKiB %7zu%% %12zu %9zu %7zu%%\n", 867 bin.size, bin.bytes_unused / 1024, bin.bytes_total / 1024, 868 percent(bin.bytes_total - bin.bytes_unused, bin.bytes_total), 869 bin.num_non_full_runs, bin.num_runs, 870 percent(bin.num_runs - bin.num_non_full_runs, bin.num_runs)); 871 } 872 873 FdPrintf(mStdErr, "\n%5s %8s %9s %7s\n", "bin", "slop", "used", "percent"); 874 for (unsigned i = 0; i < num_bins; i++) { 875 auto& bin = bin_stats[i]; 876 size_t used = bin.bytes_total - bin.bytes_unused; 877 FdPrintf(mStdErr, "%5zu %8zu %9zu %6zu%%\n", bin.size, bin_slop[i], used, 878 percent(bin_slop[i], used)); 879 } 880 FdPrintf(mStdErr, "%5s %8zu %9zu %6zu%%\n", "large", large_slop, large_used, 881 percent(large_slop, large_used)); 882 FdPrintf(mStdErr, "%5s %8zu %9zu %6zu%%\n", "huge", huge_slop, huge_used, 883 percent(huge_slop, huge_used)); 884 885 print_distributions(stats, bin_stats); 886 } 887 888 private: 889 /* 890 * Create and print frequency distributions of memory requests. 891 */ 892 void print_distributions(jemalloc_stats_t& stats, 893 jemalloc_bin_stats_t* bin_stats) { 894 const size_t num_bins = ::jemalloc_stats_num_bins(); 895 896 // We compute distributions for all of the bins for small allocations 897 // (num_bins) plus two more distributions for larger allocations. 898 Distribution dists[num_bins + 2]; 899 900 unsigned last_size = 0; 901 unsigned num_dists = 0; 902 for (unsigned i = 0; i < num_bins; i++) { 903 auto& bin = bin_stats[i]; 904 auto& dist = dists[num_dists++]; 905 906 MOZ_ASSERT(bin.size); 907 if (bin.size <= 16) { 908 // 1 byte buckets. 909 dist = Distribution(bin.size, last_size, 1); 910 } else if (bin.size <= stats.quantum_max) { 911 // 4 buckets, (4 bytes per bucket with a 16 byte quantum). 912 dist = Distribution(bin.size, last_size, stats.quantum / 4); 913 } else if (bin.size <= stats.quantum_wide_max) { 914 // 8 buckets, (32 bytes per bucket with a 256 byte quantum-wide). 915 dist = Distribution(bin.size, last_size, stats.quantum_wide / 8); 916 } else { 917 // 16 buckets. 918 dist = Distribution(bin.size, last_size, (bin.size - last_size) / 16); 919 } 920 last_size = bin.size; 921 } 922 923 // 16 buckets. 924 dists[num_dists] = Distribution(stats.page_size, last_size, 925 (stats.page_size - last_size) / 16); 926 num_dists++; 927 928 // Buckets are 1/4 of the page size (12 buckets). 929 dists[num_dists] = 930 Distribution(stats.page_size * 4, stats.page_size, stats.page_size / 4); 931 num_dists++; 932 933 MOZ_RELEASE_ASSERT(num_dists <= num_bins + 2); 934 935 for (size_t slot_id = 0; slot_id < mNumUsedSlots; slot_id++) { 936 MemSlot& slot = mSlots[slot_id]; 937 if (slot.mPtr) { 938 for (size_t i = 0; i < num_dists; i++) { 939 if (slot.mRequest <= dists[i].maxSize()) { 940 dists[i].addRequest(slot.mRequest); 941 break; 942 } 943 } 944 } 945 } 946 947 for (unsigned i = 0; i < num_dists; i++) { 948 dists[i].printDist(mStdErr); 949 } 950 } 951 952 #ifdef XP_LINUX 953 size_t get_rss() { 954 if (mGetRSSFailed) { 955 return 0; 956 } 957 958 // On Linux we can determine the RSS of the heap area by examining the 959 // smaps file. 960 mozilla::Maybe<SMapsReader> reader = SMapsReader::open(); 961 if (!reader) { 962 mGetRSSFailed = true; 963 return 0; 964 } 965 966 size_t rss = 0; 967 while (Maybe<MemoryMap> map = reader->readMap(mStdErr)) { 968 if (map->IsCandidate() && !mSlots.ownsMapping(map->mStart) && 969 !InitialMapsContains(map->mStart)) { 970 rss += map->mRSS; 971 } 972 } 973 974 return rss; 975 } 976 977 bool InitialMapsContains(uintptr_t aRangeStart) { 978 for (unsigned i = 0; i < mNumInitialMaps; i++) { 979 MOZ_ASSERT(i < MAX_INITIAL_MAPS); 980 981 if (mInitialMaps[i] == aRangeStart) { 982 return true; 983 } 984 } 985 return false; 986 } 987 988 public: 989 void BuildInitialMapInfo() { 990 if (mGetRSSFailed) { 991 return; 992 } 993 994 Maybe<SMapsReader> reader = SMapsReader::open(); 995 if (!reader) { 996 mGetRSSFailed = true; 997 return; 998 } 999 1000 while (Maybe<MemoryMap> map = reader->readMap(mStdErr)) { 1001 if (map->IsCandidate()) { 1002 if (mNumInitialMaps >= MAX_INITIAL_MAPS) { 1003 FdPrintf(mStdErr, "Too many initial mappings, can't compute RSS\n"); 1004 mGetRSSFailed = false; 1005 return; 1006 } 1007 1008 mInitialMaps[mNumInitialMaps++] = map->mStart; 1009 } 1010 } 1011 } 1012 #endif 1013 1014 private: 1015 MemSlot& SlotForResult(Buffer& aResult) { 1016 /* Parse result value and get the corresponding slot. */ 1017 Buffer dummy = aResult.SplitChar('='); 1018 Buffer dummy2 = aResult.SplitChar('#'); 1019 if (dummy || dummy2) { 1020 die("Malformed input"); 1021 } 1022 1023 size_t slot_id = parseNumber(aResult); 1024 mNumUsedSlots = std::max(mNumUsedSlots, slot_id + 1); 1025 1026 return mSlots[slot_id]; 1027 } 1028 1029 void MaybeCommit(MemSlot& aSlot) { 1030 if (mDoMemset) { 1031 // Write any byte, 0x55 isn't significant. 1032 memset(aSlot.mPtr, 0x55, aSlot.mRequest); 1033 } 1034 } 1035 1036 platform_handle_t mStdErr; 1037 size_t mOps = 0; 1038 1039 // The number of slots that have been used. It is used to iterate over slots 1040 // without accessing those we haven't initialised. 1041 size_t mNumUsedSlots = 0; 1042 1043 MemSlotList mSlots; 1044 size_t mTotalRequestedSize = 0; 1045 size_t mTotalAllocatedSize = 0; 1046 // Whether to calculate slop for all allocations over the runtime of a 1047 // process. 1048 bool mCalculateSlop = false; 1049 bool mDoMemset = false; 1050 1051 #ifdef XP_LINUX 1052 // If we have a failure reading smaps info then this is used to disable that 1053 // feature. 1054 bool mGetRSSFailed = false; 1055 1056 // The initial memory mappings are recorded here at start up. We exclude 1057 // memory in these mappings when computing RSS. We assume they do not grow 1058 // and that no regions are allocated near them, this is true because they'll 1059 // only record the .bss and .data segments from our binary and shared objects 1060 // or regions that logalloc-replay has created for MappedArrays. 1061 // 1062 // 64 should be enough for anybody. 1063 static constexpr unsigned MAX_INITIAL_MAPS = 64; 1064 uintptr_t mInitialMaps[MAX_INITIAL_MAPS]; 1065 unsigned mNumInitialMaps = 0; 1066 #endif // XP_LINUX 1067 }; 1068 1069 MOZ_RUNINIT static Replay replay; 1070 1071 int main(int argc, const char* argv[]) { 1072 size_t first_pid = 0; 1073 FdReader reader(0); 1074 1075 for (int i = 1; i < argc; i++) { 1076 const char* option = argv[i]; 1077 if (strcmp(option, "-s") == 0) { 1078 // Do accounting to calculate allocation slop. 1079 replay.enableSlopCalculation(); 1080 } else if (strcmp(option, "-c") == 0) { 1081 // Touch memory as we allocate it. 1082 replay.enableMemset(); 1083 } else { 1084 fprintf(stderr, "Unknown command line option: %s\n", option); 1085 return EXIT_FAILURE; 1086 } 1087 } 1088 1089 /* Read log from stdin and dispatch function calls to the Replay instance. 1090 * The log format is essentially: 1091 * <pid> <tid> <function>([<args>])[=<result>] 1092 * <args> is a comma separated list of arguments. 1093 * 1094 * The logs are expected to be preprocessed so that allocations are 1095 * attributed a tracking slot. The input is trusted not to have crazy 1096 * values for these slot numbers. 1097 * 1098 * <result>, as well as some of the args to some of the function calls are 1099 * such slot numbers. 1100 */ 1101 while (true) { 1102 Buffer line = reader.ReadLine(); 1103 1104 if (!line) { 1105 break; 1106 } 1107 1108 size_t pid = parseNumber(line.SplitChar(' ')); 1109 if (!first_pid) { 1110 first_pid = pid; 1111 } 1112 1113 /* The log may contain data for several processes, only entries for the 1114 * very first that appears are treated. */ 1115 if (first_pid != pid) { 1116 continue; 1117 } 1118 1119 /* The log contains thread ids for manual analysis, but we just ignore them 1120 * for now. */ 1121 parseNumber(line.SplitChar(' ')); 1122 1123 Buffer func = line.SplitChar('('); 1124 Buffer args = line.SplitChar(')'); 1125 1126 if (func == Buffer("jemalloc_stats")) { 1127 replay.jemalloc_stats(args, line); 1128 } else if (func == Buffer("free")) { 1129 replay.free(args, line); 1130 } else if (func == Buffer("malloc")) { 1131 replay.malloc(args, line); 1132 } else if (func == Buffer("posix_memalign")) { 1133 replay.posix_memalign(args, line); 1134 } else if (func == Buffer("aligned_alloc")) { 1135 replay.aligned_alloc(args, line); 1136 } else if (func == Buffer("calloc")) { 1137 replay.calloc(args, line); 1138 } else if (func == Buffer("realloc")) { 1139 replay.realloc(args, line); 1140 } else if (func == Buffer("memalign")) { 1141 replay.memalign(args, line); 1142 } else if (func == Buffer("valloc")) { 1143 replay.valloc(args, line); 1144 } else { 1145 die("Malformed input"); 1146 } 1147 } 1148 1149 return 0; 1150 }