tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

commit 9d46a4ed09733f321860f3c16d92f5e5efc87be4
parent feb93ee611fba15b4611c9622ce7bae0304a29fc
Author: Paul Bone <paul@bone.id.au>
Date:   Tue, 18 Nov 2025 02:07:23 +0000

Bug 1987055 - pt 5. Add a 'logical' page size r=glandium,profiler-reviewers,mstange

Mozjemalloc now has a concept of logical page size for its own
structures that may be different from the system's page size.

Differential Revision: https://phabricator.services.mozilla.com/D259219

Diffstat:
Mmemory/build/BaseAlloc.cpp | 4++--
Mmemory/build/Globals.h | 44+++++++++++++++++++++++++++++---------------
Mmemory/build/Globals_inc.h | 24+++++++++++++++++++-----
Mmemory/build/Utils.h | 2++
Mmemory/build/mozjemalloc.cpp | 173++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------
Mmemory/build/mozjemalloc_profiling.h | 4++++
Mtools/profiler/core/memory_markers.cpp | 14++++++++++----
7 files changed, 185 insertions(+), 80 deletions(-)

diff --git a/memory/build/BaseAlloc.cpp b/memory/build/BaseAlloc.cpp @@ -27,7 +27,7 @@ bool BaseAlloc::pages_alloc(size_t minsize) MOZ_REQUIRES(mMutex) { mPastAddr = base_pages + csize; // Leave enough pages for minsize committed, since otherwise they would // have to be immediately recommitted. - size_t pminsize = PAGE_CEILING(minsize); + size_t pminsize = REAL_PAGE_CEILING(minsize); mNextDecommitted = base_pages + pminsize; if (pminsize < csize) { pages_decommit(reinterpret_cast<void*>(mNextDecommitted), csize - pminsize); @@ -54,7 +54,7 @@ void* BaseAlloc::alloc(size_t aSize) { mNextAddr = mNextAddr + csize; // Make sure enough pages are committed for the new allocation. if (mNextAddr > mNextDecommitted) { - uintptr_t pbase_next_addr = PAGE_CEILING(mNextAddr); + uintptr_t pbase_next_addr = REAL_PAGE_CEILING(mNextAddr); if (!pages_commit(reinterpret_cast<void*>(mNextDecommitted), mNextAddr - mNextDecommitted)) { diff --git a/memory/build/Globals.h b/memory/build/Globals.h @@ -48,29 +48,46 @@ namespace mozilla { +// mozjemalloc has two values for page size. +// +// gPageSize: A logical page size used for mozjemalloc's own structures. +// gRealPageSize The actual page size used by the OS & Hardware. +// +// They can be different so that we can continue to use 4KB pages on systems +// with a larger page size. (WIP see Bug 1980047). +// +// On x86-64 they are both 4KiB. However Apple Silicon has a 16KiB page size, +// so gRealPageSize will be 16KiB, but in order to keep the number of +// regions-per-run to 256 we want to limit gPageSize to 4KiB. (4096 / 16 = +// 256). Other platforms with different gRealPageSizes might also have +// different gRealPageSize and gPageSize. +// +// gPageSize is always less than or equal to gRealPageSize. +// #ifdef MALLOC_STATIC_PAGESIZE -// VM page size. It must divide the runtime CPU page size or the code -// will abort. // Platform specific page size conditions copied from js/public/HeapAPI.h # if defined(__powerpc64__) -static const size_t gPageSize = 64_KiB; +static const size_t gRealPageSize = 64_KiB; # elif defined(__loongarch64) -static const size_t gPageSize = 16_KiB; +static const size_t gRealPageSize = 16_KiB; # else -static const size_t gPageSize = 4_KiB; +static const size_t gRealPageSize = 4_KiB; # endif -static const size_t gRealPageSize = gPageSize; +static const size_t gPageSize = gRealPageSize; #else - -// When MALLOC_OPTIONS contains one or several `P`s, the page size used -// across the allocator is multiplied by 2 for each `P`, but we also keep -// the real page size for code paths that need it. gPageSize is thus a -// power of two greater or equal to gRealPageSize. +// When MALLOC_OPTIONS contains one or several `P`s, gPageSize will be +// doubled for each `P`. Likewise each 'p' will halve gPageSize. extern size_t gRealPageSize; extern size_t gPageSize; - #endif +// Return the smallest pagesize multiple that is >= s. +#define PAGE_CEILING(s) (((s) + gPageSizeMask) & ~gPageSizeMask) +#define REAL_PAGE_CEILING(s) (((s) + gRealPageSizeMask) & ~gRealPageSizeMask) + +#define PAGES_PER_REAL_PAGE_CEILING(s) \ + (((s) + gPagesPerRealPage - 1) & ~(gPagesPerRealPage - 1)) + #ifdef MALLOC_STATIC_PAGESIZE # define GLOBAL(type, name, value) static const type name = value; # define GLOBAL_LOG2 LOG2 @@ -118,9 +135,6 @@ void DefineGlobals(); // Return the smallest sub page-size that is >= a. #define SUBPAGE_CEILING(a) (RoundUpPow2(a)) -// Return the smallest pagesize multiple that is >= s. -#define PAGE_CEILING(s) (((s) + gPageSizeMask) & ~gPageSizeMask) - // Number of all the small-allocated classes #define NUM_SMALL_CLASSES \ (kNumQuantumClasses + kNumQuantumWideClasses + gNumSubPageClasses) diff --git a/memory/build/Globals_inc.h b/memory/build/Globals_inc.h @@ -15,25 +15,37 @@ GLOBAL(uint8_t, gNumSubPageClasses, []() GLOBAL_CONSTEXPR -> uint8_t { }()) GLOBAL(uint8_t, gPageSize2Pow, GLOBAL_LOG2(gPageSize)) +GLOBAL(uint8_t, gRealPageSize2Pow, GLOBAL_LOG2(gRealPageSize)) GLOBAL(size_t, gPageSizeMask, gPageSize - 1) +GLOBAL(size_t, gRealPageSizeMask, gRealPageSize - 1) + +// For system calls that allocate pages we use this to round-up to a real +// page boundary. +GLOBAL(size_t, gPagesPerRealPage, + gPageSize < gRealPageSize ? gRealPageSize / gPageSize : 1); // Number of pages in a chunk. GLOBAL(size_t, gChunkNumPages, kChunkSize >> gPageSize2Pow) // Number of pages necessary for a chunk header plus a guard page. GLOBAL(size_t, gChunkHeaderNumPages, - 1 + (((sizeof(arena_chunk_t) + - sizeof(arena_chunk_map_t) * gChunkNumPages + gPageSizeMask) & - ~gPageSizeMask) >> - gPageSize2Pow)) + gPagesPerRealPage + + (std::max(PAGE_CEILING(sizeof(arena_chunk_t) + + sizeof(arena_chunk_map_t) * gChunkNumPages), + REAL_PAGE_CEILING(sizeof(arena_chunk_t) + + sizeof(arena_chunk_map_t) * + gChunkNumPages)) >> + gPageSize2Pow)); // One chunk, minus the header, minus a guard page GLOBAL(size_t, gMaxLargeClass, - kChunkSize - gPageSize - (gChunkHeaderNumPages << gPageSize2Pow)) + kChunkSize - gRealPageSize - (gChunkHeaderNumPages << gPageSize2Pow)) // Various checks that regard configuration. GLOBAL_ASSERT(1ULL << gPageSize2Pow == gPageSize, "Page size is not a power of two"); +GLOBAL_ASSERT(1ULL << gRealPageSize2Pow == gRealPageSize, + "Real page size is not a power of two"); GLOBAL_ASSERT(kQuantum >= sizeof(void*)); GLOBAL_ASSERT(kQuantum <= kQuantumWide); GLOBAL_ASSERT(!kNumQuantumWideClasses || @@ -44,4 +56,6 @@ GLOBAL_ASSERT(kQuantumWide <= kMaxQuantumClass); GLOBAL_ASSERT(gMaxSubPageClass >= kMinSubPageClass || gMaxSubPageClass == 0); GLOBAL_ASSERT(gMaxLargeClass >= gMaxSubPageClass); GLOBAL_ASSERT(kChunkSize >= gPageSize); +GLOBAL_ASSERT(kChunkSize >= gRealPageSize); +GLOBAL_ASSERT(gPagesPerRealPage < gChunkHeaderNumPages); GLOBAL_ASSERT(kQuantum * 4 <= kChunkSize); diff --git a/memory/build/Utils.h b/memory/build/Utils.h @@ -201,6 +201,8 @@ unsigned inline operator/(unsigned num, FastDivisor<T> divisor) { #define ALIGNMENT_CEILING(s, alignment) \ (((s) + ((alignment) - 1)) & (~((alignment) - 1))) +#define ALIGNMENT_FLOOR(s, alignment) ((s) & (~((alignment) - 1))) + static inline const char* _getprogname(void) { return "<jemalloc>"; } #ifdef XP_WIN diff --git a/memory/build/mozjemalloc.cpp b/memory/build/mozjemalloc.cpp @@ -819,11 +819,11 @@ struct arena_t { // This is used internally by FindDirtyPages to actually perform scanning // within a chunk's page tables. It finds the first dirty page within the // chunk. - bool ScanForFirstDirtyPage(); + bool ScanForFirstDirtyPage() MOZ_REQUIRES(mArena.mLock); // After ScanForFirstDirtyPage() returns true, this may be used to find the // last dirty page within the same run. - bool ScanForLastDirtyPage(); + bool ScanForLastDirtyPage() MOZ_REQUIRES(mArena.mLock); // Returns a pair, the first field indicates if there are more dirty pages // remaining in the current chunk. The second field if non-null points to a @@ -834,7 +834,8 @@ struct arena_t { // FinishPurgingInChunk() is used whenever we decide to stop purging in a // chunk, This could be because there are no more dirty pages, or the chunk // is dying, or we hit the arena-level threshold. - void FinishPurgingInChunk(bool aAddToMAdvised) MOZ_REQUIRES(mArena.mLock); + void FinishPurgingInChunk(bool aAddToMAdvised, bool aAddToDirty) + MOZ_REQUIRES(mArena.mLock); explicit PurgeInfo(arena_t& arena, arena_chunk_t* chunk, PurgeStats& stats) : mArena(arena), mChunk(chunk), mPurgeStats(stats) {} @@ -1487,6 +1488,9 @@ bool arena_t::SplitRun(arena_run_t* aRun, size_t aSize, bool aLarge, // pages in one operation, in order to reduce system call // overhead. if (chunk->mPageMap[run_ind + i].bits & CHUNK_MAP_DECOMMITTED) { + // The start of the decommitted area is on a real page boundary. + MOZ_ASSERT((run_ind + i) % gPagesPerRealPage == 0); + // Advance i+j to just past the index of the last page // to commit. Clear CHUNK_MAP_DECOMMITTED along the way. size_t j; @@ -1503,6 +1507,9 @@ bool arena_t::SplitRun(arena_run_t* aRun, size_t aSize, bool aLarge, // here. if (i + j == need_pages) { size_t extra_commit = ExtraCommitPages(j, rem_pages); + extra_commit = + PAGES_PER_REAL_PAGE_CEILING(run_ind + i + j + extra_commit) - + run_ind - i - j; for (; i + j < need_pages + extra_commit && (chunk->mPageMap[run_ind + i + j].bits & CHUNK_MAP_MADVISED_OR_DECOMMITTED); @@ -1511,6 +1518,8 @@ bool arena_t::SplitRun(arena_run_t* aRun, size_t aSize, bool aLarge, (CHUNK_MAP_FRESH | CHUNK_MAP_MADVISED)) == 0); } } + // The end of the decommitted area is on a real page boundary. + MOZ_ASSERT((run_ind + i + j) % gPagesPerRealPage == 0); if (!pages_commit( (void*)(uintptr_t(chunk) + ((run_ind + i) << gPageSize2Pow)), @@ -1612,25 +1621,35 @@ void arena_t::InitChunk(arena_chunk_t* aChunk, size_t aMinCommittedPages) { // Clear the bits for the real header pages. size_t i; - for (i = 0; i < gChunkHeaderNumPages - 1; i++) { + for (i = 0; i < gChunkHeaderNumPages - gPagesPerRealPage; i++) { aChunk->mPageMap[i].bits = 0; } - mStats.committed += gChunkHeaderNumPages - 1; + mStats.committed += gChunkHeaderNumPages - gPagesPerRealPage; // Decommit the last header page (=leading page) as a guard. - pages_decommit((void*)(uintptr_t(aChunk) + (i << gPageSize2Pow)), gPageSize); - aChunk->mPageMap[i++].bits = CHUNK_MAP_DECOMMITTED; + MOZ_ASSERT(i % gPagesPerRealPage == 0); + pages_decommit((void*)(uintptr_t(aChunk) + (i << gPageSize2Pow)), + gRealPageSize); + for (; i < gChunkHeaderNumPages; i++) { + aChunk->mPageMap[i].bits = CHUNK_MAP_DECOMMITTED; + } // If MALLOC_DECOMMIT is enabled then commit only the pages we're about to // use. Otherwise commit all of them. #ifdef MALLOC_DECOMMIT - size_t n_fresh_pages = + // The number of usable pages in the chunk, in other words, the total number + // of pages in the chunk, minus the number of pages in the chunk header + // (including the guard page at the beginning of the chunk), and the number of + // pages for the guard page at the end of the chunk. + size_t chunk_usable_pages = + gChunkNumPages - gChunkHeaderNumPages - gPagesPerRealPage; + size_t n_fresh_pages = PAGES_PER_REAL_PAGE_CEILING( aMinCommittedPages + - ExtraCommitPages( - aMinCommittedPages, - gChunkNumPages - gChunkHeaderNumPages - aMinCommittedPages - 1); + ExtraCommitPages(aMinCommittedPages, + chunk_usable_pages - aMinCommittedPages)); #else - size_t n_fresh_pages = gChunkNumPages - 1 - gChunkHeaderNumPages; + size_t n_fresh_pages = + gChunkNumPages - gPagesPerRealPage - gChunkHeaderNumPages; #endif // The committed pages are marked as Fresh. Our caller, SplitRun will update @@ -1644,12 +1663,13 @@ void arena_t::InitChunk(arena_chunk_t* aChunk, size_t aMinCommittedPages) { #ifndef MALLOC_DECOMMIT // If MALLOC_DECOMMIT isn't defined then all the pages are fresh and setup in // the loop above. - MOZ_ASSERT(i == gChunkNumPages - 1); + MOZ_ASSERT(i == gChunkNumPages - gPagesPerRealPage); #endif // If MALLOC_DECOMMIT is defined, then this will decommit the remainder of the // chunk plus the last page which is a guard page, if it is not defined it // will only decommit the guard page. + MOZ_ASSERT(i % gPagesPerRealPage == 0); pages_decommit((void*)(uintptr_t(aChunk) + (i << gPageSize2Pow)), (gChunkNumPages - i) << gPageSize2Pow); for (; i < gChunkNumPages; i++) { @@ -1658,11 +1678,13 @@ void arena_t::InitChunk(arena_chunk_t* aChunk, size_t aMinCommittedPages) { // aMinCommittedPages will create a valid run. MOZ_ASSERT(aMinCommittedPages > 0); - MOZ_ASSERT(aMinCommittedPages <= gChunkNumPages - gChunkHeaderNumPages - 1); + MOZ_ASSERT(aMinCommittedPages <= + gChunkNumPages - gChunkHeaderNumPages - gPagesPerRealPage); // Create the run. aChunk->mPageMap[gChunkHeaderNumPages].bits |= gMaxLargeClass; - aChunk->mPageMap[gChunkNumPages - 2].bits |= gMaxLargeClass; + aChunk->mPageMap[gChunkNumPages - gPagesPerRealPage - 1].bits |= + gMaxLargeClass; mRunsAvail.Insert(&aChunk->mPageMap[gChunkHeaderNumPages]); } @@ -1687,9 +1709,10 @@ bool arena_t::RemoveChunk(arena_chunk_t* aChunk) { // Count the number of madvised/fresh pages and update the stats. size_t madvised = 0; size_t fresh = 0; - for (size_t i = gChunkHeaderNumPages; i < gChunkNumPages - 1; i++) { - // There must not be any pages that are not fresh, madvised, decommitted - // or dirty. + for (size_t i = gChunkHeaderNumPages; i < gChunkNumPages - gPagesPerRealPage; + i++) { + // There must not be any pages that are not fresh, madvised, decommitted or + // dirty. MOZ_ASSERT(aChunk->mPageMap[i].bits & (CHUNK_MAP_FRESH_MADVISED_OR_DECOMMITTED | CHUNK_MAP_DIRTY)); MOZ_ASSERT((aChunk->mPageMap[i].bits & CHUNK_MAP_BUSY) == 0); @@ -1711,7 +1734,7 @@ bool arena_t::RemoveChunk(arena_chunk_t* aChunk) { #endif mStats.mapped -= kChunkSize; - mStats.committed -= gChunkHeaderNumPages - 1; + mStats.committed -= gChunkHeaderNumPages - gPagesPerRealPage; return true; } @@ -2024,7 +2047,7 @@ ArenaPurgeResult arena_t::Purge(PurgeCondition aCond, PurgeStats& aStats) { if (!continue_purge_chunk || !continue_purge_arena) { // We're going to stop purging here so update the chunk's bookkeeping. - purge_info.FinishPurgingInChunk(true); + purge_info.FinishPurgingInChunk(true, continue_purge_chunk); purge_info.mArena.mIsPurgePending = false; } } // MaybeMutexAutoLock @@ -2086,7 +2109,7 @@ bool arena_t::PurgeInfo::FindDirtyPages(bool aPurgedOnce) { if (mChunk->mNumDirty == 0 || mChunk->mDying) { // Add the chunk to the mChunksMAdvised list if it's had at least one // madvise. - FinishPurgingInChunk(aPurgedOnce); + FinishPurgingInChunk(aPurgedOnce, false); return false; } @@ -2101,8 +2124,12 @@ bool arena_t::PurgeInfo::FindDirtyPages(bool aPurgedOnce) { // On the other hand: // * Now accessing those pages will require either pages_commit() or a page // fault to ensure they're available. - MOZ_ALWAYS_TRUE(ScanForFirstDirtyPage()); - MOZ_ALWAYS_TRUE(ScanForLastDirtyPage()); + do { + if (!ScanForFirstDirtyPage()) { + FinishPurgingInChunk(aPurgedOnce, false); + return false; + } + } while (!ScanForLastDirtyPage()); MOZ_ASSERT(mFreeRunInd >= gChunkHeaderNumPages); MOZ_ASSERT(mFreeRunInd <= mDirtyInd); @@ -2111,34 +2138,31 @@ bool arena_t::PurgeInfo::FindDirtyPages(bool aPurgedOnce) { MOZ_ASSERT(mDirtyLen != 0); MOZ_ASSERT(mDirtyLen <= mFreeRunLen); MOZ_ASSERT(mDirtyInd + mDirtyLen <= mFreeRunInd + mFreeRunLen); + MOZ_ASSERT(mDirtyInd % gPagesPerRealPage == 0); + MOZ_ASSERT(mDirtyLen % gPagesPerRealPage == 0); // Count the number of dirty pages and clear their bits. mDirtyNPages = 0; for (size_t i = 0; i < mDirtyLen; i++) { size_t& bits = mChunk->mPageMap[mDirtyInd + i].bits; - - // We must not find any busy pages because this chunk shouldn't be in the - // dirty list. - MOZ_ASSERT(!(bits & CHUNK_MAP_BUSY)); - if (bits & CHUNK_MAP_DIRTY) { - MOZ_ASSERT((bits & CHUNK_MAP_FRESH_MADVISED_OR_DECOMMITTED) == 0); mDirtyNPages++; bits ^= CHUNK_MAP_DIRTY; } } + MOZ_ASSERT(mDirtyNPages > 0); MOZ_ASSERT(mDirtyNPages <= mChunk->mNumDirty); MOZ_ASSERT(mDirtyNPages <= mDirtyLen); + mChunk->mNumDirty -= mDirtyNPages; + mArena.mNumDirty -= mDirtyNPages; + // Mark the run as busy so that another thread freeing memory won't try to // coalesce it. mChunk->mPageMap[mFreeRunInd].bits |= CHUNK_MAP_BUSY; mChunk->mPageMap[FreeRunLastInd()].bits |= CHUNK_MAP_BUSY; - mChunk->mNumDirty -= mDirtyNPages; - mArena.mNumDirty -= mDirtyNPages; - // Before we unlock ensure that no other thread can allocate from these // pages. if (mArena.mSpare != mChunk) { @@ -2152,8 +2176,8 @@ bool arena_t::PurgeInfo::ScanForFirstDirtyPage() { // Scan in two nested loops. The outer loop iterates over runs, and the inner // loop iterates over pages within unallocated runs. size_t run_pages; - for (size_t run_idx = mChunk->mDirtyRunHint; run_idx < gChunkNumPages; - run_idx += run_pages) { + for (size_t run_idx = mChunk->mDirtyRunHint; + run_idx < gChunkNumPages - gPagesPerRealPage; run_idx += run_pages) { size_t run_bits = mChunk->mPageMap[run_idx].bits; // We must not find any busy pages because this chunk shouldn't be in // the dirty list. @@ -2186,16 +2210,23 @@ bool arena_t::PurgeInfo::ScanForFirstDirtyPage() { // Scan for dirty pages. for (size_t page_idx = run_idx; page_idx < run_idx + run_pages; page_idx++) { - size_t page_bits = mChunk->mPageMap[page_idx].bits; + size_t& page_bits = mChunk->mPageMap[page_idx].bits; // We must not find any busy pages because this chunk shouldn't be in // the dirty list. MOZ_ASSERT((page_bits & CHUNK_MAP_BUSY) == 0); if (page_bits & CHUNK_MAP_DIRTY) { MOZ_ASSERT((page_bits & CHUNK_MAP_FRESH_MADVISED_OR_DECOMMITTED) == 0); - mDirtyInd = page_idx; + MOZ_ASSERT(mChunk->mDirtyRunHint <= run_idx); mChunk->mDirtyRunHint = run_idx; - return true; + + if ((page_idx & (gPagesPerRealPage - 1)) == 0) { + mDirtyInd = page_idx; + return true; + } + + // This dirty page isn't aligned and can't be purged. + mPurgeStats.pages_unpurgable++; } } } @@ -2204,15 +2235,26 @@ bool arena_t::PurgeInfo::ScanForFirstDirtyPage() { } bool arena_t::PurgeInfo::ScanForLastDirtyPage() { - for (size_t i = mFreeRunInd + mFreeRunLen - 1; i >= mFreeRunInd; i--) { - size_t bits = mChunk->mPageMap[i].bits; - MOZ_ASSERT((bits & CHUNK_MAP_BUSY) == 0); + mDirtyLen = 0; + for (size_t i = FreeRunLastInd(); i >= mDirtyInd; i--) { + size_t& bits = mChunk->mPageMap[i].bits; if (bits & CHUNK_MAP_DIRTY) { - mDirtyLen = i - mDirtyInd + 1; - return true; + // We must not find any busy pages because this chunk shouldn't be in the + // dirty list. + MOZ_ASSERT(!(bits & CHUNK_MAP_BUSY)); + + if ((i & (gPagesPerRealPage - 1)) == gPagesPerRealPage - 1) { + mDirtyLen = i - mDirtyInd + 1; + return true; + } + + // This dirty page's end isn't aligned with a real page's end. + mPurgeStats.pages_unpurgable++; } } + // Advance the dirty page hint so that the next scan will make progress. + mChunk->mDirtyRunHint = FreeRunLastInd() + 1; return false; } @@ -2277,7 +2319,8 @@ std::pair<bool, arena_chunk_t*> arena_t::PurgeInfo::UpdatePagesAndCounts() { // A dying chunk doesn't need to be coaleased, it will already have one // large run. MOZ_ASSERT(mFreeRunInd == gChunkHeaderNumPages && - mFreeRunLen == gChunkNumPages - gChunkHeaderNumPages - 1); + mFreeRunLen == + gChunkNumPages - gChunkHeaderNumPages - gPagesPerRealPage); return std::make_pair(false, mChunk); } @@ -2300,7 +2343,8 @@ std::pair<bool, arena_chunk_t*> arena_t::PurgeInfo::UpdatePagesAndCounts() { return std::make_pair(mChunk->mNumDirty != 0, chunk_to_release); } -void arena_t::PurgeInfo::FinishPurgingInChunk(bool aAddToMAdvised) { +void arena_t::PurgeInfo::FinishPurgingInChunk(bool aAddToMAdvised, + bool aAddToDirty) { // If there's no more purge activity for this chunk then finish up while // we still have the lock. MOZ_ASSERT(mChunk->mIsPurging); @@ -2317,7 +2361,7 @@ void arena_t::PurgeInfo::FinishPurgingInChunk(bool aAddToMAdvised) { return; } - if (mChunk->mNumDirty != 0) { + if (mChunk->mNumDirty != 0 && aAddToDirty) { // Put the semi-processed chunk on the front of the queue so that it is // the first chunk processed next time. mArena.mChunksDirty.pushFront(mChunk); @@ -2346,7 +2390,7 @@ size_t arena_t::TryCoalesce(arena_chunk_t* aChunk, size_t run_ind, MOZ_ASSERT(size == run_pages << gPageSize2Pow); // Try to coalesce forward. - if (run_ind + run_pages < gChunkNumPages - 1 && + if (run_ind + run_pages < gChunkNumPages - gPagesPerRealPage && (aChunk->mPageMap[run_ind + run_pages].bits & (CHUNK_MAP_ALLOCATED | CHUNK_MAP_BUSY)) == 0) { size_t nrun_size = @@ -3698,7 +3742,7 @@ void* arena_t::PallocHuge(size_t aSize, size_t aAlignment, bool aZero) { // We're going to configure guard pages in the region between the // page-aligned size and the chunk-aligned size, so if those are the same // then we need to force that region into existence. - csize = CHUNK_CEILING(aSize + gPageSize); + csize = CHUNK_CEILING(aSize + gRealPageSize); if (csize < aSize) { // size is large enough to cause size_t wrap-around. return nullptr; @@ -3716,7 +3760,8 @@ void* arena_t::PallocHuge(size_t aSize, size_t aAlignment, bool aZero) { ExtentAlloc::dealloc(node); return nullptr; } - psize = PAGE_CEILING(aSize); + psize = REAL_PAGE_CEILING(aSize); + MOZ_ASSERT(psize < csize); #ifdef MOZ_DEBUG if (aZero) { chunk_assert_zero(ret, psize); @@ -3772,7 +3817,7 @@ void* arena_t::RallocHuge(void* aPtr, size_t aSize, size_t aOldSize) { // Avoid moving the allocation if the size class would not change. if (aOldSize > gMaxLargeClass && CHUNK_CEILING(aSize + gPageSize) == CHUNK_CEILING(aOldSize + gPageSize)) { - size_t psize = PAGE_CEILING(aSize); + size_t psize = REAL_PAGE_CEILING(aSize); if (aSize < aOldSize) { MaybePoison((void*)((uintptr_t)aPtr + aSize), aOldSize - aSize); } @@ -3859,7 +3904,7 @@ static void huge_dalloc(void* aPtr, arena_t* aArena) { MOZ_RELEASE_ASSERT(node->mArenaId == node->mArena->mId); huge.Remove(node); - mapped = CHUNK_CEILING(node->mSize + gPageSize); + mapped = CHUNK_CEILING(node->mSize + gRealPageSize); huge_allocated -= node->mSize; huge_mapped -= mapped; huge_operations++; @@ -3893,14 +3938,15 @@ static bool malloc_init_hard() { // We assume that the page size is a power of 2. MOZ_ASSERT(IsPowerOfTwo(page_size)); #ifdef MALLOC_STATIC_PAGESIZE - if (gPageSize % page_size) { + if (gRealPageSize % page_size) { _malloc_message( _getprogname(), "Compile-time page size does not divide the runtime one.\n"); MOZ_CRASH(); } #else - gRealPageSize = gPageSize = page_size; + gPageSize = page_size; + gRealPageSize = page_size; #endif // Get runtime configuration. @@ -3964,16 +4010,30 @@ static bool malloc_init_hard() { break; # ifndef MALLOC_STATIC_PAGESIZE case 'P': - MOZ_ASSERT(gPageSize >= 4_KiB); + MOZ_ASSERT(gPageSize >= 1_KiB); MOZ_ASSERT(gPageSize <= 64_KiB); prefix_arg = prefix_arg ? prefix_arg : 1; gPageSize <<= prefix_arg; // We know that if the shift causes gPageSize to be zero then it's // because it shifted all the bits off. We didn't start with zero. // Therefore if gPageSize is out of bounds we set it to 64KiB. - if (gPageSize < 4_KiB || gPageSize > 64_KiB) { + if (gPageSize < 1_KiB || gPageSize > 64_KiB) { gPageSize = 64_KiB; } + // We also limit gPageSize to be no larger than gRealPageSize, there's + // no reason to support this. + if (gPageSize > gRealPageSize) { + gPageSize = gRealPageSize; + } + break; + case 'p': + MOZ_ASSERT(gPageSize >= 1_KiB); + MOZ_ASSERT(gPageSize <= 64_KiB); + prefix_arg = prefix_arg ? prefix_arg : 1; + gPageSize >>= prefix_arg; + if (gPageSize < 1_KiB) { + gPageSize = 1_KiB; + } break; # endif #endif @@ -3997,6 +4057,7 @@ static bool malloc_init_hard() { } } + MOZ_ASSERT(gPageSize <= gRealPageSize); #ifndef MALLOC_STATIC_PAGESIZE DefineGlobals(); #endif @@ -4436,6 +4497,10 @@ static size_t hard_purge_chunk(arena_chunk_t* aChunk) { // We could use mincore to find out which pages are actually // present, but it's not clear that's better. if (npages > 0) { + // i and npages should be aligned because they needed to be for the + // purge code that set CHUNK_MAP_MADVISED. + MOZ_ASSERT((i % gPagesPerRealPage) == 0); + MOZ_ASSERT((npages % gPagesPerRealPage) == 0); pages_decommit(((char*)aChunk) + (i << gPageSize2Pow), npages << gPageSize2Pow); (void)pages_commit(((char*)aChunk) + (i << gPageSize2Pow), diff --git a/memory/build/mozjemalloc_profiling.h b/memory/build/mozjemalloc_profiling.h @@ -26,6 +26,10 @@ struct PurgeStats { // The total number of pages that were cleaned (includes previously an pages). size_t pages_total = 0; + // The number of pages that can't be purged because of alignment because + // of logical/hardware page alignment. + size_t pages_unpurgable = 0; + size_t system_calls = 0; size_t chunks = 0; diff --git a/tools/profiler/core/memory_markers.cpp b/tools/profiler/core/memory_markers.cpp @@ -29,6 +29,8 @@ struct PurgeArenaMarker : mozilla::BaseMarkerType<PurgeArenaMarker> { MS::Format::Integer}, {"pages_clean", MS::InputType::Uint32, "Number of clean pages amoung dirty pages cleaned", MS::Format::Integer}, + {"pages_unpurgable", MS::InputType::Uint32, + "Number of dirty pages skipped due to alignment", MS::Format::Integer}, {"syscalls", MS::InputType::Uint32, "Number of system calls", MS::Format::Integer}, {"chunks", MS::InputType::Uint32, "Number of chunks processed", @@ -38,8 +40,8 @@ struct PurgeArenaMarker : mozilla::BaseMarkerType<PurgeArenaMarker> { static void StreamJSONMarkerData( mozilla::baseprofiler::SpliceableJSONWriter& aWriter, uint32_t aId, const String8View& aLabel, const String8View& aCaller, - uint32_t aPagesDirty, uint32_t aPagesTotal, uint32_t aSyscalls, - uint32_t aChunks, const String8View& aResult) { + uint32_t aPagesDirty, uint32_t aPagesTotal, uint32_t aPagesUnpurgable, + uint32_t aSyscalls, uint32_t aChunks, const String8View& aResult) { aWriter.IntProperty("id", aId); aWriter.StringProperty("label", aLabel); aWriter.StringProperty("caller", aCaller); @@ -48,6 +50,9 @@ struct PurgeArenaMarker : mozilla::BaseMarkerType<PurgeArenaMarker> { if (pages_clean) { aWriter.IntProperty("pages_clean", aPagesTotal - aPagesDirty); } + if (aPagesUnpurgable) { + aWriter.IntProperty("pages_unpurgable", aPagesUnpurgable); + } aWriter.IntProperty("syscalls", aSyscalls); aWriter.IntProperty("chunks", aChunks); aWriter.StringProperty("result", aResult); @@ -84,8 +89,9 @@ class GeckoProfilerMallocCallbacks : public MallocProfilerCallbacks { PurgeArenaMarker, aStats.arena_id, ProfilerString8View::WrapNullTerminatedString(aStats.arena_label), ProfilerString8View::WrapNullTerminatedString(aStats.caller), - aStats.pages_dirty, aStats.pages_total, aStats.system_calls, - aStats.chunks, ProfilerString8View::WrapNullTerminatedString(result)); + aStats.pages_dirty, aStats.pages_total, aStats.pages_unpurgable, + aStats.system_calls, aStats.chunks, + ProfilerString8View::WrapNullTerminatedString(result)); } }; } // namespace profiler