[ tor-browser ].git.dasho

commit 9d46a4ed09733f321860f3c16d92f5e5efc87be4
parent feb93ee611fba15b4611c9622ce7bae0304a29fc
Author: Paul Bone <paul@bone.id.au>
Date:   Tue, 18 Nov 2025 02:07:23 +0000

Bug 1987055 - pt 5. Add a 'logical' page size r=glandium,profiler-reviewers,mstange

Mozjemalloc now has a concept of logical page size for its own
structures that may be different from the system's page size.

Differential Revision: https://phabricator.services.mozilla.com/D259219

Diffstat:
M memory/build/BaseAlloc.cpp  | 4 ++--
M memory/build/Globals.h  | 44 +++++++++++++++++++++++++++++---------------
M memory/build/Globals_inc.h  | 24 +++++++++++++++++++-----
M memory/build/Utils.h  | 2 ++
M memory/build/mozjemalloc.cpp  | 173 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------
M memory/build/mozjemalloc_profiling.h  | 4 ++++
M tools/profiler/core/memory_markers.cpp  | 14 ++++++++++----

7 files changed, 185 insertions(+), 80 deletions(-)
diff --git a/memory/build/BaseAlloc.cpp b/memory/build/BaseAlloc.cpp
@@ -27,7 +27,7 @@ bool BaseAlloc::pages_alloc(size_t minsize) MOZ_REQUIRES(mMutex) {
   mPastAddr = base_pages + csize;
   // Leave enough pages for minsize committed, since otherwise they would
   // have to be immediately recommitted.
-  size_t pminsize = PAGE_CEILING(minsize);
+  size_t pminsize = REAL_PAGE_CEILING(minsize);
   mNextDecommitted = base_pages + pminsize;
   if (pminsize < csize) {
     pages_decommit(reinterpret_cast<void*>(mNextDecommitted), csize - pminsize);
@@ -54,7 +54,7 @@ void* BaseAlloc::alloc(size_t aSize) {
   mNextAddr = mNextAddr + csize;
   // Make sure enough pages are committed for the new allocation.
   if (mNextAddr > mNextDecommitted) {
-    uintptr_t pbase_next_addr = PAGE_CEILING(mNextAddr);
+    uintptr_t pbase_next_addr = REAL_PAGE_CEILING(mNextAddr);
 
     if (!pages_commit(reinterpret_cast<void*>(mNextDecommitted),
                       mNextAddr - mNextDecommitted)) {
diff --git a/memory/build/Globals.h b/memory/build/Globals.h
@@ -48,29 +48,46 @@
 
 namespace mozilla {
 
+// mozjemalloc has two values for page size.
+//
+// gPageSize:     A logical page size used for mozjemalloc's own structures.
+// gRealPageSize  The actual page size used by the OS & Hardware.
+//
+// They can be different so that we can continue to use 4KB pages on systems
+// with a larger page size. (WIP see Bug 1980047).
+//
+// On x86-64 they are both 4KiB.  However Apple Silicon has a 16KiB page size,
+// so gRealPageSize will be 16KiB, but in order to keep the number of
+// regions-per-run to 256 we want to limit gPageSize to 4KiB.  (4096 / 16 =
+// 256).  Other platforms with different gRealPageSizes might also have
+// different gRealPageSize and gPageSize.
+//
+// gPageSize is always less than or equal to gRealPageSize.
+//
 #ifdef MALLOC_STATIC_PAGESIZE
-// VM page size. It must divide the runtime CPU page size or the code
-// will abort.
 // Platform specific page size conditions copied from js/public/HeapAPI.h
 #  if defined(__powerpc64__)
-static const size_t gPageSize = 64_KiB;
+static const size_t gRealPageSize = 64_KiB;
 #  elif defined(__loongarch64)
-static const size_t gPageSize = 16_KiB;
+static const size_t gRealPageSize = 16_KiB;
 #  else
-static const size_t gPageSize = 4_KiB;
+static const size_t gRealPageSize = 4_KiB;
 #  endif
-static const size_t gRealPageSize = gPageSize;
+static const size_t gPageSize = gRealPageSize;
 #else
-
-// When MALLOC_OPTIONS contains one or several `P`s, the page size used
-// across the allocator is multiplied by 2 for each `P`, but we also keep
-// the real page size for code paths that need it. gPageSize is thus a
-// power of two greater or equal to gRealPageSize.
+// When MALLOC_OPTIONS contains one or several `P`s, gPageSize will be
+// doubled for each `P`.  Likewise each 'p' will halve gPageSize.
 extern size_t gRealPageSize;
 extern size_t gPageSize;
-
 #endif
 
+// Return the smallest pagesize multiple that is >= s.
+#define PAGE_CEILING(s) (((s) + gPageSizeMask) & ~gPageSizeMask)
+#define REAL_PAGE_CEILING(s) (((s) + gRealPageSizeMask) & ~gRealPageSizeMask)
+
+#define PAGES_PER_REAL_PAGE_CEILING(s) \
+  (((s) + gPagesPerRealPage - 1) & ~(gPagesPerRealPage - 1))
+
 #ifdef MALLOC_STATIC_PAGESIZE
 #  define GLOBAL(type, name, value) static const type name = value;
 #  define GLOBAL_LOG2 LOG2
@@ -118,9 +135,6 @@ void DefineGlobals();
 // Return the smallest sub page-size  that is >= a.
 #define SUBPAGE_CEILING(a) (RoundUpPow2(a))
 
-// Return the smallest pagesize multiple that is >= s.
-#define PAGE_CEILING(s) (((s) + gPageSizeMask) & ~gPageSizeMask)
-
 // Number of all the small-allocated classes
 #define NUM_SMALL_CLASSES \
   (kNumQuantumClasses + kNumQuantumWideClasses + gNumSubPageClasses)
diff --git a/memory/build/Globals_inc.h b/memory/build/Globals_inc.h
@@ -15,25 +15,37 @@ GLOBAL(uint8_t, gNumSubPageClasses, []() GLOBAL_CONSTEXPR -> uint8_t {
 }())
 
 GLOBAL(uint8_t, gPageSize2Pow, GLOBAL_LOG2(gPageSize))
+GLOBAL(uint8_t, gRealPageSize2Pow, GLOBAL_LOG2(gRealPageSize))
 GLOBAL(size_t, gPageSizeMask, gPageSize - 1)
+GLOBAL(size_t, gRealPageSizeMask, gRealPageSize - 1)
+
+// For system calls that allocate pages we use this to round-up to a real
+// page boundary.
+GLOBAL(size_t, gPagesPerRealPage,
+       gPageSize < gRealPageSize ? gRealPageSize / gPageSize : 1);
 
 // Number of pages in a chunk.
 GLOBAL(size_t, gChunkNumPages, kChunkSize >> gPageSize2Pow)
 
 // Number of pages necessary for a chunk header plus a guard page.
 GLOBAL(size_t, gChunkHeaderNumPages,
-       1 + (((sizeof(arena_chunk_t) +
-              sizeof(arena_chunk_map_t) * gChunkNumPages + gPageSizeMask) &
-             ~gPageSizeMask) >>
-            gPageSize2Pow))
+       gPagesPerRealPage +
+           (std::max(PAGE_CEILING(sizeof(arena_chunk_t) +
+                                  sizeof(arena_chunk_map_t) * gChunkNumPages),
+                     REAL_PAGE_CEILING(sizeof(arena_chunk_t) +
+                                       sizeof(arena_chunk_map_t) *
+                                           gChunkNumPages)) >>
+            gPageSize2Pow));
 
 // One chunk, minus the header, minus a guard page
 GLOBAL(size_t, gMaxLargeClass,
-       kChunkSize - gPageSize - (gChunkHeaderNumPages << gPageSize2Pow))
+       kChunkSize - gRealPageSize - (gChunkHeaderNumPages << gPageSize2Pow))
 
 // Various checks that regard configuration.
 GLOBAL_ASSERT(1ULL << gPageSize2Pow == gPageSize,
               "Page size is not a power of two");
+GLOBAL_ASSERT(1ULL << gRealPageSize2Pow == gRealPageSize,
+              "Real page size is not a power of two");
 GLOBAL_ASSERT(kQuantum >= sizeof(void*));
 GLOBAL_ASSERT(kQuantum <= kQuantumWide);
 GLOBAL_ASSERT(!kNumQuantumWideClasses ||
@@ -44,4 +56,6 @@ GLOBAL_ASSERT(kQuantumWide <= kMaxQuantumClass);
 GLOBAL_ASSERT(gMaxSubPageClass >= kMinSubPageClass || gMaxSubPageClass == 0);
 GLOBAL_ASSERT(gMaxLargeClass >= gMaxSubPageClass);
 GLOBAL_ASSERT(kChunkSize >= gPageSize);
+GLOBAL_ASSERT(kChunkSize >= gRealPageSize);
+GLOBAL_ASSERT(gPagesPerRealPage < gChunkHeaderNumPages);
 GLOBAL_ASSERT(kQuantum * 4 <= kChunkSize);
diff --git a/memory/build/Utils.h b/memory/build/Utils.h
@@ -201,6 +201,8 @@ unsigned inline operator/(unsigned num, FastDivisor<T> divisor) {
 #define ALIGNMENT_CEILING(s, alignment) \
   (((s) + ((alignment) - 1)) & (~((alignment) - 1)))
 
+#define ALIGNMENT_FLOOR(s, alignment) ((s) & (~((alignment) - 1)))
+
 static inline const char* _getprogname(void) { return "<jemalloc>"; }
 
 #ifdef XP_WIN
diff --git a/memory/build/mozjemalloc.cpp b/memory/build/mozjemalloc.cpp
@@ -819,11 +819,11 @@ struct arena_t {
     // This is used internally by FindDirtyPages to actually perform scanning
     // within a chunk's page tables.  It finds the first dirty page within the
     // chunk.
-    bool ScanForFirstDirtyPage();
+    bool ScanForFirstDirtyPage() MOZ_REQUIRES(mArena.mLock);
 
     // After ScanForFirstDirtyPage() returns true, this may be used to find the
     // last dirty page within the same run.
-    bool ScanForLastDirtyPage();
+    bool ScanForLastDirtyPage() MOZ_REQUIRES(mArena.mLock);
 
     // Returns a pair, the first field indicates if there are more dirty pages
     // remaining in the current chunk. The second field if non-null points to a
@@ -834,7 +834,8 @@ struct arena_t {
     // FinishPurgingInChunk() is used whenever we decide to stop purging in a
     // chunk, This could be because there are no more dirty pages, or the chunk
     // is dying, or we hit the arena-level threshold.
-    void FinishPurgingInChunk(bool aAddToMAdvised) MOZ_REQUIRES(mArena.mLock);
+    void FinishPurgingInChunk(bool aAddToMAdvised, bool aAddToDirty)
+        MOZ_REQUIRES(mArena.mLock);
 
     explicit PurgeInfo(arena_t& arena, arena_chunk_t* chunk, PurgeStats& stats)
         : mArena(arena), mChunk(chunk), mPurgeStats(stats) {}
@@ -1487,6 +1488,9 @@ bool arena_t::SplitRun(arena_run_t* aRun, size_t aSize, bool aLarge,
     // pages in one operation, in order to reduce system call
     // overhead.
     if (chunk->mPageMap[run_ind + i].bits & CHUNK_MAP_DECOMMITTED) {
+      // The start of the decommitted area is on a real page boundary.
+      MOZ_ASSERT((run_ind + i) % gPagesPerRealPage == 0);
+
       // Advance i+j to just past the index of the last page
       // to commit.  Clear CHUNK_MAP_DECOMMITTED along the way.
       size_t j;
@@ -1503,6 +1507,9 @@ bool arena_t::SplitRun(arena_run_t* aRun, size_t aSize, bool aLarge,
       // here.
       if (i + j == need_pages) {
         size_t extra_commit = ExtraCommitPages(j, rem_pages);
+        extra_commit =
+            PAGES_PER_REAL_PAGE_CEILING(run_ind + i + j + extra_commit) -
+            run_ind - i - j;
         for (; i + j < need_pages + extra_commit &&
                (chunk->mPageMap[run_ind + i + j].bits &
                 CHUNK_MAP_MADVISED_OR_DECOMMITTED);
@@ -1511,6 +1518,8 @@ bool arena_t::SplitRun(arena_run_t* aRun, size_t aSize, bool aLarge,
                       (CHUNK_MAP_FRESH | CHUNK_MAP_MADVISED)) == 0);
         }
       }
+      // The end of the decommitted area is on a real page boundary.
+      MOZ_ASSERT((run_ind + i + j) % gPagesPerRealPage == 0);
 
       if (!pages_commit(
               (void*)(uintptr_t(chunk) + ((run_ind + i) << gPageSize2Pow)),
@@ -1612,25 +1621,35 @@ void arena_t::InitChunk(arena_chunk_t* aChunk, size_t aMinCommittedPages) {
 
   // Clear the bits for the real header pages.
   size_t i;
-  for (i = 0; i < gChunkHeaderNumPages - 1; i++) {
+  for (i = 0; i < gChunkHeaderNumPages - gPagesPerRealPage; i++) {
     aChunk->mPageMap[i].bits = 0;
   }
-  mStats.committed += gChunkHeaderNumPages - 1;
+  mStats.committed += gChunkHeaderNumPages - gPagesPerRealPage;
 
   // Decommit the last header page (=leading page) as a guard.
-  pages_decommit((void*)(uintptr_t(aChunk) + (i << gPageSize2Pow)), gPageSize);
-  aChunk->mPageMap[i++].bits = CHUNK_MAP_DECOMMITTED;
+  MOZ_ASSERT(i % gPagesPerRealPage == 0);
+  pages_decommit((void*)(uintptr_t(aChunk) + (i << gPageSize2Pow)),
+                 gRealPageSize);
+  for (; i < gChunkHeaderNumPages; i++) {
+    aChunk->mPageMap[i].bits = CHUNK_MAP_DECOMMITTED;
+  }
 
   // If MALLOC_DECOMMIT is enabled then commit only the pages we're about to
   // use.  Otherwise commit all of them.
 #ifdef MALLOC_DECOMMIT
-  size_t n_fresh_pages =
+  // The number of usable pages in the chunk, in other words, the total number
+  // of pages in the chunk, minus the number of pages in the chunk header
+  // (including the guard page at the beginning of the chunk), and the number of
+  // pages for the guard page at the end of the chunk.
+  size_t chunk_usable_pages =
+      gChunkNumPages - gChunkHeaderNumPages - gPagesPerRealPage;
+  size_t n_fresh_pages = PAGES_PER_REAL_PAGE_CEILING(
       aMinCommittedPages +
-      ExtraCommitPages(
-          aMinCommittedPages,
-          gChunkNumPages - gChunkHeaderNumPages - aMinCommittedPages - 1);
+      ExtraCommitPages(aMinCommittedPages,
+                       chunk_usable_pages - aMinCommittedPages));
 #else
-  size_t n_fresh_pages = gChunkNumPages - 1 - gChunkHeaderNumPages;
+  size_t n_fresh_pages =
+      gChunkNumPages - gPagesPerRealPage - gChunkHeaderNumPages;
 #endif
 
   // The committed pages are marked as Fresh.  Our caller, SplitRun will update
@@ -1644,12 +1663,13 @@ void arena_t::InitChunk(arena_chunk_t* aChunk, size_t aMinCommittedPages) {
 #ifndef MALLOC_DECOMMIT
   // If MALLOC_DECOMMIT isn't defined then all the pages are fresh and setup in
   // the loop above.
-  MOZ_ASSERT(i == gChunkNumPages - 1);
+  MOZ_ASSERT(i == gChunkNumPages - gPagesPerRealPage);
 #endif
 
   // If MALLOC_DECOMMIT is defined, then this will decommit the remainder of the
   // chunk plus the last page which is a guard page, if it is not defined it
   // will only decommit the guard page.
+  MOZ_ASSERT(i % gPagesPerRealPage == 0);
   pages_decommit((void*)(uintptr_t(aChunk) + (i << gPageSize2Pow)),
                  (gChunkNumPages - i) << gPageSize2Pow);
   for (; i < gChunkNumPages; i++) {
@@ -1658,11 +1678,13 @@ void arena_t::InitChunk(arena_chunk_t* aChunk, size_t aMinCommittedPages) {
 
   // aMinCommittedPages will create a valid run.
   MOZ_ASSERT(aMinCommittedPages > 0);
-  MOZ_ASSERT(aMinCommittedPages <= gChunkNumPages - gChunkHeaderNumPages - 1);
+  MOZ_ASSERT(aMinCommittedPages <=
+             gChunkNumPages - gChunkHeaderNumPages - gPagesPerRealPage);
 
   // Create the run.
   aChunk->mPageMap[gChunkHeaderNumPages].bits |= gMaxLargeClass;
-  aChunk->mPageMap[gChunkNumPages - 2].bits |= gMaxLargeClass;
+  aChunk->mPageMap[gChunkNumPages - gPagesPerRealPage - 1].bits |=
+      gMaxLargeClass;
   mRunsAvail.Insert(&aChunk->mPageMap[gChunkHeaderNumPages]);
 }
 
@@ -1687,9 +1709,10 @@ bool arena_t::RemoveChunk(arena_chunk_t* aChunk) {
   // Count the number of madvised/fresh pages and update the stats.
   size_t madvised = 0;
   size_t fresh = 0;
-  for (size_t i = gChunkHeaderNumPages; i < gChunkNumPages - 1; i++) {
-    // There must not be any pages that are not fresh, madvised, decommitted
-    // or dirty.
+  for (size_t i = gChunkHeaderNumPages; i < gChunkNumPages - gPagesPerRealPage;
+       i++) {
+    // There must not be any pages that are not fresh, madvised, decommitted or
+    // dirty.
     MOZ_ASSERT(aChunk->mPageMap[i].bits &
                (CHUNK_MAP_FRESH_MADVISED_OR_DECOMMITTED | CHUNK_MAP_DIRTY));
     MOZ_ASSERT((aChunk->mPageMap[i].bits & CHUNK_MAP_BUSY) == 0);
@@ -1711,7 +1734,7 @@ bool arena_t::RemoveChunk(arena_chunk_t* aChunk) {
 #endif
 
   mStats.mapped -= kChunkSize;
-  mStats.committed -= gChunkHeaderNumPages - 1;
+  mStats.committed -= gChunkHeaderNumPages - gPagesPerRealPage;
 
   return true;
 }
@@ -2024,7 +2047,7 @@ ArenaPurgeResult arena_t::Purge(PurgeCondition aCond, PurgeStats& aStats) {
 
       if (!continue_purge_chunk || !continue_purge_arena) {
         // We're going to stop purging here so update the chunk's bookkeeping.
-        purge_info.FinishPurgingInChunk(true);
+        purge_info.FinishPurgingInChunk(true, continue_purge_chunk);
         purge_info.mArena.mIsPurgePending = false;
       }
     }  // MaybeMutexAutoLock
@@ -2086,7 +2109,7 @@ bool arena_t::PurgeInfo::FindDirtyPages(bool aPurgedOnce) {
   if (mChunk->mNumDirty == 0 || mChunk->mDying) {
     // Add the chunk to the mChunksMAdvised list if it's had at least one
     // madvise.
-    FinishPurgingInChunk(aPurgedOnce);
+    FinishPurgingInChunk(aPurgedOnce, false);
     return false;
   }
 
@@ -2101,8 +2124,12 @@ bool arena_t::PurgeInfo::FindDirtyPages(bool aPurgedOnce) {
   // On the other hand:
   //  * Now accessing those pages will require either pages_commit() or a page
   //    fault to ensure they're available.
-  MOZ_ALWAYS_TRUE(ScanForFirstDirtyPage());
-  MOZ_ALWAYS_TRUE(ScanForLastDirtyPage());
+  do {
+    if (!ScanForFirstDirtyPage()) {
+      FinishPurgingInChunk(aPurgedOnce, false);
+      return false;
+    }
+  } while (!ScanForLastDirtyPage());
 
   MOZ_ASSERT(mFreeRunInd >= gChunkHeaderNumPages);
   MOZ_ASSERT(mFreeRunInd <= mDirtyInd);
@@ -2111,34 +2138,31 @@ bool arena_t::PurgeInfo::FindDirtyPages(bool aPurgedOnce) {
   MOZ_ASSERT(mDirtyLen != 0);
   MOZ_ASSERT(mDirtyLen <= mFreeRunLen);
   MOZ_ASSERT(mDirtyInd + mDirtyLen <= mFreeRunInd + mFreeRunLen);
+  MOZ_ASSERT(mDirtyInd % gPagesPerRealPage == 0);
+  MOZ_ASSERT(mDirtyLen % gPagesPerRealPage == 0);
 
   // Count the number of dirty pages and clear their bits.
   mDirtyNPages = 0;
   for (size_t i = 0; i < mDirtyLen; i++) {
     size_t& bits = mChunk->mPageMap[mDirtyInd + i].bits;
-
-    // We must not find any busy pages because this chunk shouldn't be in the
-    // dirty list.
-    MOZ_ASSERT(!(bits & CHUNK_MAP_BUSY));
-
     if (bits & CHUNK_MAP_DIRTY) {
-      MOZ_ASSERT((bits & CHUNK_MAP_FRESH_MADVISED_OR_DECOMMITTED) == 0);
       mDirtyNPages++;
       bits ^= CHUNK_MAP_DIRTY;
     }
   }
+
   MOZ_ASSERT(mDirtyNPages > 0);
   MOZ_ASSERT(mDirtyNPages <= mChunk->mNumDirty);
   MOZ_ASSERT(mDirtyNPages <= mDirtyLen);
 
+  mChunk->mNumDirty -= mDirtyNPages;
+  mArena.mNumDirty -= mDirtyNPages;
+
   // Mark the run as busy so that another thread freeing memory won't try to
   // coalesce it.
   mChunk->mPageMap[mFreeRunInd].bits |= CHUNK_MAP_BUSY;
   mChunk->mPageMap[FreeRunLastInd()].bits |= CHUNK_MAP_BUSY;
 
-  mChunk->mNumDirty -= mDirtyNPages;
-  mArena.mNumDirty -= mDirtyNPages;
-
   // Before we unlock ensure that no other thread can allocate from these
   // pages.
   if (mArena.mSpare != mChunk) {
@@ -2152,8 +2176,8 @@ bool arena_t::PurgeInfo::ScanForFirstDirtyPage() {
   // Scan in two nested loops.  The outer loop iterates over runs, and the inner
   // loop iterates over pages within unallocated runs.
   size_t run_pages;
-  for (size_t run_idx = mChunk->mDirtyRunHint; run_idx < gChunkNumPages;
-       run_idx += run_pages) {
+  for (size_t run_idx = mChunk->mDirtyRunHint;
+       run_idx < gChunkNumPages - gPagesPerRealPage; run_idx += run_pages) {
     size_t run_bits = mChunk->mPageMap[run_idx].bits;
     // We must not find any busy pages because this chunk shouldn't be in
     // the dirty list.
@@ -2186,16 +2210,23 @@ bool arena_t::PurgeInfo::ScanForFirstDirtyPage() {
     // Scan for dirty pages.
     for (size_t page_idx = run_idx; page_idx < run_idx + run_pages;
          page_idx++) {
-      size_t page_bits = mChunk->mPageMap[page_idx].bits;
+      size_t& page_bits = mChunk->mPageMap[page_idx].bits;
       // We must not find any busy pages because this chunk shouldn't be in
       // the dirty list.
       MOZ_ASSERT((page_bits & CHUNK_MAP_BUSY) == 0);
 
       if (page_bits & CHUNK_MAP_DIRTY) {
         MOZ_ASSERT((page_bits & CHUNK_MAP_FRESH_MADVISED_OR_DECOMMITTED) == 0);
-        mDirtyInd = page_idx;
+        MOZ_ASSERT(mChunk->mDirtyRunHint <= run_idx);
         mChunk->mDirtyRunHint = run_idx;
-        return true;
+
+        if ((page_idx & (gPagesPerRealPage - 1)) == 0) {
+          mDirtyInd = page_idx;
+          return true;
+        }
+
+        // This dirty page isn't aligned and can't be purged.
+        mPurgeStats.pages_unpurgable++;
       }
     }
   }
@@ -2204,15 +2235,26 @@ bool arena_t::PurgeInfo::ScanForFirstDirtyPage() {
 }
 
 bool arena_t::PurgeInfo::ScanForLastDirtyPage() {
-  for (size_t i = mFreeRunInd + mFreeRunLen - 1; i >= mFreeRunInd; i--) {
-    size_t bits = mChunk->mPageMap[i].bits;
-    MOZ_ASSERT((bits & CHUNK_MAP_BUSY) == 0);
+  mDirtyLen = 0;
+  for (size_t i = FreeRunLastInd(); i >= mDirtyInd; i--) {
+    size_t& bits = mChunk->mPageMap[i].bits;
     if (bits & CHUNK_MAP_DIRTY) {
-      mDirtyLen = i - mDirtyInd + 1;
-      return true;
+      // We must not find any busy pages because this chunk shouldn't be in the
+      // dirty list.
+      MOZ_ASSERT(!(bits & CHUNK_MAP_BUSY));
+
+      if ((i & (gPagesPerRealPage - 1)) == gPagesPerRealPage - 1) {
+        mDirtyLen = i - mDirtyInd + 1;
+        return true;
+      }
+
+      // This dirty page's end isn't aligned with a real page's end.
+      mPurgeStats.pages_unpurgable++;
     }
   }
 
+  // Advance the dirty page hint so that the next scan will make progress.
+  mChunk->mDirtyRunHint = FreeRunLastInd() + 1;
   return false;
 }
 
@@ -2277,7 +2319,8 @@ std::pair<bool, arena_chunk_t*> arena_t::PurgeInfo::UpdatePagesAndCounts() {
     // A dying chunk doesn't need to be coaleased, it will already have one
     // large run.
     MOZ_ASSERT(mFreeRunInd == gChunkHeaderNumPages &&
-               mFreeRunLen == gChunkNumPages - gChunkHeaderNumPages - 1);
+               mFreeRunLen ==
+                   gChunkNumPages - gChunkHeaderNumPages - gPagesPerRealPage);
 
     return std::make_pair(false, mChunk);
   }
@@ -2300,7 +2343,8 @@ std::pair<bool, arena_chunk_t*> arena_t::PurgeInfo::UpdatePagesAndCounts() {
   return std::make_pair(mChunk->mNumDirty != 0, chunk_to_release);
 }
 
-void arena_t::PurgeInfo::FinishPurgingInChunk(bool aAddToMAdvised) {
+void arena_t::PurgeInfo::FinishPurgingInChunk(bool aAddToMAdvised,
+                                              bool aAddToDirty) {
   // If there's no more purge activity for this chunk then finish up while
   // we still have the lock.
   MOZ_ASSERT(mChunk->mIsPurging);
@@ -2317,7 +2361,7 @@ void arena_t::PurgeInfo::FinishPurgingInChunk(bool aAddToMAdvised) {
     return;
   }
 
-  if (mChunk->mNumDirty != 0) {
+  if (mChunk->mNumDirty != 0 && aAddToDirty) {
     // Put the semi-processed chunk on the front of the queue so that it is
     // the first chunk processed next time.
     mArena.mChunksDirty.pushFront(mChunk);
@@ -2346,7 +2390,7 @@ size_t arena_t::TryCoalesce(arena_chunk_t* aChunk, size_t run_ind,
   MOZ_ASSERT(size == run_pages << gPageSize2Pow);
 
   // Try to coalesce forward.
-  if (run_ind + run_pages < gChunkNumPages - 1 &&
+  if (run_ind + run_pages < gChunkNumPages - gPagesPerRealPage &&
       (aChunk->mPageMap[run_ind + run_pages].bits &
        (CHUNK_MAP_ALLOCATED | CHUNK_MAP_BUSY)) == 0) {
     size_t nrun_size =
@@ -3698,7 +3742,7 @@ void* arena_t::PallocHuge(size_t aSize, size_t aAlignment, bool aZero) {
   // We're going to configure guard pages in the region between the
   // page-aligned size and the chunk-aligned size, so if those are the same
   // then we need to force that region into existence.
-  csize = CHUNK_CEILING(aSize + gPageSize);
+  csize = CHUNK_CEILING(aSize + gRealPageSize);
   if (csize < aSize) {
     // size is large enough to cause size_t wrap-around.
     return nullptr;
@@ -3716,7 +3760,8 @@ void* arena_t::PallocHuge(size_t aSize, size_t aAlignment, bool aZero) {
     ExtentAlloc::dealloc(node);
     return nullptr;
   }
-  psize = PAGE_CEILING(aSize);
+  psize = REAL_PAGE_CEILING(aSize);
+  MOZ_ASSERT(psize < csize);
 #ifdef MOZ_DEBUG
   if (aZero) {
     chunk_assert_zero(ret, psize);
@@ -3772,7 +3817,7 @@ void* arena_t::RallocHuge(void* aPtr, size_t aSize, size_t aOldSize) {
   // Avoid moving the allocation if the size class would not change.
   if (aOldSize > gMaxLargeClass &&
       CHUNK_CEILING(aSize + gPageSize) == CHUNK_CEILING(aOldSize + gPageSize)) {
-    size_t psize = PAGE_CEILING(aSize);
+    size_t psize = REAL_PAGE_CEILING(aSize);
     if (aSize < aOldSize) {
       MaybePoison((void*)((uintptr_t)aPtr + aSize), aOldSize - aSize);
     }
@@ -3859,7 +3904,7 @@ static void huge_dalloc(void* aPtr, arena_t* aArena) {
     MOZ_RELEASE_ASSERT(node->mArenaId == node->mArena->mId);
     huge.Remove(node);
 
-    mapped = CHUNK_CEILING(node->mSize + gPageSize);
+    mapped = CHUNK_CEILING(node->mSize + gRealPageSize);
     huge_allocated -= node->mSize;
     huge_mapped -= mapped;
     huge_operations++;
@@ -3893,14 +3938,15 @@ static bool malloc_init_hard() {
   // We assume that the page size is a power of 2.
   MOZ_ASSERT(IsPowerOfTwo(page_size));
 #ifdef MALLOC_STATIC_PAGESIZE
-  if (gPageSize % page_size) {
+  if (gRealPageSize % page_size) {
     _malloc_message(
         _getprogname(),
         "Compile-time page size does not divide the runtime one.\n");
     MOZ_CRASH();
   }
 #else
-  gRealPageSize = gPageSize = page_size;
+  gPageSize = page_size;
+  gRealPageSize = page_size;
 #endif
 
   // Get runtime configuration.
@@ -3964,16 +4010,30 @@ static bool malloc_init_hard() {
           break;
 #  ifndef MALLOC_STATIC_PAGESIZE
         case 'P':
-          MOZ_ASSERT(gPageSize >= 4_KiB);
+          MOZ_ASSERT(gPageSize >= 1_KiB);
           MOZ_ASSERT(gPageSize <= 64_KiB);
           prefix_arg = prefix_arg ? prefix_arg : 1;
           gPageSize <<= prefix_arg;
           // We know that if the shift causes gPageSize to be zero then it's
           // because it shifted all the bits off.  We didn't start with zero.
           // Therefore if gPageSize is out of bounds we set it to 64KiB.
-          if (gPageSize < 4_KiB || gPageSize > 64_KiB) {
+          if (gPageSize < 1_KiB || gPageSize > 64_KiB) {
             gPageSize = 64_KiB;
           }
+          // We also limit gPageSize to be no larger than gRealPageSize, there's
+          // no reason to support this.
+          if (gPageSize > gRealPageSize) {
+            gPageSize = gRealPageSize;
+          }
+          break;
+        case 'p':
+          MOZ_ASSERT(gPageSize >= 1_KiB);
+          MOZ_ASSERT(gPageSize <= 64_KiB);
+          prefix_arg = prefix_arg ? prefix_arg : 1;
+          gPageSize >>= prefix_arg;
+          if (gPageSize < 1_KiB) {
+            gPageSize = 1_KiB;
+          }
           break;
 #  endif
 #endif
@@ -3997,6 +4057,7 @@ static bool malloc_init_hard() {
     }
   }
 
+  MOZ_ASSERT(gPageSize <= gRealPageSize);
 #ifndef MALLOC_STATIC_PAGESIZE
   DefineGlobals();
 #endif
@@ -4436,6 +4497,10 @@ static size_t hard_purge_chunk(arena_chunk_t* aChunk) {
     // We could use mincore to find out which pages are actually
     // present, but it's not clear that's better.
     if (npages > 0) {
+      // i and npages should be aligned because they needed to be for the
+      // purge code that set CHUNK_MAP_MADVISED.
+      MOZ_ASSERT((i % gPagesPerRealPage) == 0);
+      MOZ_ASSERT((npages % gPagesPerRealPage) == 0);
       pages_decommit(((char*)aChunk) + (i << gPageSize2Pow),
                      npages << gPageSize2Pow);
       (void)pages_commit(((char*)aChunk) + (i << gPageSize2Pow),
diff --git a/memory/build/mozjemalloc_profiling.h b/memory/build/mozjemalloc_profiling.h
@@ -26,6 +26,10 @@ struct PurgeStats {
   // The total number of pages that were cleaned (includes previously an pages).
   size_t pages_total = 0;
 
+  // The number of pages that can't be purged because of alignment because
+  // of logical/hardware page alignment.
+  size_t pages_unpurgable = 0;
+
   size_t system_calls = 0;
   size_t chunks = 0;
 
diff --git a/tools/profiler/core/memory_markers.cpp b/tools/profiler/core/memory_markers.cpp
@@ -29,6 +29,8 @@ struct PurgeArenaMarker : mozilla::BaseMarkerType<PurgeArenaMarker> {
        MS::Format::Integer},
       {"pages_clean", MS::InputType::Uint32,
        "Number of clean pages amoung dirty pages cleaned", MS::Format::Integer},
+      {"pages_unpurgable", MS::InputType::Uint32,
+       "Number of dirty pages skipped due to alignment", MS::Format::Integer},
       {"syscalls", MS::InputType::Uint32, "Number of system calls",
        MS::Format::Integer},
       {"chunks", MS::InputType::Uint32, "Number of chunks processed",
@@ -38,8 +40,8 @@ struct PurgeArenaMarker : mozilla::BaseMarkerType<PurgeArenaMarker> {
   static void StreamJSONMarkerData(
       mozilla::baseprofiler::SpliceableJSONWriter& aWriter, uint32_t aId,
       const String8View& aLabel, const String8View& aCaller,
-      uint32_t aPagesDirty, uint32_t aPagesTotal, uint32_t aSyscalls,
-      uint32_t aChunks, const String8View& aResult) {
+      uint32_t aPagesDirty, uint32_t aPagesTotal, uint32_t aPagesUnpurgable,
+      uint32_t aSyscalls, uint32_t aChunks, const String8View& aResult) {
     aWriter.IntProperty("id", aId);
     aWriter.StringProperty("label", aLabel);
     aWriter.StringProperty("caller", aCaller);
@@ -48,6 +50,9 @@ struct PurgeArenaMarker : mozilla::BaseMarkerType<PurgeArenaMarker> {
     if (pages_clean) {
       aWriter.IntProperty("pages_clean", aPagesTotal - aPagesDirty);
     }
+    if (aPagesUnpurgable) {
+      aWriter.IntProperty("pages_unpurgable", aPagesUnpurgable);
+    }
     aWriter.IntProperty("syscalls", aSyscalls);
     aWriter.IntProperty("chunks", aChunks);
     aWriter.StringProperty("result", aResult);
@@ -84,8 +89,9 @@ class GeckoProfilerMallocCallbacks : public MallocProfilerCallbacks {
         PurgeArenaMarker, aStats.arena_id,
         ProfilerString8View::WrapNullTerminatedString(aStats.arena_label),
         ProfilerString8View::WrapNullTerminatedString(aStats.caller),
-        aStats.pages_dirty, aStats.pages_total, aStats.system_calls,
-        aStats.chunks, ProfilerString8View::WrapNullTerminatedString(result));
+        aStats.pages_dirty, aStats.pages_total, aStats.pages_unpurgable,
+        aStats.system_calls, aStats.chunks,
+        ProfilerString8View::WrapNullTerminatedString(result));
   }
 };
 }  // namespace profiler

	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE

M	memory/build/BaseAlloc.cpp	\|	4	++--
M	memory/build/Globals.h	\|	44	+++++++++++++++++++++++++++++---------------
M	memory/build/Globals_inc.h	\|	24	+++++++++++++++++++-----
M	memory/build/Utils.h	\|	2	++
M	memory/build/mozjemalloc.cpp	\|	173	++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------
M	memory/build/mozjemalloc_profiling.h	\|	4	++++
M	tools/profiler/core/memory_markers.cpp	\|	14	++++++++++----