[ tor-browser ].git.dasho

commit 4f2aa402bb8f89811d987e0a31d36314906de64e
parent 3f0330154030da93b334cf851336bbff937dbc35
Author: Andrew Osmond <aosmond@gmail.com>
Date:   Wed, 29 Oct 2025 17:48:51 +0000

Bug 1996880 - Rework GPUProcessManager::EnsureGPUReady to better handles failures. r=gfx-reviewers,lsalzman

This patch reduces the complexity in EnsureGPUReady to account for
different failure scenarios that are not properly handled with the
current logic.

We can fail to initialize a GPU process because:
1) We are already in shutdown.
2) The initial launch request itself fails.
3) The launch response fails.
4) The GPUChild actor fails its first sync IPDL call.

We were failing to account for the fact that 4) can both fail, and not
disable the GPU process to allow for a retry of the launch.

Now instead we simplify the logic to:
1) Early exit for the most common success cases.
2) Only check for shutdown once after that.
3) Reduce the necessary logic, trusting (with asserts) that the
   expected methods will be called to set our state correctly.
4) Make disabling WebRender without restarting the GPU process perform
   the necessary synchronization inline, since it is going to happen
   shortly anyways due to the config change and compositor resets.

Differential Revision: https://phabricator.services.mozilla.com/D270555

Diffstat:
M gfx/ipc/GPUChild.cpp  | 5 ++---
M gfx/ipc/GPUChild.h  | 6 ++----
M gfx/ipc/GPUProcessManager.cpp  | 107 ++++++++++++++++++++++++++++---------------------------------------------------
M gfx/ipc/GPUProcessManager.h  | 2 +-

4 files changed, 43 insertions(+), 77 deletions(-)
diff --git a/gfx/ipc/GPUChild.cpp b/gfx/ipc/GPUChild.cpp
@@ -85,12 +85,12 @@ void GPUChild::OnVarChanged(const nsTArray<GfxVarUpdate>& aVar) {
   SendUpdateVar(aVar);
 }
 
-bool GPUChild::EnsureGPUReady() {
+bool GPUChild::EnsureGPUReady(bool aForceSync /* = false */) {
   // On our initial process launch, we want to block on the GetDeviceStatus
   // message. Additionally, we may have updated our compositor configuration
   // through the gfxVars after fallback, in which case we want to ensure the
   // GPU process has handled any updates before creating compositor sessions.
-  if (mGPUReady && !mWaitForVarUpdate) {
+  if (mGPUReady && !aForceSync) {
     return true;
   }
 
@@ -107,7 +107,6 @@ bool GPUChild::EnsureGPUReady() {
     mGPUReady = true;
   }
 
-  mWaitForVarUpdate = false;
   return true;
 }
 
diff --git a/gfx/ipc/GPUChild.h b/gfx/ipc/GPUChild.h
@@ -35,10 +35,9 @@ class GPUChild final : public mozilla::ipc::CrashReporterHelper<GPUChild>,
 
   void Init();
 
-  bool IsGPUReady() const { return mGPUReady && !mWaitForVarUpdate; }
+  bool IsGPUReady() const { return mGPUReady; }
 
-  bool EnsureGPUReady();
-  void MarkWaitForVarUpdate() { mWaitForVarUpdate = true; }
+  bool EnsureGPUReady(bool aForceSync = false);
 
   // Notifies that an unexpected GPU process shutdown has been noticed by a
   // different IPDL actor, and the GPU process is being torn down as a result.
@@ -112,7 +111,6 @@ class GPUChild final : public mozilla::ipc::CrashReporterHelper<GPUChild>,
   GPUProcessHost* mHost;
   UniquePtr<MemoryReportRequestHost> mMemoryReportRequest;
   bool mGPUReady;
-  bool mWaitForVarUpdate = false;
   bool mUnexpectedShutdown = false;
   // Whether a paired minidump has already been generated, meaning we do not
   // need to create a crash report in ActorDestroy().
diff --git a/gfx/ipc/GPUProcessManager.cpp b/gfx/ipc/GPUProcessManager.cpp
@@ -260,13 +260,13 @@ bool GPUProcessManager::IsProcessStable(const TimeStamp& aNow) {
   return mProcessStable;
 }
 
-bool GPUProcessManager::LaunchGPUProcess() {
+nsresult GPUProcessManager::LaunchGPUProcess() {
   if (mProcess) {
-    return true;
+    return NS_OK;
   }
 
   if (AppShutdown::IsInOrBeyond(ShutdownPhase::XPCOMShutdown)) {
-    return false;
+    return NS_ERROR_ILLEGAL_DURING_SHUTDOWN;
   }
 
   // Start listening for pref changes so we can
@@ -291,9 +291,10 @@ bool GPUProcessManager::LaunchGPUProcess() {
   mProcess = new GPUProcessHost(this);
   if (!mProcess->Launch(std::move(extraArgs))) {
     DisableGPUProcess("Failed to launch GPU process");
+    return NS_ERROR_FAILURE;
   }
 
-  return true;
+  return NS_OK;
 }
 
 bool GPUProcessManager::IsGPUProcessLaunching() {
@@ -402,74 +403,42 @@ bool GPUProcessManager::IsGPUReady() const {
 nsresult GPUProcessManager::EnsureGPUReady() {
   MOZ_ASSERT(NS_IsMainThread());
 
-  // We only wait to fail with NS_ERROR_ILLEGAL_DURING_SHUTDOWN if we would
-  // cause a state change or if we are in the middle of relaunching the GPU
-  // process.
-  bool inShutdown = AppShutdown::IsInOrBeyond(ShutdownPhase::XPCOMShutdown);
-
-  while (true) {
-    // Launch the GPU process if it is enabled but hasn't been (re-)launched
-    // yet.
-    if (!mProcess && gfxConfig::IsEnabled(Feature::GPU_PROCESS)) {
-      if (NS_WARN_IF(inShutdown)) {
-        return NS_ERROR_ILLEGAL_DURING_SHUTDOWN;
-      }
-
-      if (!LaunchGPUProcess()) {
-        MOZ_DIAGNOSTIC_ASSERT(!gfxConfig::IsEnabled(Feature::GPU_PROCESS));
-        return NS_OK;
-      }
-    }
+  // Common case is we already have a GPU process.
+  if (mProcess && mProcess->IsConnected() && mGPUChild) {
+    MOZ_DIAGNOSTIC_ASSERT(mGPUChild->IsGPUReady());
+    return NS_OK;
+  }
 
-    if (mProcess && !mProcess->IsConnected()) {
-      if (NS_WARN_IF(inShutdown)) {
-        return NS_ERROR_ILLEGAL_DURING_SHUTDOWN;
-      }
-
-      if (!mProcess->WaitForLaunch()) {
-        // If this fails, we should have fired OnProcessLaunchComplete and
-        // removed the process. The algorithm either allows us another attempt
-        // or it will have disabled the GPU process.
-        MOZ_ASSERT(!mProcess);
-        MOZ_ASSERT(!mGPUChild);
-        continue;
-      }
-    }
+  // Next most common case is that we are compositing in the parent process.
+  if (!gfxConfig::IsEnabled(Feature::GPU_PROCESS)) {
+    MOZ_DIAGNOSTIC_ASSERT(!mProcess);
+    MOZ_DIAGNOSTIC_ASSERT(!mGPUChild);
+    return NS_OK;
+  }
 
-    // If we don't have a connected process by this stage, we must have
-    // explicitly disabled the GPU process.
-    if (!mGPUChild) {
-      break;
-    }
+  // We aren't ready and in shutdown, we should just abort.
+  if (AppShutdown::IsInOrBeyond(ShutdownPhase::XPCOMShutdown)) {
+    return NS_ERROR_ILLEGAL_DURING_SHUTDOWN;
+  }
 
-    // We already call this in OnProcessLaunchComplete that is called during
-    // WaitForLaunch, but there could be more gfxVar/gfxConfig updates that need
-    // to be sent over, if we are calling this well after the process has
-    // launched. This is because changes can occur due to device resets without
-    // a process crash.
-    if (mGPUChild->EnsureGPUReady()) {
+  do {
+    // Launch the GPU process if it is enabled but hasn't been (re-)launched
+    // yet, and wait for it to complete the handshake. As part of WaitForLaunch,
+    // we know that OnProcessLaunchComplete has been called. If it succeeds,
+    // we know that mGPUChild has been set and we already waited for it to be
+    // ready. If it fails, then we know that the GPU process must have been
+    // destroyed and/or disabled.
+    nsresult rv = LaunchGPUProcess();
+    if (NS_SUCCEEDED(rv) && mProcess->WaitForLaunch() && mGPUChild) {
+      MOZ_DIAGNOSTIC_ASSERT(mGPUChild->IsGPUReady());
       return NS_OK;
     }
 
-    // If the initialization above fails, we likely have a GPU process teardown
-    // waiting in our message queue (or will soon). OnProcessUnexpectedShutdown
-    // will explicitly teardown the process and prevent any pending events from
-    // triggering our fallback logic again. It will allow a number of attempts
-    // in the same configuration in case we are failing for intermittent
-    // reasons, before first falling back from acceleration, and eventually
-    // disabling the GPU process altogether.
-    OnProcessUnexpectedShutdown(mProcess);
-  }
-
-  MOZ_DIAGNOSTIC_ASSERT(!gfxConfig::IsEnabled(Feature::GPU_PROCESS));
+    MOZ_RELEASE_ASSERT(rv != NS_ERROR_ILLEGAL_DURING_SHUTDOWN);
+    MOZ_RELEASE_ASSERT(!mProcess);
+    MOZ_RELEASE_ASSERT(!mGPUChild);
+  } while (gfxConfig::IsEnabled(Feature::GPU_PROCESS));
 
-  // This is the first time we are trying to use the in-process compositor.
-  if (mTotalProcessAttempts == 0) {
-    if (NS_WARN_IF(inShutdown)) {
-      return NS_ERROR_ILLEGAL_DURING_SHUTDOWN;
-    }
-    ResetProcessStable();
-  }
   return NS_OK;
 }
 
@@ -779,12 +748,12 @@ bool GPUProcessManager::DisableWebRenderConfig(wr::WebRenderError aError,
 
   // If we still have the GPU process, and we fallback to a new configuration
   // that prefers to have the GPU process, reset the counter. Because we
-  // updated the gfxVars, we want to flag the GPUChild to wait for the update
-  // to be processed before creating new compositor sessions, otherwise we risk
-  // them being out of sync with the content/parent processes.
+  // updated the gfxVars, we call GPUChild::EnsureGPUReady to force us to wait
+  // for the update to be processed before creating new compositor sessions.
+  // Otherwise we risk them being out of sync with the content/parent processes.
   if (wantRestart && mProcess && mGPUChild) {
     mUnstableProcessAttempts = 1;
-    mGPUChild->MarkWaitForVarUpdate();
+    mGPUChild->EnsureGPUReady(/* aForceSync */ true);
   }
 
   return true;
diff --git a/gfx/ipc/GPUProcessManager.h b/gfx/ipc/GPUProcessManager.h
@@ -92,7 +92,7 @@ class GPUProcessManager final : public GPUProcessHost::Listener {
   ~GPUProcessManager();
 
   // If not using a GPU process, launch a new GPU process asynchronously.
-  bool LaunchGPUProcess();
+  nsresult LaunchGPUProcess();
   bool IsGPUProcessLaunching();
 
   // Ensure that GPU-bound methods can be used. If no GPU process is being

	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE

M	gfx/ipc/GPUChild.cpp	\|	5	++---
M	gfx/ipc/GPUChild.h	\|	6	++----
M	gfx/ipc/GPUProcessManager.cpp	\|	107	++++++++++++++++++++++++++++---------------------------------------------------
M	gfx/ipc/GPUProcessManager.h	\|	2	+-