commit 2e959490d411967d2f1f87e272e052c8325f4dfa
parent 1358572ccf4313afe5b8b3b45df9f4b26ba6b6ff
Author: Robin Steuber <bytesized@mozilla.com>
Date: Wed, 8 Oct 2025 21:04:33 +0000
Bug 1984458 - Make `UpdateService` check for persistent access failures r=cdupuis,nrishel
`UpdateService` already handles most types of errors with retry and fallback logic, but cases where `update.status` is permanently not accessible can lead to updates being delayed indefinitely.
This change adds logic to check for this situation. The check increments a counter stored as a user pref when attempts to read/write `update.status` fails due to file access or locking. When the number of failures exceeds a configurable max and the `update.status` file's last modified time is greater than a maximum age, the user will be notified that there is something wrong with their installation, and they should manually reinstall.
This additionally propogates errors a bit further through the update system than they previously were. This has two purposes: (1) Allow us to notice when state writing fails so we can notify the user, and (2) allow AUS to notice the update failure so that it can react to it properly instead of continuing, often screwing up our attempts to show an update failed notification.
Differential Revision: https://phabricator.services.mozilla.com/D247857
Diffstat:
3 files changed, 152 insertions(+), 13 deletions(-)
diff --git a/toolkit/mozapps/update/UpdateService.sys.mjs b/toolkit/mozapps/update/UpdateService.sys.mjs
@@ -84,6 +84,11 @@ const PREF_APP_UPDATE_ELEVATE_NEVER = "app.update.elevate.never";
const PREF_APP_UPDATE_ELEVATE_VERSION = "app.update.elevate.version";
const PREF_APP_UPDATE_ELEVATE_ATTEMPTS = "app.update.elevate.attempts";
const PREF_APP_UPDATE_ELEVATE_MAXATTEMPTS = "app.update.elevate.maxAttempts";
+const PREF_APP_UPDATE_LOCKEDOUT_COUNT = "app.update.lockedOut.count";
+const PREF_APP_UPDATE_LOCKEDOUT_DEBOUNCETIME =
+ "app.update.lockedOut.debounceTimeMs";
+const PREF_APP_UPDATE_LOCKEDOUT_MAXCOUNT = "app.update.lockedOut.maxCount";
+const PREF_APP_UPDATE_LOCKEDOUT_MAXAGE = "app.update.lockedOut.maxAgeMs";
const PREF_APP_UPDATE_LANGPACK_ENABLED = "app.update.langpack.enabled";
const PREF_APP_UPDATE_LANGPACK_TIMEOUT = "app.update.langpack.timeout";
const PREF_APP_UPDATE_NOTIFYDURINGDOWNLOAD = "app.update.notifyDuringDownload";
@@ -306,6 +311,8 @@ let gOnlyDownloadUpdatesThisSession = false;
// This will be the backing for `nsIApplicationUpdateService.currentState`
var gUpdateState = Ci.nsIApplicationUpdateService.STATE_IDLE;
+let gLastLockoutDebouncedAt = 0;
+
/**
* Simple container and constructor for a Promise and its resolve function.
*/
@@ -1043,6 +1050,92 @@ function getDownloadingUpdateDir() {
}
/**
+ * If there is a problem with accessing the status file, it may be a transient
+ * issue, such as another process checking for updates holding a lock,
+ * or it may be a persistent problem that needs attention.
+ * This function tries to determine if we should prompt the user to fix the
+ * issue.
+ *
+ * @param file
+ * An nsIFile object for the file with the issue
+ * @param ex
+ * The Exception object that was thrown when attempting to access the
+ * file.
+ */
+function onStateAccessFailure(file, ex) {
+ LOG("onStateAccessFailure. Ex: " + ex);
+ if (
+ ex.result == Cr.NS_ERROR_FILE_ACCESS_DENIED ||
+ ex.result == Cr.NS_ERROR_FILE_IS_LOCKED
+ ) {
+ // Looks like we can't access the file. If it hasn't changed in
+ // a long time, notify the user that we are persistently unable to update.
+ const oneMinMs = 60 * 1000;
+ const oneDayMs = 24 * 60 * oneMinMs;
+ const now = Date.now();
+
+ // A single update check can attempt to read and write the update state
+ // multiple times. We want `lockoutCount` to count the number of times that
+ // an update check fails, not the number of times that we fail to access the
+ // state. To deal with this, we will implement a debouncing period after
+ // each failure.
+ let debounceTimeMs = Services.prefs.getIntPref(
+ PREF_APP_UPDATE_LOCKEDOUT_DEBOUNCETIME,
+ 5 * oneMinMs
+ );
+ debounceTimeMs = Math.min(debounceTimeMs, oneDayMs);
+ const debounceEnd = gLastLockoutDebouncedAt + debounceTimeMs;
+ if (now < debounceEnd) {
+ LOG(`onStateAccessFailure: debounced! (${debounceEnd - now}ms left)`);
+ return;
+ }
+ gLastLockoutDebouncedAt = now;
+
+ // Not really the age, just the interval since last modified.
+ const fileAgeMs = now - file.lastModifiedTime;
+
+ let lockoutCount = Services.prefs.getIntPref(
+ PREF_APP_UPDATE_LOCKEDOUT_COUNT,
+ 0
+ );
+ lockoutCount += 1;
+ Services.prefs.setIntPref(PREF_APP_UPDATE_LOCKEDOUT_COUNT, lockoutCount);
+
+ let maxLockoutCount = Services.prefs.getIntPref(
+ PREF_APP_UPDATE_LOCKEDOUT_MAXCOUNT,
+ 4
+ );
+ maxLockoutCount = Math.min(maxLockoutCount, 20);
+ LOG(
+ `onStateAccessFailure: lockoutCount is ${lockoutCount}, maxLockoutCount is ${maxLockoutCount}`
+ );
+
+ let maxFileAgeMs = Services.prefs.getIntPref(
+ PREF_APP_UPDATE_LOCKEDOUT_MAXAGE,
+ oneDayMs * 2
+ );
+ maxFileAgeMs = Math.min(maxFileAgeMs, oneDayMs * 14);
+ LOG(
+ `onStateAccessFailure: fileAgeMs = ${fileAgeMs} maxFileAgeMs = ${maxFileAgeMs}`
+ );
+
+ if (lockoutCount >= maxLockoutCount && fileAgeMs >= maxFileAgeMs) {
+ Glean.update.stateWriteFailure.add();
+ // Create an empty Update object for messaging.
+ let update = new Update(null);
+ LOG("onStateAccessFailure: reporting permission issue");
+ Services.obs.notifyObservers(update, "update-error", "bad-perms");
+ Services.prefs.setIntPref(PREF_APP_UPDATE_LOCKEDOUT_COUNT, 0);
+ }
+ }
+}
+
+function onStateAccessSuccess() {
+ LOG("onStateAccessSuccess");
+ Services.prefs.setIntPref(PREF_APP_UPDATE_LOCKEDOUT_COUNT, 0);
+}
+
+/**
* Reads the update state from the update.status file in the specified
* directory.
* @param dir
@@ -1061,16 +1154,31 @@ function readStatusFile(dir) {
* Writes the current update operation/state to a file in the patch
* directory, indicating to the patching system that operations need
* to be performed.
+ *
+ * This function does not throw on errors. It just returns the error.
+ *
* @param dir
* The patch directory where the update.status file should be
* written.
* @param state
* The state value to write.
+ * @returns `null` on success or a JS exception on failure.
*/
function writeStatusFile(dir, state) {
let statusFile = dir.clone();
statusFile.append(FILE_UPDATE_STATUS);
- writeStringToFile(statusFile, state);
+ try {
+ writeStringToFile(statusFile, state);
+ LOG("writeStatusFile - status: " + state + ", path: " + statusFile.path);
+ try {
+ onStateAccessSuccess();
+ } catch {}
+ } catch (ex) {
+ LOG("writeStatusFile failed: " + ex);
+ onStateAccessFailure(statusFile, ex);
+ return ex;
+ }
+ return null;
}
/**
@@ -1081,6 +1189,9 @@ function writeStatusFile(dir, state) {
* the update should be applied. Note that this won't provide protection from
* downgrade of the application for the nightly user case where the application
* version doesn't change.
+ *
+ * This function fails silently.
+ *
* @param dir
* The patch directory where the update.version file should be
* written.
@@ -1091,7 +1202,11 @@ function writeStatusFile(dir, state) {
function writeVersionFile(dir, version) {
let versionFile = dir.clone();
versionFile.append(FILE_UPDATE_VERSION);
- writeStringToFile(versionFile, version);
+ try {
+ writeStringToFile(versionFile, version);
+ } catch (ex) {
+ LOG("writeVersionFile failed: " + ex);
+ }
}
/**
@@ -1425,19 +1540,20 @@ async function cleanupActiveUpdates() {
* written to the file. This function only works with ASCII text.
* @param file An nsIFile indicating what file to write to.
* @param text A string containing the text to write to the file.
- * @return true on success, false on failure.
+ * @throws Errors from file stream will be propagated.
*/
function writeStringToFile(file, text) {
+ let fos = FileUtils.openSafeFileOutputStream(file);
+ text += "\n";
+ fos.write(text, text.length);
+ // Don't use `FileUtils.closeSafeFileOutputStream` because it swallows errors,
+ // which we don't want to do here.
try {
- let fos = FileUtils.openSafeFileOutputStream(file);
- text += "\n";
- fos.write(text, text.length);
- FileUtils.closeSafeFileOutputStream(fos);
- } catch (e) {
- LOG(`writeStringToFile - Failed to write to file: "${file}". Error: ${e}"`);
- return false;
+ fos.QueryInterface(Ci.nsISafeOutputStream);
+ fos.finish();
+ } finally {
+ fos.close();
}
- return true;
}
function readStringFromInputStream(inputStream) {
@@ -6400,8 +6516,14 @@ class Downloader {
}
if (!lazy.UM.internal.readyUpdate) {
- LOG("Downloader:downloadUpdate - Setting status to downloading");
- writeStatusFile(getReadyUpdateDir(), STATE_DOWNLOADING);
+ const error = writeStatusFile(getReadyUpdateDir(), STATE_DOWNLOADING);
+ if (error) {
+ LOG("Downloader:downloadUpdate - Failed to set status to downloading");
+ await cleanupActiveUpdates();
+ return Ci.nsIApplicationUpdateService
+ .DOWNLOAD_FAILURE_CANNOT_WRITE_STATE;
+ }
+ LOG("Downloader:downloadUpdate - Set status to downloading");
}
if (this._patch.state != STATE_DOWNLOADING) {
LOG("Downloader:downloadUpdate - Setting state to downloading");
diff --git a/toolkit/mozapps/update/metrics.yaml b/toolkit/mozapps/update/metrics.yaml
@@ -467,6 +467,20 @@ update:
expires: never
telemetry_mirror: h#UPDATE_SERVICE_MANUALLY_UNINSTALLED_SUBSEQUENT
+ state_write_failure:
+ type: counter
+ description: >
+ Update: Count of the number of times we have shown a notification to the
+ user that they need to manually update because we cannot write to the
+ update status file.
+ bugs:
+ - https://bugzilla.mozilla.org/show_bug.cgi?id=1984458
+ data_reviews:
+ - https://bugzilla.mozilla.org/show_bug.cgi?id=1984458
+ notification_emails:
+ - application-update-telemetry-alerts@mozilla.com
+ expires: never
+
unable_to_apply_external:
type: counter
description: >
diff --git a/toolkit/mozapps/update/nsIUpdateService.idl b/toolkit/mozapps/update/nsIUpdateService.idl
@@ -490,6 +490,9 @@ interface nsIApplicationUpdateService : nsISupports
// The update download cannot be started for a reason that doesn't have a more
// specific failure code.
const long DOWNLOAD_FAILURE_GENERIC = 3;
+ // The update download cannot be started because the update state could not
+ // be written to the update directory.
+ const long DOWNLOAD_FAILURE_CANNOT_WRITE_STATE = 4;
/**
* Starts downloading the update passed. Once the update is downloaded, it