tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

commit 61483b26f03028514ff769c6aab9dcbbe652a7e2
parent 01f977e353a442ef5b29b4eef91a74ca93a353ae
Author: Harshit <hsohaney@mozilla.com>
Date:   Wed, 17 Dec 2025 19:36:34 +0000

Bug 2005776 - Add exponential backoff to backup retries. r=kpatenio,cdupuis

Differential Revision: https://phabricator.services.mozilla.com/D276543

Diffstat:
Mbrowser/app/profile/firefox.js | 2+-
Mbrowser/components/backup/BackupService.sys.mjs | 113+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------
Mbrowser/components/backup/tests/xpcshell/test_BackupService_retryHeuristic.js | 213+++++++++++++++++++++++++++++++++++++++++++++++--------------------------------
3 files changed, 221 insertions(+), 107 deletions(-)

diff --git a/browser/app/profile/firefox.js b/browser/app/profile/firefox.js @@ -3487,7 +3487,7 @@ pref("browser.backup.template.fallback-download.aurora", "https://www.firefox.co pref("browser.backup.template.fallback-download.nightly", "https://www.firefox.com/channel/desktop/?utm_medium=firefox-desktop&utm_source=html-backup"); pref("browser.backup.template.fallback-download.esr", " https://www.firefox.com/download/all/desktop-esr/?utm_medium=firefox-desktop&utm_source=html-backup"); pref("browser.backup.errorCode", 0); -pref("browser.backup.backup-retry-limit", 100); +pref("browser.backup.backup-retry-limit", 10); pref("browser.backup.disabled-on-idle-backup-retry", false); // Limit of number of unremovable staging directories and archives that are // permitted before backup will stop making additional backups. Unremovable diff --git a/browser/components/backup/BackupService.sys.mjs b/browser/components/backup/BackupService.sys.mjs @@ -641,6 +641,26 @@ export class BackupService extends EventTarget { static #errorRetries = 0; /** + * Time to wait (in seconds) until the next backup attempt. + * + * This uses exponential backoff based on the number of consecutive + * failed backup attempts since the last successful backup. + * + * Backoff formula: + * 2^(retryCount) * 60 + * + * Example: + * If 2 backup attempts have failed since the last successful backup, + * the next attempt will occur after: + * + * 2^2 * 60 = 240 seconds (4 minutes) + * + * This differs from minimumTimeBetweenBackupsSeconds, which is used to determine + * the time between successful backups. + */ + static backoffSeconds = () => Math.pow(2, BackupService.#errorRetries) * 60; + + /** * @typedef {object} EnabledStatus * @property {boolean} enabled * True if the feature is enabled. @@ -4434,35 +4454,88 @@ export class BackupService extends EventTarget { } /** - * Calls BackupService.createBackup at the next moment when the event queue - * is not busy with higher priority events. This is intentionally broken out - * into its own method to make it easier to stub out in tests. + * Decide whether we should attempt a backup now. * - * @param {object} [options] - * @param {boolean} [options.deletePreviousBackup] - * @param {string} [options.reason] + * @returns {boolean} */ - createBackupOnIdleDispatch({ deletePreviousBackup = true, reason }) { + shouldAttemptBackup() { let now = Math.floor(Date.now() / 1000); - let errorStateDebugInfo = Services.prefs.getStringPref( + const debugInfoStr = Services.prefs.getStringPref( BACKUP_DEBUG_INFO_PREF_NAME, "" ); - // we retry failing backups every minimumTimeBetweenBackupsSeconds if - // isRetryDisabledOnIdle is true. If isRetryDisabledOnIdle is false, - // we retry on next idle until we hit backupRetryLimit and switch isRetryDisabledOnIdle to true - if ( - lazy.isRetryDisabledOnIdle && - errorStateDebugInfo && - now - JSON.parse(errorStateDebugInfo).lastBackupAttempt < - lazy.minimumTimeBetweenBackupsSeconds - ) { + let parsed = null; + if (debugInfoStr) { + try { + parsed = JSON.parse(debugInfoStr); + } catch (e) { + lazy.logConsole.warn( + "Invalid backup debug-info pref; ignoring and allowing backup attempt.", + e + ); + parsed = null; + } + } + + const lastBackupAttempt = parsed?.lastBackupAttempt; + const hasErroredLastAttempt = Number.isFinite(lastBackupAttempt); + + if (!hasErroredLastAttempt) { + lazy.logConsole.debug( + `There have been no errored last attempts, let's do a backup` + ); + return true; + } + + const secondsSinceLastAttempt = now - lastBackupAttempt; + + if (lazy.isRetryDisabledOnIdle) { + // Let's add a buffer before restarting the retries. Dividing by 2 + // since currently minimumTimeBetweenBackupsSeconds is set to 24 hours + // We want to approximately keep a backup for each day, so let's retry + // in about 12 hours again. + if (secondsSinceLastAttempt < lazy.minimumTimeBetweenBackupsSeconds / 2) { + lazy.logConsole.debug( + `Retrying is disabled, we have to wait for ${lazy.minimumTimeBetweenBackupsSeconds / 2}s to retry` + ); + return false; + } + // Looks like we've waited enough, reset the retry states and try to create + // a backup again. + BackupService.#errorRetries = 0; + Services.prefs.clearUserPref(DISABLED_ON_IDLE_RETRY_PREF_NAME); + + return true; + } + + // Exponential backoff guard, avoids throttling the same error again and again + if (secondsSinceLastAttempt < BackupService.backoffSeconds()) { lazy.logConsole.debug( - `We've already retried in the last ${lazy.minimumTimeBetweenBackupsSeconds}s. Waiting for next valid idleDispatch to try again.` + `backoff: elapsed ${secondsSinceLastAttempt}s < backoff ${BackupService.backoffSeconds()}s` ); + return false; + } + + return true; + } + + /** + * Calls BackupService.createBackup at the next moment when the event queue + * is not busy with higher priority events. This is intentionally broken out + * into its own method to make it easier to stub out in tests. + * + * @param {object} [options] + * @param {boolean} [options.deletePreviousBackup] + * @param {string} [options.reason] + * + * @returns {Promise} A backup promise to hold onto + */ + createBackupOnIdleDispatch({ deletePreviousBackup = true, reason }) { + if (!this.shouldAttemptBackup()) { return Promise.resolve(); } + // Determine path to old backup file const oldBackupFile = this.#_state.lastBackupFileName; const isScheduledBackupsEnabled = lazy.scheduledBackupsPref; @@ -4488,8 +4561,10 @@ export class BackupService extends EventTarget { BackupService.#errorRetries += 1; if (BackupService.#errorRetries > lazy.backupRetryLimit) { - // We've had too many errors with retries, let's only backup on next timestamp Services.prefs.setBoolPref(DISABLED_ON_IDLE_RETRY_PREF_NAME, true); + // Next retry will be 24 hours later (backoffSeconds = 2^(11) * 60s), + // let's just restart our backoff heuristic + BackupService.#errorRetries = 0; Glean.browserBackup.backupThrottled.record(); } } finally { diff --git a/browser/components/backup/tests/xpcshell/test_BackupService_retryHeuristic.js b/browser/components/backup/tests/xpcshell/test_BackupService_retryHeuristic.js @@ -6,6 +6,7 @@ https://creativecommons.org/publicdomain/zero/1.0/ */ ChromeUtils.defineESModuleGetters(this, { BackupError: "resource:///modules/backup/BackupError.mjs", ERRORS: "chrome://browser/content/backup/backup-constants.mjs", + TestUtils: "resource://testing-common/TestUtils.sys.mjs", }); const BACKUP_RETRY_LIMIT_PREF_NAME = "browser.backup.backup-retry-limit"; @@ -18,6 +19,81 @@ const SCHEDULED_BACKUPS_ENABLED_PREF_NAME = "browser.backup.scheduled.enabled"; const BACKUP_DEBUG_INFO_PREF_NAME = "browser.backup.backup-debug-info"; const BACKUP_DEFAULT_LOCATION_PREF_NAME = "browser.backup.location"; +const RETRIES_FOR_TEST = 4; + +async function create_backup_failure_expected_calls( + bs, + callCount, + assertionMsg +) { + assertionMsg = assertionMsg + ? assertionMsg + : `createBackup should be called ${callCount} times`; + + let originalBackoffTime = BackupService.backoffSeconds(); + + bs.createBackupOnIdleDispatch({}); + + // testing that callCount remains the same, skip all the other checks + if (callCount == bs.createBackup.callCount) { + Assert.equal(bs.createBackup.callCount, callCount, assertionMsg); + + return; + } + + // Wait for in progress states to change + // so that the errorRetries can be updated + + await bsInProgressStateUpdate(bs, true); + await bsInProgressStateUpdate(bs, false); + + // propagate prefs + await TestUtils.waitForTick(); + + // have we called createBackup more times than allowed retries? + // if so, the retries should reset and retrying should + // disable calling createBackup again + if (callCount == RETRIES_FOR_TEST + 1) { + Assert.equal( + Glean.browserBackup.backupThrottled.testGetValue().length, + 1, + "backupThrottled telemetry was sent" + ); + + Assert.ok( + Services.prefs.getBoolPref(DISABLED_ON_IDLE_RETRY_PREF_NAME), + "Disable on idle is now enabled - no more retries allowed" + ); + } + // we expect createBackup to be called, but it shouldn't succeed + else { + Assert.equal( + BackupService.backoffSeconds(), + 2 * originalBackoffTime, + "Backoff time should have doubled" + ); + + Assert.ok( + !Services.prefs.getBoolPref(DISABLED_ON_IDLE_RETRY_PREF_NAME), + "Disable on idle is disabled - which means that we can do more retries!" + ); + + Assert.equal( + Glean.browserBackup.backupThrottled.testGetValue(), + null, + "backupThrottled telemetry was not sent yet" + ); + } + + Assert.equal(bs.createBackup.callCount, callCount, assertionMsg); + + Assert.equal( + Services.prefs.getIntPref(BACKUP_ERROR_CODE_PREF_NAME), + ERRORS.UNKNOWN, + "Error code has been set" + ); +} + function bsInProgressStateUpdate(bs, isBackupInProgress) { // Check if already in desired state if (bs.state.backupInProgress === isBackupInProgress) { @@ -47,7 +123,7 @@ add_setup(async () => { TEST_PROFILE_PATH ); Services.prefs.setBoolPref(SCHEDULED_BACKUPS_ENABLED_PREF_NAME, true); - Services.prefs.setIntPref(BACKUP_RETRY_LIMIT_PREF_NAME, 2); + Services.prefs.setIntPref(BACKUP_RETRY_LIMIT_PREF_NAME, RETRIES_FOR_TEST); Services.prefs.setBoolPref(DISABLED_ON_IDLE_RETRY_PREF_NAME, false); setupProfile(); @@ -62,13 +138,13 @@ add_setup(async () => { }); }); -add_task(async function test_retry_limit() { +add_task(async function test_retries_no_backoff() { Services.fog.testResetFOG(); let bs = new BackupService(); let sandbox = sinon.createSandbox(); // Make createBackup fail intentionally - const createBackupFailureStub = sandbox + sandbox .stub(bs, "resolveArchiveDestFolderPath") .rejects(new BackupError("forced failure", ERRORS.UNKNOWN)); @@ -83,62 +159,53 @@ add_task(async function test_retry_limit() { // ensure that there is no error code set Services.prefs.setIntPref(BACKUP_ERROR_CODE_PREF_NAME, ERRORS.NONE); - bs.createBackupOnIdleDispatch({}); - - // #backupInProgress is set to true - await bsInProgressStateUpdate(bs, true); - // #backupInProgress is set to false - await bsInProgressStateUpdate(bs, false); - - // wait a tick for the prefs to update - await new Promise(executeSoon); - - Assert.equal( - bs.createBackup.callCount, - i + 1, - "createBackup was called on idle" - ); - Assert.equal( - Services.prefs.getIntPref(BACKUP_ERROR_CODE_PREF_NAME), - ERRORS.UNKNOWN, - "Error code has been set" + // Set the lastBackupAttempt to the current backoff threshold, to avoid hitting + // the exponential backoff clause for this test. + Services.prefs.setStringPref( + BACKUP_DEBUG_INFO_PREF_NAME, + JSON.stringify({ + lastBackupAttempt: + Math.floor(Date.now() / 1000) - (BackupService.backoffSeconds() + 1), + errorCode: ERRORS.UNKNOWN, + lastRunStep: 0, + }) ); - if (i < n) { - Assert.equal( - Glean.browserBackup.backupThrottled.testGetValue(), - null, - "backupThrottled telemetry was not sent yet" - ); - } else { - // On this call, createBackup _was_ called, but the next call will be - // ignored. However, the telemetry ping is sent now. - Assert.equal( - Glean.browserBackup.backupThrottled.testGetValue().length, - 1, - "backupThrottled telemetry was sent" - ); - } + await create_backup_failure_expected_calls(bs, i + 1); } // check if it switched to no longer creating backups on idle - const previousCalls = bs.createBackup.callCount; + await create_backup_failure_expected_calls( + bs, + bs.createBackup.callCount, + "createBackup was not called since we hit the retry limit" + ); - bs.createBackupOnIdleDispatch({}); + sandbox.restore(); +}); - // wait a tick for the pref to update - await new Promise(executeSoon); +add_task(async function test_exponential_backoff() { + Services.fog.testResetFOG(); - Assert.equal( - bs.createBackup.callCount, - previousCalls, - "createBackup was not called again after hitting the retry limit" - ); - Assert.ok( - Services.prefs.getBoolPref(DISABLED_ON_IDLE_RETRY_PREF_NAME), - "Disable on idle has been enabled" + let bs = new BackupService(); + let sandbox = sinon.createSandbox(); + const createBackupFailureStub = sandbox + .stub(bs, "resolveArchiveDestFolderPath") + .rejects(new BackupError("forced failure", ERRORS.UNKNOWN)); + + sandbox.stub(ChromeUtils, "idleDispatch").callsFake(callback => callback()); + sandbox.spy(bs, "createBackup"); + + Services.prefs.setIntPref(BACKUP_ERROR_CODE_PREF_NAME, ERRORS.NONE); + Services.prefs.setStringPref( + BACKUP_DEBUG_INFO_PREF_NAME, + JSON.stringify({ + lastBackupAttempt: + Math.floor(Date.now() / 1000) - (BackupService.backoffSeconds() + 1), + errorCode: ERRORS.UNKNOWN, + lastRunStep: 0, + }) ); - Services.fog.testResetFOG(); Services.prefs.setIntPref(MINIMUM_TIME_BETWEEN_BACKUPS_SECONDS_PREF_NAME, 0); registerCleanupFunction(() => { Services.prefs.clearUserPref( @@ -146,36 +213,12 @@ add_task(async function test_retry_limit() { ); }); - // add a buffer between lastAttempt and now - // eslint-disable-next-line mozilla/no-arbitrary-setTimeout - await new Promise(resolve => setTimeout(resolve, 10)); + await create_backup_failure_expected_calls(bs, 1); - bs.createBackupOnIdleDispatch({}); - - // #backupInProgress is set to true - await bsInProgressStateUpdate(bs, true); - - Assert.equal( - bs.createBackup.callCount, - previousCalls + 1, - "createBackup was called again" - ); - - Assert.equal( - Glean.browserBackup.backupThrottled.testGetValue(), - null, - "backupThrottled telemetry was not sent after resuming backups" - ); - - // #backupInProgress is set to false - await bsInProgressStateUpdate(bs, false); - - // let's restore the failing function in createBackup + // Remove the stub, ensure that a success leads to the prefs + // and retries resetting createBackupFailureStub.restore(); - // Now, on idleDispatch, we don't expect any failures - - // add a buffer between lastAttempt and now // eslint-disable-next-line mozilla/no-arbitrary-setTimeout await new Promise(resolve => setTimeout(resolve, 10)); @@ -184,30 +227,26 @@ add_task(async function test_retry_limit() { "testBackup_profile" ); - // calling createBackup directly to avoid race conditions with reliably - // awaiting createBackup to finish from a call to idleDispatch. This case covers - // the use case of reseting the prefs if createBackup is called and does not fail. await bs.createBackup({ profilePath: testProfilePath, }); - // the error states should have reset - Assert.ok( - !Services.prefs.getBoolPref(DISABLED_ON_IDLE_RETRY_PREF_NAME), - "Retry on idle is enabled now" - ); - Assert.equal( Services.prefs.getIntPref(BACKUP_ERROR_CODE_PREF_NAME), ERRORS.NONE, "The error code is reset to NONE" ); + Assert.equal( + 60, + BackupService.backoffSeconds(), + "The exponential backoff is reset to 1 minute (60s)" + ); + Assert.ok( !Services.prefs.getStringPref(BACKUP_DEBUG_INFO_PREF_NAME, null), "Error debug info has been cleared" ); - // check if changing the time since last backup calls createBackup sandbox.restore(); });