tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

commit d0f27a2fe758a1098430a3e670e709e13d15a55c
parent 8d845f554d7c762d506142b182eefb0333f214f5
Author: Marco Bonardo <mbonardo@mozilla.com>
Date:   Tue,  2 Dec 2025 13:10:54 +0000

Bug 1992920 - Limit semantic history search to a list of supported locales. r=jteow,urlbar-reviewers,cgopal

Differential Revision: https://phabricator.services.mozilla.com/D272336

Diffstat:
Mbrowser/app/profile/firefox.js | 1+
Mbrowser/components/urlbar/UrlbarProvidersManager.sys.mjs | 10++++++++++
Mbrowser/components/urlbar/tests/unit/test_UrlbarProviderSemanticHistorySearch.js | 13+++++++++++++
Mtoolkit/components/ml/tests/browser/browser_ml_semantic_history_search_perf.js | 11+++++++++++
Mtoolkit/components/nimbus/FeatureManifest.yaml | 13+++++++++++++
Mtoolkit/components/places/PlacesSemanticHistoryManager.sys.mjs | 90++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------
Mtoolkit/components/places/tests/unit/test_PlacesSemanticHistoryManager.js | 126+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
7 files changed, 244 insertions(+), 20 deletions(-)

diff --git a/browser/app/profile/firefox.js b/browser/app/profile/firefox.js @@ -688,6 +688,7 @@ pref("places.semanticHistory.featureGate", true); #else pref("places.semanticHistory.featureGate", false); #endif +pref("places.semanticHistory.supportedRegions", "[[\"AU\",[\"en-*\"]],[\"CA\",[\"en-*\"]],[\"GB\",[\"en-*\"]],[\"IE\",[\"en-*\"]],[\"NZ\",[\"en-*\"]],[\"PH\",[\"en-*\"]],[\"US\",[\"en-*\"]]]"); // Minimum length threshold for semantic history search pref("browser.urlbar.suggest.semanticHistory.minLength", 5); diff --git a/browser/components/urlbar/UrlbarProvidersManager.sys.mjs b/browser/components/urlbar/UrlbarProvidersManager.sys.mjs @@ -18,6 +18,7 @@ const lazy = {}; ChromeUtils.defineESModuleGetters(lazy, { ObjectUtils: "resource://gre/modules/ObjectUtils.sys.mjs", PlacesUtils: "resource://gre/modules/PlacesUtils.sys.mjs", + Region: "resource://gre/modules/Region.sys.mjs", SkippableTimer: "moz-src:///browser/components/urlbar/UrlbarUtils.sys.mjs", UrlbarMuxer: "moz-src:///browser/components/urlbar/UrlbarUtils.sys.mjs", UrlbarPrefs: "moz-src:///browser/components/urlbar/UrlbarPrefs.sys.mjs", @@ -463,6 +464,15 @@ export class ProvidersManager { // history and bookmarks even if search engines are not available. } + // Some providers depend on Region/Locale info and must access Region.home + // synchronously, so we ensure Region is initialized. + try { + await lazy.Region.init(); + } catch (ex) { + // We continue anyway, region will be null and providers should handle + // that gracefully. + } + if (query.canceled) { return; } diff --git a/browser/components/urlbar/tests/unit/test_UrlbarProviderSemanticHistorySearch.js b/browser/components/urlbar/tests/unit/test_UrlbarProviderSemanticHistorySearch.js @@ -17,6 +17,13 @@ const { UrlbarProviderSemanticHistorySearch } = ChromeUtils.importESModule( const { getPlacesSemanticHistoryManager } = ChromeUtils.importESModule( "resource://gre/modules/PlacesSemanticHistoryManager.sys.mjs" ); +ChromeUtils.defineLazyGetter(this, "QuickSuggestTestUtils", () => { + const { QuickSuggestTestUtils: module } = ChromeUtils.importESModule( + "resource://testing-common/QuickSuggestTestUtils.sys.mjs" + ); + module.init(this); + return module; +}); let semanticManager = getPlacesSemanticHistoryManager(); let hasSufficientEntriesStub = sinon @@ -46,6 +53,12 @@ add_task(async function setup() { Services.prefs .getDefaultBranch("") .setIntPref("browser.urlbar.suggest.semanticHistory.minLength", 5); + + let cleanup = await QuickSuggestTestUtils.setRegionAndLocale({ + region: "US", + locale: "en-US", + }); + registerCleanupFunction(cleanup); }); add_task(async function test_startQuery_adds_results() { diff --git a/toolkit/components/ml/tests/browser/browser_ml_semantic_history_search_perf.js b/toolkit/components/ml/tests/browser/browser_ml_semantic_history_search_perf.js @@ -2,6 +2,10 @@ * http://creativecommons.org/publicdomain/zero/1.0/ */ "use strict"; +const { sinon } = ChromeUtils.importESModule( + "resource://testing-common/Sinon.sys.mjs" +); + const UPDATE_TASK_LATENCY = "update-task-latency"; const SEARCH_LATENCY = "search-latency"; const INFERENCE_LATENCY = "inference-latency"; @@ -247,6 +251,13 @@ async function prepareSemanticSearchTest({ return { skip: true }; } + // Skip featureGate, Region and other non critical checks. + let canUseSemanticStub = sinon.stub(semanticManager, "canUseSemanticSearch"); + canUseSemanticStub.get(() => true); + registerCleanupFunction(() => { + canUseSemanticStub.restore(); + }); + semanticManager.embedder.options = CUSTOM_EMBEDDER_OPTIONS; await semanticManager.embedder.ensureEngine(); diff --git a/toolkit/components/nimbus/FeatureManifest.yaml b/toolkit/components/nimbus/FeatureManifest.yaml @@ -598,6 +598,19 @@ urlbar: description: >- semantic similarity distance threshold (0.0 - 1.0) defaulted at 0.6. + semanticHistorySupportedRegions: + type: string + setPref: + branch: user + pref: places.semanticHistory.supportedRegions + description: >- + map-like JSON of regions and locales for which the feature is supported. + When `semanticHistoryEnable` is false, this list is ignored and the + feature is disabled everywhere. When true, the feature is enabled only + in the regions/locales listed here. + The format is like: '[["AU",["en-*"]],["CA",["en-*"]], …]' where `-*` + can be used as a wildcard to match any locale that starts with the given + prefix. showDebuggingIcons: type: boolean setPref: diff --git a/toolkit/components/places/PlacesSemanticHistoryManager.sys.mjs b/toolkit/components/places/PlacesSemanticHistoryManager.sys.mjs @@ -11,15 +11,17 @@ * @import {OpenedConnection} from "resource://gre/modules/Sqlite.sys.mjs" */ -const lazy = {}; +import { XPCOMUtils } from "resource://gre/modules/XPCOMUtils.sys.mjs"; +const lazy = {}; ChromeUtils.defineESModuleGetters(lazy, { - DeferredTask: "resource://gre/modules/DeferredTask.sys.mjs", AsyncShutdown: "resource://gre/modules/AsyncShutdown.sys.mjs", - PlacesUtils: "resource://gre/modules/PlacesUtils.sys.mjs", + DeferredTask: "resource://gre/modules/DeferredTask.sys.mjs", + EmbeddingsGenerator: "chrome://global/content/ml/EmbeddingsGenerator.sys.mjs", PlacesSemanticHistoryDatabase: "resource://gre/modules/PlacesSemanticHistoryDatabase.sys.mjs", - EmbeddingsGenerator: "chrome://global/content/ml/EmbeddingsGenerator.sys.mjs", + PlacesUtils: "resource://gre/modules/PlacesUtils.sys.mjs", + Region: "resource://gre/modules/Region.sys.mjs", }); ChromeUtils.defineLazyGetter(lazy, "logger", function () { @@ -33,6 +35,44 @@ ChromeUtils.defineLazyGetter(lazy, "PAGES_FRECENCY_FIELD", () => { : "frecency"; }); +// This list is based on the current model capabilities. It is a Map-like list +// of regions where English is predominant, and a common language is latin-based. +// Each country code is assigned to an array of supported BCP 47 language tags, +// a tag can end with "-*" to match any variants (match at the start). +// The list of supported region and locales is loaded from the +// places.semanticHistory.supportedRegions string pref, and this is used as a +// fallback if we fail to parse the pref. +/** @type {[string, string[]][]} */ +const ENABLED_REGIONS_DEFAULT = [ + ["AU", ["en-*"]], + ["CA", ["en-*"]], + ["GB", ["en-*"]], + ["IE", ["en-*"]], + ["NZ", ["en-*"]], + ["PH", ["en-*"]], + ["US", ["en-*"]], +]; +XPCOMUtils.defineLazyPreferenceGetter( + lazy, + "supportedRegions", + "places.semanticHistory.supportedRegions", + JSON.stringify(ENABLED_REGIONS_DEFAULT), + null, + val => { + try { + return new Map(JSON.parse(val)); + } catch (ex) { + // Supposing a user may empty the pref to disable the feature, as they + // don't know it should be a JSON string, we'll treat that as an empty + // Map, so the feature is disabled. + if (val === "") { + return new Map(); + } + return new Map(ENABLED_REGIONS_DEFAULT); + } + } +); + // Time between deferred task executions. const DEFERRED_TASK_INTERVAL_MS = 3000; // Maximum time to wait for an idle before the task is executed anyway. @@ -94,6 +134,7 @@ class PlacesSemanticHistoryManager { * @param {number} [options.changeThresholdCount=3] - Threshold of changed rows to trigger updates. * @param {number} [options.distanceThreshold=0.6] - Cosine distance threshold to determine similarity. * @param {boolean} [options.testFlag=false] - Flag for test behavior. + * @param {number} [options.deferredTaskInterval=DEFERRED_TASK_INTERVAL_MS] - Interval for deferred task execution. */ constructor({ backend = "static-embeddings", @@ -103,6 +144,7 @@ class PlacesSemanticHistoryManager { changeThresholdCount = 3, distanceThreshold = 0.6, testFlag = false, + deferredTaskInterval = DEFERRED_TASK_INTERVAL_MS, } = {}) { this.QueryInterface = ChromeUtils.generateQI([ "nsIObserver", @@ -149,6 +191,7 @@ class PlacesSemanticHistoryManager { this.#changeThresholdCount = changeThresholdCount; this.#distanceThreshold = distanceThreshold; this.testFlag = testFlag; + this.#deferredTaskInterval = deferredTaskInterval; this.#updateTaskLatency = []; lazy.logger.trace("PlaceSemanticManager constructor"); @@ -289,13 +332,41 @@ class PlacesSemanticHistoryManager { * else false */ get canUseSemanticSearch() { + // This requires Region to have been initialized somewhere else + // asynchronously, so consumer is responsible for that, otherwise it may + // be null. return ( this.qualifiedForSemanticSearch && Services.prefs.getBoolPref("browser.ml.enable", true) && - Services.prefs.getBoolPref("places.semanticHistory.featureGate", false) + Services.prefs.getBoolPref("places.semanticHistory.featureGate", false) && + this.#isSupportedLocale(Services.locale.appLocaleAsBCP47) ); } + /** + * Check if the given locale is supported for Semantic History Search. + * + * @param {string} appLocale BCP 47 language tag. + * @returns {boolean} Whether the locale is supported. + */ + #isSupportedLocale(appLocale) { + // Per BCP-47 comparisons must be performend in a case-insensitive manner. + appLocale = appLocale.toLowerCase(); + let supportedLocales = lazy.supportedRegions.get(lazy.Region.home) ?? []; + for (let localePattern of supportedLocales) { + localePattern = localePattern.toLowerCase(); + if ( + localePattern.endsWith("*") && + appLocale.startsWith(localePattern.slice(0, -1)) + ) { + return true; + } else if (localePattern == appLocale) { + return true; + } + } + return false; + } + handlePlacesEvents(events) { for (const { type } of events) { switch (type) { @@ -330,15 +401,6 @@ class PlacesSemanticHistoryManager { } /** - * Sets the DeferredTask interval for testing purposes. - * - * @param {number} val minimum milliseconds between deferred task executions. - */ - setDeferredTaskIntervalForTests(val) { - this.#deferredTaskInterval = val; - } - - /** * Creates or updates the DeferredTask for managing updates to the semantic DB. */ #createOrUpdateTask() { diff --git a/toolkit/components/places/tests/unit/test_PlacesSemanticHistoryManager.js b/toolkit/components/places/tests/unit/test_PlacesSemanticHistoryManager.js @@ -10,6 +10,14 @@ ChromeUtils.defineESModuleGetters(this, { "resource://gre/modules/PlacesSemanticHistoryManager.sys.mjs", }); +ChromeUtils.defineLazyGetter(this, "QuickSuggestTestUtils", () => { + const { QuickSuggestTestUtils: module } = ChromeUtils.importESModule( + "resource://testing-common/QuickSuggestTestUtils.sys.mjs" + ); + module.init(this); + return module; +}); + // Must be supported, and a multiple of 8. see EmbeddingsGenerator.sys.mjs for // a list of supported values. const EMBEDDING_SIZE = 32; @@ -61,6 +69,11 @@ class MockMLEngine { add_setup(async function () { Services.fog.initializeFOG(); + let cleanup = await QuickSuggestTestUtils.setRegionAndLocale({ + region: "US", + locale: "en-US", + }); + registerCleanupFunction(cleanup); }); add_task(async function test_tensorToSQLBindable() { @@ -164,6 +177,105 @@ add_task(async function test_canUseSemanticSearch_not_qualified() { ); }); +add_task(async function test_canUseSemanticSearch_region_locale() { + const semanticManager = createPlacesSemanticHistoryManager(); + + Services.prefs.setBoolPref("browser.ml.enable", true); + Services.prefs.setBoolPref("places.semanticHistory.featureGate", true); + + semanticManager.qualifiedForSemanticSearch = true; + semanticManager.enoughEntries = true; + + let tests = [ + { region: "US", locale: "en-US", supported: true }, + { region: "FR", locale: "fr-FR", supported: false }, + { + region: "IT", + locale: "it-IT", + supported: true, + setPref: '[["IT",["it-*"]]]', + }, + { + region: "US", + locale: "en-US", + supported: false, + setPref: '[["IT",["it-*"]]]', + }, + { + region: "US", + locale: "en-US", + supported: false, + setPref: "[]", // empty list means disable + }, + { + region: "US", + locale: "en-US", + supported: false, + setPref: "", // empty string means disable + }, + { + region: "US", + locale: "en-US", + supported: true, + setPref: "invalid json", // invalid, should use default. + }, + { + region: "US", + locale: "en", // wrong locale format + supported: false, + setPref: '[["US",["en-*"]]]', + }, + { + region: "US", + locale: "en-US", + supported: true, + setPref: '[["US",["en-US"]]]', + }, + { + region: "US", + locale: "es-MX", + supported: false, + setPref: '[["US",["en-US"]]]', + }, + { + region: "US", + locale: "es-MX", + supported: true, + setPref: '[["US",["en-*","es-*"]]]', + }, + { + region: "US", + locale: "en-US", + supported: true, + setPref: '[["US",["en-*","es-*"]]]', + }, + ]; + for (let { region, locale, supported, setPref } of tests) { + if (setPref !== undefined) { + info("Setting `supportedRegions` pref to " + setPref); + Services.prefs.setCharPref( + "places.semanticHistory.supportedRegions", + setPref + ); + } + await QuickSuggestTestUtils.withRegionAndLocale({ + region, + locale, + skipSuggestReset: true, + callback() { + Assert.equal( + semanticManager.canUseSemanticSearch, + supported, + `Check region ${region} and locale ${locale}` + ); + }, + }); + if (setPref !== undefined) { + Services.prefs.clearUserPref("places.semanticHistory.supportedRegions"); + } + } +}); + add_task(async function test_removeDatabaseFilesOnDisable() { // Ensure Places has been initialized. Assert.equal( @@ -269,9 +381,9 @@ add_task(async function test_chunksTelemetry() { ); Services.prefs.setBoolPref("places.semanticHistory.featureGate", true); - let semanticManager = createPlacesSemanticHistoryManager(); - // Ensure only one task execution for measuremant purposes. - semanticManager.setDeferredTaskIntervalForTests(3000); + let semanticManager = createPlacesSemanticHistoryManager({ + deferredTaskInterval: 2000, // lower time to avoid timeouts. + }); await semanticManager.getConnection(); semanticManager.embedder.setEngine(new MockMLEngine()); await TestUtils.topicObserved( @@ -317,9 +429,9 @@ add_task(async function test_duplicate_urlhash() { ); }); - let semanticManager = createPlacesSemanticHistoryManager(); - // Ensure only one task execution for measuremant purposes. - semanticManager.setDeferredTaskIntervalForTests(3000); + let semanticManager = createPlacesSemanticHistoryManager({ + deferredTaskInterval: 2000, // lower time to avoid timeouts. + }); let conn = await semanticManager.getConnection(); semanticManager.embedder.setEngine(new MockMLEngine()); await TestUtils.topicObserved( @@ -358,6 +470,7 @@ add_task(async function test_rowid_relations() { let semanticManager = createPlacesSemanticHistoryManager({ changeThresholdCount: 1, + deferredTaskInterval: 2000, // lower time to avoid timeouts. }); // Ensure we start from an empty database. await semanticManager.semanticDB.removeDatabaseFiles(); @@ -426,6 +539,7 @@ add_task(async function test_rowid_conflict() { let semanticManager = createPlacesSemanticHistoryManager({ changeThresholdCount: 1, + deferredTaskInterval: 2000, // lower time to avoid timeouts. }); // Ensure we start from an empty database. await semanticManager.semanticDB.removeDatabaseFiles();