tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

commit 19a8af49bf9706cf23e5f5735e8b17ce132e6e38
parent 5927356099fe73690a3f1bef2636e80a867d4b88
Author: agoloman <agoloman@mozilla.com>
Date:   Sat, 22 Nov 2025 00:52:32 +0200

Revert "Bug 2000945 - Move query intent detection to AI-window r=Mardak,ai-models-reviewers" for causing bc failures @browser_all_files_referenced.js.

This reverts commit 22284e85a1de490c37bb63364be4a3cb03c6657a.

Diffstat:
Dbrowser/components/aiwindow/models/IntentClassifier.sys.mjs | 235-------------------------------------------------------------------------------
Mbrowser/components/aiwindow/models/moz.build | 6------
Dbrowser/components/aiwindow/models/tests/xpcshell/test_intent_classifier.js | 303-------------------------------------------------------------------------------
Dbrowser/components/aiwindow/models/tests/xpcshell/xpcshell.toml | 5-----
4 files changed, 0 insertions(+), 549 deletions(-)

diff --git a/browser/components/aiwindow/models/IntentClassifier.sys.mjs b/browser/components/aiwindow/models/IntentClassifier.sys.mjs @@ -1,235 +0,0 @@ -/** - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - */ - -import { createEngine } from "chrome://global/content/ml/EngineProcess.sys.mjs"; - -const FORCED_CHAT_PHRASES = [ - "amuse me", - "are we alone", - "are you alive", - "are you gpt", - "are you human", - "are you real", - "bark like dog", - "cheer me up", - "comfort me", - "count numbers", - "curse me", - "do aliens exist", - "do we matter", - "do you dream", - "do you think", - "does fate exist", - "dream meaning", - "drop wisdom", - "encourage me", - "entertain me", - "explain yourself", - "flip coin", - "give blessing", - "give wisdom", - "good morning", - "good night", - "guess number", - "hallo", - "hello", - "hey", - "hi", - "hola", - "how are you", - "inspire me", - "invent a word", - "invent holiday", - "invent joke", - "is god real", - "life advice", - "life purpose", - "list animals", - "list capitals", - "list colors", - "list countries", - "list elements", - "list fruits", - "list metals", - "list oceans", - "list planets", - "list shapes", - "meaning of life", - "meow like cat", - "motivate me", - "now you are", - "play a game", - "pretend alien", - "pretend child", - "pretend detective", - "pretend ghost", - "pretend pirate", - "pretend robot", - "pretend superhero", - "pretend teacher", - "pretend wizard", - "random fact", - "random number", - "roll dice", - "goodbye", - "simulate chat", - "simulate future", - "simulate past", - "sing like robot", - "sing lullaby", - "sing rap", - "sup", - "surprise me", - "teach me", - "tell bedtime story", - "tell fortune", - "tell joke", - "tell prophecy", - "tell riddle", - "tell story", - "what is art", - "what is beauty", - "what is death", - "what is freedom", - "what is justice", - "what is love", - "what is mind", - "what is reality", - "what is right", - "what is self", - "what is soul", - "what is time", - "what is truth", - "what is wrong", - "what model are you", - "what version", - "what’s up", - "which model are you", - "who am i", - "who are you", - "who made you", - "why are we", - "write a poem", - "write a song", - "write haiku", - "write quote", - "your model is", -]; - -export function normalizeTextForChatAllowlist(s) { - return s.toLowerCase().normalize("NFKC").replace(/\s+/g, " ").trim(); -} - -// Split on non-word chars; letters/numbers/_ are "word" characters -export function tokenizeTextForChatAllowlist(s) { - return normalizeTextForChatAllowlist(s) - .split(/[^\p{L}\p{N}_]+/u) - .filter(Boolean); -} - -export function buildChatAllowlist(phrases) { - const byLen = new Map(); // len -> Set("tok tok ...") - for (const p of phrases) { - const key = tokenizeTextForChatAllowlist(p).join(" "); - if (!key) { - continue; - } - const k = key.split(" ").length; - if (!byLen.has(k)) { - byLen.set(k, new Set()); - } - byLen.get(k).add(key); - } - return byLen; -} - -// Factory: returns a fast checker for “does query contain any isolated phrase?” -export function makeIsolatedPhraseChecker(phrases) { - const byLen = buildChatAllowlist(phrases); - const cache = new Map(); - - return function containsIsolatedPhrase(query) { - const qNorm = normalizeTextForChatAllowlist(query); - if (cache.has(qNorm)) { - return cache.get(qNorm); - } - - const toks = qNorm.split(/[^\p{L}\p{N}_]+/u).filter(Boolean); - for (const [k, set] of byLen) { - for (let i = 0; i + k <= toks.length; i++) { - if (set.has(toks.slice(i, i + k).join(" "))) { - cache.set(qNorm, true); - return true; - } - } - } - cache.set(qNorm, false); - return false; - }; -} - -/** - * Intent Classifier Engine - */ -export const IntentClassifier = { - /** - * Exposing createEngine for testing purposes. - */ - - _createEngine: createEngine, - - /** - * Initialize forced-chat checker at module load. - * Keeping it as a property ensures easy stubbing in tests. - */ - - _isForcedChat: makeIsolatedPhraseChecker(FORCED_CHAT_PHRASES), - - /** - * Gets the intent of the prompt using a text classification model. - * - * @param {string} prompt - * @returns {string} "search" | "chat" - */ - - async getPromptIntent(query) { - try { - const cleanedQuery = this._preprocessQuery(query); - if (this._isForcedChat(cleanedQuery)) { - return "chat"; - } - const engine = await this._createEngine({ - featureId: "smart-intent", - modelId: "mozilla/mobilebert-query-intent-detection", - modelRevision: "v0.2.0", - taskName: "text-classification", - }); - const threshold = 0.8; - const resp = await engine.run({ args: [[cleanedQuery]] }); - // resp example: [{ label: "chat", score: 0.95 }, { label: "search", score: 0.04 }] - if ( - resp[0].label.toLowerCase() === "chat" && - resp[0].score >= threshold - ) { - return "chat"; - } - return "search"; - } catch (error) { - console.error("Error using intent detection model:", error); - throw error; - } - }, - - // Helper function for preprocessing text input - _preprocessQuery(query) { - if (typeof query !== "string") { - throw new TypeError( - `Expected a string for query preprocessing, but received ${typeof query}` - ); - } - return query.replace(/\?/g, "").trim(); - }, -}; diff --git a/browser/components/aiwindow/models/moz.build b/browser/components/aiwindow/models/moz.build @@ -4,9 +4,3 @@ with Files("**"): BUG_COMPONENT = ("Core", "Machine Learning: General") - -MOZ_SRC_FILES += [ - "IntentClassifier.sys.mjs", -] - -XPCSHELL_TESTS_MANIFESTS += ["tests/xpcshell/xpcshell.toml"] diff --git a/browser/components/aiwindow/models/tests/xpcshell/test_intent_classifier.js b/browser/components/aiwindow/models/tests/xpcshell/test_intent_classifier.js @@ -1,303 +0,0 @@ -/* Any copyright is dedicated to the Public Domain. - * http://creativecommons.org/publicdomain/zero/1.0/ */ - -const { - IntentClassifier, - normalizeTextForChatAllowlist, - tokenizeTextForChatAllowlist, - buildChatAllowlist, - makeIsolatedPhraseChecker, -} = ChromeUtils.importESModule( - "moz-src:///browser/components/aiwindow/models/IntentClassifier.sys.mjs" -); - -const { sinon } = ChromeUtils.importESModule( - "resource://testing-common/Sinon.sys.mjs" -); - -add_task(async function test_getPromptIntent_basic() { - const sb = sinon.createSandbox(); - try { - const cases = [ - { prompt: "please search for news on firefox", expected: "search" }, - { - prompt: "Can you FIND me the docs for PageAssist?", - expected: "search", - }, // case-insensitive - { prompt: "look up the best pizza in SF", expected: "search" }, - { prompt: "hello there, how are you?", expected: "chat" }, - { prompt: "tell me a joke", expected: "chat" }, - ]; - - const fakeEngine = { - run({ args: [[query]] }) { - const searchKeywords = [ - "search", - "find", - "look", - "query", - "locate", - "explore", - ]; - const formattedPrompt = query.toLowerCase(); - const isSearch = searchKeywords.some(keyword => - formattedPrompt.includes(keyword) - ); - - // Simulate model confidence scores - if (isSearch) { - return [ - { label: "search", score: 0.95 }, - { label: "chat", score: 0.05 }, - ]; - } - return [ - { label: "chat", score: 0.95 }, - { label: "search", score: 0.05 }, - ]; - }, - }; - - sb.stub(IntentClassifier, "_createEngine").resolves(fakeEngine); - - for (const { prompt, expected } of cases) { - const intent = await IntentClassifier.getPromptIntent(prompt); - Assert.equal( - intent, - expected, - `getPromptIntent("${prompt}") should return "${expected}"` - ); - } - } finally { - sb.restore(); - } -}); - -add_task(async function test_preprocessQuery_removes_question_marks() { - // Call the real helper on the classifier - const cases = [ - { input: "hello?", expected: "hello" }, - { input: "?prompt", expected: "prompt" }, - { input: "multiple???", expected: "multiple" }, - { input: "mid?dle", expected: "middle" }, - { input: "question? ", expected: "question" }, - { input: " no? spaces? ", expected: "no spaces" }, - { input: "???", expected: "" }, - { input: "clean input", expected: "clean input" }, - ]; - - for (const { input, expected } of cases) { - const result = IntentClassifier._preprocessQuery(input); - Assert.equal( - result, - expected, - `Expected "${input}" to preprocess to "${expected}", got "${result}"` - ); - } -}); - -add_task(function test_normalizeTextForChatAllowlist_basic() { - // lowercasing + trimming + collapsing internal spaces - Assert.equal( - normalizeTextForChatAllowlist(" HeLLo There "), - "hello there", - "Should lowercase, trim, and collapse spaces" - ); - - // NFKC normalization: compatibility forms → canonical - // Fullwidth characters normalize: e.g., 'TEST' → 'test' - Assert.equal( - normalizeTextForChatAllowlist("TEST 123"), - "test 123", - "Should NFKC-normalize fullwidth letters/digits" - ); - - // Multiple whitespace kinds (NBSP, tabs, newlines) collapse - Assert.equal( - normalizeTextForChatAllowlist("a\u00A0b\tc\nd"), - "a b c d", - "Should collapse all whitespace kinds to single spaces" - ); -}); - -add_task(function test_tokenizeTextForChatAllowlist_unicode_and_boundaries() { - // Splits on non-word chars, keeps letters/digits/underscore - Assert.deepEqual( - tokenizeTextForChatAllowlist("hello, world! 42_times"), - ["hello", "world", "42_times"], - "Should split on punctuation and keep underscores" - ); - - // Unicode letters should be treated as word chars (\p{L}) - Assert.deepEqual( - tokenizeTextForChatAllowlist("mañana—café!"), - ["mañana", "café"], - "Should keep Unicode letters and split on punctuation (em dash, bang)" - ); - - // Apostrophes split (non-word), as intended - Assert.deepEqual( - tokenizeTextForChatAllowlist("what's up"), - ["what", "s", "up"], - "Apostrophes are separators, so tokens split around them" - ); -}); - -add_task(function test_buildChatAllowlist_grouping_and_normalization() { - const phrases = [ - "sup", - "hi there", // 2 tokens - "what's up", // becomes "what s up" (3 tokens) - " foo bar ", // leading/trailing + multiple spaces - "", // empty should be skipped - "___", // token of underscores counts as 1 token - ]; - const sets = buildChatAllowlist(phrases); - - // Expect keys for lengths: 1, 2, 3 - Assert.ok(sets.has(1), "Should have set for single-token phrases"); - Assert.ok(sets.has(2), "Should have set for two-token phrases"); - Assert.ok(sets.has(3), "Should have set for three-token phrases"); - - // 1-token set contains: "sup", "___" - Assert.ok(sets.get(1).has("sup"), "Single-token set should contain 'sup'"); - Assert.ok(sets.get(1).has("___"), "Single-token set should contain '___'"); - - // 2-token set contains normalized "hi there" and "foo bar" - Assert.ok( - sets.get(2).has("hi there"), - "Two-token set should contain 'hi there'" - ); - Assert.ok( - sets.get(2).has("foo bar"), - "Two-token set should contain normalized 'foo bar'" - ); - - // 3-token set contains "what s up" (note apostrophe split) - Assert.ok( - sets.get(3).has("what s up"), - "Three-token set should contain 'what s up'" - ); - - // Empty phrase skipped: nothing added for length 0 - for (const [k, set] of sets) { - Assert.ok( - k > 0 && set.size >= 1, - "No empty keys, each set has at least one entry" - ); - } -}); - -add_task(function test_isolated_phrase_checker_single_word_boundaries() { - const phrases = ["sup", "hello", "___"]; - const isForced = makeIsolatedPhraseChecker(phrases); - - // Positive: exact token present - Assert.ok( - isForced("sup bro"), - "Should match 'sup' as an isolated token at start" - ); - Assert.ok( - isForced("hey, hello there"), - "Should match 'hello' surrounded by punctuation" - ); - Assert.ok(isForced("foo ___ bar"), "Should match token with underscores"); - - // Negative: partial-word should NOT match - Assert.ok( - !isForced("supposingly, this should not match"), - "No partial-word match for 'sup'" - ); - Assert.ok(!isForced("supper time"), "No partial-word match inside 'supper'"); - Assert.ok(!isForced("shelloworld"), "No partial-word match for 'hello'"); -}); - -add_task(function test_isolated_phrase_checker_multiword_and_punctuation() { - // Multiword phrases; apostrophes become token splits -> "what's up" => "what s up" - const phrases = ["hi there", "what's up"]; - const isForced = makeIsolatedPhraseChecker(phrases); - - // Positive: punctuation between words should still match (token split) - Assert.ok( - isForced("hi—there!"), - "Em dash between words should match 'hi there'" - ); - Assert.ok( - isForced("well, hi there!!"), - "Punctuation around phrase should match" - ); - Assert.ok( - isForced("so, what’s up today?"), - "Curly apostrophe splits to tokens; should match 'what s up'" - ); - - // Negative: glued words should not match - Assert.ok( - !isForced("hithere"), - "Concatenated words should not match 'hi there'" - ); - Assert.ok( - !isForced("whatssup"), - "Should not match 'what s up' without separators" - ); -}); - -add_task(function test_isolated_phrase_checker_spacing_and_unicode_norm() { - const phrases = ["good morning", "hello"]; - const isForced = makeIsolatedPhraseChecker(phrases); - - // Multiple spaces collapse - Assert.ok( - isForced("good morning everyone"), - "Multiple spaces between tokens should still match" - ); - - // Fullwidth / NFKC normalization (TEST) and basic usage - Assert.ok( - isForced(" HELLO "), - "Case and surrounding spaces should normalize and match 'hello'" - ); - - // Non-breaking spaces and tabs - Assert.ok( - isForced("good\u00A0morning\tteam"), - "NBSP and tabs normalize and match" - ); -}); - -add_task(function test_isolated_phrase_checker_no_match_cases() { - const phrases = ["hi there", "sup"]; - const isForced = makeIsolatedPhraseChecker(phrases); - - Assert.ok(!isForced(""), "Empty string should not match"); - Assert.ok( - !isForced("nothing to see here"), - "Unrelated text should not match" - ); - Assert.ok( - !isForced("support"), - "Partial token with 'sup' prefix should not match" - ); -}); - -add_task(function test_isolated_phrase_checker_caching_stability() { - const phrases = ["hello", "hi there"]; - const isForced = makeIsolatedPhraseChecker(phrases); - - // Repeated calls with the same input should return identical results (cache sanity) - const q1 = "Hello there!"; - const first = isForced(q1); - const second = isForced(q1); - Assert.equal( - first, - second, - "Same query should yield identical result across calls (cache-stable)" - ); - - // Different whitespace should normalize to the same outcome - Assert.equal( - isForced(" hello there "), - isForced("hello there"), - "Whitespace variations should not affect result" - ); -}); diff --git a/browser/components/aiwindow/models/tests/xpcshell/xpcshell.toml b/browser/components/aiwindow/models/tests/xpcshell/xpcshell.toml @@ -1,5 +0,0 @@ -[DEFAULT] -run-if = ["os != 'android'"] -firefox-appdir = "browser" - -["test_intent_classifier.js"]