commit 19a8af49bf9706cf23e5f5735e8b17ce132e6e38
parent 5927356099fe73690a3f1bef2636e80a867d4b88
Author: agoloman <agoloman@mozilla.com>
Date: Sat, 22 Nov 2025 00:52:32 +0200
Revert "Bug 2000945 - Move query intent detection to AI-window r=Mardak,ai-models-reviewers" for causing bc failures @browser_all_files_referenced.js.
This reverts commit 22284e85a1de490c37bb63364be4a3cb03c6657a.
Diffstat:
4 files changed, 0 insertions(+), 549 deletions(-)
diff --git a/browser/components/aiwindow/models/IntentClassifier.sys.mjs b/browser/components/aiwindow/models/IntentClassifier.sys.mjs
@@ -1,235 +0,0 @@
-/**
- * This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/.
- */
-
-import { createEngine } from "chrome://global/content/ml/EngineProcess.sys.mjs";
-
-const FORCED_CHAT_PHRASES = [
- "amuse me",
- "are we alone",
- "are you alive",
- "are you gpt",
- "are you human",
- "are you real",
- "bark like dog",
- "cheer me up",
- "comfort me",
- "count numbers",
- "curse me",
- "do aliens exist",
- "do we matter",
- "do you dream",
- "do you think",
- "does fate exist",
- "dream meaning",
- "drop wisdom",
- "encourage me",
- "entertain me",
- "explain yourself",
- "flip coin",
- "give blessing",
- "give wisdom",
- "good morning",
- "good night",
- "guess number",
- "hallo",
- "hello",
- "hey",
- "hi",
- "hola",
- "how are you",
- "inspire me",
- "invent a word",
- "invent holiday",
- "invent joke",
- "is god real",
- "life advice",
- "life purpose",
- "list animals",
- "list capitals",
- "list colors",
- "list countries",
- "list elements",
- "list fruits",
- "list metals",
- "list oceans",
- "list planets",
- "list shapes",
- "meaning of life",
- "meow like cat",
- "motivate me",
- "now you are",
- "play a game",
- "pretend alien",
- "pretend child",
- "pretend detective",
- "pretend ghost",
- "pretend pirate",
- "pretend robot",
- "pretend superhero",
- "pretend teacher",
- "pretend wizard",
- "random fact",
- "random number",
- "roll dice",
- "goodbye",
- "simulate chat",
- "simulate future",
- "simulate past",
- "sing like robot",
- "sing lullaby",
- "sing rap",
- "sup",
- "surprise me",
- "teach me",
- "tell bedtime story",
- "tell fortune",
- "tell joke",
- "tell prophecy",
- "tell riddle",
- "tell story",
- "what is art",
- "what is beauty",
- "what is death",
- "what is freedom",
- "what is justice",
- "what is love",
- "what is mind",
- "what is reality",
- "what is right",
- "what is self",
- "what is soul",
- "what is time",
- "what is truth",
- "what is wrong",
- "what model are you",
- "what version",
- "what’s up",
- "which model are you",
- "who am i",
- "who are you",
- "who made you",
- "why are we",
- "write a poem",
- "write a song",
- "write haiku",
- "write quote",
- "your model is",
-];
-
-export function normalizeTextForChatAllowlist(s) {
- return s.toLowerCase().normalize("NFKC").replace(/\s+/g, " ").trim();
-}
-
-// Split on non-word chars; letters/numbers/_ are "word" characters
-export function tokenizeTextForChatAllowlist(s) {
- return normalizeTextForChatAllowlist(s)
- .split(/[^\p{L}\p{N}_]+/u)
- .filter(Boolean);
-}
-
-export function buildChatAllowlist(phrases) {
- const byLen = new Map(); // len -> Set("tok tok ...")
- for (const p of phrases) {
- const key = tokenizeTextForChatAllowlist(p).join(" ");
- if (!key) {
- continue;
- }
- const k = key.split(" ").length;
- if (!byLen.has(k)) {
- byLen.set(k, new Set());
- }
- byLen.get(k).add(key);
- }
- return byLen;
-}
-
-// Factory: returns a fast checker for “does query contain any isolated phrase?”
-export function makeIsolatedPhraseChecker(phrases) {
- const byLen = buildChatAllowlist(phrases);
- const cache = new Map();
-
- return function containsIsolatedPhrase(query) {
- const qNorm = normalizeTextForChatAllowlist(query);
- if (cache.has(qNorm)) {
- return cache.get(qNorm);
- }
-
- const toks = qNorm.split(/[^\p{L}\p{N}_]+/u).filter(Boolean);
- for (const [k, set] of byLen) {
- for (let i = 0; i + k <= toks.length; i++) {
- if (set.has(toks.slice(i, i + k).join(" "))) {
- cache.set(qNorm, true);
- return true;
- }
- }
- }
- cache.set(qNorm, false);
- return false;
- };
-}
-
-/**
- * Intent Classifier Engine
- */
-export const IntentClassifier = {
- /**
- * Exposing createEngine for testing purposes.
- */
-
- _createEngine: createEngine,
-
- /**
- * Initialize forced-chat checker at module load.
- * Keeping it as a property ensures easy stubbing in tests.
- */
-
- _isForcedChat: makeIsolatedPhraseChecker(FORCED_CHAT_PHRASES),
-
- /**
- * Gets the intent of the prompt using a text classification model.
- *
- * @param {string} prompt
- * @returns {string} "search" | "chat"
- */
-
- async getPromptIntent(query) {
- try {
- const cleanedQuery = this._preprocessQuery(query);
- if (this._isForcedChat(cleanedQuery)) {
- return "chat";
- }
- const engine = await this._createEngine({
- featureId: "smart-intent",
- modelId: "mozilla/mobilebert-query-intent-detection",
- modelRevision: "v0.2.0",
- taskName: "text-classification",
- });
- const threshold = 0.8;
- const resp = await engine.run({ args: [[cleanedQuery]] });
- // resp example: [{ label: "chat", score: 0.95 }, { label: "search", score: 0.04 }]
- if (
- resp[0].label.toLowerCase() === "chat" &&
- resp[0].score >= threshold
- ) {
- return "chat";
- }
- return "search";
- } catch (error) {
- console.error("Error using intent detection model:", error);
- throw error;
- }
- },
-
- // Helper function for preprocessing text input
- _preprocessQuery(query) {
- if (typeof query !== "string") {
- throw new TypeError(
- `Expected a string for query preprocessing, but received ${typeof query}`
- );
- }
- return query.replace(/\?/g, "").trim();
- },
-};
diff --git a/browser/components/aiwindow/models/moz.build b/browser/components/aiwindow/models/moz.build
@@ -4,9 +4,3 @@
with Files("**"):
BUG_COMPONENT = ("Core", "Machine Learning: General")
-
-MOZ_SRC_FILES += [
- "IntentClassifier.sys.mjs",
-]
-
-XPCSHELL_TESTS_MANIFESTS += ["tests/xpcshell/xpcshell.toml"]
diff --git a/browser/components/aiwindow/models/tests/xpcshell/test_intent_classifier.js b/browser/components/aiwindow/models/tests/xpcshell/test_intent_classifier.js
@@ -1,303 +0,0 @@
-/* Any copyright is dedicated to the Public Domain.
- * http://creativecommons.org/publicdomain/zero/1.0/ */
-
-const {
- IntentClassifier,
- normalizeTextForChatAllowlist,
- tokenizeTextForChatAllowlist,
- buildChatAllowlist,
- makeIsolatedPhraseChecker,
-} = ChromeUtils.importESModule(
- "moz-src:///browser/components/aiwindow/models/IntentClassifier.sys.mjs"
-);
-
-const { sinon } = ChromeUtils.importESModule(
- "resource://testing-common/Sinon.sys.mjs"
-);
-
-add_task(async function test_getPromptIntent_basic() {
- const sb = sinon.createSandbox();
- try {
- const cases = [
- { prompt: "please search for news on firefox", expected: "search" },
- {
- prompt: "Can you FIND me the docs for PageAssist?",
- expected: "search",
- }, // case-insensitive
- { prompt: "look up the best pizza in SF", expected: "search" },
- { prompt: "hello there, how are you?", expected: "chat" },
- { prompt: "tell me a joke", expected: "chat" },
- ];
-
- const fakeEngine = {
- run({ args: [[query]] }) {
- const searchKeywords = [
- "search",
- "find",
- "look",
- "query",
- "locate",
- "explore",
- ];
- const formattedPrompt = query.toLowerCase();
- const isSearch = searchKeywords.some(keyword =>
- formattedPrompt.includes(keyword)
- );
-
- // Simulate model confidence scores
- if (isSearch) {
- return [
- { label: "search", score: 0.95 },
- { label: "chat", score: 0.05 },
- ];
- }
- return [
- { label: "chat", score: 0.95 },
- { label: "search", score: 0.05 },
- ];
- },
- };
-
- sb.stub(IntentClassifier, "_createEngine").resolves(fakeEngine);
-
- for (const { prompt, expected } of cases) {
- const intent = await IntentClassifier.getPromptIntent(prompt);
- Assert.equal(
- intent,
- expected,
- `getPromptIntent("${prompt}") should return "${expected}"`
- );
- }
- } finally {
- sb.restore();
- }
-});
-
-add_task(async function test_preprocessQuery_removes_question_marks() {
- // Call the real helper on the classifier
- const cases = [
- { input: "hello?", expected: "hello" },
- { input: "?prompt", expected: "prompt" },
- { input: "multiple???", expected: "multiple" },
- { input: "mid?dle", expected: "middle" },
- { input: "question? ", expected: "question" },
- { input: " no? spaces? ", expected: "no spaces" },
- { input: "???", expected: "" },
- { input: "clean input", expected: "clean input" },
- ];
-
- for (const { input, expected } of cases) {
- const result = IntentClassifier._preprocessQuery(input);
- Assert.equal(
- result,
- expected,
- `Expected "${input}" to preprocess to "${expected}", got "${result}"`
- );
- }
-});
-
-add_task(function test_normalizeTextForChatAllowlist_basic() {
- // lowercasing + trimming + collapsing internal spaces
- Assert.equal(
- normalizeTextForChatAllowlist(" HeLLo There "),
- "hello there",
- "Should lowercase, trim, and collapse spaces"
- );
-
- // NFKC normalization: compatibility forms → canonical
- // Fullwidth characters normalize: e.g., 'TEST' → 'test'
- Assert.equal(
- normalizeTextForChatAllowlist("TEST 123"),
- "test 123",
- "Should NFKC-normalize fullwidth letters/digits"
- );
-
- // Multiple whitespace kinds (NBSP, tabs, newlines) collapse
- Assert.equal(
- normalizeTextForChatAllowlist("a\u00A0b\tc\nd"),
- "a b c d",
- "Should collapse all whitespace kinds to single spaces"
- );
-});
-
-add_task(function test_tokenizeTextForChatAllowlist_unicode_and_boundaries() {
- // Splits on non-word chars, keeps letters/digits/underscore
- Assert.deepEqual(
- tokenizeTextForChatAllowlist("hello, world! 42_times"),
- ["hello", "world", "42_times"],
- "Should split on punctuation and keep underscores"
- );
-
- // Unicode letters should be treated as word chars (\p{L})
- Assert.deepEqual(
- tokenizeTextForChatAllowlist("mañana—café!"),
- ["mañana", "café"],
- "Should keep Unicode letters and split on punctuation (em dash, bang)"
- );
-
- // Apostrophes split (non-word), as intended
- Assert.deepEqual(
- tokenizeTextForChatAllowlist("what's up"),
- ["what", "s", "up"],
- "Apostrophes are separators, so tokens split around them"
- );
-});
-
-add_task(function test_buildChatAllowlist_grouping_and_normalization() {
- const phrases = [
- "sup",
- "hi there", // 2 tokens
- "what's up", // becomes "what s up" (3 tokens)
- " foo bar ", // leading/trailing + multiple spaces
- "", // empty should be skipped
- "___", // token of underscores counts as 1 token
- ];
- const sets = buildChatAllowlist(phrases);
-
- // Expect keys for lengths: 1, 2, 3
- Assert.ok(sets.has(1), "Should have set for single-token phrases");
- Assert.ok(sets.has(2), "Should have set for two-token phrases");
- Assert.ok(sets.has(3), "Should have set for three-token phrases");
-
- // 1-token set contains: "sup", "___"
- Assert.ok(sets.get(1).has("sup"), "Single-token set should contain 'sup'");
- Assert.ok(sets.get(1).has("___"), "Single-token set should contain '___'");
-
- // 2-token set contains normalized "hi there" and "foo bar"
- Assert.ok(
- sets.get(2).has("hi there"),
- "Two-token set should contain 'hi there'"
- );
- Assert.ok(
- sets.get(2).has("foo bar"),
- "Two-token set should contain normalized 'foo bar'"
- );
-
- // 3-token set contains "what s up" (note apostrophe split)
- Assert.ok(
- sets.get(3).has("what s up"),
- "Three-token set should contain 'what s up'"
- );
-
- // Empty phrase skipped: nothing added for length 0
- for (const [k, set] of sets) {
- Assert.ok(
- k > 0 && set.size >= 1,
- "No empty keys, each set has at least one entry"
- );
- }
-});
-
-add_task(function test_isolated_phrase_checker_single_word_boundaries() {
- const phrases = ["sup", "hello", "___"];
- const isForced = makeIsolatedPhraseChecker(phrases);
-
- // Positive: exact token present
- Assert.ok(
- isForced("sup bro"),
- "Should match 'sup' as an isolated token at start"
- );
- Assert.ok(
- isForced("hey, hello there"),
- "Should match 'hello' surrounded by punctuation"
- );
- Assert.ok(isForced("foo ___ bar"), "Should match token with underscores");
-
- // Negative: partial-word should NOT match
- Assert.ok(
- !isForced("supposingly, this should not match"),
- "No partial-word match for 'sup'"
- );
- Assert.ok(!isForced("supper time"), "No partial-word match inside 'supper'");
- Assert.ok(!isForced("shelloworld"), "No partial-word match for 'hello'");
-});
-
-add_task(function test_isolated_phrase_checker_multiword_and_punctuation() {
- // Multiword phrases; apostrophes become token splits -> "what's up" => "what s up"
- const phrases = ["hi there", "what's up"];
- const isForced = makeIsolatedPhraseChecker(phrases);
-
- // Positive: punctuation between words should still match (token split)
- Assert.ok(
- isForced("hi—there!"),
- "Em dash between words should match 'hi there'"
- );
- Assert.ok(
- isForced("well, hi there!!"),
- "Punctuation around phrase should match"
- );
- Assert.ok(
- isForced("so, what’s up today?"),
- "Curly apostrophe splits to tokens; should match 'what s up'"
- );
-
- // Negative: glued words should not match
- Assert.ok(
- !isForced("hithere"),
- "Concatenated words should not match 'hi there'"
- );
- Assert.ok(
- !isForced("whatssup"),
- "Should not match 'what s up' without separators"
- );
-});
-
-add_task(function test_isolated_phrase_checker_spacing_and_unicode_norm() {
- const phrases = ["good morning", "hello"];
- const isForced = makeIsolatedPhraseChecker(phrases);
-
- // Multiple spaces collapse
- Assert.ok(
- isForced("good morning everyone"),
- "Multiple spaces between tokens should still match"
- );
-
- // Fullwidth / NFKC normalization (TEST) and basic usage
- Assert.ok(
- isForced(" HELLO "),
- "Case and surrounding spaces should normalize and match 'hello'"
- );
-
- // Non-breaking spaces and tabs
- Assert.ok(
- isForced("good\u00A0morning\tteam"),
- "NBSP and tabs normalize and match"
- );
-});
-
-add_task(function test_isolated_phrase_checker_no_match_cases() {
- const phrases = ["hi there", "sup"];
- const isForced = makeIsolatedPhraseChecker(phrases);
-
- Assert.ok(!isForced(""), "Empty string should not match");
- Assert.ok(
- !isForced("nothing to see here"),
- "Unrelated text should not match"
- );
- Assert.ok(
- !isForced("support"),
- "Partial token with 'sup' prefix should not match"
- );
-});
-
-add_task(function test_isolated_phrase_checker_caching_stability() {
- const phrases = ["hello", "hi there"];
- const isForced = makeIsolatedPhraseChecker(phrases);
-
- // Repeated calls with the same input should return identical results (cache sanity)
- const q1 = "Hello there!";
- const first = isForced(q1);
- const second = isForced(q1);
- Assert.equal(
- first,
- second,
- "Same query should yield identical result across calls (cache-stable)"
- );
-
- // Different whitespace should normalize to the same outcome
- Assert.equal(
- isForced(" hello there "),
- isForced("hello there"),
- "Whitespace variations should not affect result"
- );
-});
diff --git a/browser/components/aiwindow/models/tests/xpcshell/xpcshell.toml b/browser/components/aiwindow/models/tests/xpcshell/xpcshell.toml
@@ -1,5 +0,0 @@
-[DEFAULT]
-run-if = ["os != 'android'"]
-firefox-appdir = "browser"
-
-["test_intent_classifier.js"]