commit ae1760503c4d56d0bd0ccedaad01ded1835cbffc
parent 1c9d86edc5d91b67ba8c858a053de40e1b98dc95
Author: Tzu-An Liu <tliu@mozilla.com>
Date: Tue, 25 Nov 2025 18:19:31 +0000
Bug 2000945 - Move query intent detection to AI-window r=Mardak,ai-models-reviewers
Differential Revision: https://phabricator.services.mozilla.com/D273259
Diffstat:
5 files changed, 546 insertions(+), 3 deletions(-)
diff --git a/browser/base/content/test/static/browser_all_files_referenced.js b/browser/base/content/test/static/browser_all_files_referenced.js
@@ -336,6 +336,11 @@ var allowlist = [
{
file: "moz-src:///browser/components/aiwindow/models/InsightsHistorySource.sys.mjs",
},
+
+ // Bug 2000945 - Move query intent detection to AI-window r?mardak (backed out due to unused file)
+ {
+ file: "moz-src:///browser/components/aiwindow/models/IntentClassifier.sys.mjs",
+ },
];
if (AppConstants.NIGHTLY_BUILD) {
diff --git a/browser/components/aiwindow/models/IntentClassifier.sys.mjs b/browser/components/aiwindow/models/IntentClassifier.sys.mjs
@@ -0,0 +1,235 @@
+/**
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+import { createEngine } from "chrome://global/content/ml/EngineProcess.sys.mjs";
+
+const FORCED_CHAT_PHRASES = [
+ "amuse me",
+ "are we alone",
+ "are you alive",
+ "are you gpt",
+ "are you human",
+ "are you real",
+ "bark like dog",
+ "cheer me up",
+ "comfort me",
+ "count numbers",
+ "curse me",
+ "do aliens exist",
+ "do we matter",
+ "do you dream",
+ "do you think",
+ "does fate exist",
+ "dream meaning",
+ "drop wisdom",
+ "encourage me",
+ "entertain me",
+ "explain yourself",
+ "flip coin",
+ "give blessing",
+ "give wisdom",
+ "good morning",
+ "good night",
+ "guess number",
+ "hallo",
+ "hello",
+ "hey",
+ "hi",
+ "hola",
+ "how are you",
+ "inspire me",
+ "invent a word",
+ "invent holiday",
+ "invent joke",
+ "is god real",
+ "life advice",
+ "life purpose",
+ "list animals",
+ "list capitals",
+ "list colors",
+ "list countries",
+ "list elements",
+ "list fruits",
+ "list metals",
+ "list oceans",
+ "list planets",
+ "list shapes",
+ "meaning of life",
+ "meow like cat",
+ "motivate me",
+ "now you are",
+ "play a game",
+ "pretend alien",
+ "pretend child",
+ "pretend detective",
+ "pretend ghost",
+ "pretend pirate",
+ "pretend robot",
+ "pretend superhero",
+ "pretend teacher",
+ "pretend wizard",
+ "random fact",
+ "random number",
+ "roll dice",
+ "goodbye",
+ "simulate chat",
+ "simulate future",
+ "simulate past",
+ "sing like robot",
+ "sing lullaby",
+ "sing rap",
+ "sup",
+ "surprise me",
+ "teach me",
+ "tell bedtime story",
+ "tell fortune",
+ "tell joke",
+ "tell prophecy",
+ "tell riddle",
+ "tell story",
+ "what is art",
+ "what is beauty",
+ "what is death",
+ "what is freedom",
+ "what is justice",
+ "what is love",
+ "what is mind",
+ "what is reality",
+ "what is right",
+ "what is self",
+ "what is soul",
+ "what is time",
+ "what is truth",
+ "what is wrong",
+ "what model are you",
+ "what version",
+ "what’s up",
+ "which model are you",
+ "who am i",
+ "who are you",
+ "who made you",
+ "why are we",
+ "write a poem",
+ "write a song",
+ "write haiku",
+ "write quote",
+ "your model is",
+];
+
+export function normalizeTextForChatAllowlist(s) {
+ return s.toLowerCase().normalize("NFKC").replace(/\s+/g, " ").trim();
+}
+
+// Split on non-word chars; letters/numbers/_ are "word" characters
+export function tokenizeTextForChatAllowlist(s) {
+ return normalizeTextForChatAllowlist(s)
+ .split(/[^\p{L}\p{N}_]+/u)
+ .filter(Boolean);
+}
+
+export function buildChatAllowlist(phrases) {
+ const byLen = new Map(); // len -> Set("tok tok ...")
+ for (const p of phrases) {
+ const key = tokenizeTextForChatAllowlist(p).join(" ");
+ if (!key) {
+ continue;
+ }
+ const k = key.split(" ").length;
+ if (!byLen.has(k)) {
+ byLen.set(k, new Set());
+ }
+ byLen.get(k).add(key);
+ }
+ return byLen;
+}
+
+// Factory: returns a fast checker for “does query contain any isolated phrase?”
+export function makeIsolatedPhraseChecker(phrases) {
+ const byLen = buildChatAllowlist(phrases);
+ const cache = new Map();
+
+ return function containsIsolatedPhrase(query) {
+ const qNorm = normalizeTextForChatAllowlist(query);
+ if (cache.has(qNorm)) {
+ return cache.get(qNorm);
+ }
+
+ const toks = qNorm.split(/[^\p{L}\p{N}_]+/u).filter(Boolean);
+ for (const [k, set] of byLen) {
+ for (let i = 0; i + k <= toks.length; i++) {
+ if (set.has(toks.slice(i, i + k).join(" "))) {
+ cache.set(qNorm, true);
+ return true;
+ }
+ }
+ }
+ cache.set(qNorm, false);
+ return false;
+ };
+}
+
+/**
+ * Intent Classifier Engine
+ */
+export const IntentClassifier = {
+ /**
+ * Exposing createEngine for testing purposes.
+ */
+
+ _createEngine: createEngine,
+
+ /**
+ * Initialize forced-chat checker at module load.
+ * Keeping it as a property ensures easy stubbing in tests.
+ */
+
+ _isForcedChat: makeIsolatedPhraseChecker(FORCED_CHAT_PHRASES),
+
+ /**
+ * Gets the intent of the prompt using a text classification model.
+ *
+ * @param {string} prompt
+ * @returns {string} "search" | "chat"
+ */
+
+ async getPromptIntent(query) {
+ try {
+ const cleanedQuery = this._preprocessQuery(query);
+ if (this._isForcedChat(cleanedQuery)) {
+ return "chat";
+ }
+ const engine = await this._createEngine({
+ featureId: "smart-intent",
+ modelId: "mozilla/mobilebert-query-intent-detection",
+ modelRevision: "v0.2.0",
+ taskName: "text-classification",
+ });
+ const threshold = 0.8;
+ const resp = await engine.run({ args: [[cleanedQuery]] });
+ // resp example: [{ label: "chat", score: 0.95 }, { label: "search", score: 0.04 }]
+ if (
+ resp[0].label.toLowerCase() === "chat" &&
+ resp[0].score >= threshold
+ ) {
+ return "chat";
+ }
+ return "search";
+ } catch (error) {
+ console.error("Error using intent detection model:", error);
+ throw error;
+ }
+ },
+
+ // Helper function for preprocessing text input
+ _preprocessQuery(query) {
+ if (typeof query !== "string") {
+ throw new TypeError(
+ `Expected a string for query preprocessing, but received ${typeof query}`
+ );
+ }
+ return query.replace(/\?/g, "").trim();
+ },
+};
diff --git a/browser/components/aiwindow/models/moz.build b/browser/components/aiwindow/models/moz.build
@@ -5,9 +5,7 @@
with Files("**"):
BUG_COMPONENT = ("Core", "Machine Learning: General")
-MOZ_SRC_FILES += [
- "InsightsHistorySource.sys.mjs",
-]
+MOZ_SRC_FILES += ["InsightsHistorySource.sys.mjs", "IntentClassifier.sys.mjs"]
XPCSHELL_TESTS_MANIFESTS += [
"tests/xpcshell/xpcshell.toml",
diff --git a/browser/components/aiwindow/models/tests/xpcshell/test_intent_classifier.js b/browser/components/aiwindow/models/tests/xpcshell/test_intent_classifier.js
@@ -0,0 +1,303 @@
+/* Any copyright is dedicated to the Public Domain.
+ * http://creativecommons.org/publicdomain/zero/1.0/ */
+
+const {
+ IntentClassifier,
+ normalizeTextForChatAllowlist,
+ tokenizeTextForChatAllowlist,
+ buildChatAllowlist,
+ makeIsolatedPhraseChecker,
+} = ChromeUtils.importESModule(
+ "moz-src:///browser/components/aiwindow/models/IntentClassifier.sys.mjs"
+);
+
+const { sinon } = ChromeUtils.importESModule(
+ "resource://testing-common/Sinon.sys.mjs"
+);
+
+add_task(async function test_getPromptIntent_basic() {
+ const sb = sinon.createSandbox();
+ try {
+ const cases = [
+ { prompt: "please search for news on firefox", expected: "search" },
+ {
+ prompt: "Can you FIND me the docs for PageAssist?",
+ expected: "search",
+ }, // case-insensitive
+ { prompt: "look up the best pizza in SF", expected: "search" },
+ { prompt: "hello there, how are you?", expected: "chat" },
+ { prompt: "tell me a joke", expected: "chat" },
+ ];
+
+ const fakeEngine = {
+ run({ args: [[query]] }) {
+ const searchKeywords = [
+ "search",
+ "find",
+ "look",
+ "query",
+ "locate",
+ "explore",
+ ];
+ const formattedPrompt = query.toLowerCase();
+ const isSearch = searchKeywords.some(keyword =>
+ formattedPrompt.includes(keyword)
+ );
+
+ // Simulate model confidence scores
+ if (isSearch) {
+ return [
+ { label: "search", score: 0.95 },
+ { label: "chat", score: 0.05 },
+ ];
+ }
+ return [
+ { label: "chat", score: 0.95 },
+ { label: "search", score: 0.05 },
+ ];
+ },
+ };
+
+ sb.stub(IntentClassifier, "_createEngine").resolves(fakeEngine);
+
+ for (const { prompt, expected } of cases) {
+ const intent = await IntentClassifier.getPromptIntent(prompt);
+ Assert.equal(
+ intent,
+ expected,
+ `getPromptIntent("${prompt}") should return "${expected}"`
+ );
+ }
+ } finally {
+ sb.restore();
+ }
+});
+
+add_task(async function test_preprocessQuery_removes_question_marks() {
+ // Call the real helper on the classifier
+ const cases = [
+ { input: "hello?", expected: "hello" },
+ { input: "?prompt", expected: "prompt" },
+ { input: "multiple???", expected: "multiple" },
+ { input: "mid?dle", expected: "middle" },
+ { input: "question? ", expected: "question" },
+ { input: " no? spaces? ", expected: "no spaces" },
+ { input: "???", expected: "" },
+ { input: "clean input", expected: "clean input" },
+ ];
+
+ for (const { input, expected } of cases) {
+ const result = IntentClassifier._preprocessQuery(input);
+ Assert.equal(
+ result,
+ expected,
+ `Expected "${input}" to preprocess to "${expected}", got "${result}"`
+ );
+ }
+});
+
+add_task(function test_normalizeTextForChatAllowlist_basic() {
+ // lowercasing + trimming + collapsing internal spaces
+ Assert.equal(
+ normalizeTextForChatAllowlist(" HeLLo There "),
+ "hello there",
+ "Should lowercase, trim, and collapse spaces"
+ );
+
+ // NFKC normalization: compatibility forms → canonical
+ // Fullwidth characters normalize: e.g., 'TEST' → 'test'
+ Assert.equal(
+ normalizeTextForChatAllowlist("TEST 123"),
+ "test 123",
+ "Should NFKC-normalize fullwidth letters/digits"
+ );
+
+ // Multiple whitespace kinds (NBSP, tabs, newlines) collapse
+ Assert.equal(
+ normalizeTextForChatAllowlist("a\u00A0b\tc\nd"),
+ "a b c d",
+ "Should collapse all whitespace kinds to single spaces"
+ );
+});
+
+add_task(function test_tokenizeTextForChatAllowlist_unicode_and_boundaries() {
+ // Splits on non-word chars, keeps letters/digits/underscore
+ Assert.deepEqual(
+ tokenizeTextForChatAllowlist("hello, world! 42_times"),
+ ["hello", "world", "42_times"],
+ "Should split on punctuation and keep underscores"
+ );
+
+ // Unicode letters should be treated as word chars (\p{L})
+ Assert.deepEqual(
+ tokenizeTextForChatAllowlist("mañana—café!"),
+ ["mañana", "café"],
+ "Should keep Unicode letters and split on punctuation (em dash, bang)"
+ );
+
+ // Apostrophes split (non-word), as intended
+ Assert.deepEqual(
+ tokenizeTextForChatAllowlist("what's up"),
+ ["what", "s", "up"],
+ "Apostrophes are separators, so tokens split around them"
+ );
+});
+
+add_task(function test_buildChatAllowlist_grouping_and_normalization() {
+ const phrases = [
+ "sup",
+ "hi there", // 2 tokens
+ "what's up", // becomes "what s up" (3 tokens)
+ " foo bar ", // leading/trailing + multiple spaces
+ "", // empty should be skipped
+ "___", // token of underscores counts as 1 token
+ ];
+ const sets = buildChatAllowlist(phrases);
+
+ // Expect keys for lengths: 1, 2, 3
+ Assert.ok(sets.has(1), "Should have set for single-token phrases");
+ Assert.ok(sets.has(2), "Should have set for two-token phrases");
+ Assert.ok(sets.has(3), "Should have set for three-token phrases");
+
+ // 1-token set contains: "sup", "___"
+ Assert.ok(sets.get(1).has("sup"), "Single-token set should contain 'sup'");
+ Assert.ok(sets.get(1).has("___"), "Single-token set should contain '___'");
+
+ // 2-token set contains normalized "hi there" and "foo bar"
+ Assert.ok(
+ sets.get(2).has("hi there"),
+ "Two-token set should contain 'hi there'"
+ );
+ Assert.ok(
+ sets.get(2).has("foo bar"),
+ "Two-token set should contain normalized 'foo bar'"
+ );
+
+ // 3-token set contains "what s up" (note apostrophe split)
+ Assert.ok(
+ sets.get(3).has("what s up"),
+ "Three-token set should contain 'what s up'"
+ );
+
+ // Empty phrase skipped: nothing added for length 0
+ for (const [k, set] of sets) {
+ Assert.ok(
+ k > 0 && set.size >= 1,
+ "No empty keys, each set has at least one entry"
+ );
+ }
+});
+
+add_task(function test_isolated_phrase_checker_single_word_boundaries() {
+ const phrases = ["sup", "hello", "___"];
+ const isForced = makeIsolatedPhraseChecker(phrases);
+
+ // Positive: exact token present
+ Assert.ok(
+ isForced("sup bro"),
+ "Should match 'sup' as an isolated token at start"
+ );
+ Assert.ok(
+ isForced("hey, hello there"),
+ "Should match 'hello' surrounded by punctuation"
+ );
+ Assert.ok(isForced("foo ___ bar"), "Should match token with underscores");
+
+ // Negative: partial-word should NOT match
+ Assert.ok(
+ !isForced("supposingly, this should not match"),
+ "No partial-word match for 'sup'"
+ );
+ Assert.ok(!isForced("supper time"), "No partial-word match inside 'supper'");
+ Assert.ok(!isForced("shelloworld"), "No partial-word match for 'hello'");
+});
+
+add_task(function test_isolated_phrase_checker_multiword_and_punctuation() {
+ // Multiword phrases; apostrophes become token splits -> "what's up" => "what s up"
+ const phrases = ["hi there", "what's up"];
+ const isForced = makeIsolatedPhraseChecker(phrases);
+
+ // Positive: punctuation between words should still match (token split)
+ Assert.ok(
+ isForced("hi—there!"),
+ "Em dash between words should match 'hi there'"
+ );
+ Assert.ok(
+ isForced("well, hi there!!"),
+ "Punctuation around phrase should match"
+ );
+ Assert.ok(
+ isForced("so, what’s up today?"),
+ "Curly apostrophe splits to tokens; should match 'what s up'"
+ );
+
+ // Negative: glued words should not match
+ Assert.ok(
+ !isForced("hithere"),
+ "Concatenated words should not match 'hi there'"
+ );
+ Assert.ok(
+ !isForced("whatssup"),
+ "Should not match 'what s up' without separators"
+ );
+});
+
+add_task(function test_isolated_phrase_checker_spacing_and_unicode_norm() {
+ const phrases = ["good morning", "hello"];
+ const isForced = makeIsolatedPhraseChecker(phrases);
+
+ // Multiple spaces collapse
+ Assert.ok(
+ isForced("good morning everyone"),
+ "Multiple spaces between tokens should still match"
+ );
+
+ // Fullwidth / NFKC normalization (TEST) and basic usage
+ Assert.ok(
+ isForced(" HELLO "),
+ "Case and surrounding spaces should normalize and match 'hello'"
+ );
+
+ // Non-breaking spaces and tabs
+ Assert.ok(
+ isForced("good\u00A0morning\tteam"),
+ "NBSP and tabs normalize and match"
+ );
+});
+
+add_task(function test_isolated_phrase_checker_no_match_cases() {
+ const phrases = ["hi there", "sup"];
+ const isForced = makeIsolatedPhraseChecker(phrases);
+
+ Assert.ok(!isForced(""), "Empty string should not match");
+ Assert.ok(
+ !isForced("nothing to see here"),
+ "Unrelated text should not match"
+ );
+ Assert.ok(
+ !isForced("support"),
+ "Partial token with 'sup' prefix should not match"
+ );
+});
+
+add_task(function test_isolated_phrase_checker_caching_stability() {
+ const phrases = ["hello", "hi there"];
+ const isForced = makeIsolatedPhraseChecker(phrases);
+
+ // Repeated calls with the same input should return identical results (cache sanity)
+ const q1 = "Hello there!";
+ const first = isForced(q1);
+ const second = isForced(q1);
+ Assert.equal(
+ first,
+ second,
+ "Same query should yield identical result across calls (cache-stable)"
+ );
+
+ // Different whitespace should normalize to the same outcome
+ Assert.equal(
+ isForced(" hello there "),
+ isForced("hello there"),
+ "Whitespace variations should not affect result"
+ );
+});
diff --git a/browser/components/aiwindow/models/tests/xpcshell/xpcshell.toml b/browser/components/aiwindow/models/tests/xpcshell/xpcshell.toml
@@ -5,3 +5,5 @@ firefox-appdir = "browser"
support-files = []
["test_InsightsHistorySource.js"]
+
+["test_intent_classifier.js"]