[ tor-browser ].git.dasho

commit 19a8af49bf9706cf23e5f5735e8b17ce132e6e38
parent 5927356099fe73690a3f1bef2636e80a867d4b88
Author: agoloman <agoloman@mozilla.com>
Date:   Sat, 22 Nov 2025 00:52:32 +0200

Revert "Bug 2000945 - Move query intent detection to AI-window r=Mardak,ai-models-reviewers" for causing bc failures @browser_all_files_referenced.js.

This reverts commit 22284e85a1de490c37bb63364be4a3cb03c6657a.

Diffstat:
D browser/components/aiwindow/models/IntentClassifier.sys.mjs  | 235 -------------------------------------------------------------------------------
M browser/components/aiwindow/models/moz.build  | 6 ------
D browser/components/aiwindow/models/tests/xpcshell/test_intent_classifier.js  | 303 -------------------------------------------------------------------------------
D browser/components/aiwindow/models/tests/xpcshell/xpcshell.toml  | 5 -----

4 files changed, 0 insertions(+), 549 deletions(-)
diff --git a/browser/components/aiwindow/models/IntentClassifier.sys.mjs b/browser/components/aiwindow/models/IntentClassifier.sys.mjs
@@ -1,235 +0,0 @@
-/**
- * This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/.
- */
-
-import { createEngine } from "chrome://global/content/ml/EngineProcess.sys.mjs";
-
-const FORCED_CHAT_PHRASES = [
-  "amuse me",
-  "are we alone",
-  "are you alive",
-  "are you gpt",
-  "are you human",
-  "are you real",
-  "bark like dog",
-  "cheer me up",
-  "comfort me",
-  "count numbers",
-  "curse me",
-  "do aliens exist",
-  "do we matter",
-  "do you dream",
-  "do you think",
-  "does fate exist",
-  "dream meaning",
-  "drop wisdom",
-  "encourage me",
-  "entertain me",
-  "explain yourself",
-  "flip coin",
-  "give blessing",
-  "give wisdom",
-  "good morning",
-  "good night",
-  "guess number",
-  "hallo",
-  "hello",
-  "hey",
-  "hi",
-  "hola",
-  "how are you",
-  "inspire me",
-  "invent a word",
-  "invent holiday",
-  "invent joke",
-  "is god real",
-  "life advice",
-  "life purpose",
-  "list animals",
-  "list capitals",
-  "list colors",
-  "list countries",
-  "list elements",
-  "list fruits",
-  "list metals",
-  "list oceans",
-  "list planets",
-  "list shapes",
-  "meaning of life",
-  "meow like cat",
-  "motivate me",
-  "now you are",
-  "play a game",
-  "pretend alien",
-  "pretend child",
-  "pretend detective",
-  "pretend ghost",
-  "pretend pirate",
-  "pretend robot",
-  "pretend superhero",
-  "pretend teacher",
-  "pretend wizard",
-  "random fact",
-  "random number",
-  "roll dice",
-  "goodbye",
-  "simulate chat",
-  "simulate future",
-  "simulate past",
-  "sing like robot",
-  "sing lullaby",
-  "sing rap",
-  "sup",
-  "surprise me",
-  "teach me",
-  "tell bedtime story",
-  "tell fortune",
-  "tell joke",
-  "tell prophecy",
-  "tell riddle",
-  "tell story",
-  "what is art",
-  "what is beauty",
-  "what is death",
-  "what is freedom",
-  "what is justice",
-  "what is love",
-  "what is mind",
-  "what is reality",
-  "what is right",
-  "what is self",
-  "what is soul",
-  "what is time",
-  "what is truth",
-  "what is wrong",
-  "what model are you",
-  "what version",
-  "what’s up",
-  "which model are you",
-  "who am i",
-  "who are you",
-  "who made you",
-  "why are we",
-  "write a poem",
-  "write a song",
-  "write haiku",
-  "write quote",
-  "your model is",
-];
-
-export function normalizeTextForChatAllowlist(s) {
-  return s.toLowerCase().normalize("NFKC").replace(/\s+/g, " ").trim();
-}
-
-// Split on non-word chars; letters/numbers/_ are "word" characters
-export function tokenizeTextForChatAllowlist(s) {
-  return normalizeTextForChatAllowlist(s)
-    .split(/[^\p{L}\p{N}_]+/u)
-    .filter(Boolean);
-}
-
-export function buildChatAllowlist(phrases) {
-  const byLen = new Map(); // len -> Set("tok tok ...")
-  for (const p of phrases) {
-    const key = tokenizeTextForChatAllowlist(p).join(" ");
-    if (!key) {
-      continue;
-    }
-    const k = key.split(" ").length;
-    if (!byLen.has(k)) {
-      byLen.set(k, new Set());
-    }
-    byLen.get(k).add(key);
-  }
-  return byLen;
-}
-
-// Factory: returns a fast checker for “does query contain any isolated phrase?”
-export function makeIsolatedPhraseChecker(phrases) {
-  const byLen = buildChatAllowlist(phrases);
-  const cache = new Map();
-
-  return function containsIsolatedPhrase(query) {
-    const qNorm = normalizeTextForChatAllowlist(query);
-    if (cache.has(qNorm)) {
-      return cache.get(qNorm);
-    }
-
-    const toks = qNorm.split(/[^\p{L}\p{N}_]+/u).filter(Boolean);
-    for (const [k, set] of byLen) {
-      for (let i = 0; i + k <= toks.length; i++) {
-        if (set.has(toks.slice(i, i + k).join(" "))) {
-          cache.set(qNorm, true);
-          return true;
-        }
-      }
-    }
-    cache.set(qNorm, false);
-    return false;
-  };
-}
-
-/**
- * Intent Classifier Engine
- */
-export const IntentClassifier = {
-  /**
-   * Exposing createEngine for testing purposes.
-   */
-
-  _createEngine: createEngine,
-
-  /**
-   * Initialize forced-chat checker at module load.
-   * Keeping it as a property ensures easy stubbing in tests.
-   */
-
-  _isForcedChat: makeIsolatedPhraseChecker(FORCED_CHAT_PHRASES),
-
-  /**
-   * Gets the intent of the prompt using a text classification model.
-   *
-   * @param {string} prompt
-   * @returns {string} "search" | "chat"
-   */
-
-  async getPromptIntent(query) {
-    try {
-      const cleanedQuery = this._preprocessQuery(query);
-      if (this._isForcedChat(cleanedQuery)) {
-        return "chat";
-      }
-      const engine = await this._createEngine({
-        featureId: "smart-intent",
-        modelId: "mozilla/mobilebert-query-intent-detection",
-        modelRevision: "v0.2.0",
-        taskName: "text-classification",
-      });
-      const threshold = 0.8;
-      const resp = await engine.run({ args: [[cleanedQuery]] });
-      // resp example: [{ label: "chat", score: 0.95 }, { label: "search", score: 0.04 }]
-      if (
-        resp[0].label.toLowerCase() === "chat" &&
-        resp[0].score >= threshold
-      ) {
-        return "chat";
-      }
-      return "search";
-    } catch (error) {
-      console.error("Error using intent detection model:", error);
-      throw error;
-    }
-  },
-
-  // Helper function for preprocessing text input
-  _preprocessQuery(query) {
-    if (typeof query !== "string") {
-      throw new TypeError(
-        `Expected a string for query preprocessing, but received ${typeof query}`
-      );
-    }
-    return query.replace(/\?/g, "").trim();
-  },
-};
diff --git a/browser/components/aiwindow/models/moz.build b/browser/components/aiwindow/models/moz.build
@@ -4,9 +4,3 @@
 
 with Files("**"):
     BUG_COMPONENT = ("Core", "Machine Learning: General")
-
-MOZ_SRC_FILES += [
-    "IntentClassifier.sys.mjs",
-]
-
-XPCSHELL_TESTS_MANIFESTS += ["tests/xpcshell/xpcshell.toml"]
diff --git a/browser/components/aiwindow/models/tests/xpcshell/test_intent_classifier.js b/browser/components/aiwindow/models/tests/xpcshell/test_intent_classifier.js
@@ -1,303 +0,0 @@
-/* Any copyright is dedicated to the Public Domain.
- * http://creativecommons.org/publicdomain/zero/1.0/ */
-
-const {
-  IntentClassifier,
-  normalizeTextForChatAllowlist,
-  tokenizeTextForChatAllowlist,
-  buildChatAllowlist,
-  makeIsolatedPhraseChecker,
-} = ChromeUtils.importESModule(
-  "moz-src:///browser/components/aiwindow/models/IntentClassifier.sys.mjs"
-);
-
-const { sinon } = ChromeUtils.importESModule(
-  "resource://testing-common/Sinon.sys.mjs"
-);
-
-add_task(async function test_getPromptIntent_basic() {
-  const sb = sinon.createSandbox();
-  try {
-    const cases = [
-      { prompt: "please search for news on firefox", expected: "search" },
-      {
-        prompt: "Can you FIND me the docs for PageAssist?",
-        expected: "search",
-      }, // case-insensitive
-      { prompt: "look up the best pizza in SF", expected: "search" },
-      { prompt: "hello there, how are you?", expected: "chat" },
-      { prompt: "tell me a joke", expected: "chat" },
-    ];
-
-    const fakeEngine = {
-      run({ args: [[query]] }) {
-        const searchKeywords = [
-          "search",
-          "find",
-          "look",
-          "query",
-          "locate",
-          "explore",
-        ];
-        const formattedPrompt = query.toLowerCase();
-        const isSearch = searchKeywords.some(keyword =>
-          formattedPrompt.includes(keyword)
-        );
-
-        // Simulate model confidence scores
-        if (isSearch) {
-          return [
-            { label: "search", score: 0.95 },
-            { label: "chat", score: 0.05 },
-          ];
-        }
-        return [
-          { label: "chat", score: 0.95 },
-          { label: "search", score: 0.05 },
-        ];
-      },
-    };
-
-    sb.stub(IntentClassifier, "_createEngine").resolves(fakeEngine);
-
-    for (const { prompt, expected } of cases) {
-      const intent = await IntentClassifier.getPromptIntent(prompt);
-      Assert.equal(
-        intent,
-        expected,
-        `getPromptIntent("${prompt}") should return "${expected}"`
-      );
-    }
-  } finally {
-    sb.restore();
-  }
-});
-
-add_task(async function test_preprocessQuery_removes_question_marks() {
-  // Call the real helper on the classifier
-  const cases = [
-    { input: "hello?", expected: "hello" },
-    { input: "?prompt", expected: "prompt" },
-    { input: "multiple???", expected: "multiple" },
-    { input: "mid?dle", expected: "middle" },
-    { input: "question? ", expected: "question" },
-    { input: " no?  spaces? ", expected: "no  spaces" },
-    { input: "???", expected: "" },
-    { input: "clean input", expected: "clean input" },
-  ];
-
-  for (const { input, expected } of cases) {
-    const result = IntentClassifier._preprocessQuery(input);
-    Assert.equal(
-      result,
-      expected,
-      `Expected "${input}" to preprocess to "${expected}", got "${result}"`
-    );
-  }
-});
-
-add_task(function test_normalizeTextForChatAllowlist_basic() {
-  // lowercasing + trimming + collapsing internal spaces
-  Assert.equal(
-    normalizeTextForChatAllowlist("  HeLLo   There  "),
-    "hello there",
-    "Should lowercase, trim, and collapse spaces"
-  );
-
-  // NFKC normalization: compatibility forms → canonical
-  // Fullwidth characters normalize: e.g., 'ＴＥＳＴ' → 'test'
-  Assert.equal(
-    normalizeTextForChatAllowlist("ＴＥＳＴ  １２３"),
-    "test 123",
-    "Should NFKC-normalize fullwidth letters/digits"
-  );
-
-  // Multiple whitespace kinds (NBSP, tabs, newlines) collapse
-  Assert.equal(
-    normalizeTextForChatAllowlist("a\u00A0b\tc\nd"),
-    "a b c d",
-    "Should collapse all whitespace kinds to single spaces"
-  );
-});
-
-add_task(function test_tokenizeTextForChatAllowlist_unicode_and_boundaries() {
-  // Splits on non-word chars, keeps letters/digits/underscore
-  Assert.deepEqual(
-    tokenizeTextForChatAllowlist("hello, world! 42_times"),
-    ["hello", "world", "42_times"],
-    "Should split on punctuation and keep underscores"
-  );
-
-  // Unicode letters should be treated as word chars (\p{L})
-  Assert.deepEqual(
-    tokenizeTextForChatAllowlist("mañana—café!"),
-    ["mañana", "café"],
-    "Should keep Unicode letters and split on punctuation (em dash, bang)"
-  );
-
-  // Apostrophes split (non-word), as intended
-  Assert.deepEqual(
-    tokenizeTextForChatAllowlist("what's up"),
-    ["what", "s", "up"],
-    "Apostrophes are separators, so tokens split around them"
-  );
-});
-
-add_task(function test_buildChatAllowlist_grouping_and_normalization() {
-  const phrases = [
-    "sup",
-    "hi there", // 2 tokens
-    "what's up", // becomes "what s up" (3 tokens)
-    " foo   bar  ", // leading/trailing + multiple spaces
-    "", // empty should be skipped
-    "___", // token of underscores counts as 1 token
-  ];
-  const sets = buildChatAllowlist(phrases);
-
-  // Expect keys for lengths: 1, 2, 3
-  Assert.ok(sets.has(1), "Should have set for single-token phrases");
-  Assert.ok(sets.has(2), "Should have set for two-token phrases");
-  Assert.ok(sets.has(3), "Should have set for three-token phrases");
-
-  // 1-token set contains: "sup", "___"
-  Assert.ok(sets.get(1).has("sup"), "Single-token set should contain 'sup'");
-  Assert.ok(sets.get(1).has("___"), "Single-token set should contain '___'");
-
-  // 2-token set contains normalized "hi there" and "foo bar"
-  Assert.ok(
-    sets.get(2).has("hi there"),
-    "Two-token set should contain 'hi there'"
-  );
-  Assert.ok(
-    sets.get(2).has("foo bar"),
-    "Two-token set should contain normalized 'foo bar'"
-  );
-
-  // 3-token set contains "what s up" (note apostrophe split)
-  Assert.ok(
-    sets.get(3).has("what s up"),
-    "Three-token set should contain 'what s up'"
-  );
-
-  // Empty phrase skipped: nothing added for length 0
-  for (const [k, set] of sets) {
-    Assert.ok(
-      k > 0 && set.size >= 1,
-      "No empty keys, each set has at least one entry"
-    );
-  }
-});
-
-add_task(function test_isolated_phrase_checker_single_word_boundaries() {
-  const phrases = ["sup", "hello", "___"];
-  const isForced = makeIsolatedPhraseChecker(phrases);
-
-  // Positive: exact token present
-  Assert.ok(
-    isForced("sup bro"),
-    "Should match 'sup' as an isolated token at start"
-  );
-  Assert.ok(
-    isForced("hey, hello there"),
-    "Should match 'hello' surrounded by punctuation"
-  );
-  Assert.ok(isForced("foo ___ bar"), "Should match token with underscores");
-
-  // Negative: partial-word should NOT match
-  Assert.ok(
-    !isForced("supposingly, this should not match"),
-    "No partial-word match for 'sup'"
-  );
-  Assert.ok(!isForced("supper time"), "No partial-word match inside 'supper'");
-  Assert.ok(!isForced("shelloworld"), "No partial-word match for 'hello'");
-});
-
-add_task(function test_isolated_phrase_checker_multiword_and_punctuation() {
-  // Multiword phrases; apostrophes become token splits -> "what's up" => "what s up"
-  const phrases = ["hi there", "what's up"];
-  const isForced = makeIsolatedPhraseChecker(phrases);
-
-  // Positive: punctuation between words should still match (token split)
-  Assert.ok(
-    isForced("hi—there!"),
-    "Em dash between words should match 'hi there'"
-  );
-  Assert.ok(
-    isForced("well, hi there!!"),
-    "Punctuation around phrase should match"
-  );
-  Assert.ok(
-    isForced("so, what’s up today?"),
-    "Curly apostrophe splits to tokens; should match 'what s up'"
-  );
-
-  // Negative: glued words should not match
-  Assert.ok(
-    !isForced("hithere"),
-    "Concatenated words should not match 'hi there'"
-  );
-  Assert.ok(
-    !isForced("whatssup"),
-    "Should not match 'what s up' without separators"
-  );
-});
-
-add_task(function test_isolated_phrase_checker_spacing_and_unicode_norm() {
-  const phrases = ["good morning", "hello"];
-  const isForced = makeIsolatedPhraseChecker(phrases);
-
-  // Multiple spaces collapse
-  Assert.ok(
-    isForced("good     morning everyone"),
-    "Multiple spaces between tokens should still match"
-  );
-
-  // Fullwidth / NFKC normalization (ＴＥＳＴ) and basic usage
-  Assert.ok(
-    isForced("  HELLO  "),
-    "Case and surrounding spaces should normalize and match 'hello'"
-  );
-
-  // Non-breaking spaces and tabs
-  Assert.ok(
-    isForced("good\u00A0morning\tteam"),
-    "NBSP and tabs normalize and match"
-  );
-});
-
-add_task(function test_isolated_phrase_checker_no_match_cases() {
-  const phrases = ["hi there", "sup"];
-  const isForced = makeIsolatedPhraseChecker(phrases);
-
-  Assert.ok(!isForced(""), "Empty string should not match");
-  Assert.ok(
-    !isForced("nothing to see here"),
-    "Unrelated text should not match"
-  );
-  Assert.ok(
-    !isForced("support"),
-    "Partial token with 'sup' prefix should not match"
-  );
-});
-
-add_task(function test_isolated_phrase_checker_caching_stability() {
-  const phrases = ["hello", "hi there"];
-  const isForced = makeIsolatedPhraseChecker(phrases);
-
-  // Repeated calls with the same input should return identical results (cache sanity)
-  const q1 = "Hello there!";
-  const first = isForced(q1);
-  const second = isForced(q1);
-  Assert.equal(
-    first,
-    second,
-    "Same query should yield identical result across calls (cache-stable)"
-  );
-
-  // Different whitespace should normalize to the same outcome
-  Assert.equal(
-    isForced("  hello   there "),
-    isForced("hello there"),
-    "Whitespace variations should not affect result"
-  );
-});
diff --git a/browser/components/aiwindow/models/tests/xpcshell/xpcshell.toml b/browser/components/aiwindow/models/tests/xpcshell/xpcshell.toml
@@ -1,5 +0,0 @@
-[DEFAULT]
-run-if = ["os != 'android'"]
-firefox-appdir = "browser"
-
-["test_intent_classifier.js"]

	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE

D	browser/components/aiwindow/models/IntentClassifier.sys.mjs	\|	235	-------------------------------------------------------------------------------
M	browser/components/aiwindow/models/moz.build	\|	6	------
D	browser/components/aiwindow/models/tests/xpcshell/test_intent_classifier.js	\|	303	-------------------------------------------------------------------------------
D	browser/components/aiwindow/models/tests/xpcshell/xpcshell.toml	\|	5	-----