tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

test_intent_classifier.js (9256B)


      1 /* Any copyright is dedicated to the Public Domain.
      2 * http://creativecommons.org/publicdomain/zero/1.0/ */
      3 
      4 const {
      5  IntentClassifier,
      6  normalizeTextForChatAllowlist,
      7  tokenizeTextForChatAllowlist,
      8  buildChatAllowlist,
      9  makeIsolatedPhraseChecker,
     10 } = ChromeUtils.importESModule(
     11  "moz-src:///browser/components/aiwindow/models/IntentClassifier.sys.mjs"
     12 );
     13 
     14 const { sinon } = ChromeUtils.importESModule(
     15  "resource://testing-common/Sinon.sys.mjs"
     16 );
     17 
     18 add_task(async function test_getPromptIntent_basic() {
     19  const sb = sinon.createSandbox();
     20  try {
     21    const cases = [
     22      { prompt: "please search for news on firefox", expected: "search" },
     23      {
     24        prompt: "Can you FIND me the docs for PageAssist?",
     25        expected: "search",
     26      }, // case-insensitive
     27      { prompt: "look up the best pizza in SF", expected: "search" },
     28      { prompt: "hello there, how are you?", expected: "chat" },
     29      { prompt: "tell me a joke", expected: "chat" },
     30    ];
     31 
     32    const fakeEngine = {
     33      run({ args: [[query]] }) {
     34        const searchKeywords = [
     35          "search",
     36          "find",
     37          "look",
     38          "query",
     39          "locate",
     40          "explore",
     41        ];
     42        const formattedPrompt = query.toLowerCase();
     43        const isSearch = searchKeywords.some(keyword =>
     44          formattedPrompt.includes(keyword)
     45        );
     46 
     47        // Simulate model confidence scores
     48        if (isSearch) {
     49          return [
     50            { label: "search", score: 0.95 },
     51            { label: "chat", score: 0.05 },
     52          ];
     53        }
     54        return [
     55          { label: "chat", score: 0.95 },
     56          { label: "search", score: 0.05 },
     57        ];
     58      },
     59    };
     60 
     61    sb.stub(IntentClassifier, "_createEngine").resolves(fakeEngine);
     62 
     63    for (const { prompt, expected } of cases) {
     64      const intent = await IntentClassifier.getPromptIntent(prompt);
     65      Assert.equal(
     66        intent,
     67        expected,
     68        `getPromptIntent("${prompt}") should return "${expected}"`
     69      );
     70    }
     71  } finally {
     72    sb.restore();
     73  }
     74 });
     75 
     76 add_task(async function test_preprocessQuery_removes_question_marks() {
     77  // Call the real helper on the classifier
     78  const cases = [
     79    { input: "hello?", expected: "hello" },
     80    { input: "?prompt", expected: "prompt" },
     81    { input: "multiple???", expected: "multiple" },
     82    { input: "mid?dle", expected: "middle" },
     83    { input: "question? ", expected: "question" },
     84    { input: " no?  spaces? ", expected: "no  spaces" },
     85    { input: "???", expected: "" },
     86    { input: "clean input", expected: "clean input" },
     87  ];
     88 
     89  for (const { input, expected } of cases) {
     90    const result = IntentClassifier._preprocessQuery(input);
     91    Assert.equal(
     92      result,
     93      expected,
     94      `Expected "${input}" to preprocess to "${expected}", got "${result}"`
     95    );
     96  }
     97 });
     98 
     99 add_task(function test_normalizeTextForChatAllowlist_basic() {
    100  // lowercasing + trimming + collapsing internal spaces
    101  Assert.equal(
    102    normalizeTextForChatAllowlist("  HeLLo   There  "),
    103    "hello there",
    104    "Should lowercase, trim, and collapse spaces"
    105  );
    106 
    107  // NFKC normalization: compatibility forms → canonical
    108  // Fullwidth characters normalize: e.g., 'TEST' → 'test'
    109  Assert.equal(
    110    normalizeTextForChatAllowlist("TEST  123"),
    111    "test 123",
    112    "Should NFKC-normalize fullwidth letters/digits"
    113  );
    114 
    115  // Multiple whitespace kinds (NBSP, tabs, newlines) collapse
    116  Assert.equal(
    117    normalizeTextForChatAllowlist("a\u00A0b\tc\nd"),
    118    "a b c d",
    119    "Should collapse all whitespace kinds to single spaces"
    120  );
    121 });
    122 
    123 add_task(function test_tokenizeTextForChatAllowlist_unicode_and_boundaries() {
    124  // Splits on non-word chars, keeps letters/digits/underscore
    125  Assert.deepEqual(
    126    tokenizeTextForChatAllowlist("hello, world! 42_times"),
    127    ["hello", "world", "42_times"],
    128    "Should split on punctuation and keep underscores"
    129  );
    130 
    131  // Unicode letters should be treated as word chars (\p{L})
    132  Assert.deepEqual(
    133    tokenizeTextForChatAllowlist("mañana—café!"),
    134    ["mañana", "café"],
    135    "Should keep Unicode letters and split on punctuation (em dash, bang)"
    136  );
    137 
    138  // Apostrophes split (non-word), as intended
    139  Assert.deepEqual(
    140    tokenizeTextForChatAllowlist("what's up"),
    141    ["what", "s", "up"],
    142    "Apostrophes are separators, so tokens split around them"
    143  );
    144 });
    145 
    146 add_task(function test_buildChatAllowlist_grouping_and_normalization() {
    147  const phrases = [
    148    "sup",
    149    "hi there", // 2 tokens
    150    "what's up", // becomes "what s up" (3 tokens)
    151    " foo   bar  ", // leading/trailing + multiple spaces
    152    "", // empty should be skipped
    153    "___", // token of underscores counts as 1 token
    154  ];
    155  const sets = buildChatAllowlist(phrases);
    156 
    157  // Expect keys for lengths: 1, 2, 3
    158  Assert.ok(sets.has(1), "Should have set for single-token phrases");
    159  Assert.ok(sets.has(2), "Should have set for two-token phrases");
    160  Assert.ok(sets.has(3), "Should have set for three-token phrases");
    161 
    162  // 1-token set contains: "sup", "___"
    163  Assert.ok(sets.get(1).has("sup"), "Single-token set should contain 'sup'");
    164  Assert.ok(sets.get(1).has("___"), "Single-token set should contain '___'");
    165 
    166  // 2-token set contains normalized "hi there" and "foo bar"
    167  Assert.ok(
    168    sets.get(2).has("hi there"),
    169    "Two-token set should contain 'hi there'"
    170  );
    171  Assert.ok(
    172    sets.get(2).has("foo bar"),
    173    "Two-token set should contain normalized 'foo bar'"
    174  );
    175 
    176  // 3-token set contains "what s up" (note apostrophe split)
    177  Assert.ok(
    178    sets.get(3).has("what s up"),
    179    "Three-token set should contain 'what s up'"
    180  );
    181 
    182  // Empty phrase skipped: nothing added for length 0
    183  for (const [k, set] of sets) {
    184    Assert.ok(
    185      k > 0 && set.size >= 1,
    186      "No empty keys, each set has at least one entry"
    187    );
    188  }
    189 });
    190 
    191 add_task(function test_isolated_phrase_checker_single_word_boundaries() {
    192  const phrases = ["sup", "hello", "___"];
    193  const isForced = makeIsolatedPhraseChecker(phrases);
    194 
    195  // Positive: exact token present
    196  Assert.ok(
    197    isForced("sup bro"),
    198    "Should match 'sup' as an isolated token at start"
    199  );
    200  Assert.ok(
    201    isForced("hey, hello there"),
    202    "Should match 'hello' surrounded by punctuation"
    203  );
    204  Assert.ok(isForced("foo ___ bar"), "Should match token with underscores");
    205 
    206  // Negative: partial-word should NOT match
    207  Assert.ok(
    208    !isForced("supposingly, this should not match"),
    209    "No partial-word match for 'sup'"
    210  );
    211  Assert.ok(!isForced("supper time"), "No partial-word match inside 'supper'");
    212  Assert.ok(!isForced("shelloworld"), "No partial-word match for 'hello'");
    213 });
    214 
    215 add_task(function test_isolated_phrase_checker_multiword_and_punctuation() {
    216  // Multiword phrases; apostrophes become token splits -> "what's up" => "what s up"
    217  const phrases = ["hi there", "what's up"];
    218  const isForced = makeIsolatedPhraseChecker(phrases);
    219 
    220  // Positive: punctuation between words should still match (token split)
    221  Assert.ok(
    222    isForced("hi—there!"),
    223    "Em dash between words should match 'hi there'"
    224  );
    225  Assert.ok(
    226    isForced("well, hi there!!"),
    227    "Punctuation around phrase should match"
    228  );
    229  Assert.ok(
    230    isForced("so, what’s up today?"),
    231    "Curly apostrophe splits to tokens; should match 'what s up'"
    232  );
    233 
    234  // Negative: glued words should not match
    235  Assert.ok(
    236    !isForced("hithere"),
    237    "Concatenated words should not match 'hi there'"
    238  );
    239  Assert.ok(
    240    !isForced("whatssup"),
    241    "Should not match 'what s up' without separators"
    242  );
    243 });
    244 
    245 add_task(function test_isolated_phrase_checker_spacing_and_unicode_norm() {
    246  const phrases = ["good morning", "hello"];
    247  const isForced = makeIsolatedPhraseChecker(phrases);
    248 
    249  // Multiple spaces collapse
    250  Assert.ok(
    251    isForced("good     morning everyone"),
    252    "Multiple spaces between tokens should still match"
    253  );
    254 
    255  // Fullwidth / NFKC normalization (TEST) and basic usage
    256  Assert.ok(
    257    isForced("  HELLO  "),
    258    "Case and surrounding spaces should normalize and match 'hello'"
    259  );
    260 
    261  // Non-breaking spaces and tabs
    262  Assert.ok(
    263    isForced("good\u00A0morning\tteam"),
    264    "NBSP and tabs normalize and match"
    265  );
    266 });
    267 
    268 add_task(function test_isolated_phrase_checker_no_match_cases() {
    269  const phrases = ["hi there", "sup"];
    270  const isForced = makeIsolatedPhraseChecker(phrases);
    271 
    272  Assert.ok(!isForced(""), "Empty string should not match");
    273  Assert.ok(
    274    !isForced("nothing to see here"),
    275    "Unrelated text should not match"
    276  );
    277  Assert.ok(
    278    !isForced("support"),
    279    "Partial token with 'sup' prefix should not match"
    280  );
    281 });
    282 
    283 add_task(function test_isolated_phrase_checker_caching_stability() {
    284  const phrases = ["hello", "hi there"];
    285  const isForced = makeIsolatedPhraseChecker(phrases);
    286 
    287  // Repeated calls with the same input should return identical results (cache sanity)
    288  const q1 = "Hello there!";
    289  const first = isForced(q1);
    290  const second = isForced(q1);
    291  Assert.equal(
    292    first,
    293    second,
    294    "Same query should yield identical result across calls (cache-stable)"
    295  );
    296 
    297  // Different whitespace should normalize to the same outcome
    298  Assert.equal(
    299    isForced("  hello   there "),
    300    isForced("hello there"),
    301    "Whitespace variations should not affect result"
    302  );
    303 });