test_intent_classifier.js (9256B)
1 /* Any copyright is dedicated to the Public Domain. 2 * http://creativecommons.org/publicdomain/zero/1.0/ */ 3 4 const { 5 IntentClassifier, 6 normalizeTextForChatAllowlist, 7 tokenizeTextForChatAllowlist, 8 buildChatAllowlist, 9 makeIsolatedPhraseChecker, 10 } = ChromeUtils.importESModule( 11 "moz-src:///browser/components/aiwindow/models/IntentClassifier.sys.mjs" 12 ); 13 14 const { sinon } = ChromeUtils.importESModule( 15 "resource://testing-common/Sinon.sys.mjs" 16 ); 17 18 add_task(async function test_getPromptIntent_basic() { 19 const sb = sinon.createSandbox(); 20 try { 21 const cases = [ 22 { prompt: "please search for news on firefox", expected: "search" }, 23 { 24 prompt: "Can you FIND me the docs for PageAssist?", 25 expected: "search", 26 }, // case-insensitive 27 { prompt: "look up the best pizza in SF", expected: "search" }, 28 { prompt: "hello there, how are you?", expected: "chat" }, 29 { prompt: "tell me a joke", expected: "chat" }, 30 ]; 31 32 const fakeEngine = { 33 run({ args: [[query]] }) { 34 const searchKeywords = [ 35 "search", 36 "find", 37 "look", 38 "query", 39 "locate", 40 "explore", 41 ]; 42 const formattedPrompt = query.toLowerCase(); 43 const isSearch = searchKeywords.some(keyword => 44 formattedPrompt.includes(keyword) 45 ); 46 47 // Simulate model confidence scores 48 if (isSearch) { 49 return [ 50 { label: "search", score: 0.95 }, 51 { label: "chat", score: 0.05 }, 52 ]; 53 } 54 return [ 55 { label: "chat", score: 0.95 }, 56 { label: "search", score: 0.05 }, 57 ]; 58 }, 59 }; 60 61 sb.stub(IntentClassifier, "_createEngine").resolves(fakeEngine); 62 63 for (const { prompt, expected } of cases) { 64 const intent = await IntentClassifier.getPromptIntent(prompt); 65 Assert.equal( 66 intent, 67 expected, 68 `getPromptIntent("${prompt}") should return "${expected}"` 69 ); 70 } 71 } finally { 72 sb.restore(); 73 } 74 }); 75 76 add_task(async function test_preprocessQuery_removes_question_marks() { 77 // Call the real helper on the classifier 78 const cases = [ 79 { input: "hello?", expected: "hello" }, 80 { input: "?prompt", expected: "prompt" }, 81 { input: "multiple???", expected: "multiple" }, 82 { input: "mid?dle", expected: "middle" }, 83 { input: "question? ", expected: "question" }, 84 { input: " no? spaces? ", expected: "no spaces" }, 85 { input: "???", expected: "" }, 86 { input: "clean input", expected: "clean input" }, 87 ]; 88 89 for (const { input, expected } of cases) { 90 const result = IntentClassifier._preprocessQuery(input); 91 Assert.equal( 92 result, 93 expected, 94 `Expected "${input}" to preprocess to "${expected}", got "${result}"` 95 ); 96 } 97 }); 98 99 add_task(function test_normalizeTextForChatAllowlist_basic() { 100 // lowercasing + trimming + collapsing internal spaces 101 Assert.equal( 102 normalizeTextForChatAllowlist(" HeLLo There "), 103 "hello there", 104 "Should lowercase, trim, and collapse spaces" 105 ); 106 107 // NFKC normalization: compatibility forms → canonical 108 // Fullwidth characters normalize: e.g., 'TEST' → 'test' 109 Assert.equal( 110 normalizeTextForChatAllowlist("TEST 123"), 111 "test 123", 112 "Should NFKC-normalize fullwidth letters/digits" 113 ); 114 115 // Multiple whitespace kinds (NBSP, tabs, newlines) collapse 116 Assert.equal( 117 normalizeTextForChatAllowlist("a\u00A0b\tc\nd"), 118 "a b c d", 119 "Should collapse all whitespace kinds to single spaces" 120 ); 121 }); 122 123 add_task(function test_tokenizeTextForChatAllowlist_unicode_and_boundaries() { 124 // Splits on non-word chars, keeps letters/digits/underscore 125 Assert.deepEqual( 126 tokenizeTextForChatAllowlist("hello, world! 42_times"), 127 ["hello", "world", "42_times"], 128 "Should split on punctuation and keep underscores" 129 ); 130 131 // Unicode letters should be treated as word chars (\p{L}) 132 Assert.deepEqual( 133 tokenizeTextForChatAllowlist("mañana—café!"), 134 ["mañana", "café"], 135 "Should keep Unicode letters and split on punctuation (em dash, bang)" 136 ); 137 138 // Apostrophes split (non-word), as intended 139 Assert.deepEqual( 140 tokenizeTextForChatAllowlist("what's up"), 141 ["what", "s", "up"], 142 "Apostrophes are separators, so tokens split around them" 143 ); 144 }); 145 146 add_task(function test_buildChatAllowlist_grouping_and_normalization() { 147 const phrases = [ 148 "sup", 149 "hi there", // 2 tokens 150 "what's up", // becomes "what s up" (3 tokens) 151 " foo bar ", // leading/trailing + multiple spaces 152 "", // empty should be skipped 153 "___", // token of underscores counts as 1 token 154 ]; 155 const sets = buildChatAllowlist(phrases); 156 157 // Expect keys for lengths: 1, 2, 3 158 Assert.ok(sets.has(1), "Should have set for single-token phrases"); 159 Assert.ok(sets.has(2), "Should have set for two-token phrases"); 160 Assert.ok(sets.has(3), "Should have set for three-token phrases"); 161 162 // 1-token set contains: "sup", "___" 163 Assert.ok(sets.get(1).has("sup"), "Single-token set should contain 'sup'"); 164 Assert.ok(sets.get(1).has("___"), "Single-token set should contain '___'"); 165 166 // 2-token set contains normalized "hi there" and "foo bar" 167 Assert.ok( 168 sets.get(2).has("hi there"), 169 "Two-token set should contain 'hi there'" 170 ); 171 Assert.ok( 172 sets.get(2).has("foo bar"), 173 "Two-token set should contain normalized 'foo bar'" 174 ); 175 176 // 3-token set contains "what s up" (note apostrophe split) 177 Assert.ok( 178 sets.get(3).has("what s up"), 179 "Three-token set should contain 'what s up'" 180 ); 181 182 // Empty phrase skipped: nothing added for length 0 183 for (const [k, set] of sets) { 184 Assert.ok( 185 k > 0 && set.size >= 1, 186 "No empty keys, each set has at least one entry" 187 ); 188 } 189 }); 190 191 add_task(function test_isolated_phrase_checker_single_word_boundaries() { 192 const phrases = ["sup", "hello", "___"]; 193 const isForced = makeIsolatedPhraseChecker(phrases); 194 195 // Positive: exact token present 196 Assert.ok( 197 isForced("sup bro"), 198 "Should match 'sup' as an isolated token at start" 199 ); 200 Assert.ok( 201 isForced("hey, hello there"), 202 "Should match 'hello' surrounded by punctuation" 203 ); 204 Assert.ok(isForced("foo ___ bar"), "Should match token with underscores"); 205 206 // Negative: partial-word should NOT match 207 Assert.ok( 208 !isForced("supposingly, this should not match"), 209 "No partial-word match for 'sup'" 210 ); 211 Assert.ok(!isForced("supper time"), "No partial-word match inside 'supper'"); 212 Assert.ok(!isForced("shelloworld"), "No partial-word match for 'hello'"); 213 }); 214 215 add_task(function test_isolated_phrase_checker_multiword_and_punctuation() { 216 // Multiword phrases; apostrophes become token splits -> "what's up" => "what s up" 217 const phrases = ["hi there", "what's up"]; 218 const isForced = makeIsolatedPhraseChecker(phrases); 219 220 // Positive: punctuation between words should still match (token split) 221 Assert.ok( 222 isForced("hi—there!"), 223 "Em dash between words should match 'hi there'" 224 ); 225 Assert.ok( 226 isForced("well, hi there!!"), 227 "Punctuation around phrase should match" 228 ); 229 Assert.ok( 230 isForced("so, what’s up today?"), 231 "Curly apostrophe splits to tokens; should match 'what s up'" 232 ); 233 234 // Negative: glued words should not match 235 Assert.ok( 236 !isForced("hithere"), 237 "Concatenated words should not match 'hi there'" 238 ); 239 Assert.ok( 240 !isForced("whatssup"), 241 "Should not match 'what s up' without separators" 242 ); 243 }); 244 245 add_task(function test_isolated_phrase_checker_spacing_and_unicode_norm() { 246 const phrases = ["good morning", "hello"]; 247 const isForced = makeIsolatedPhraseChecker(phrases); 248 249 // Multiple spaces collapse 250 Assert.ok( 251 isForced("good morning everyone"), 252 "Multiple spaces between tokens should still match" 253 ); 254 255 // Fullwidth / NFKC normalization (TEST) and basic usage 256 Assert.ok( 257 isForced(" HELLO "), 258 "Case and surrounding spaces should normalize and match 'hello'" 259 ); 260 261 // Non-breaking spaces and tabs 262 Assert.ok( 263 isForced("good\u00A0morning\tteam"), 264 "NBSP and tabs normalize and match" 265 ); 266 }); 267 268 add_task(function test_isolated_phrase_checker_no_match_cases() { 269 const phrases = ["hi there", "sup"]; 270 const isForced = makeIsolatedPhraseChecker(phrases); 271 272 Assert.ok(!isForced(""), "Empty string should not match"); 273 Assert.ok( 274 !isForced("nothing to see here"), 275 "Unrelated text should not match" 276 ); 277 Assert.ok( 278 !isForced("support"), 279 "Partial token with 'sup' prefix should not match" 280 ); 281 }); 282 283 add_task(function test_isolated_phrase_checker_caching_stability() { 284 const phrases = ["hello", "hi there"]; 285 const isForced = makeIsolatedPhraseChecker(phrases); 286 287 // Repeated calls with the same input should return identical results (cache sanity) 288 const q1 = "Hello there!"; 289 const first = isForced(q1); 290 const second = isForced(q1); 291 Assert.equal( 292 first, 293 second, 294 "Same query should yield identical result across calls (cache-stable)" 295 ); 296 297 // Different whitespace should normalize to the same outcome 298 Assert.equal( 299 isForced(" hello there "), 300 isForced("hello there"), 301 "Whitespace variations should not affect result" 302 ); 303 });