IntentClassifier.sys.mjs (5403B)
1 /** 2 * This Source Code Form is subject to the terms of the Mozilla Public 3 * License, v. 2.0. If a copy of the MPL was not distributed with this 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. 5 */ 6 7 import { createEngine } from "chrome://global/content/ml/EngineProcess.sys.mjs"; 8 9 const FORCED_CHAT_PHRASES = [ 10 "amuse me", 11 "are we alone", 12 "are you alive", 13 "are you gpt", 14 "are you human", 15 "are you real", 16 "bark like dog", 17 "cheer me up", 18 "comfort me", 19 "count numbers", 20 "curse me", 21 "do aliens exist", 22 "do we matter", 23 "do you dream", 24 "do you think", 25 "does fate exist", 26 "dream meaning", 27 "drop wisdom", 28 "encourage me", 29 "entertain me", 30 "explain yourself", 31 "flip coin", 32 "give blessing", 33 "give wisdom", 34 "good morning", 35 "good night", 36 "guess number", 37 "hallo", 38 "hello", 39 "hey", 40 "hi", 41 "hola", 42 "how are you", 43 "inspire me", 44 "invent a word", 45 "invent holiday", 46 "invent joke", 47 "is god real", 48 "life advice", 49 "life purpose", 50 "list animals", 51 "list capitals", 52 "list colors", 53 "list countries", 54 "list elements", 55 "list fruits", 56 "list metals", 57 "list oceans", 58 "list planets", 59 "list shapes", 60 "meaning of life", 61 "meow like cat", 62 "motivate me", 63 "now you are", 64 "play a game", 65 "pretend alien", 66 "pretend child", 67 "pretend detective", 68 "pretend ghost", 69 "pretend pirate", 70 "pretend robot", 71 "pretend superhero", 72 "pretend teacher", 73 "pretend wizard", 74 "random fact", 75 "random number", 76 "roll dice", 77 "goodbye", 78 "simulate chat", 79 "simulate future", 80 "simulate past", 81 "sing like robot", 82 "sing lullaby", 83 "sing rap", 84 "sup", 85 "surprise me", 86 "teach me", 87 "tell bedtime story", 88 "tell fortune", 89 "tell joke", 90 "tell prophecy", 91 "tell riddle", 92 "tell story", 93 "what is art", 94 "what is beauty", 95 "what is death", 96 "what is freedom", 97 "what is justice", 98 "what is love", 99 "what is mind", 100 "what is reality", 101 "what is right", 102 "what is self", 103 "what is soul", 104 "what is time", 105 "what is truth", 106 "what is wrong", 107 "what model are you", 108 "what version", 109 "what’s up", 110 "which model are you", 111 "who am i", 112 "who are you", 113 "who made you", 114 "why are we", 115 "write a poem", 116 "write a song", 117 "write haiku", 118 "write quote", 119 "your model is", 120 ]; 121 122 export function normalizeTextForChatAllowlist(s) { 123 return s.toLowerCase().normalize("NFKC").replace(/\s+/g, " ").trim(); 124 } 125 126 // Split on non-word chars; letters/numbers/_ are "word" characters 127 export function tokenizeTextForChatAllowlist(s) { 128 return normalizeTextForChatAllowlist(s) 129 .split(/[^\p{L}\p{N}_]+/u) 130 .filter(Boolean); 131 } 132 133 export function buildChatAllowlist(phrases) { 134 const byLen = new Map(); // len -> Set("tok tok ...") 135 for (const p of phrases) { 136 const key = tokenizeTextForChatAllowlist(p).join(" "); 137 if (!key) { 138 continue; 139 } 140 const k = key.split(" ").length; 141 if (!byLen.has(k)) { 142 byLen.set(k, new Set()); 143 } 144 byLen.get(k).add(key); 145 } 146 return byLen; 147 } 148 149 // Factory: returns a fast checker for “does query contain any isolated phrase?” 150 export function makeIsolatedPhraseChecker(phrases) { 151 const byLen = buildChatAllowlist(phrases); 152 const cache = new Map(); 153 154 return function containsIsolatedPhrase(query) { 155 const qNorm = normalizeTextForChatAllowlist(query); 156 if (cache.has(qNorm)) { 157 return cache.get(qNorm); 158 } 159 160 const toks = qNorm.split(/[^\p{L}\p{N}_]+/u).filter(Boolean); 161 for (const [k, set] of byLen) { 162 for (let i = 0; i + k <= toks.length; i++) { 163 if (set.has(toks.slice(i, i + k).join(" "))) { 164 cache.set(qNorm, true); 165 return true; 166 } 167 } 168 } 169 cache.set(qNorm, false); 170 return false; 171 }; 172 } 173 174 /** 175 * Intent Classifier Engine 176 */ 177 export const IntentClassifier = { 178 /** 179 * Exposing createEngine for testing purposes. 180 */ 181 182 _createEngine: createEngine, 183 184 /** 185 * Initialize forced-chat checker at module load. 186 * Keeping it as a property ensures easy stubbing in tests. 187 */ 188 189 _isForcedChat: makeIsolatedPhraseChecker(FORCED_CHAT_PHRASES), 190 191 /** 192 * Gets the intent of the prompt using a text classification model. 193 * 194 * @param {string} prompt 195 * @returns {string} "search" | "chat" 196 */ 197 198 async getPromptIntent(query) { 199 try { 200 const cleanedQuery = this._preprocessQuery(query); 201 if (this._isForcedChat(cleanedQuery)) { 202 return "chat"; 203 } 204 const engine = await this._createEngine({ 205 featureId: "smart-intent", 206 modelId: "mozilla/mobilebert-query-intent-detection", 207 modelRevision: "v0.2.0", 208 taskName: "text-classification", 209 }); 210 const threshold = 0.8; 211 const resp = await engine.run({ args: [[cleanedQuery]] }); 212 // resp example: [{ label: "chat", score: 0.95 }, { label: "search", score: 0.04 }] 213 if ( 214 resp[0].label.toLowerCase() === "chat" && 215 resp[0].score >= threshold 216 ) { 217 return "chat"; 218 } 219 return "search"; 220 } catch (error) { 221 console.error("Error using intent detection model:", error); 222 throw error; 223 } 224 }, 225 226 // Helper function for preprocessing text input 227 _preprocessQuery(query) { 228 if (typeof query !== "string") { 229 throw new TypeError( 230 `Expected a string for query preprocessing, but received ${typeof query}` 231 ); 232 } 233 return query.replace(/\?/g, "").trim(); 234 }, 235 };