tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

IntentClassifier.sys.mjs (5403B)


      1 /**
      2 * This Source Code Form is subject to the terms of the Mozilla Public
      3 * License, v. 2.0. If a copy of the MPL was not distributed with this
      4 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
      5 */
      6 
      7 import { createEngine } from "chrome://global/content/ml/EngineProcess.sys.mjs";
      8 
      9 const FORCED_CHAT_PHRASES = [
     10  "amuse me",
     11  "are we alone",
     12  "are you alive",
     13  "are you gpt",
     14  "are you human",
     15  "are you real",
     16  "bark like dog",
     17  "cheer me up",
     18  "comfort me",
     19  "count numbers",
     20  "curse me",
     21  "do aliens exist",
     22  "do we matter",
     23  "do you dream",
     24  "do you think",
     25  "does fate exist",
     26  "dream meaning",
     27  "drop wisdom",
     28  "encourage me",
     29  "entertain me",
     30  "explain yourself",
     31  "flip coin",
     32  "give blessing",
     33  "give wisdom",
     34  "good morning",
     35  "good night",
     36  "guess number",
     37  "hallo",
     38  "hello",
     39  "hey",
     40  "hi",
     41  "hola",
     42  "how are you",
     43  "inspire me",
     44  "invent a word",
     45  "invent holiday",
     46  "invent joke",
     47  "is god real",
     48  "life advice",
     49  "life purpose",
     50  "list animals",
     51  "list capitals",
     52  "list colors",
     53  "list countries",
     54  "list elements",
     55  "list fruits",
     56  "list metals",
     57  "list oceans",
     58  "list planets",
     59  "list shapes",
     60  "meaning of life",
     61  "meow like cat",
     62  "motivate me",
     63  "now you are",
     64  "play a game",
     65  "pretend alien",
     66  "pretend child",
     67  "pretend detective",
     68  "pretend ghost",
     69  "pretend pirate",
     70  "pretend robot",
     71  "pretend superhero",
     72  "pretend teacher",
     73  "pretend wizard",
     74  "random fact",
     75  "random number",
     76  "roll dice",
     77  "goodbye",
     78  "simulate chat",
     79  "simulate future",
     80  "simulate past",
     81  "sing like robot",
     82  "sing lullaby",
     83  "sing rap",
     84  "sup",
     85  "surprise me",
     86  "teach me",
     87  "tell bedtime story",
     88  "tell fortune",
     89  "tell joke",
     90  "tell prophecy",
     91  "tell riddle",
     92  "tell story",
     93  "what is art",
     94  "what is beauty",
     95  "what is death",
     96  "what is freedom",
     97  "what is justice",
     98  "what is love",
     99  "what is mind",
    100  "what is reality",
    101  "what is right",
    102  "what is self",
    103  "what is soul",
    104  "what is time",
    105  "what is truth",
    106  "what is wrong",
    107  "what model are you",
    108  "what version",
    109  "what’s up",
    110  "which model are you",
    111  "who am i",
    112  "who are you",
    113  "who made you",
    114  "why are we",
    115  "write a poem",
    116  "write a song",
    117  "write haiku",
    118  "write quote",
    119  "your model is",
    120 ];
    121 
    122 export function normalizeTextForChatAllowlist(s) {
    123  return s.toLowerCase().normalize("NFKC").replace(/\s+/g, " ").trim();
    124 }
    125 
    126 // Split on non-word chars; letters/numbers/_ are "word" characters
    127 export function tokenizeTextForChatAllowlist(s) {
    128  return normalizeTextForChatAllowlist(s)
    129    .split(/[^\p{L}\p{N}_]+/u)
    130    .filter(Boolean);
    131 }
    132 
    133 export function buildChatAllowlist(phrases) {
    134  const byLen = new Map(); // len -> Set("tok tok ...")
    135  for (const p of phrases) {
    136    const key = tokenizeTextForChatAllowlist(p).join(" ");
    137    if (!key) {
    138      continue;
    139    }
    140    const k = key.split(" ").length;
    141    if (!byLen.has(k)) {
    142      byLen.set(k, new Set());
    143    }
    144    byLen.get(k).add(key);
    145  }
    146  return byLen;
    147 }
    148 
    149 // Factory: returns a fast checker for “does query contain any isolated phrase?”
    150 export function makeIsolatedPhraseChecker(phrases) {
    151  const byLen = buildChatAllowlist(phrases);
    152  const cache = new Map();
    153 
    154  return function containsIsolatedPhrase(query) {
    155    const qNorm = normalizeTextForChatAllowlist(query);
    156    if (cache.has(qNorm)) {
    157      return cache.get(qNorm);
    158    }
    159 
    160    const toks = qNorm.split(/[^\p{L}\p{N}_]+/u).filter(Boolean);
    161    for (const [k, set] of byLen) {
    162      for (let i = 0; i + k <= toks.length; i++) {
    163        if (set.has(toks.slice(i, i + k).join(" "))) {
    164          cache.set(qNorm, true);
    165          return true;
    166        }
    167      }
    168    }
    169    cache.set(qNorm, false);
    170    return false;
    171  };
    172 }
    173 
    174 /**
    175 * Intent Classifier Engine
    176 */
    177 export const IntentClassifier = {
    178  /**
    179   * Exposing createEngine for testing purposes.
    180   */
    181 
    182  _createEngine: createEngine,
    183 
    184  /**
    185   * Initialize forced-chat checker at module load.
    186   * Keeping it as a property ensures easy stubbing in tests.
    187   */
    188 
    189  _isForcedChat: makeIsolatedPhraseChecker(FORCED_CHAT_PHRASES),
    190 
    191  /**
    192   * Gets the intent of the prompt using a text classification model.
    193   *
    194   * @param {string} prompt
    195   * @returns {string} "search" | "chat"
    196   */
    197 
    198  async getPromptIntent(query) {
    199    try {
    200      const cleanedQuery = this._preprocessQuery(query);
    201      if (this._isForcedChat(cleanedQuery)) {
    202        return "chat";
    203      }
    204      const engine = await this._createEngine({
    205        featureId: "smart-intent",
    206        modelId: "mozilla/mobilebert-query-intent-detection",
    207        modelRevision: "v0.2.0",
    208        taskName: "text-classification",
    209      });
    210      const threshold = 0.8;
    211      const resp = await engine.run({ args: [[cleanedQuery]] });
    212      // resp example: [{ label: "chat", score: 0.95 }, { label: "search", score: 0.04 }]
    213      if (
    214        resp[0].label.toLowerCase() === "chat" &&
    215        resp[0].score >= threshold
    216      ) {
    217        return "chat";
    218      }
    219      return "search";
    220    } catch (error) {
    221      console.error("Error using intent detection model:", error);
    222      throw error;
    223    }
    224  },
    225 
    226  // Helper function for preprocessing text input
    227  _preprocessQuery(query) {
    228    if (typeof query !== "string") {
    229      throw new TypeError(
    230        `Expected a string for query preprocessing, but received ${typeof query}`
    231      );
    232    }
    233    return query.replace(/\?/g, "").trim();
    234  },
    235 };