tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

Memories.sys.mjs (17006B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
      4 
      5 /**
      6 * This module defines functions to generate, deduplicate, and filter memories.
      7 *
      8 * The primary method in this module is `generateMemories`, which orchestrates the entire pipeline:
      9 * 1. Generates initial memories from a specified user data user
     10 * 2. Deduplicates the newly generated memories against all existing memories
     11 * 3. Filters out memories with sensitive content (i.e. financial, medical, etc.)
     12 * 4. Returns the final list of memories objects
     13 *
     14 * `generateMemories` requires 3 arguments:
     15 * 1. `engine`: an instance of `openAIEngine` to call the LLM API
     16 * 2. `sources`: an object mapping user data source types to aggregated records (i.e., {history: [domainItems, titleItems, searchItems]})
     17 * 3. `existingMemoriesList`: an array of existing memory summary strings to deduplicate against
     18 *
     19 * Example Usage:
     20 * const engine = await openAIEngine.build(MODEL_FEATURES.MEMORIES, DEFAULT_ENGINE_ID, SERVICE_TYPES.MEMORIES);
     21 * const sources = {history: [domainItems, titleItems, searchItems]};
     22 * const existingMemoriesList = [...]; // Array of existing memory summary strings; this should be fetched from memory storage
     23 * const newMemories = await generateMemories(engine, sources, existingMemoriesList);
     24 *
     25 */
     26 
     27 import { renderPrompt, openAIEngine } from "../Utils.sys.mjs";
     28 
     29 import {
     30  HISTORY,
     31  CONVERSATION,
     32  ALL_SOURCES,
     33  CATEGORIES,
     34  CATEGORIES_LIST,
     35  INTENTS,
     36  INTENTS_LIST,
     37 } from "./MemoriesConstants.sys.mjs";
     38 
     39 import {
     40  initialMemoriesGenerationSystemPrompt,
     41  initialMemoriesGenerationPrompt,
     42  memoriesDeduplicationSystemPrompt,
     43  memoriesDeduplicationPrompt,
     44  memoriesSensitivityFilterSystemPrompt,
     45  memoriesSensitivityFilterPrompt,
     46 } from "moz-src:///browser/components/aiwindow/models/prompts/MemoriesPrompts.sys.mjs";
     47 
     48 import {
     49  INITIAL_MEMORIES_SCHEMA,
     50  MEMORIES_DEDUPLICATION_SCHEMA,
     51  MEMORIES_NON_SENSITIVE_SCHEMA,
     52 } from "moz-src:///browser/components/aiwindow/models/memories/MemoriesSchemas.sys.mjs";
     53 
     54 /**
     55 * Generates, deduplicates, and filters memories end-to-end
     56 *
     57 * This is the main pipeline function.
     58 *
     59 * @param {OpenAIEngine} engine                 openAIEngine instance to call LLM API
     60 * @param {object} sources                      User data source type to aggregrated records (i.e., {history: [domainItems, titleItems, searchItems]})
     61 * @param {Array<string>} existingMemoriesList  List of existing memory summary strings to deduplicate against
     62 * @returns {Promise<Array<Map<{
     63 *  category: string,
     64 *  intent: string,
     65 *  memory_summary: string,
     66 *  score: number,
     67 * }>>>}                                        Promise resolving the final list of generated, deduplicated, and filtered memory objects
     68 */
     69 export async function generateMemories(engine, sources, existingMemoriesList) {
     70  // Step 1: Generate initial memories
     71  const initialMemories = await generateInitialMemoriesList(engine, sources);
     72  // If we don't generate any new memories, just return an empty list immediately instead of doing the rest of the steps
     73  if (!initialMemories || initialMemories.length === 0) {
     74    return [];
     75  }
     76 
     77  // Step 2: Deduplicate against existing memories
     78  const initialMemoriesSummaries = initialMemories.map(
     79    memory => memory.memory_summary
     80  );
     81  const dedupedMemoriesSummaries = await deduplicateMemories(
     82    engine,
     83    existingMemoriesList,
     84    initialMemoriesSummaries
     85  );
     86  // If we don't have any deduped memories, no new memories were generated or we ran into an unexpected JSON parse error, so return an empty list
     87  if (!dedupedMemoriesSummaries || dedupedMemoriesSummaries.length === 0) {
     88    return [];
     89  }
     90 
     91  // Step 3: Filter out sensitive memories
     92  const nonSensitiveMemoriesSummaries = await filterSensitiveMemories(
     93    engine,
     94    dedupedMemoriesSummaries
     95  );
     96 
     97  // Step 4: Map back to full memory objects and return
     98  return await mapFilteredMemoriesToInitialList(
     99    initialMemories,
    100    nonSensitiveMemoriesSummaries
    101  );
    102 }
    103 
    104 /**
    105 * Formats a list of strings into a prompt-friendly bullet list
    106 *
    107 * @param {List<string>} list
    108 * @returns {string}
    109 */
    110 export function formatListForPrompt(list) {
    111  return list.map(item => `- "${item}"`).join("\n");
    112 }
    113 
    114 /**
    115 * Utility function to cleanly get bullet-formatted category and memory lists
    116 *
    117 * @param {string} attributeName  "categories" or "intents"
    118 * @returns {string}              Formatted list string
    119 */
    120 export function getFormattedMemoryAttributeList(attributeName) {
    121  if (attributeName === CATEGORIES) {
    122    return formatListForPrompt(CATEGORIES_LIST);
    123  } else if (attributeName === INTENTS) {
    124    return formatListForPrompt(INTENTS_LIST);
    125  }
    126  throw new Error(`Unsupported memory attribute name: ${attributeName}`);
    127 }
    128 
    129 /**
    130 * Extracts a JSON as a map from an LLM response (handles markdown-formatted code blocks)
    131 *
    132 * @param {any} response  LLM response
    133 * @param {any} fallback  Fallback value if parsing fails to protect downstream code
    134 * @returns {Map}         Parsed JSON object
    135 */
    136 export function parseAndExtractJSON(response, fallback) {
    137  const rawContent = response?.finalOutput ?? "";
    138  const markdownMatch = rawContent.match(/```(?:json)?\s*([\s\S]*?)\s*```/i);
    139  const payload = markdownMatch ? markdownMatch[1] : rawContent;
    140  try {
    141    return JSON.parse(payload);
    142  } catch (e) {
    143    // If we can't parse a JSON from the LLM response, return a tailored fallback value to prevent downstream code failures
    144    if (e instanceof SyntaxError) {
    145      console.warn(
    146        `Could not parse JSON from LLM response; using fallback (${fallback}): ${e.message}`
    147      );
    148      return fallback;
    149    }
    150    throw new Error(
    151      `Unexpected error parsing JSON from LLM response: ${e.message}`
    152    );
    153  }
    154 }
    155 
    156 /**
    157 * Renders recent history records into CSV tables for prompt input
    158 *
    159 * @param {Array<Array<string>>} domainItems   List of aggregated domain items
    160 * @param {Array<Array<string>>} titleItems    List of aggregated title items
    161 * @param {Array<object>} searchItems          List of aggregated search items
    162 * @returns {Promise<string>}   Promise resolving recent browser history rendered as CSV tables
    163 */
    164 export async function renderRecentHistoryForPrompt(
    165  domainItems,
    166  titleItems,
    167  searchItems
    168 ) {
    169  let finalCSV = "";
    170 
    171  if (domainItems.length) {
    172    let domainRecordsTable = ["Domain,Importance Score"];
    173    for (const domainItem of domainItems) {
    174      domainRecordsTable.push(domainItem.join(","));
    175    }
    176    finalCSV += "# Domains\n" + domainRecordsTable.join("\n") + "\n\n";
    177  }
    178 
    179  if (titleItems.length) {
    180    let titleRecordsTable = ["Title,Importance Score"];
    181    for (const titleItem of titleItems) {
    182      titleRecordsTable.push(titleItem.join(","));
    183    }
    184    finalCSV += "# Titles\n" + titleRecordsTable.join("\n") + "\n\n";
    185  }
    186 
    187  if (searchItems.length) {
    188    let searchRecordsTable = ["Search,Importance Score"];
    189    for (const searchItem of searchItems) {
    190      searchRecordsTable.push(`${searchItem.q},${searchItem.r}`);
    191    }
    192    finalCSV += "# Searches\n" + searchRecordsTable.join("\n");
    193  }
    194 
    195  return finalCSV.trim();
    196 }
    197 
    198 export async function renderRecentConversationForPrompt(conversationMessages) {
    199  let finalCSV = "";
    200  if (conversationMessages.length) {
    201    let conversationRecordsTable = ["Message"];
    202    for (const message of conversationMessages) {
    203      conversationRecordsTable.push(`${message.content}`);
    204    }
    205    finalCSV += "# Chat History\n" + conversationRecordsTable.join("\n");
    206  }
    207  return finalCSV.trim();
    208 }
    209 
    210 /**
    211 * Builds the initial memories generation prompt, pulling profile information based on given source
    212 *
    213 * @param {object} sources      User data source type to aggregrated records (i.e., {history: [domainItems, titleItems, searchItems]})
    214 * @returns {Promise<string>}   Promise resolving the generated memories generation prompt with profile records injected
    215 */
    216 export async function buildInitialMemoriesGenerationPrompt(sources) {
    217  if (ALL_SOURCES.intersection(new Set(Object.keys(sources))).size === 0) {
    218    throw new Error(
    219      `No valid sources provided to build memories generation prompt: ${Object.keys(sources).join(", ")}`
    220    );
    221  }
    222 
    223  let profileRecordsRenderedStr = "";
    224 
    225  // Allow for multiple sources in the future
    226  if (sources.hasOwnProperty(HISTORY)) {
    227    const [domainItems, titleItems, searchItems] = sources[HISTORY];
    228    profileRecordsRenderedStr += await renderRecentHistoryForPrompt(
    229      domainItems,
    230      titleItems,
    231      searchItems
    232    );
    233  }
    234  if (sources.hasOwnProperty(CONVERSATION)) {
    235    profileRecordsRenderedStr += await renderRecentConversationForPrompt(
    236      sources[CONVERSATION]
    237    );
    238  }
    239 
    240  return await renderPrompt(initialMemoriesGenerationPrompt, {
    241    categoriesList: getFormattedMemoryAttributeList(CATEGORIES),
    242    intentsList: getFormattedMemoryAttributeList(INTENTS),
    243    profileRecordsRenderedStr,
    244  });
    245 }
    246 
    247 /**
    248 * Builds the memories deduplication prompt
    249 *
    250 * @param {Array<string>} existingMemoriesList  List of existing memories
    251 * @param {Array<string>} newMemoriesList       List of newly generated memories
    252 * @returns {Promise<string>}                   Promise resolving the generated deduplication prompt with existing and new memories lists injected
    253 */
    254 export async function buildMemoriesDeduplicationPrompt(
    255  existingMemoriesList,
    256  newMemoriesList
    257 ) {
    258  const existingMemoriesListStr = formatListForPrompt(existingMemoriesList);
    259  const newMemoriesListStr = formatListForPrompt(newMemoriesList);
    260 
    261  return await renderPrompt(memoriesDeduplicationPrompt, {
    262    existingMemoriesList: existingMemoriesListStr,
    263    newMemoriesList: newMemoriesListStr,
    264  });
    265 }
    266 
    267 /**
    268 * Builds the memories sensitivity filter prompt
    269 *
    270 * @param {Array<string>} memoriesList  List of memories to filter
    271 * @returns {Promise<string>}           Promise resolving the generated sensitivity filter prompt with memories list injected
    272 */
    273 export async function buildMemoriesSensitivityFilterPrompt(memoriesList) {
    274  const memoriesListStr = formatListForPrompt(memoriesList);
    275 
    276  return await renderPrompt(memoriesSensitivityFilterPrompt, {
    277    memoriesList: memoriesListStr,
    278  });
    279 }
    280 
    281 /**
    282 * Sanitizes a single memory object from LLM output, checking required fields and normalizing score
    283 *
    284 * @param {*} memory               Raw memory object from LLM
    285 * @returns {Map<{
    286 *  category: string|null,
    287 *  intent: string|null,
    288 *  memory_summary: string|null,
    289 *  score: number,
    290 * }>|null}                         Sanitized memory or null if invalid
    291 */
    292 function sanitizeMemory(memory) {
    293  // Shortcut to return nothing if memory is bad
    294  if (!memory || typeof memory !== "object") {
    295    return null;
    296  }
    297 
    298  // Check that the candidate memory object has all the required string fields
    299  for (const field of ["category", "intent", "memory_summary"]) {
    300    if (!(field in memory) && typeof memory[field] !== "string") {
    301      return null;
    302    }
    303  }
    304 
    305  // Clamp score to [1,5]; treat missing/invalid as 1
    306  let score = Number.isFinite(memory.score) ? Math.round(memory.score) : 1;
    307  if (score < 1) {
    308    score = 1;
    309  } else if (score > 5) {
    310    score = 5;
    311  }
    312 
    313  return {
    314    category: memory.category,
    315    intent: memory.intent,
    316    memory_summary: memory.memory_summary,
    317    score,
    318  };
    319 }
    320 
    321 /**
    322 * Normalizes and validates parsed LLM output into a list of memories to handle LLM output variability
    323 *
    324 * @param {*} parsed                JSON-parsed LLM output
    325 * @returns {Array<Map<{
    326 *  category: string,
    327 *  intent: string,
    328 *  memory_summary: string,
    329 *  score: number,
    330 * }>>}                             List of sanitized memories
    331 */
    332 function normalizeMemoryList(parsed) {
    333  let list = parsed;
    334  if (!Array.isArray(list)) {
    335    // If list isn't an array, check that it's an object with a nested "items" array
    336    if (list && Array.isArray(list.items)) {
    337      list = list.items;
    338    } else if (list && typeof list === "object") {
    339      // If list isn't an array, check that it's a least a single object, so check that list has memory-like keys
    340      const looksLikeMemory =
    341        "category" in list || "intent" in list || "memory_summary" in list;
    342      if (looksLikeMemory) {
    343        list = [list];
    344      }
    345    }
    346  }
    347  if (!Array.isArray(list)) {
    348    return [];
    349  }
    350 
    351  return list.map(sanitizeMemory).filter(Boolean);
    352 }
    353 
    354 /**
    355 * Prompts an LLM to generate an initial, unfiltered list of candidate memories from user data
    356 *
    357 * @param {openAIEngine} engine     openAIEngine instance to call LLM API
    358 * @param {object} sources          User data source type to aggregrated records (i.e., {history: [domainItems, titleItems, searchItems]})
    359 * @returns {Promise<Array<Map<{
    360 *  category: string,
    361 *  intent: string,
    362 *  memory_summary: string,
    363 *  score: number,
    364 * }>>>}                            Promise resolving the list of generated memories
    365 */
    366 export async function generateInitialMemoriesList(engine, sources) {
    367  const promptText = await buildInitialMemoriesGenerationPrompt(sources);
    368  const response = await engine.run({
    369    args: [
    370      {
    371        role: "system",
    372        content: initialMemoriesGenerationSystemPrompt,
    373      },
    374      { role: "user", content: promptText },
    375    ],
    376    responseFormat: { type: "json_schema", schema: INITIAL_MEMORIES_SCHEMA },
    377    fxAccountToken: await openAIEngine.getFxAccountToken(),
    378  });
    379 
    380  const parsed = parseAndExtractJSON(response, []);
    381  return normalizeMemoryList(parsed);
    382 }
    383 
    384 /**
    385 * Prompts an LLM to deduplicate new memories against existing ones
    386 *
    387 * @param {OpenAIEngine} engine                 openAIEngine instance to call LLM API
    388 * @param {Array<string>} existingMemoriesList  List of existing memory summary strings
    389 * @param {Array<string>} newMemoriesList       List of new memory summary strings to deduplicate
    390 * @returns {Promise<Array<string>>}            Promise resolving the final list of deduplicated memory summary strings
    391 */
    392 export async function deduplicateMemories(
    393  engine,
    394  existingMemoriesList,
    395  newMemoriesList
    396 ) {
    397  const dedupPrompt = await buildMemoriesDeduplicationPrompt(
    398    existingMemoriesList,
    399    newMemoriesList
    400  );
    401 
    402  const response = await engine.run({
    403    args: [
    404      {
    405        role: "system",
    406        content: memoriesDeduplicationSystemPrompt,
    407      },
    408      { role: "user", content: dedupPrompt },
    409    ],
    410    responseFormat: {
    411      type: "json_schema",
    412      schema: MEMORIES_DEDUPLICATION_SCHEMA,
    413    },
    414    fxAccountToken: await openAIEngine.getFxAccountToken(),
    415  });
    416 
    417  const parsed = parseAndExtractJSON(response, { unique_memories: [] });
    418 
    419  // Able to extract a JSON, so the fallback wasn't used, but the LLM didn't follow the schema
    420  if (
    421    parsed.unique_memories === undefined ||
    422    !Array.isArray(parsed.unique_memories)
    423  ) {
    424    return [];
    425  }
    426 
    427  // Make sure we filter out any invalid main_memory entries before returning
    428  return parsed.unique_memories
    429    .filter(
    430      item =>
    431        item.main_memory !== undefined && typeof item.main_memory === "string"
    432    )
    433    .map(item => item.main_memory);
    434 }
    435 
    436 /**
    437 * Prompts an LLM to filter out sensitive memories from an memories list
    438 *
    439 * @param {OpenAIEngine} engine         openAIEngine instance to call LLM API
    440 * @param {Array<string>} memoriesList  List of memory summary strings to filter
    441 * @returns {Promise<Array<string>>}    Promise resolving the final list of non-sensitive memory summary strings
    442 */
    443 export async function filterSensitiveMemories(engine, memoriesList) {
    444  const sensitivityFilterPrompt =
    445    await buildMemoriesSensitivityFilterPrompt(memoriesList);
    446  const response = await engine.run({
    447    args: [
    448      {
    449        role: "system",
    450        content: memoriesSensitivityFilterSystemPrompt,
    451      },
    452      { role: "user", content: sensitivityFilterPrompt },
    453    ],
    454    responseFormat: {
    455      type: "json_schema",
    456      schema: MEMORIES_NON_SENSITIVE_SCHEMA,
    457    },
    458    fxAccountToken: await openAIEngine.getFxAccountToken(),
    459  });
    460 
    461  const parsed = parseAndExtractJSON(response, { non_sensitive_memories: [] });
    462 
    463  // Able to extract a JSON, so the fallback wasn't used, but the LLM didn't follow the schema
    464  if (
    465    parsed.non_sensitive_memories === undefined ||
    466    !Array.isArray(parsed.non_sensitive_memories)
    467  ) {
    468    return [];
    469  }
    470 
    471  // Make sure we filter out any invalid entries before returning
    472  return parsed.non_sensitive_memories.filter(item => typeof item === "string");
    473 }
    474 
    475 /**
    476 *
    477 * @param {Map<string, any>} initialMemories    List of original, unfiltered memory objects
    478 * @param {Array<string>} filteredMemoriesList  List of deduplicated and sensitivity-filtered memory summary strings
    479 * @returns {Promise<Map<string, any>>}         Promise resolving the final list of memory objects
    480 */
    481 export async function mapFilteredMemoriesToInitialList(
    482  initialMemories,
    483  filteredMemoriesList
    484 ) {
    485  return initialMemories.filter(memory =>
    486    filteredMemoriesList.includes(memory.memory_summary)
    487  );
    488 }