tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

MemoriesManager.sys.mjs (18005B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
      4 
      5 import {
      6  getRecentHistory,
      7  sessionizeVisits,
      8  generateProfileInputs,
      9  aggregateSessions,
     10  topkAggregates,
     11 } from "moz-src:///browser/components/aiwindow/models/memories/MemoriesHistorySource.sys.mjs";
     12 import { getRecentChats } from "./MemoriesChatSource.sys.mjs";
     13 import {
     14  DEFAULT_ENGINE_ID,
     15  MODEL_FEATURES,
     16  openAIEngine,
     17  renderPrompt,
     18  SERVICE_TYPES,
     19 } from "moz-src:///browser/components/aiwindow/models/Utils.sys.mjs";
     20 import { MemoryStore } from "moz-src:///browser/components/aiwindow/services/MemoryStore.sys.mjs";
     21 import {
     22  CATEGORIES,
     23  INTENTS,
     24  HISTORY as SOURCE_HISTORY,
     25  CONVERSATION as SOURCE_CONVERSATION,
     26 } from "moz-src:///browser/components/aiwindow/models/memories/MemoriesConstants.sys.mjs";
     27 import {
     28  getFormattedMemoryAttributeList,
     29  parseAndExtractJSON,
     30  generateMemories,
     31 } from "moz-src:///browser/components/aiwindow/models/memories/Memories.sys.mjs";
     32 import {
     33  messageMemoryClassificationSystemPrompt,
     34  messageMemoryClassificationPrompt,
     35 } from "moz-src:///browser/components/aiwindow/models/prompts/MemoriesPrompts.sys.mjs";
     36 import { MEMORIES_MESSAGE_CLASSIFY_SCHEMA } from "moz-src:///browser/components/aiwindow/models/memories/MemoriesSchemas.sys.mjs";
     37 
     38 const K_DOMAINS_FULL = 100;
     39 const K_TITLES_FULL = 60;
     40 const K_SEARCHES_FULL = 10;
     41 const K_DOMAINS_DELTA = 30;
     42 const K_TITLES_DELTA = 60;
     43 const K_SEARCHES_DELTA = 10;
     44 
     45 const DEFAULT_HISTORY_FULL_LOOKUP_DAYS = 60;
     46 const DEFAULT_HISTORY_FULL_MAX_RESULTS = 3000;
     47 const DEFAULT_HISTORY_DELTA_MAX_RESULTS = 500;
     48 const DEFAULT_CHAT_FULL_MAX_RESULTS = 50;
     49 const DEFAULT_CHAT_HALF_LIFE_DAYS_FULL_RESULTS = 7;
     50 
     51 const LAST_HISTORY_MEMORY_TS_ATTRIBUTE = "last_history_memory_ts";
     52 const LAST_CONVERSATION_MEMORY_TS_ATTRIBUTE = "last_chat_memory_ts";
     53 /**
     54 * MemoriesManager class
     55 */
     56 export class MemoriesManager {
     57  static #openAIEnginePromise = null;
     58 
     59  // Exposed to be stubbed for testing
     60  static _getRecentChats = getRecentChats;
     61 
     62  /**
     63   * Creates and returns an class-level openAIEngine instance if one has not already been created.
     64   * This current pulls from the general browser.aiwindow.* prefs, but will likely pull from memories-specific ones in the future
     65   *
     66   * @returns {Promise<openAIEngine>}  openAIEngine instance
     67   */
     68  static async ensureOpenAIEngine() {
     69    if (!this.#openAIEnginePromise) {
     70      this.#openAIEnginePromise = await openAIEngine.build(
     71        MODEL_FEATURES.MEMORIES,
     72        DEFAULT_ENGINE_ID,
     73        SERVICE_TYPES.MEMORIES
     74      );
     75    }
     76    return this.#openAIEnginePromise;
     77  }
     78 
     79  /**
     80   * Generates, saves, and returns memories from pre-computed sources
     81   *
     82   * @param {object} sources      User data source type to aggregrated records (i.e., {history: [domainItems, titleItems, searchItems]})
     83   * @param {string} sourceName   Specific source type from which memories are generated ("history" or "conversation")
     84   * @returns {Promise<Memory[]>}
     85   *          A promise that resolves to the list of persisted memories
     86   *          (newly created or updated), sorted and shaped as returned by
     87   *          {@link MemoryStore.addMemory}.
     88   */
     89  static async generateAndSaveMemoriesFromSources(sources, sourceName) {
     90    const now = Date.now();
     91    const existingMemories = await this.getAllMemories();
     92    const existingMemoriesSummaries = existingMemories.map(
     93      i => i.memory_summary
     94    );
     95    const engine = await this.ensureOpenAIEngine();
     96    const memories = await generateMemories(
     97      engine,
     98      sources,
     99      existingMemoriesSummaries
    100    );
    101    const { persistedMemories } = await this.saveMemories(
    102      memories,
    103      sourceName,
    104      now
    105    );
    106    return persistedMemories;
    107  }
    108 
    109  /**
    110   * Generates and persists memories derived from the user's recent browsing history.
    111   *
    112   * This method:
    113   *  1. Reads {@link last_history_memory_ts} via {@link getLastHistoryMemoryTimestamp}.
    114   *  2. Decides between:
    115   *     - Full processing (first run, no prior timestamp):
    116   *         * Uses a days-based cutoff (DEFAULT_HISTORY_FULL_LOOKUP_DAYS).
    117   *         * Uses max-results cap (DEFAULT_HISTORY_FULL_MAX_RESULTS).
    118   *         * Uses full top-k settings (K_DOMAINS_FULL, K_TITLES_FULL, K_SEARCHES_FULL).
    119   *     - Delta processing (subsequent runs, prior timestamp present):
    120   *         * Uses an absolute cutoff via `sinceMicros = lastTsMs * 1000`.
    121   *         * Uses a smaller max-results cap (DEFAULT_HISTORY_DELTA_MAX_RESULTS).
    122   *         * Uses delta top-k settings (K_DOMAINS_DELTA, K_TITLES_DELTA, K_SEARCHES_DELTA).
    123   *  3. Calls {@link getAggregatedBrowserHistory} with the computed options to obtain
    124   *     domain, title, and search aggregates.
    125   *  4. Calls {@link generateAndSaveMemoriesFromSources} with retrieved history to generate and save new memories.
    126   *
    127   * @returns {Promise<Memory[]>}
    128   *          A promise that resolves to the list of persisted history memories
    129   *          (newly created or updated), sorted and shaped as returned by
    130   *          {@link MemoryStore.addMemory}.
    131   */
    132  static async generateMemoriesFromBrowsingHistory() {
    133    const now = Date.now();
    134    // get last history memory timestamp in ms
    135    const lastTsMs = await this.getLastHistoryMemoryTimestamp();
    136    const isDelta = typeof lastTsMs === "number" && lastTsMs > 0;
    137    // set up the options based on delta or full (first) run
    138    let recentHistoryOpts = {};
    139    let topkAggregatesOpts;
    140    if (isDelta) {
    141      recentHistoryOpts = {
    142        sinceMicros: lastTsMs * 1000,
    143        maxResults: DEFAULT_HISTORY_DELTA_MAX_RESULTS,
    144      };
    145      topkAggregatesOpts = {
    146        k_domains: K_DOMAINS_DELTA,
    147        k_titles: K_TITLES_DELTA,
    148        k_searches: K_SEARCHES_DELTA,
    149        now,
    150      };
    151    } else {
    152      recentHistoryOpts = {
    153        days: DEFAULT_HISTORY_FULL_LOOKUP_DAYS,
    154        maxResults: DEFAULT_HISTORY_FULL_MAX_RESULTS,
    155      };
    156      topkAggregatesOpts = {
    157        k_domains: K_DOMAINS_FULL,
    158        k_titles: K_TITLES_FULL,
    159        k_searches: K_SEARCHES_FULL,
    160        now,
    161      };
    162    }
    163 
    164    const [domainItems, titleItems, searchItems] =
    165      await this.getAggregatedBrowserHistory(
    166        recentHistoryOpts,
    167        topkAggregatesOpts
    168      );
    169    const sources = { history: [domainItems, titleItems, searchItems] };
    170    return await this.generateAndSaveMemoriesFromSources(
    171      sources,
    172      SOURCE_HISTORY
    173    );
    174  }
    175 
    176  /**
    177   * Generates and persists memories derived from the user's recent chat history.
    178   *
    179   * This method:
    180   *  1. Reads {@link last_chat_memory_ts} via {@link getLastConversationMemoryTimestamp}.
    181   *  2. Decides between:
    182   *     - Full processing (first run, no prior timestamp):
    183   *         * Pulls all messages from the beginning of time.
    184   *     - Delta processing (subsequent runs, prior timestamp present):
    185   *         * Pulls all messages since the last timestamp.
    186   *  3. Calls {@link getRecentChats} with the computed options to obtain messages.
    187   *  4. Calls {@link generateAndSaveMemoriesFromSources} with messages to generate and save new memories.
    188   *
    189   * @returns {Promise<Memory[]>}
    190   *          A promise that resolves to the list of persisted conversation memories
    191   *          (newly created or updated), sorted and shaped as returned by
    192   *          {@link MemoryStore.addMemory}.
    193   */
    194  static async generateMemoriesFromConversationHistory() {
    195    // get last chat memory timestamp in ms
    196    const lastTsMs = await this.getLastConversationMemoryTimestamp();
    197    const isDelta = typeof lastTsMs === "number" && lastTsMs > 0;
    198 
    199    let startTime = 0;
    200 
    201    // If this is a subsequent run, set startTime to lastTsMs, the last time we generated chat-based memories
    202    if (isDelta) {
    203      startTime = lastTsMs;
    204    }
    205 
    206    const chatMessages = await this._getRecentChats(
    207      startTime,
    208      DEFAULT_CHAT_FULL_MAX_RESULTS,
    209      DEFAULT_CHAT_HALF_LIFE_DAYS_FULL_RESULTS
    210    );
    211    const sources = { conversation: chatMessages };
    212    return await this.generateAndSaveMemoriesFromSources(
    213      sources,
    214      SOURCE_CONVERSATION
    215    );
    216  }
    217 
    218  /**
    219   * Retrieves and aggregates recent browser history into top-k domain, title, and search aggregates.
    220   *
    221   * @param {object} [recentHistoryOpts={}]
    222   * @param {number} [recentHistoryOpts.sinceMicros=null]
    223   *        Optional absolute cutoff in microseconds since epoch (Places
    224   *        visit_date). If provided, this is used directly as the cutoff:
    225   *        only visits with `visit_date >= sinceMicros` are returned.
    226   *
    227   *        This is the recommended way to implement incremental reads:
    228   *        store the max `visitDateMicros` from the previous run and pass
    229   *        it (or max + 1) back in as `sinceMicros`.
    230   *
    231   * @param {number} [recentHistoryOpts.days=DEFAULT_DAYS]
    232   *        How far back to look if `sinceMicros` is not provided.
    233   *        The cutoff is computed as:
    234   *          cutoff = now() - days * MS_PER_DAY
    235   *
    236   *        Ignored when `sinceMicros` is non-null.
    237   *
    238   * @param {number} [recentHistoryOpts.maxResults=DEFAULT_MAX_RESULTS]
    239   *        Maximum number of rows to return from the SQL query (after
    240   *        sorting by most recent visit). Note that this caps the number
    241   *        of visits, not distinct URLs.
    242   * @param {object} [topkAggregatesOpts]
    243   * @param {number} [topkAggregatesOpts.k_domains=30]    Max number of domain aggregates to return
    244   * @param {number} [topkAggregatesOpts.k_titles=60]     Max number of title aggregates to return
    245   * @param {number} [topkAggregatesOpts.k_searches=10]   Max number of search aggregates to return
    246   * @param {number} [topkAggregatesOpts.now]             Current time; seconds or ms, normalized internally.}
    247   * @returns {Promise<[Array, Array, Array]>}            Top-k domain, title, and search aggregates
    248   */
    249  static async getAggregatedBrowserHistory(
    250    recentHistoryOpts = {},
    251    topkAggregatesOpts = {
    252      k_domains: K_DOMAINS_DELTA,
    253      k_titles: K_TITLES_DELTA,
    254      k_searches: K_SEARCHES_DELTA,
    255      now: undefined,
    256    }
    257  ) {
    258    const recentVisitRecords = await getRecentHistory(recentHistoryOpts);
    259    const sessionized = sessionizeVisits(recentVisitRecords);
    260    const profilePreparedInputs = generateProfileInputs(sessionized);
    261    const [domainAgg, titleAgg, searchAgg] = aggregateSessions(
    262      profilePreparedInputs
    263    );
    264 
    265    return await topkAggregates(
    266      domainAgg,
    267      titleAgg,
    268      searchAgg,
    269      topkAggregatesOpts
    270    );
    271  }
    272 
    273  /**
    274   * Retrieves all stored memories.
    275   * This is a quick-access wrapper around MemoryStore.getMemories() with no additional processing.
    276   *
    277   * @param {object} [opts={}]
    278   * @param {boolean} [opts.includeSoftDeleted=false]
    279   *        Whether to include soft-deleted memories.
    280   * @returns {Promise<Array<Map<{
    281   *  memory_summary: string,
    282   *  category: string,
    283   *  intent: string,
    284   *  score: number,
    285   * }>>>}                                    List of memories
    286   */
    287  static async getAllMemories(opts = { includeSoftDeleted: false }) {
    288    return await MemoryStore.getMemories(opts);
    289  }
    290 
    291  /**
    292   * Returns the last timestamp (in ms since Unix epoch) when a history-based
    293   * memory was generated, as persisted in MemoryStore.meta.
    294   *
    295   * If the store has never been updated, this returns 0.
    296   *
    297   * @returns {Promise<number>}  Milliseconds since Unix epoch
    298   */
    299  static async getLastHistoryMemoryTimestamp() {
    300    const meta = await MemoryStore.getMeta();
    301    return meta.last_history_memory_ts || 0;
    302  }
    303 
    304  /**
    305   * Returns the last timestamp (in ms since Unix epoch) when a chat-based
    306   * memory was generated, as persisted in MemoryStore.meta.
    307   *
    308   * If the store has never been updated, this returns 0.
    309   *
    310   * @returns {Promise<number>}  Milliseconds since Unix epoch
    311   */
    312  static async getLastConversationMemoryTimestamp() {
    313    const meta = await MemoryStore.getMeta();
    314    return meta.last_chat_memory_ts || 0;
    315  }
    316 
    317  /**
    318   * Persist a list of generated memories and update the appropriate meta timestamp.
    319   *
    320   * @param {Array<object>|null|undefined} generatedMemories
    321   *        Array of MemoryPartial-like objects to persist.
    322   * @param {"history"|"conversation"} source
    323   *        Source of these memories; controls which meta timestamp to update.
    324   * @param {number} [nowMs=Date.now()]
    325   *        Optional "now" timestamp in ms, for meta update fallback.
    326   *
    327   * @returns {Promise<{ persistedMemories: Array<object>, newTimestampMs: number | null }>}
    328   */
    329  static async saveMemories(generatedMemories, source, nowMs = Date.now()) {
    330    const persistedMemories = [];
    331 
    332    if (Array.isArray(generatedMemories)) {
    333      for (const memoryPartial of generatedMemories) {
    334        const stored = await MemoryStore.addMemory(memoryPartial);
    335        persistedMemories.push(stored);
    336      }
    337    }
    338 
    339    // Decide which meta field to update
    340    let metaKey;
    341    if (source === SOURCE_HISTORY) {
    342      metaKey = LAST_HISTORY_MEMORY_TS_ATTRIBUTE;
    343    } else if (source === SOURCE_CONVERSATION) {
    344      metaKey = LAST_CONVERSATION_MEMORY_TS_ATTRIBUTE;
    345    } else {
    346      // Unknown source: don't update meta, just return persisted results.
    347      return {
    348        persistedMemories,
    349        newTimestampMs: null,
    350      };
    351    }
    352 
    353    // Compute new timestamp: prefer max(updated_at) if present, otherwise fall back to nowMs.
    354    let newTsMs = nowMs;
    355    if (persistedMemories.length) {
    356      const maxUpdated = persistedMemories.reduce(
    357        (max, i) => Math.max(max, i.updated_at ?? 0),
    358        0
    359      );
    360      if (maxUpdated > 0) {
    361        newTsMs = maxUpdated;
    362      }
    363    }
    364 
    365    await MemoryStore.updateMeta({
    366      [metaKey]: newTsMs,
    367    });
    368 
    369    return {
    370      persistedMemories,
    371      newTimestampMs: newTsMs,
    372    };
    373  }
    374 
    375  /**
    376   * Soft deletes a memory by its ID.
    377   * Soft deletion sets the memory's `is_deleted` flag to true. This prevents memory getter functions
    378   * from returning the memory when using default parameters. It does not delete the memory from storage.
    379   *
    380   * From the user's perspective, soft-deleted memories will not be used in assistant responses but will still exist in storage.
    381   *
    382   * @param {string} memoryId        ID of the memory to soft-delete
    383   * @returns {Promise<Memory|null>} The soft-deleted memory, or null if not found
    384   */
    385  static async softDeleteMemoryById(memoryId) {
    386    return await MemoryStore.softDeleteMemory(memoryId);
    387  }
    388 
    389  /**
    390   * Hard deletes a memory by its ID.
    391   * Hard deletion permenantly removes the memory from storage entirely. This method should be used
    392   * by UI to allow users to delete memories they no longer want stored.
    393   *
    394   * @param {string} memoryId        ID of the memory to hard-delete
    395   * @returns {Promise<boolean>}      True if the memory was found and deleted, false otherwise
    396   */
    397  static async hardDeleteMemoryById(memoryId) {
    398    return await MemoryStore.hardDeleteMemory(memoryId);
    399  }
    400 
    401  /**
    402   * Builds the prompt to classify a user message into memory categories and intents.
    403   *
    404   * @param {string} message          User message to classify
    405   * @returns {Promise<string>}       Prompt string to send to LLM for classifying the message
    406   */
    407  static async buildMessageMemoryClassificationPrompt(message) {
    408    const categories = getFormattedMemoryAttributeList(CATEGORIES);
    409    const intents = getFormattedMemoryAttributeList(INTENTS);
    410 
    411    return await renderPrompt(messageMemoryClassificationPrompt, {
    412      message,
    413      categories,
    414      intents,
    415    });
    416  }
    417 
    418  /**
    419   * Classifies a user message into memory categories and intents.
    420   *
    421   * @param {string} message                                                        User message to classify
    422   * @returns {Promise<Map<{categories: Array<string>, intents: Array<string>}>>}}  Categories and intents into which the message was classified
    423   */
    424  static async memoryClassifyMessage(message) {
    425    const messageClassifPrompt =
    426      await this.buildMessageMemoryClassificationPrompt(message);
    427 
    428    const engine = await this.ensureOpenAIEngine();
    429 
    430    const response = await engine.run({
    431      args: [
    432        { role: "system", content: messageMemoryClassificationSystemPrompt },
    433        { role: "user", content: messageClassifPrompt },
    434      ],
    435      responseFormat: {
    436        type: "json_schema",
    437        schema: MEMORIES_MESSAGE_CLASSIFY_SCHEMA,
    438      },
    439      fxAccountToken: await openAIEngine.getFxAccountToken(),
    440    });
    441 
    442    const parsed = parseAndExtractJSON(response, {
    443      categories: [],
    444      intents: [],
    445    });
    446    if (!parsed.categories || !parsed.intents) {
    447      return { categories: [], intents: [] };
    448    }
    449 
    450    return parsed;
    451  }
    452 
    453  /**
    454   * Fetches relevant memories for a given user message.
    455   *
    456   * @param {string} message                  User message to find relevant memories for
    457   * @returns {Promise<Array<Map<{
    458   *  memory_summary: string,
    459   *  category: string,
    460   *  intent: string,
    461   *  score: number,
    462   * }>>>}                                    List of relevant memories
    463   */
    464  static async getRelevantMemories(message) {
    465    const existingMemories = await MemoriesManager.getAllMemories();
    466    // Shortcut: if there aren't any existing memories, return empty list immediately
    467    if (existingMemories.length === 0) {
    468      return [];
    469    }
    470 
    471    const messageClassification =
    472      await MemoriesManager.memoryClassifyMessage(message);
    473    // Shortcut: if the message's category and/or intent is null, return empty list immediately
    474    if (!messageClassification.categories || !messageClassification.intents) {
    475      return [];
    476    }
    477 
    478    // Filter existing memories to those that match the message's category
    479    const candidateRelevantMemories = existingMemories.filter(memory => {
    480      return messageClassification.categories.includes(memory.category);
    481    });
    482 
    483    return candidateRelevantMemories;
    484  }
    485 }