tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

commit 4dc5bc79f2ae965a9ec9b6330bcf644c3063d34d
parent 86dc5318b583376277f455a1468b7f2508a6dbf1
Author: Chidam Gopal <cgopal@mozilla.com>
Date:   Tue,  9 Dec 2025 21:40:20 +0000

Bug 2005046 - Insight updation for Day 0 and incremental updates r=cdipersio,ai-models-reviewers

Differential Revision: https://phabricator.services.mozilla.com/D275669

Diffstat:
Mbrowser/components/aiwindow/models/InsightsConstants.sys.mjs | 1+
Mbrowser/components/aiwindow/models/InsightsManager.sys.mjs | 178++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
Mbrowser/components/aiwindow/models/tests/xpcshell/test_InsightsManager.js | 156++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
3 files changed, 328 insertions(+), 7 deletions(-)

diff --git a/browser/components/aiwindow/models/InsightsConstants.sys.mjs b/browser/components/aiwindow/models/InsightsConstants.sys.mjs @@ -3,6 +3,7 @@ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ export const HISTORY = "history"; +export const CONVERSATION = "conversation"; /** * Insight categories diff --git a/browser/components/aiwindow/models/InsightsManager.sys.mjs b/browser/components/aiwindow/models/InsightsManager.sys.mjs @@ -17,10 +17,13 @@ import { InsightStore } from "moz-src:///browser/components/aiwindow/services/In import { CATEGORIES, INTENTS, + HISTORY as SOURCE_HISTORY, + CONVERSATION as SOURCE_CONVERSATION, } from "moz-src:///browser/components/aiwindow/models/InsightsConstants.sys.mjs"; import { getFormattedInsightAttributeList, parseAndExtractJSON, + generateInsights, } from "moz-src:///browser/components/aiwindow/models/Insights.sys.mjs"; import { messageInsightClassificationSystemPrompt, @@ -28,10 +31,19 @@ import { } from "moz-src:///browser/components/aiwindow/models/prompts/insightsPrompts.sys.mjs"; import { INSIGHTS_MESSAGE_CLASSIFY_SCHEMA } from "moz-src:///browser/components/aiwindow/models/InsightsSchemas.sys.mjs"; -const K_DOMAINS = 30; -const K_TITLES = 60; -const K_SEARCHES = 10; +const K_DOMAINS_FULL = 100; +const K_TITLES_FULL = 60; +const K_SEARCHES_FULL = 10; +const K_DOMAINS_DELTA = 30; +const K_TITLES_DELTA = 60; +const K_SEARCHES_DELTA = 10; +const DEFAULT_HISTORY_FULL_LOOKUP_DAYS = 60; +const DEFAULT_HISTORY_FULL_MAX_RESULTS = 3000; +const DEFAULT_HISTORY_DELTA_MAX_RESULTS = 500; + +const LAST_HISTORY_INSIGHTS_TS_ATTRIBUTE = "last_history_insight_ts"; +const LAST_CONVERSATION_INSIGHTS_TS_ATTRIBUTE = "last_chat_insight_ts"; /** * InsightsManager class */ @@ -52,6 +64,89 @@ export class InsightsManager { } /** + * Generates and persists insights derived from the user's recent browsing history. + * + * This method: + * 1. Reads {@link last_history_insight_ts} via {@link getLastHistoryInsightsTimestamp}. + * 2. Decides between: + * - Full processing (first run, no prior timestamp): + * * Uses a days-based cutoff (DEFAULT_HISTORY_FULL_LOOKUP_DAYS). + * * Uses max-results cap (DEFAULT_HISTORY_FULL_MAX_RESULTS). + * * Uses full top-k settings (K_DOMAINS_FULL, K_TITLES_FULL, K_SEARCHES_FULL). + * - Delta processing (subsequent runs, prior timestamp present): + * * Uses an absolute cutoff via `sinceMicros = lastTsMs * 1000`. + * * Uses a smaller max-results cap (DEFAULT_HISTORY_DELTA_MAX_RESULTS). + * * Uses delta top-k settings (K_DOMAINS_DELTA, K_TITLES_DELTA, K_SEARCHES_DELTA). + * 3. Calls {@link getAggregatedBrowserHistory} with the computed options to obtain + * domain, title, and search aggregates. + * 4. Fetches existing insights via {@link getAllInsights}. + * 5. Ensures a shared OpenAI engine via {@link ensureOpenAIEngine} and calls + * {@link generateInsights} to produce new/updated insights. + * 6. Persists those insights via {@link saveInsights}, which also updates + * `last_history_insight_ts` in {@link InsightStore.updateMeta}. + * + * @returns {Promise<Insight[]>} + * A promise that resolves to the list of persisted history insights + * (newly created or updated), sorted and shaped as returned by + * {@link InsightStore.addInsight}. + */ + static async generateInsightsFromBrowsingHistory() { + const now = Date.now(); + // get last history insight timestamp in ms + const lastTsMs = await this.getLastHistoryInsightsTimestamp(); + const isDelta = typeof lastTsMs === "number" && lastTsMs > 0; + // set up the options based on delta or full (first) run + let recentHistoryOpts = {}; + let topkAggregatesOpts; + if (isDelta) { + recentHistoryOpts = { + sinceMicros: lastTsMs * 1000, + maxResults: DEFAULT_HISTORY_DELTA_MAX_RESULTS, + }; + topkAggregatesOpts = { + k_domains: K_DOMAINS_DELTA, + k_titles: K_TITLES_DELTA, + k_searches: K_SEARCHES_DELTA, + now, + }; + } else { + recentHistoryOpts = { + days: DEFAULT_HISTORY_FULL_LOOKUP_DAYS, + maxResults: DEFAULT_HISTORY_FULL_MAX_RESULTS, + }; + topkAggregatesOpts = { + k_domains: K_DOMAINS_FULL, + k_titles: K_TITLES_FULL, + k_searches: K_SEARCHES_FULL, + now, + }; + } + + const [domainItems, titleItems, searchItems] = + await this.getAggregatedBrowserHistory( + recentHistoryOpts, + topkAggregatesOpts + ); + const sources = { history: [domainItems, titleItems, searchItems] }; + const existingInsights = await this.getAllInsights(); + const existingInsightsSummaries = existingInsights.map( + i => i.insight_summary + ); + const engine = await this.ensureOpenAIEngine(); + const insights = await generateInsights( + engine, + sources, + existingInsightsSummaries + ); + const { persistedInsights } = await this.saveInsights( + insights, + SOURCE_HISTORY, + now + ); + return persistedInsights; + } + + /** * Retrieves and aggregates recent browser history into top-k domain, title, and search aggregates. * * @param {object} [recentHistoryOpts={}] @@ -85,9 +180,9 @@ export class InsightsManager { static async getAggregatedBrowserHistory( recentHistoryOpts = {}, topkAggregatesOpts = { - k_domains: K_DOMAINS, - k_titles: K_TITLES, - k_searches: K_SEARCHES, + k_domains: K_DOMAINS_DELTA, + k_titles: K_TITLES_DELTA, + k_searches: K_SEARCHES_DELTA, now: undefined, } ) { @@ -122,6 +217,77 @@ export class InsightsManager { } /** + * Returns the last timestamp (in ms since Unix epoch) when a history-based + * insight was generated, as persisted in InsightStore.meta. + * + * If the store has never been updated, this returns 0. + * + * @returns {Promise<number>} Milliseconds since Unix epoch + */ + static async getLastHistoryInsightsTimestamp() { + const meta = await InsightStore.getMeta(); + return meta.last_history_insights_ts || 0; + } + + /** + * Persist a list of generated insights and update the appropriate meta timestamp. + * + * @param {Array<object>|null|undefined} generatedInsights + * Array of InsightPartial-like objects to persist. + * @param {"history"|"conversation"} source + * Source of these insights; controls which meta timestamp to update. + * @param {number} [nowMs=Date.now()] + * Optional "now" timestamp in ms, for meta update fallback. + * + * @returns {Promise<{ persistedInsights: Array<object>, newTimestampMs: number | null }>} + */ + static async saveInsights(generatedInsights, source, nowMs = Date.now()) { + const persistedInsights = []; + + if (Array.isArray(generatedInsights)) { + for (const insightPartial of generatedInsights) { + const stored = await InsightStore.addInsight(insightPartial); + persistedInsights.push(stored); + } + } + + // Decide which meta field to update + let metaKey; + if (source === SOURCE_HISTORY) { + metaKey = LAST_HISTORY_INSIGHTS_TS_ATTRIBUTE; + } else if (source === SOURCE_CONVERSATION) { + metaKey = LAST_CONVERSATION_INSIGHTS_TS_ATTRIBUTE; + } else { + // Unknown source: don't update meta, just return persisted results. + return { + persistedInsights, + newTimestampMs: null, + }; + } + + // Compute new timestamp: prefer max(updated_at) if present, otherwise fall back to nowMs. + let newTsMs = nowMs; + if (persistedInsights.length) { + const maxUpdated = persistedInsights.reduce( + (max, i) => Math.max(max, i.updated_at ?? 0), + 0 + ); + if (maxUpdated > 0) { + newTsMs = maxUpdated; + } + } + + await InsightStore.updateMeta({ + [metaKey]: newTsMs, + }); + + return { + persistedInsights, + newTimestampMs: newTsMs, + }; + } + + /** * Builds the prompt to classify a user message into insight categories and intents. * * @param {string} message User message to classify diff --git a/browser/components/aiwindow/models/tests/xpcshell/test_InsightsManager.js b/browser/components/aiwindow/models/tests/xpcshell/test_InsightsManager.js @@ -12,7 +12,12 @@ const { sinon } = ChromeUtils.importESModule( const { InsightsManager } = ChromeUtils.importESModule( "moz-src:///browser/components/aiwindow/models/InsightsManager.sys.mjs" ); -const { CATEGORIES, INTENTS } = ChromeUtils.importESModule( +const { + CATEGORIES, + INTENTS, + HISTORY: SOURCE_HISTORY, + CONVERSATION: SOURCE_CONVERSATION, +} = ChromeUtils.importESModule( "moz-src:///browser/components/aiwindow/models/InsightsConstants.sys.mjs" ); const { getFormattedInsightAttributeList } = ChromeUtils.importESModule( @@ -504,3 +509,152 @@ add_task( } } ); + +/** + * Tests saveInsights correctly persists history insights and updates last_history_insight_ts. + */ +add_task(async function test_saveInsights_history_updates_meta() { + const sb = sinon.createSandbox(); + try { + const now = Date.now(); + + const generatedInsights = [ + { + insight_summary: "foo", + category: "A", + intent: "X", + score: 1, + updated_at: now - 1000, + }, + { + insight_summary: "bar", + category: "B", + intent: "Y", + score: 2, + updated_at: now + 500, + }, + ]; + + const storedInsights = generatedInsights.map((generatedInsight, idx) => ({ + id: `id-${idx}`, + ...generatedInsight, + })); + + const addInsightStub = sb + .stub(InsightStore, "addInsight") + .callsFake(async partial => { + // simple mapping: return first / second stored insight based on summary + return storedInsights.find( + s => s.insight_summary === partial.insight_summary + ); + }); + + const updateMetaStub = sb.stub(InsightStore, "updateMeta").resolves(); + + const { persistedInsights, newTimestampMs } = + await InsightsManager.saveInsights( + generatedInsights, + SOURCE_HISTORY, + now + ); + + Assert.equal( + addInsightStub.callCount, + generatedInsights.length, + "addInsight should be called once per generated insight" + ); + Assert.deepEqual( + persistedInsights.map(i => i.id), + storedInsights.map(i => i.id), + "Persisted insights should match stored insights" + ); + + Assert.ok( + updateMetaStub.calledOnce, + "updateMeta should be called once for history source" + ); + const metaArg = updateMetaStub.firstCall.args[0]; + Assert.ok( + "last_history_insight_ts" in metaArg, + "updateMeta should update last_history_insight_ts for history source" + ); + Assert.equal( + metaArg.last_history_insight_ts, + storedInsights[1].updated_at, + "last_history_insight_ts should be set to max(updated_at) among persisted insights" + ); + Assert.equal( + newTimestampMs, + storedInsights[1].updated_at, + "Returned newTimestampMs should match the updated meta timestamp" + ); + } finally { + sb.restore(); + } +}); + +/** + * Tests saveInsights correctly persists conversation insights and updates last_chat_insight_ts. + */ +add_task(async function test_saveInsights_conversation_updates_meta() { + const sb = sinon.createSandbox(); + try { + const now = Date.now(); + + const generatedInsights = [ + { + insight_summary: "chat-insight", + category: "Chat", + intent: "Talk", + score: 1, + updated_at: now, + }, + ]; + const storedInsight = { id: "chat-1", ...generatedInsights[0] }; + + const addInsightStub = sb + .stub(InsightStore, "addInsight") + .resolves(storedInsight); + const updateMetaStub = sb.stub(InsightStore, "updateMeta").resolves(); + + const { persistedInsights, newTimestampMs } = + await InsightsManager.saveInsights( + generatedInsights, + SOURCE_CONVERSATION, + now + ); + + Assert.equal( + addInsightStub.callCount, + 1, + "addInsight should be called once for conversation insight" + ); + Assert.equal( + persistedInsights[0].id, + storedInsight.id, + "Persisted insight should match stored insight" + ); + + Assert.ok( + updateMetaStub.calledOnce, + "updateMeta should be called once for conversation source" + ); + const metaArg = updateMetaStub.firstCall.args[0]; + Assert.ok( + "last_chat_insight_ts" in metaArg, + "updateMeta should update last_chat_insight_ts for conversation source" + ); + Assert.equal( + metaArg.last_chat_insight_ts, + storedInsight.updated_at, + "last_chat_insight_ts should be set to insight.updated_at" + ); + Assert.equal( + newTimestampMs, + storedInsight.updated_at, + "Returned newTimestampMs should match the updated meta timestamp" + ); + } finally { + sb.restore(); + } +});