tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

commit 9fd16992d574b9769fa1aacb3e8ae1ae7dc0c457
parent e335f068da4a8db0cac9961e9acbc6359068ab7c
Author: Christopher DiPersio <cdipersio@mozilla.com>
Date:   Mon,  8 Dec 2025 17:35:54 +0000

Bug 2003330 - Implement initial insights list creation r=cgopal,tarek,ai-models-reviewers

Differential Revision: https://phabricator.services.mozilla.com/D274964

Diffstat:
Mbrowser/base/content/test/static/browser_all_files_referenced.js | 13+++++++++++++
Abrowser/components/aiwindow/models/Insights.sys.mjs | 465+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Abrowser/components/aiwindow/models/InsightsConstants.sys.mjs | 51+++++++++++++++++++++++++++++++++++++++++++++++++++
Abrowser/components/aiwindow/models/InsightsSchemas.sys.mjs | 114+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mbrowser/components/aiwindow/models/moz.build | 3+++
Abrowser/components/aiwindow/models/prompts/insightsPrompts.sys.mjs | 174+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mbrowser/components/aiwindow/models/prompts/moz.build | 1+
Abrowser/components/aiwindow/models/tests/xpcshell/test_Insights.js | 1286+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mbrowser/components/aiwindow/models/tests/xpcshell/xpcshell.toml | 2++
9 files changed, 2109 insertions(+), 0 deletions(-)

diff --git a/browser/base/content/test/static/browser_all_files_referenced.js b/browser/base/content/test/static/browser_all_files_referenced.js @@ -362,6 +362,19 @@ var allowlist = [ { file: "moz-src:///browser/components/aiwindow/models/Tools.sys.mjs", }, + // Bug 2003330 - Implement initial insights list creation + { + file: "moz-src:///browser/components/aiwindow/models/Insights.sys.mjs", + }, + { + file: "moz-src:///browser/components/aiwindow/models/InsightsConstants.sys.mjs", + }, + { + file: "moz-src:///browser/components/aiwindow/models/prompts/insightsPrompts.sys.mjs", + }, + { + file: "moz-src:///browser/components/aiwindow/models/InsightsSchemas.sys.mjs", + }, ]; if (AppConstants.NIGHTLY_BUILD) { diff --git a/browser/components/aiwindow/models/Insights.sys.mjs b/browser/components/aiwindow/models/Insights.sys.mjs @@ -0,0 +1,465 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +/** + * This module defines functions to generate, deduplicate, and filter insights. + * + * The primary method in this module is `generateInsights`, which orchestrates the entire pipeline: + * 1. Generates initial insights from a specified user data user + * 2. Deduplicates the newly generated insights against all existing insights + * 3. Filters out insights with sensitive content (i.e. financial, medical, etc.) + * 4. Returns the final list of insights objects + * + * `generateInsights` requires 3 arguments: + * 1. `engine`: an instance of `openAIEngine` to call the LLM API + * 2. `sources`: an object mapping user data source types to aggregated records (i.e., {history: [domainItems, titleItems, searchItems]}) + * 3. `existingInsightsList`: an array of existing insight summary strings to deduplicate against + * + * Example Usage: + * const engine = await openAIEngine.build(); + * const sources = {history: [domainItems, titleItems, searchItems]}; + * const existingInsightsList = [...]; // Array of existing insight summary strings; this should be fetched from insight storage + * const newInsights = await generateInsights(engine, sources, existingInsightsList); + * + */ + +import { renderPrompt } from "./Utils.sys.mjs"; + +import { + HISTORY, + CATEGORIES, + CATEGORIES_LIST, + INTENTS, + INTENTS_LIST, +} from "./InsightsConstants.sys.mjs"; + +import { + initialInsightsGenerationSystemPrompt, + initialInsightsGenerationPrompt, + insightsDeduplicationSystemPrompt, + insightsDeduplicationPrompt, + insightSensitivityFilterSystemPrompt, + insightsSensitivityFilterPrompt, +} from "moz-src:///browser/components/aiwindow/models/prompts/insightsPrompts.sys.mjs"; + +import { + INITIAL_INSIGHTS_SCHEMA, + INSIGHTS_DEDUPLICATION_SCHEMA, + INSIGHTS_NON_SENSITIVE_SCHEMA, +} from "moz-src:///browser/components/aiwindow/models/InsightsSchemas.sys.mjs"; + +/** + * Generates, deduplicates, and filters insights end-to-end + * + * This is the main pipeline function. + * + * @param {OpenAIEngine} engine openAIEngine instance to call LLM API + * @param {object} sources User data source type to aggregrated records (i.e., {history: [domainItems, titleItems, searchItems]}) + * @param {Array<string>} existingInsightsList List of existing insight summary strings to deduplicate against + * @returns {Promise<Array<Map<{ + * category: string, + * intent: string, + * insight_summary: string, + * score: number, + * }>>>} Promise resolving the final list of generated, deduplicated, and filtered insight objects + */ +export async function generateInsights(engine, sources, existingInsightsList) { + // Step 1: Generate initial insights + const initialInsights = await generateInitialInsightsList(engine, sources); + // If we don't generate any new insights, just return an empty list immediately instead of doing the rest of the steps + if (!initialInsights || initialInsights.length === 0) { + return []; + } + + // Step 2: Deduplicate against existing insights + const initialInsightsSummaries = initialInsights.map( + insight => insight.insight_summary + ); + const dedupedInsightsSummaries = await deduplicateInsights( + engine, + existingInsightsList, + initialInsightsSummaries + ); + // If we don't have any deduped insights, no new insights were generated or we ran into an unexpected JSON parse error, so return an empty list + if (!dedupedInsightsSummaries || dedupedInsightsSummaries.length === 0) { + return []; + } + + // Step 3: Filter out sensitive insights + const nonSensitiveInsightsSummaries = await filterSensitiveInsights( + engine, + dedupedInsightsSummaries + ); + + // Step 4: Map back to full insight objects and return + return await mapFilteredInsightsToInitialList( + initialInsights, + nonSensitiveInsightsSummaries + ); +} + +/** + * Formats a list of strings into a prompt-friendly bullet list + * + * @param {List<string>} list + * @returns {string} + */ +export function formatListForPrompt(list) { + return list.map(item => `- "${item}"`).join("\n"); +} + +/** + * Utility function to cleanly get bullet-formatted category and insight lists + * + * @param {string} attributeName "categories" or "intents" + * @returns {string} Formatted list string + */ +export function getFormattedInsightAttributeList(attributeName) { + if (attributeName === CATEGORIES) { + return formatListForPrompt(CATEGORIES_LIST); + } else if (attributeName === INTENTS) { + return formatListForPrompt(INTENTS_LIST); + } + throw new Error(`Unsupported insight attribute name: ${attributeName}`); +} + +/** + * Extracts a JSON as a map from an LLM response (handles markdown-formatted code blocks) + * + * @param {any} response LLM response + * @param {any} fallback Fallback value if parsing fails to protect downstream code + * @returns {Map} Parsed JSON object + */ +function parseAndExtractJSON(response, fallback) { + const rawContent = response?.finalOutput ?? ""; + const markdownMatch = rawContent.match(/```(?:json)?\s*([\s\S]*?)\s*```/i); + const payload = markdownMatch ? markdownMatch[1] : rawContent; + try { + return JSON.parse(payload); + } catch (e) { + // If we can't parse a JSON from the LLM response, return a tailored fallback value to prevent downstream code failures + if (e instanceof SyntaxError) { + console.warn( + `Could not parse JSON from LLM response; using fallback (${fallback}): ${e.message}` + ); + return fallback; + } + throw new Error( + `Unexpected error parsing JSON from LLM response: ${e.message}` + ); + } +} + +/** + * Renders recent history records into CSV tables for prompt input + * + * @param {Array<Array<string>>} domainItems List of aggregated domain items + * @param {Array<Array<string>>} titleItems List of aggregated title items + * @param {Array<object>} searchItems List of aggregated search items + * @returns {Promise<string>} Promise resolving recent browser history rendered as CSV tables + */ +export async function renderRecentHistoryForPrompt( + domainItems, + titleItems, + searchItems +) { + let finalCSV = ""; + + if (domainItems.length) { + let domainRecordsTable = ["Domain,Importance Score"]; + for (const domainItem of domainItems) { + domainRecordsTable.push(domainItem.join(",")); + } + finalCSV += "# Domains\n" + domainRecordsTable.join("\n") + "\n\n"; + } + + if (titleItems.length) { + let titleRecordsTable = ["Title,Importance Score"]; + for (const titleItem of titleItems) { + titleRecordsTable.push(titleItem.join(",")); + } + finalCSV += "# Titles\n" + titleRecordsTable.join("\n") + "\n\n"; + } + + if (searchItems.length) { + let searchRecordsTable = ["Search,Importance Score"]; + for (const searchItem of searchItems) { + searchRecordsTable.push(`${searchItem.q},${searchItem.r}`); + } + finalCSV += "# Searches\n" + searchRecordsTable.join("\n"); + } + + return finalCSV.trim(); +} + +/** + * Builds the initial insights generation prompt, pulling profile information based on given source + * + * @param {object} sources User data source type to aggregrated records (i.e., {history: [domainItems, titleItems, searchItems]}) + * @returns {Promise<string>} Promise resolving the generated insights generation prompt with profile records injected + */ +export async function buildInitialInsightsGenerationPrompt(sources) { + let profileRecordsRenderedStr = ""; + + // Allow for multiple sources in the future + if (sources.hasOwnProperty(HISTORY)) { + const [domainItems, titleItems, searchItems] = sources[HISTORY]; + profileRecordsRenderedStr += await renderRecentHistoryForPrompt( + domainItems, + titleItems, + searchItems + ); + } + + if (!profileRecordsRenderedStr) { + throw new Error( + `No valid sources provided to build insights generation prompt: ${Object.keys(sources).join(", ")}` + ); + } + + return await renderPrompt(initialInsightsGenerationPrompt, { + categoriesList: getFormattedInsightAttributeList(CATEGORIES), + intentsList: getFormattedInsightAttributeList(INTENTS), + profileRecordsRenderedStr, + }); +} + +/** + * Builds the insights deduplication prompt + * + * @param {Array<string>} existingInsightsList List of existing insights + * @param {Array<string>} newInsightsList List of newly generated insights + * @returns {Promise<string>} Promise resolving the generated deduplication prompt with existing and new insights lists injected + */ +export async function buildInsightsDeduplicationPrompt( + existingInsightsList, + newInsightsList +) { + const existingInsightsListStr = formatListForPrompt(existingInsightsList); + const newInsightsListStr = formatListForPrompt(newInsightsList); + + return await renderPrompt(insightsDeduplicationPrompt, { + existingInsightsList: existingInsightsListStr, + newInsightsList: newInsightsListStr, + }); +} + +/** + * Builds the insights sensitivity filter prompt + * + * @param {Array<string>} insightsList List of insights to filter + * @returns {Promise<string>} Promise resolving the generated sensitivity filter prompt with insights list injected + */ +export async function buildInsightsSensitivityFilterPrompt(insightsList) { + const insightsListStr = formatListForPrompt(insightsList); + + return await renderPrompt(insightsSensitivityFilterPrompt, { + insightsList: insightsListStr, + }); +} + +/** + * Sanitizes a single insight object from LLM output, checking required fields and normalizing score + * + * @param {*} insight Raw insight object from LLM + * @returns {Map<{ + * category: string|null, + * intent: string|null, + * insight_summary: string|null, + * score: number, + * }>|null} Sanitized insight or null if invalid + */ +function sanitizeInsight(insight) { + // Shortcut to return nothing if insight is bad + if (!insight || typeof insight !== "object") { + return null; + } + + // Check that the candidate insight object has all the required string fields + for (const field of ["category", "intent", "insight_summary"]) { + if (!(field in insight) && typeof insight[field] !== "string") { + return null; + } + } + + // Clamp score to [1,5]; treat missing/invalid as 1 + let score = Number.isFinite(insight.score) ? Math.round(insight.score) : 1; + if (score < 1) { + score = 1; + } else if (score > 5) { + score = 5; + } + + return { + category: insight.category, + intent: insight.intent, + insight_summary: insight.insight_summary, + score, + }; +} + +/** + * Normalizes and validates parsed LLM output into a list of insights to handle LLM output variability + * + * @param {*} parsed JSON-parsed LLM output + * @returns {Array<Map<{ + * category: string, + * intent: string, + * insight_summary: string, + * score: number, + * }>>} List of sanitized insights + */ +function normalizeInsightList(parsed) { + let list = parsed; + if (!Array.isArray(list)) { + // If list isn't an array, check that it's an object with a nested "items" array + if (list && Array.isArray(list.items)) { + list = list.items; + } else if (list && typeof list === "object") { + // If list isn't an array, check that it's a least a single object, so check that list has insight-like keys + const looksLikeInsight = + "category" in list || "intent" in list || "insight_summary" in list; + if (looksLikeInsight) { + list = [list]; + } + } + } + if (!Array.isArray(list)) { + return []; + } + + return list.map(sanitizeInsight).filter(Boolean); +} + +/** + * Prompts an LLM to generate an initial, unfiltered list of candidate insights from user data + * + * @param {openAIEngine} engine openAIEngine instance to call LLM API + * @param {object} sources User data source type to aggregrated records (i.e., {history: [domainItems, titleItems, searchItems]}) + * @returns {Promise<Array<Map<{ + * category: string, + * intent: string, + * insight_summary: string, + * score: number, + * }>>>} Promise resolving the list of generated insights + */ +export async function generateInitialInsightsList(engine, sources) { + const promptText = await buildInitialInsightsGenerationPrompt(sources); + const response = await engine.run({ + args: [ + { + role: "system", + content: initialInsightsGenerationSystemPrompt, + }, + { role: "user", content: promptText }, + ], + responseFormat: { type: "json_schema", schema: INITIAL_INSIGHTS_SCHEMA }, + }); + + const parsed = parseAndExtractJSON(response, []); + return normalizeInsightList(parsed); +} + +/** + * Prompts an LLM to deduplicate new insights against existing ones + * + * @param {OpenAIEngine} engine openAIEngine instance to call LLM API + * @param {Array<string>} existingInsightsList List of existing insight summary strings + * @param {Array<string>} newInsightsList List of new insight summary strings to deduplicate + * @returns {Promise<Array<string>>} Promise resolving the final list of deduplicated insight summary strings + */ +export async function deduplicateInsights( + engine, + existingInsightsList, + newInsightsList +) { + const dedupPrompt = await buildInsightsDeduplicationPrompt( + existingInsightsList, + newInsightsList + ); + const response = await engine.run({ + args: [ + { + role: "system", + content: insightsDeduplicationSystemPrompt, + }, + { role: "user", content: dedupPrompt }, + ], + responseFormat: { + type: "json_schema", + schema: INSIGHTS_DEDUPLICATION_SCHEMA, + }, + }); + + const parsed = parseAndExtractJSON(response, { unique_insights: [] }); + + // Able to extract a JSON, so the fallback wasn't used, but the LLM didn't follow the schema + if ( + parsed.unique_insights === undefined || + !Array.isArray(parsed.unique_insights) + ) { + return []; + } + + // Make sure we filter out any invalid main_insight entries before returning + return parsed.unique_insights + .filter( + item => + item.main_insight !== undefined && typeof item.main_insight === "string" + ) + .map(item => item.main_insight); +} + +/** + * Prompts an LLM to filter out sensitive insights from an insights list + * + * @param {OpenAIEngine} engine openAIEngine instance to call LLM API + * @param {Array<string>} insightsList List of insight summary strings to filter + * @returns {Promise<Array<string>>} Promise resolving the final list of non-sensitive insight summary strings + */ +export async function filterSensitiveInsights(engine, insightsList) { + const sensitivityFilterPrompt = + await buildInsightsSensitivityFilterPrompt(insightsList); + const response = await engine.run({ + args: [ + { + role: "system", + content: insightSensitivityFilterSystemPrompt, + }, + { role: "user", content: sensitivityFilterPrompt }, + ], + responseFormat: { + type: "json_schema", + schema: INSIGHTS_NON_SENSITIVE_SCHEMA, + }, + }); + + const parsed = parseAndExtractJSON(response, { non_sensitive_insights: [] }); + + // Able to extract a JSON, so the fallback wasn't used, but the LLM didn't follow the schema + if ( + parsed.non_sensitive_insights === undefined || + !Array.isArray(parsed.non_sensitive_insights) + ) { + return []; + } + + // Make sure we filter out any invalid entries before returning + return parsed.non_sensitive_insights.filter(item => typeof item === "string"); +} + +/** + * + * @param {Map<string, any>} initialInsights List of original, unfiltered insight objects + * @param {Array<string>} filteredInsightsList List of deduplicated and sensitivity-filtered insight summary strings + * @returns {Promise<Map<string, any>>} Promise resolving the final list of insight objects + */ +export async function mapFilteredInsightsToInitialList( + initialInsights, + filteredInsightsList +) { + return initialInsights.filter(insight => + filteredInsightsList.includes(insight.insight_summary) + ); +} diff --git a/browser/components/aiwindow/models/InsightsConstants.sys.mjs b/browser/components/aiwindow/models/InsightsConstants.sys.mjs @@ -0,0 +1,51 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +export const HISTORY = "history"; + +/** + * Insight categories + */ +export const CATEGORIES = "categories"; +export const CATEGORIES_LIST = [ + "Arts & Entertainment", + "Autos & Vehicles", + "Beauty & Fitness", + "Books & Literature", + "Business & Industrial", + "Computers & Electronics", + "Food & Drink", + "Games", + "Hobbies & Leisure", + "Home & Garden", + "Internet & Telecom", + "Jobs & Education", + "Law & Government", + "News", + "Online Communities", + "People & Society", + "Pets & Animals", + "Real Estate", + "Reference", + "Science", + "Shopping", + "Sports", + "Travel & Transportation", +]; + +/** + * Insight intents + */ +export const INTENTS = "intents"; +export const INTENTS_LIST = [ + "Research / Learn", + "Compare / Evaluate", + "Plan / Organize", + "Buy / Acquire", + "Create / Produce", + "Communicate / Share", + "Monitor / Track", + "Entertain / Relax", + "Resume / Revisit", +]; diff --git a/browser/components/aiwindow/models/InsightsSchemas.sys.mjs b/browser/components/aiwindow/models/InsightsSchemas.sys.mjs @@ -0,0 +1,114 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +import { CATEGORIES_LIST, INTENTS_LIST } from "./InsightsConstants.sys.mjs"; + +/** + * JSON Schema for initial insights generation + */ +export const INITIAL_INSIGHTS_SCHEMA = { + type: "array", + minItems: 1, + items: { + type: "object", + additionalProperties: false, + required: [ + "category", + "intent", + "insight_summary", + "score", + "why", + "evidence", + ], + properties: { + category: { + type: ["string", "null"], + enum: [...CATEGORIES_LIST, null], + }, + intent: { + type: ["string", "null"], + enum: [...INTENTS_LIST, null], + }, + insight_summary: { type: ["string", "null"] }, + score: { type: "integer" }, + + why: { type: "string", minLength: 12, maxLength: 200 }, + + evidence: { + type: "array", + minItems: 1, + maxItems: 4, + items: { + type: "object", + required: ["type", "value"], + additionalProperties: false, + properties: { + type: { + type: "string", + enum: ["domain", "title", "search", "chat", "user"], + }, + value: { type: "string" }, + weight: { type: "number", minimum: 0, maximum: 1 }, + session_ids: { + type: "array", + items: { type: ["integer", "string"] }, + }, + }, + }, + }, + }, + }, +}; + +/** + * JSON Schema for insights deduplication + */ +export const INSIGHTS_DEDUPLICATION_SCHEMA = { + type: "array", + minItems: 1, + items: { + type: "object", + additionalProperties: false, + required: ["unique_insights"], + properties: { + unique_insights: { + type: "array", + minItems: 1, + items: { + type: "object", + additionalProperties: false, + required: ["main_insight", "duplicates"], + properties: { + main_insight: { type: "string" }, + duplicates: { + type: "array", + minItems: 1, + items: { type: "string" }, + }, + }, + }, + }, + }, + }, +}; + +/** + * JSON schema for filtering sensitive insights + */ +export const INSIGHTS_NON_SENSITIVE_SCHEMA = { + type: "array", + minItems: 1, + items: { + type: "object", + additionalProperties: false, + required: ["non_sensitive_insights"], + properties: { + non_sensitive_insights: { + type: "array", + minItems: 1, + items: { type: "string" }, + }, + }, + }, +}; diff --git a/browser/components/aiwindow/models/moz.build b/browser/components/aiwindow/models/moz.build @@ -11,7 +11,10 @@ DIRS += [ MOZ_SRC_FILES += [ "ChatUtils.sys.mjs", + "Insights.sys.mjs", + "InsightsConstants.sys.mjs", "InsightsHistorySource.sys.mjs", + "InsightsSchemas.sys.mjs", "IntentClassifier.sys.mjs", "SearchBrowsingHistory.sys.mjs", "Tools.sys.mjs", diff --git a/browser/components/aiwindow/models/prompts/insightsPrompts.sys.mjs b/browser/components/aiwindow/models/prompts/insightsPrompts.sys.mjs @@ -0,0 +1,174 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +export const initialInsightsGenerationSystemPromptMetadata = { + version: "0.1", +}; + +export const initialInsightsGenerationSystemPrompt = + "You are a privacy respecting data analyst who tries to generate useful insights about user preferences EXCLUDING personal, medical, health, financial, political, religion, private and any sensitive activities of users. Return ONLY valid JSON."; + +export const initialInsightsGenerationPromptMetadata = { + version: "0.1", +}; + +export const initialInsightsGenerationPrompt = ` +# Overview +You are an expert at extracting insights from user browser data. An insight is a short, concise statement about user interests or behaviors (products, brands, behaviors) that can help personalize their experience. + +You will receive CSV tables and/or JSON objects of data representing the user's browsing history, search history, and chat history. Use ONLY this data to generate insights. Each table has a header row that defines the schema. + +# Instructions +- Extract up as many insights as you can. +- Each insight must be supported by 1-4 pieces of evidence from the user records. ONLY USE VERBATIM STRINGS FROM THE USER RECORDS! +- Insights are user preferences (products, brands, behaviors) useful for future personalization. +- Do not imagine actions without evidence. Prefer "shops for / plans / looked for" over "bought / booked / watched" unless explicit. +- Do not include personal names unless widely public (avoid PII). +- Base insights on patterns, not single instances. + +## Exemplars +Below are examples of high quality insights (for reference only; do NOT copy): +- "Prefers LLBean & Nordstrom formalwear collections" +- "Compares white jeans under $80 at Target" +- "Streams new-release movies via Fandango" +- "Cooks Mediterranean seafood from TasteAtlas recipes" +- "Tracks minimalist fashion drops at Uniqlo" + +## Category rules +Every insight requires a category. Choose ONLY one from this list; if none fits, use null: +{categoriesList} + +## Intent rules +Every insight requires an intent. Choose ONLY one from this list; if none fits, use null: +{intentsList} + +# Output Schema + +Return ONLY a JSON array of objects, no prose, no code fences. Each object must have: +\`\`\`json +[ + { + "why": "<12-40 words that briefly explains the rationale, referencing the cited evidence (no new claims or invented entities).>", + "category": "<one of the categories or null>", + "intent": "<one of the intents or null>", + "insight_summary": "<4-10 words, crisp and specific or null>", + "score": <integer 1-5>, + "evidence": [ + { + "type": "<one of ["domain","title","search","chat","user"]>", + "value": "<a **verbatim** string copied from profile_records (for domain/title/search) or a short user/chat quote>", + "session_ids": ["<optional array of session ids (if available from inputs)>"], + "weight": <float 0-1 indicating contribution strength> + }, + ... + ] + } +] +\`\`\` + +## Scoring priorities +- Base "score" on *strength + recency*; boost multi-source corroboration. +- Source priority: user (highest) > chat > search > history (lowest). +- Typical caps: recent history ≤1; search up to 2; multi-source 2-3; recent chat 4; explicit user 5. +- Do not assign 5 unless pattern is strong and recent. + +# Inputs +Analyze the records below to generate as many unique, non-sensitive, specific user insights as possible. Each set of records is a CSV table with header row that defines the schema or JSON object. + +{profileRecordsRenderedStr} + +** CREATE ALL POSSIBLE UNIQUE INSIGHTS WITHOUT VIOLATING THE RULES ABOVE **`.trim(); + +export const insightsDeduplicationSystemPromptMetadata = { + version: "0.1", +}; + +export const insightsDeduplicationSystemPrompt = + "You are an expert at identifying duplicate statements. Return ONLY valid JSON."; + +export const insightsDeduplicationPromptMetadata = { + version: "0.1", +}; + +export const insightsDeduplicationPrompt = ` +You are an expert at identifying duplicate statements. + +Examine the following list of statements and find the unique ones. If you identify a set of statements that express the same general idea, pick the most general one from the set as the "main insight" and mark the rest as duplicates of it. + +There are 2 lists of statements: Existing Statements and New Statements. If you find a duplicate between the 2, **ALWAYS** pick the Existing Statement as the "main insight". + +If all statements are unique, simply return them all. + +## Existing Statements: +{existingInsightsList} + +## New Statements: +{newInsightsList} + +Return ONLY JSON per the schema below. +\`\`\`json +{ + "unique_insights": [ + { + "main_insight": "<the main unique insight statement>", + "duplicates": [ + "<duplicate_statement_1>", + "<duplicate_statement_2>", + ... + ] + }, + ... + ] +} +\`\`\``.trim(); + +export const insightSensitivityFilterSystemPromptMetadata = { + version: "0.1", +}; + +export const insightSensitivityFilterSystemPrompt = + "You are an expert at identifying sensitive statements and content. Return ONLY valid JSON."; + +export const insightsSensitivityFilterPromptMetadata = { + version: "0.1", +}; + +export const insightsSensitivityFilterPrompt = ` +You are an expert at identifying sensitive statements and content. + +Examine the following list of statements and filter out any that contain sensitive information or content. +Sensitive information includes, but is not limited to: + +- Medical/Health: diagnoses, symptoms, treatments, conditions, mental health, pregnancy, fertility, contraception. +- Finance: income/salary/compensation, bank/credit card details, credit score, loans/mortgage, taxes/benefits, debt/collections, investments/brokerage. +- Legal: lawsuits, settlements, subpoenas/warrants, arrests/convictions, immigration status/visas/asylum, divorce/custody, NDAs. +- Politics/Demographics/PII: political leaning/affiliation, religion, race/ethnicity, gender/sexual orientation, addresses/phones/emails/IDs. + +Below are exemplars of sensitive statements: +- "Researches treatment about arthritis" +- "Searches about pregnancy tests online" +- "Pediatrician in San Francisco" +- "Political leaning towards a party" +- "Research about ethnicity demographics in a city" +- "Negotiates debt settlement with bank" +- "Prepares documents for divorce hearing" +- "Tracks mortgage refinance rates" +- "Applies for work visa extension" +- "Marie, female from Ohio looking for rental apartments" + +If all statements are not sensitive, simply return them all. + +Here are the statements to analyze: +{insightsList} + +Return ONLY JSON per the schema below. +\`\`\`json +{ + "non_sensitive_insights": [ + "<insight_statement_1>", + "<insight_statement_2>", + ... + ] +} +\`\`\``.trim(); diff --git a/browser/components/aiwindow/models/prompts/moz.build b/browser/components/aiwindow/models/prompts/moz.build @@ -7,4 +7,5 @@ with Files("**"): MOZ_SRC_FILES += [ "assistantPrompts.sys.mjs", + "insightsPrompts.sys.mjs", ] diff --git a/browser/components/aiwindow/models/tests/xpcshell/test_Insights.js b/browser/components/aiwindow/models/tests/xpcshell/test_Insights.js @@ -0,0 +1,1286 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +const { + getRecentHistory, + generateProfileInputs, + aggregateSessions, + topkAggregates, +} = ChromeUtils.importESModule( + "moz-src:///browser/components/aiwindow/models/InsightsHistorySource.sys.mjs" +); + +const { openAIEngine } = ChromeUtils.importESModule( + "moz-src:///browser/components/aiwindow/models/Utils.sys.mjs" +); +const { sinon } = ChromeUtils.importESModule( + "resource://testing-common/Sinon.sys.mjs" +); + +const { CATEGORIES, INTENTS } = ChromeUtils.importESModule( + "moz-src:///browser/components/aiwindow/models/InsightsConstants.sys.mjs" +); + +const { + formatListForPrompt, + getFormattedInsightAttributeList, + renderRecentHistoryForPrompt, + mapFilteredInsightsToInitialList, + buildInitialInsightsGenerationPrompt, + buildInsightsDeduplicationPrompt, + buildInsightsSensitivityFilterPrompt, + generateInitialInsightsList, + deduplicateInsights, + filterSensitiveInsights, +} = ChromeUtils.importESModule( + "moz-src:///browser/components/aiwindow/models/Insights.sys.mjs" +); + +/** + * Constants for preference keys and test values + */ +const PREF_API_KEY = "browser.aiwindow.apiKey"; +const PREF_ENDPOINT = "browser.aiwindow.endpoint"; +const PREF_MODEL = "browser.aiwindow.model"; + +const API_KEY = "fake-key"; +const ENDPOINT = "https://api.fake-endpoint.com/v1"; +const MODEL = "fake-model"; + +const EXISTING_INSIGHTS = [ + "Loves outdoor activities", + "Enjoys cooking recipes", + "Like sci-fi media", +]; +const NEW_INSIGHTS = [ + "Loves hiking and camping", + "Reads science fiction novels", + "Likes both dogs and cats", + "Likes risky stock bets", +]; + +add_setup(async function () { + // Setup prefs used across multiple tests + Services.prefs.setStringPref(PREF_API_KEY, API_KEY); + Services.prefs.setStringPref(PREF_ENDPOINT, ENDPOINT); + Services.prefs.setStringPref(PREF_MODEL, MODEL); + + // Clear prefs after testing + registerCleanupFunction(() => { + for (let pref of [PREF_API_KEY, PREF_ENDPOINT, PREF_MODEL]) { + if (Services.prefs.prefHasUserValue(pref)) { + Services.prefs.clearUserPref(pref); + } + } + }); +}); + +/** + * Builds fake browsing history data for testing + */ +async function buildFakeBrowserHistory() { + const now = Date.now(); + + const seeded = [ + { + url: "https://www.google.com/search?q=firefox+history", + title: "Google Search: firefox history", + visits: [{ date: new Date(now - 5 * 60 * 1000) }], + }, + { + url: "https://news.ycombinator.com/", + title: "Hacker News", + visits: [{ date: new Date(now - 15 * 60 * 1000) }], + }, + { + url: "https://mozilla.org/en-US/", + title: "Internet for people, not profit — Mozilla", + visits: [{ date: new Date(now - 25 * 60 * 1000) }], + }, + ]; + await PlacesUtils.history.clear(); + await PlacesUtils.history.insertMany(seeded); +} + +async function getBrowserHistoryAggregates() { + const profileRecords = await getRecentHistory(); + const profilePreparedInputs = await generateProfileInputs(profileRecords); + const [domainAgg, titleAgg, searchAgg] = aggregateSessions( + profilePreparedInputs + ); + + //const [domainItems, titleItems, searchItems] = + return await topkAggregates(domainAgg, titleAgg, searchAgg); +} + +/** + * Tests building the prompt for initial insights generation + */ +add_task(async function test_buildInitialInsightsGenerationPrompt() { + // Check that history is rendered correctly into CSV tables + await buildFakeBrowserHistory(); + const [domainItems, titleItems, searchItems] = + await getBrowserHistoryAggregates(); + const renderedBrowserHistory = await renderRecentHistoryForPrompt( + domainItems, + titleItems, + searchItems + ); + Assert.equal( + renderedBrowserHistory, + `# Domains +Domain,Importance Score +www.google.com,100 +news.ycombinator.com,100 +mozilla.org,100 + +# Titles +Title,Importance Score +Google Search: firefox history,100 +Hacker News,100 +Internet for people, not profit — Mozilla,100 + +# Searches +Search,Importance Score +Google Search: firefox history,1`.trim() + ); + + // Check that the full prompt is built correctly with injected categories, intents, and browsing history + const sources = { history: [domainItems, titleItems, searchItems] }; + const initialInsightsPrompt = + await buildInitialInsightsGenerationPrompt(sources); + Assert.ok( + initialInsightsPrompt.includes( + "You are an expert at extracting insights from user browser data." + ), + "Initial insights generation prompt should pull from the correct base" + ); + Assert.ok( + initialInsightsPrompt.includes( + getFormattedInsightAttributeList(CATEGORIES) + ), + "Prompt should include formatted categories list" + ); + Assert.ok( + initialInsightsPrompt.includes(getFormattedInsightAttributeList(INTENTS)), + "Prompt should include formatted intents list" + ); + Assert.ok( + initialInsightsPrompt.includes(renderedBrowserHistory), + "Prompt should include rendered browsing history" + ); +}); + +/** + * Tests rendering history as CSV when only search data is present + */ +add_task(async function test_buildRecentHistoryCSV_only_search() { + const now = Date.now(); + const seeded = [ + { + url: "https://www.google.com/search?q=firefox+history", + title: "Google Search: firefox history", + visits: [{ date: new Date(now - 5 * 60 * 1000) }], + }, + ]; + await PlacesUtils.history.clear(); + await PlacesUtils.history.insertMany(seeded); + + const [domainItems, titleItems, searchItems] = + await getBrowserHistoryAggregates(); + const renderedBrowserHistory = await renderRecentHistoryForPrompt( + domainItems, + titleItems, + searchItems + ); + Assert.equal( + renderedBrowserHistory, + `# Domains +Domain,Importance Score +www.google.com,100 + +# Titles +Title,Importance Score +Google Search: firefox history,100 + +# Searches +Search,Importance Score +Google Search: firefox history,1`.trim() + ); +}); + +/** + * Tests rendering history as CSV when only history data is present + */ +add_task(async function test_buildRecentHistoryCSV_only_browsing_history() { + const now = Date.now(); + const seeded = [ + { + url: "https://news.ycombinator.com/", + title: "Hacker News", + visits: [{ date: new Date(now - 15 * 60 * 1000) }], + }, + { + url: "https://mozilla.org/en-US/", + title: "Internet for people, not profit — Mozilla", + visits: [{ date: new Date(now - 25 * 60 * 1000) }], + }, + ]; + await PlacesUtils.history.clear(); + await PlacesUtils.history.insertMany(seeded); + + const [domainItems, titleItems, searchItems] = + await getBrowserHistoryAggregates(); + const renderedBrowserHistory = await renderRecentHistoryForPrompt( + domainItems, + titleItems, + searchItems + ); + Assert.equal( + renderedBrowserHistory, + `# Domains +Domain,Importance Score +news.ycombinator.com,100 +mozilla.org,100 + +# Titles +Title,Importance Score +Hacker News,100 +Internet for people, not profit — Mozilla,100`.trim() + ); +}); + +/** + * Tests building the prompt for insights deduplication + */ +add_task(async function test_buildInsightsDeduplicationPrompt() { + const insightsDeduplicationPrompt = await buildInsightsDeduplicationPrompt( + EXISTING_INSIGHTS, + NEW_INSIGHTS + ); + Assert.ok( + insightsDeduplicationPrompt.includes( + "You are an expert at identifying duplicate statements." + ), + "Insights deduplication prompt should pull from the correct base" + ); + Assert.ok( + insightsDeduplicationPrompt.includes( + formatListForPrompt(EXISTING_INSIGHTS) + ), + "Deduplication prompt should include existing insights list" + ); + Assert.ok( + insightsDeduplicationPrompt.includes(formatListForPrompt(NEW_INSIGHTS)), + "Deduplication prompt should include new insights list" + ); +}); + +/** + * Tests building the prompt for insights sensitivity filtering + */ +add_task(async function test_buildInsightsSensitivityFilterPrompt() { + /** Insights sensitivity filter prompt */ + const insightsSensitivityFilterPrompt = + await buildInsightsSensitivityFilterPrompt(NEW_INSIGHTS); + Assert.ok( + insightsSensitivityFilterPrompt.includes( + "You are an expert at identifying sensitive statements and content." + ), + "Insights sensitivity filter prompt should pull from the correct base" + ); + Assert.ok( + insightsSensitivityFilterPrompt.includes(formatListForPrompt(NEW_INSIGHTS)), + "Sensitivity filter prompt should include insights list" + ); +}); + +/** + * Tests successful initial insights generation + */ +add_task(async function test_generateInitialInsightsList_happy_path() { + const sb = sinon.createSandbox(); + try { + /** + * The fake engine returns canned LLM response. + * The main `generateInitialInsightsList` function should modify this heavily, cutting it back to only the required fields. + */ + const fakeEngine = { + run() { + return { + finalOutput: `[ + { + "why": "User has recently searched for Firefox history and visited mozilla.org.", + "category": "Internet & Telecom", + "intent": "Research / Learn", + "insight_summary": "Searches for Firefox information", + "score": 7, + "evidence": [ + { + "type": "search", + "value": "Google Search: firefox history" + }, + { + "type": "domain", + "value": "mozilla.org" + } + ] + }, + { + "why": "User buys dog food online regularly from multiple sources.", + "category": "Pets & Animals", + "intent": "Buy / Acquire", + "insight_summary": "Purchases dog food online", + "score": -1, + "evidence": [ + { + "type": "domain", + "value": "example.com" + } + ] + } +]`, + }; + }, + }; + + // Check that the stub was called + const stub = sb.stub(openAIEngine, "_createEngine").returns(fakeEngine); + const engine = await openAIEngine.build(); + Assert.ok(stub.calledOnce, "_createEngine should be called once"); + + const [domainItems, titleItems, searchItems] = + await getBrowserHistoryAggregates(); + const sources = { history: [domainItems, titleItems, searchItems] }; + const insightsList = await generateInitialInsightsList(engine, sources); + + // Check top level structure + Assert.ok( + Array.isArray(insightsList), + "Should return an array of insights" + ); + Assert.equal(insightsList.length, 2, "Array should contain 2 insights"); + + // Check first insight structure and content + const firstInsight = insightsList[0]; + Assert.equal( + typeof firstInsight, + "object", + "First insight should be an object/map" + ); + Assert.equal( + Object.keys(firstInsight).length, + 4, + "First insight should have 4 keys" + ); + Assert.equal( + firstInsight.category, + "Internet & Telecom", + "First insight should have expected category (Internet & Telecom)" + ); + Assert.equal( + firstInsight.intent, + "Research / Learn", + "First insight should have expected intent (Research / Learn)" + ); + Assert.equal( + firstInsight.insight_summary, + "Searches for Firefox information", + "First insight should have expected summary" + ); + Assert.equal( + firstInsight.score, + 5, + "First insight should have expected score, clamping 7 to 5" + ); + + // Check that the second insight's score was clamped to the minimum + const secondInsight = insightsList[1]; + Assert.equal( + secondInsight.score, + 1, + "Second insight should have expected score, clamping -1 to 1" + ); + } finally { + sb.restore(); + } +}); + +/** + * Tests failed initial insights generation - Empty output + */ +add_task( + async function test_generateInitialInsightsList_sad_path_empty_output() { + const sb = sinon.createSandbox(); + try { + // LLM returns an empty insights list + const fakeEngine = { + run() { + return { + finalOutput: `[]`, + }; + }, + }; + + // Check that the stub was called + const stub = sb.stub(openAIEngine, "_createEngine").returns(fakeEngine); + const engine = await openAIEngine.build(); + Assert.ok(stub.calledOnce, "_createEngine should be called once"); + + const [domainItems, titleItems, searchItems] = + await getBrowserHistoryAggregates(); + const sources = { history: [domainItems, titleItems, searchItems] }; + const insightsList = await generateInitialInsightsList(engine, sources); + + Assert.equal(Array.isArray(insightsList), true, "Should return an array"); + Assert.equal(insightsList.length, 0, "Array should contain 0 insights"); + } finally { + sb.restore(); + } + } +); + +/** + * Tests failed initial insights generation - Output not array + */ +add_task( + async function test_generateInitialInsightsList_sad_path_output_not_array() { + const sb = sinon.createSandbox(); + try { + // LLM doesn't return an array + const fakeEngine = { + run() { + return { + finalOutput: `testing`, + }; + }, + }; + + // Check that the stub was called + const stub = sb.stub(openAIEngine, "_createEngine").returns(fakeEngine); + const engine = await openAIEngine.build(); + Assert.ok(stub.calledOnce, "_createEngine should be called once"); + + const [domainItems, titleItems, searchItems] = + await getBrowserHistoryAggregates(); + const sources = { history: [domainItems, titleItems, searchItems] }; + const insightsList = await generateInitialInsightsList(engine, sources); + + Assert.equal(Array.isArray(insightsList), true, "Should return an array"); + Assert.equal(insightsList.length, 0, "Array should contain 0 insights"); + } finally { + sb.restore(); + } + } +); + +/** + * Tests failed initial insights generation - Output not array of maps + */ +add_task( + async function test_generateInitialInsightsList_sad_path_output_not_array_of_maps() { + const sb = sinon.createSandbox(); + try { + // LLM doesn't return an array of maps + const fakeEngine = { + run() { + return { + finalOutput: `["testing1", "testing2", ["testing3"]]`, + }; + }, + }; + + // Check that the stub was called + const stub = sb.stub(openAIEngine, "_createEngine").returns(fakeEngine); + const engine = await openAIEngine.build(); + Assert.ok(stub.calledOnce, "_createEngine should be called once"); + + const [domainItems, titleItems, searchItems] = + await getBrowserHistoryAggregates(); + const sources = { history: [domainItems, titleItems, searchItems] }; + const insightsList = await generateInitialInsightsList(engine, sources); + + Assert.equal(Array.isArray(insightsList), true, "Should return an array"); + Assert.equal(insightsList.length, 0, "Array should contain 0 insights"); + } finally { + sb.restore(); + } + } +); + +/** + * Tests failed initial insights generation - Some correct insights + */ +add_task( + async function test_generateInitialInsightsList_sad_path_some_correct_insights() { + const sb = sinon.createSandbox(); + try { + // LLM returns an insights list where 1 is fully correct and 1 is missing required keys (category in this case) + const fakeEngine = { + run() { + return { + finalOutput: `[ + { + "why": "User has recently searched for Firefox history and visited mozilla.org.", + "intent": "Research / Learn", + "insight_summary": "Searches for Firefox information", + "score": 7, + "evidence": [ + { + "type": "search", + "value": "Google Search: firefox history" + }, + { + "type": "domain", + "value": "mozilla.org" + } + ] + }, + { + "why": "User buys dog food online regularly from multiple sources.", + "category": "Pets & Animals", + "intent": "Buy / Acquire", + "insight_summary": "Purchases dog food online", + "score": -1, + "evidence": [ + { + "type": "domain", + "value": "example.com" + } + ] + } +]`, + }; + }, + }; + + // Check that the stub was called + const stub = sb.stub(openAIEngine, "_createEngine").returns(fakeEngine); + const engine = await openAIEngine.build(); + Assert.ok(stub.calledOnce, "_createEngine should be called once"); + + const [domainItems, titleItems, searchItems] = + await getBrowserHistoryAggregates(); + const sources = { history: [domainItems, titleItems, searchItems] }; + const insightsList = await generateInitialInsightsList(engine, sources); + + Assert.equal( + Array.isArray(insightsList), + true, + "Should return an array of insights" + ); + Assert.equal(insightsList.length, 1, "Array should contain 1 insight"); + Assert.equal( + insightsList[0].insight_summary, + "Purchases dog food online", + "Insight summary should match the valid insight" + ); + } finally { + sb.restore(); + } + } +); + +/** + * Tests successful insights deduplication + */ +add_task(async function test_deduplicateInsightsList_happy_path() { + const sb = sinon.createSandbox(); + try { + /** + * The fake engine that returns a canned LLM response for deduplication. + * The `deduplicateInsights` function should return an array containing only the `main_insight` values. + */ + const fakeEngine = { + run() { + return { + finalOutput: `{ + "unique_insights": [ + { + "main_insight": "Loves outdoor activities", + "duplicates": ["Loves hiking and camping"] + }, + { + "main_insight": "Enjoys cooking recipes", + "duplicates": [] + }, + { + "main_insight": "Like sci-fi media", + "duplicates": ["Reads science fiction novels"] + }, + { + "main_insight": "Likes both dogs and cats", + "duplicates": [] + }, + { + "main_insight": "Likes risky stock bets", + "duplicates": [] + } + ] + }`, + }; + }, + }; + + // Check that the stub was called + const stub = sb.stub(openAIEngine, "_createEngine").resolves(fakeEngine); + const engine = await openAIEngine.build(); + Assert.ok(stub.calledOnce, "_createEngine should be called once"); + + const dedupedInsightsList = await deduplicateInsights( + engine, + EXISTING_INSIGHTS, + NEW_INSIGHTS + ); + + // Check that the deduplicated list contains only unique insights (`main_insight` values) + Assert.equal( + dedupedInsightsList.length, + 5, + "Deduplicated insights list should contain 5 unique insights" + ); + Assert.ok( + dedupedInsightsList.includes("Loves outdoor activities"), + "Deduplicated insights should include 'Loves outdoor activities'" + ); + Assert.ok( + dedupedInsightsList.includes("Enjoys cooking recipes"), + "Deduplicated insights should include 'Enjoys cooking recipes'" + ); + Assert.ok( + dedupedInsightsList.includes("Like sci-fi media"), + "Deduplicated insights should include 'Like sci-fi media'" + ); + Assert.ok( + dedupedInsightsList.includes("Likes both dogs and cats"), + "Deduplicated insights should include 'Likes both dogs and cats'" + ); + Assert.ok( + dedupedInsightsList.includes("Likes risky stock bets"), + "Deduplicated insights should include 'Likes risky stock bets'" + ); + } finally { + sb.restore(); + } +}); + +/** + * Tests failed insights deduplication - Empty output + */ +add_task(async function test_deduplicateInsightsList_sad_path_empty_output() { + const sb = sinon.createSandbox(); + try { + // LLM returns the correct schema but with an empty unique_insights array + const fakeEngine = { + run() { + return { + finalOutput: `{ + "unique_insights": [] + }`, + }; + }, + }; + + // Check that the stub was called + const stub = sb.stub(openAIEngine, "_createEngine").resolves(fakeEngine); + const engine = await openAIEngine.build(); + Assert.ok(stub.calledOnce, "_createEngine should be called once"); + + const dedupedInsightsList = await deduplicateInsights( + engine, + EXISTING_INSIGHTS, + NEW_INSIGHTS + ); + + Assert.ok(Array.isArray(dedupedInsightsList), "Should return an array"); + Assert.equal(dedupedInsightsList.length, 0, "Should return an empty array"); + } finally { + sb.restore(); + } +}); + +/** + * Tests failed insights deduplication - Wrong top-level data type + */ +add_task( + async function test_deduplicateInsightsList_sad_path_wrong_top_level_data_type() { + const sb = sinon.createSandbox(); + try { + // LLM returns an incorrect data type + const fakeEngine = { + run() { + return { + finalOutput: `testing`, + }; + }, + }; + + // Check that the stub was called + const stub = sb.stub(openAIEngine, "_createEngine").resolves(fakeEngine); + const engine = await openAIEngine.build(); + Assert.ok(stub.calledOnce, "_createEngine should be called once"); + + const dedupedInsightsList = await deduplicateInsights( + engine, + EXISTING_INSIGHTS, + NEW_INSIGHTS + ); + + Assert.ok(Array.isArray(dedupedInsightsList), "Should return an array"); + Assert.equal( + dedupedInsightsList.length, + 0, + "Should return an empty array" + ); + } finally { + sb.restore(); + } + } +); + +/** + * Tests failed insights deduplication - Wrong inner data type + */ +add_task( + async function test_deduplicateInsightsList_sad_path_wrong_inner_data_type() { + const sb = sinon.createSandbox(); + try { + // LLM returns a map with the right top-level key, but the inner structure is wrong + const fakeEngine = { + run() { + return { + finalOutput: `{ + "unique_insights": "testing" + }`, + }; + }, + }; + + // Check that the stub was called + const stub = sb.stub(openAIEngine, "_createEngine").resolves(fakeEngine); + const engine = await openAIEngine.build(); + Assert.ok(stub.calledOnce, "_createEngine should be called once"); + + const dedupedInsightsList = await deduplicateInsights( + engine, + EXISTING_INSIGHTS, + NEW_INSIGHTS + ); + + Assert.ok(Array.isArray(dedupedInsightsList), "Should return an array"); + Assert.equal( + dedupedInsightsList.length, + 0, + "Should return an empty array" + ); + } finally { + sb.restore(); + } + } +); + +/** + * Tests failed insights deduplication - Wrong inner array structure + */ +add_task( + async function test_deduplicateInsightsList_sad_path_wrong_inner_array_structure() { + const sb = sinon.createSandbox(); + try { + // LLM returns a map of nested arrays, but the array structure is wrong + const fakeEngine = { + run() { + return { + finalOutput: `{ + "unique_insights": ["testing1", "testing2"] + }`, + }; + }, + }; + + // Check that the stub was called + const stub = sb.stub(openAIEngine, "_createEngine").resolves(fakeEngine); + const engine = await openAIEngine.build(); + Assert.ok(stub.calledOnce, "_createEngine should be called once"); + + const dedupedInsightsList = await deduplicateInsights( + engine, + EXISTING_INSIGHTS, + NEW_INSIGHTS + ); + + Assert.ok(Array.isArray(dedupedInsightsList), "Should return an array"); + Assert.equal( + dedupedInsightsList.length, + 0, + "Should return an empty array" + ); + } finally { + sb.restore(); + } + } +); + +/** + * Tests failed insights deduplication - Incorrect top-level schema key + */ +add_task( + async function test_deduplicateInsightsList_sad_path_bad_top_level_key() { + const sb = sinon.createSandbox(); + try { + // LLm returns correct output except that the top-level key is wrong + const fakeEngine = { + run() { + return { + finalOutput: `{ + "correct_insights": [ + { + "main_insight": "Loves outdoor activities", + "duplicates": ["Loves hiking and camping"] + }, + { + "main_insight": "Enjoys cooking recipes", + "duplicates": [] + }, + { + "main_insight": "Like sci-fi media", + "duplicates": ["Reads science fiction novels"] + }, + { + "main_insight": "Likes both dogs and cats", + "duplicates": [] + }, + { + "main_insight": "Likes risky stock bets", + "duplicates": [] + } + ] + }`, + }; + }, + }; + + // Check that the stub was called + const stub = sb.stub(openAIEngine, "_createEngine").resolves(fakeEngine); + const engine = await openAIEngine.build(); + Assert.ok(stub.calledOnce, "_createEngine should be called once"); + + const dedupedInsightsList = await deduplicateInsights( + engine, + EXISTING_INSIGHTS, + NEW_INSIGHTS + ); + + Assert.ok(Array.isArray(dedupedInsightsList), "Should return an array"); + Assert.equal( + dedupedInsightsList.length, + 0, + "Should return an empty array" + ); + } finally { + sb.restore(); + } + } +); + +/** + * Tests failed insights deduplication - Some correct inner schema + */ +add_task( + async function test_deduplicateInsightsList_sad_path_bad_some_correct_inner_schema() { + const sb = sinon.createSandbox(); + try { + // LLm returns correct output except that 1 of the inner maps is wrong and 1 main_insight is the wrong data type + const fakeEngine = { + run() { + return { + finalOutput: `{ + "unique_insights": [ + { + "primary_insight": "Loves outdoor activities", + "duplicates": ["Loves hiking and camping"] + }, + { + "main_insight": "Enjoys cooking recipes", + "duplicates": [] + }, + { + "main_insight": 12345, + "duplicates": [] + } + ] + }`, + }; + }, + }; + + // Check that the stub was called + const stub = sb.stub(openAIEngine, "_createEngine").resolves(fakeEngine); + const engine = await openAIEngine.build(); + Assert.ok(stub.calledOnce, "_createEngine should be called once"); + + const dedupedInsightsList = await deduplicateInsights( + engine, + EXISTING_INSIGHTS, + NEW_INSIGHTS + ); + + Assert.ok(Array.isArray(dedupedInsightsList), "Should return an array"); + Assert.equal( + dedupedInsightsList.length, + 1, + "Should return an array with one valid insight" + ); + Assert.equal( + dedupedInsightsList[0], + "Enjoys cooking recipes", + "Should return the single valid insight" + ); + } finally { + sb.restore(); + } + } +); + +/** + * Tests successful insights sensitivity filtering + */ +add_task(async function test_filterSensitiveInsights_happy_path() { + const sb = sinon.createSandbox(); + try { + /** + * The fake engine that returns a canned LLM response for deduplication. + * The `filterSensitiveInsights` function should return the inner array from `non_sensitive_insights`. + */ + const fakeEngine = { + run() { + return { + finalOutput: `{ + "non_sensitive_insights": [ + "Loves hiking and camping", + "Reads science fiction novels", + "Likes both dogs and cats" + ] +}`, + }; + }, + }; + + // Check that the stub was called + const stub = sb.stub(openAIEngine, "_createEngine").resolves(fakeEngine); + const engine = await openAIEngine.build(); + Assert.ok(stub.calledOnce, "_createEngine should be called once"); + + const nonSensitiveInsightsList = await filterSensitiveInsights( + engine, + NEW_INSIGHTS + ); + + // Check that the non-sensitive insights list contains only non-sensitive insights + Assert.equal( + nonSensitiveInsightsList.length, + 3, + "Non-sensitive insights list should contain 3 insights" + ); + Assert.ok( + nonSensitiveInsightsList.includes("Loves hiking and camping"), + "Non-sensitive insights should include 'Loves hiking and camping'" + ); + Assert.ok( + nonSensitiveInsightsList.includes("Reads science fiction novels"), + "Non-sensitive insights should include 'Reads science fiction novels'" + ); + Assert.ok( + nonSensitiveInsightsList.includes("Likes both dogs and cats"), + "Non-sensitive insights should include 'Likes both dogs and cats'" + ); + } finally { + sb.restore(); + } +}); + +/** + * Tests failed insights sensitivity filtering - Empty output + */ +add_task(async function test_filterSensitiveInsights_sad_path_empty_output() { + const sb = sinon.createSandbox(); + try { + // LLM returns an empty non_sensitive_insights array + const fakeEngine = { + run() { + return { + finalOutput: `{ + "non_sensitive_insights": [] +}`, + }; + }, + }; + + // Check that the stub was called + const stub = sb.stub(openAIEngine, "_createEngine").resolves(fakeEngine); + const engine = await openAIEngine.build(); + Assert.ok(stub.calledOnce, "_createEngine should be called once"); + + const nonSensitiveInsightsList = await filterSensitiveInsights( + engine, + NEW_INSIGHTS + ); + + Assert.ok( + Array.isArray(nonSensitiveInsightsList), + "Should return an array" + ); + Assert.equal( + nonSensitiveInsightsList.length, + 0, + "Should return an empty array" + ); + } finally { + sb.restore(); + } +}); + +/** + * Tests failed insights sensitivity filtering - Wrong data type + */ +add_task( + async function test_filterSensitiveInsights_sad_path_wrong_data_type() { + const sb = sinon.createSandbox(); + try { + // LLM returns the wrong outer data type + const fakeEngine = { + run() { + return { + finalOutput: `testing`, + }; + }, + }; + + // Check that the stub was called + const stub = sb.stub(openAIEngine, "_createEngine").resolves(fakeEngine); + const engine = await openAIEngine.build(); + Assert.ok(stub.calledOnce, "_createEngine should be called once"); + + const nonSensitiveInsightsList = await filterSensitiveInsights( + engine, + NEW_INSIGHTS + ); + + Assert.ok( + Array.isArray(nonSensitiveInsightsList), + "Should return an array" + ); + Assert.equal( + nonSensitiveInsightsList.length, + 0, + "Should return an empty array" + ); + } finally { + sb.restore(); + } + } +); + +/** + * Tests failed insights sensitivity filtering - Wrong inner data type + */ +add_task( + async function test_filterSensitiveInsights_sad_path_wrong_inner_data_type() { + const sb = sinon.createSandbox(); + try { + // LLM returns a map with the non_sensitive_insights key, but its value's data type is wrong + const fakeEngine = { + run() { + return { + finalOutput: `{ + "non_sensitive_insights": "testing" +}`, + }; + }, + }; + + // Check that the stub was called + const stub = sb.stub(openAIEngine, "_createEngine").resolves(fakeEngine); + const engine = await openAIEngine.build(); + Assert.ok(stub.calledOnce, "_createEngine should be called once"); + + const nonSensitiveInsightsList = await filterSensitiveInsights( + engine, + NEW_INSIGHTS + ); + + Assert.ok( + Array.isArray(nonSensitiveInsightsList), + "Should return an array" + ); + Assert.equal( + nonSensitiveInsightsList.length, + 0, + "Should return an empty array" + ); + } finally { + sb.restore(); + } + } +); + +/** + * Tests failed insights sensitivity filtering - Wrong outer schema + */ +add_task( + async function test_filterSensitiveInsights_sad_path_wrong_outer_schema() { + const sb = sinon.createSandbox(); + try { + // LLM returns a map but with the wrong top-level key + const fakeEngine = { + run() { + return { + finalOutput: `{ + "these_are_non_sensitive_insights": [ + "testing1", "testing2", "testing3" + ] +}`, + }; + }, + }; + + // Check that the stub was called + const stub = sb.stub(openAIEngine, "_createEngine").resolves(fakeEngine); + const engine = await openAIEngine.build(); + Assert.ok(stub.calledOnce, "_createEngine should be called once"); + + const nonSensitiveInsightsList = await filterSensitiveInsights( + engine, + NEW_INSIGHTS + ); + + Assert.ok( + Array.isArray(nonSensitiveInsightsList), + "Should return an array" + ); + Assert.equal( + nonSensitiveInsightsList.length, + 0, + "Should return an empty array" + ); + } finally { + sb.restore(); + } + } +); + +/** + * Tests failed insights sensitivity filtering - Some correct inner schema + */ +add_task( + async function test_filterSensitiveInsights_sad_path_some_correct_inner_schema() { + const sb = sinon.createSandbox(); + try { + // LLM returns a map with the non_sensitive_insights key, but the inner schema has a mix of correct and incorrect data types + const fakeEngine = { + run() { + return { + finalOutput: `{ + "non_sensitive_insights": [ + "correct", + 12345, + {"bad": "schema"} + ] +}`, + }; + }, + }; + + // Check that the stub was called + const stub = sb.stub(openAIEngine, "_createEngine").resolves(fakeEngine); + const engine = await openAIEngine.build(); + Assert.ok(stub.calledOnce, "_createEngine should be called once"); + + const nonSensitiveInsightsList = await filterSensitiveInsights( + engine, + NEW_INSIGHTS + ); + + Assert.ok( + Array.isArray(nonSensitiveInsightsList), + "Should return an array" + ); + Assert.equal( + nonSensitiveInsightsList.length, + 1, + "Should return an array with one valid insight" + ); + Assert.equal( + nonSensitiveInsightsList[0], + "correct", + "Should return the single valid insight" + ); + } finally { + sb.restore(); + } + } +); + +/** + * Tests mapping filtered insights back to full insight objects + */ +add_task(async function test_mapFilteredInsightsToInitialList() { + // Raw mock full insights object list + const initialInsightsList = [ + // Imagined duplicate - should have been filtered out + { + category: "Pets & Animals", + intent: "Buy / Acquire", + insight_summary: "Buys dog food online", + score: 4, + }, + // Sensitive content (stocks) - should have been filtered out + { + category: "News", + intent: "Research / Learn", + insight_summary: "Likes to invest in risky stocks", + score: 5, + }, + { + category: "Games", + intent: "Entertain / Relax", + insight_summary: "Enjoys strategy games", + score: 3, + }, + ]; + + // Mock list of good insights to keep + const filteredInsightsList = ["Enjoys strategy games"]; + + const finalInsightsList = await mapFilteredInsightsToInitialList( + initialInsightsList, + filteredInsightsList + ); + + // Check that only the non-duplicate, non-sensitive insight remains + Assert.equal( + finalInsightsList.length, + 1, + "Final insights should contain 1 insight" + ); + Assert.equal( + finalInsightsList[0].category, + "Games", + "Final insight should have the correct category" + ); + Assert.equal( + finalInsightsList[0].intent, + "Entertain / Relax", + "Final insight should have the correct intent" + ); + Assert.equal( + finalInsightsList[0].insight_summary, + "Enjoys strategy games", + "Final insight should match the filtered insight" + ); + Assert.equal( + finalInsightsList[0].score, + 3, + "Final insight should have the correct score" + ); +}); diff --git a/browser/components/aiwindow/models/tests/xpcshell/xpcshell.toml b/browser/components/aiwindow/models/tests/xpcshell/xpcshell.toml @@ -8,6 +8,8 @@ support-files = [] ["test_ChatUtils.js"] +["test_Insights.js"] + ["test_InsightsHistorySource.js"] ["test_SearchBrowsingHistory.js"]