tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

commit 75e1bd656b68b0d4f09c055c1fd3fe816c54d8a9
parent 3ef513152eeba70f727d780959a658fe0495d188
Author: Tom Zhang <tzhang@mozilla.com>
Date:   Fri, 19 Dec 2025 18:18:49 +0000

Bug 2006772 - implement response parsing functions for insights and search terms r=ai-models-reviewers,tburrell

Differential Revision: https://phabricator.services.mozilla.com/D277125

Diffstat:
Mbrowser/components/aiwindow/models/ChatUtils.sys.mjs | 72++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mbrowser/components/aiwindow/models/tests/xpcshell/test_ChatUtils.js | 174+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 246 insertions(+), 0 deletions(-)

diff --git a/browser/components/aiwindow/models/ChatUtils.sys.mjs b/browser/components/aiwindow/models/ChatUtils.sys.mjs @@ -149,3 +149,75 @@ export async function constructRelevantInsightsContextMessage(message) { // If there aren't any relevant insights, return null return null; } + +/** + * Response parsing funtions to detect special tagged information like insights and search terms. + * Also return the cleaned content after removing all the taggings. + * + * @param {string} content + * @returns {Promise<object>} + */ +export async function parseContentWithTokens(content) { + const searchRegex = /§search:\s*([^§]+)§/gi; + const insightsRegex = /§existing_insight:\s*([^§]+)§/gi; + + const searchTokens = detectTokens(content, searchRegex, "query"); + const insightsTokens = detectTokens(content, insightsRegex, "insights"); + // Sort all tokens in reverse index order for easier removal + const allTokens = [...searchTokens, ...insightsTokens].sort( + (a, b) => b.startIndex - a.startIndex + ); + + if (allTokens.length === 0) { + return { + cleanContent: content, + searchQueries: [], + usedInsights: [], + }; + } + + // Clean content by removing tagged information + let cleanContent = content; + const searchQueries = []; + const usedInsights = []; + + for (const token of allTokens) { + if (token.query) { + searchQueries.unshift(token.query); + } else if (token.insights) { + usedInsights.unshift(token.insights); + // TODO: do we need customEvent to dispatch used insights as we iterate? + } + cleanContent = + cleanContent.slice(0, token.startIndex) + + cleanContent.slice(token.endIndex); + } + + return { + cleanContent: cleanContent.trim(), + searchQueries, + usedInsights, + }; +} + +/** + * Given the content and the regex pattern to search, find all occurrence of matches. + * + * @param {string} content + * @param {RegExp} regexPattern + * @param {string} key + * @returns {Array<object>} + */ +export function detectTokens(content, regexPattern, key) { + const matches = []; + let match; + while ((match = regexPattern.exec(content)) !== null) { + matches.push({ + fullMatch: match[0], + [key]: match[1].trim(), + startIndex: match.index, + endIndex: match.index + match[0].length, + }); + } + return matches; +} diff --git a/browser/components/aiwindow/models/tests/xpcshell/test_ChatUtils.js b/browser/components/aiwindow/models/tests/xpcshell/test_ChatUtils.js @@ -11,6 +11,8 @@ const { getLocalIsoTime, getCurrentTabMetadata, constructRelevantInsightsContextMessage, + parseContentWithTokens, + detectTokens, } = ChromeUtils.importESModule( "moz-src:///browser/components/aiwindow/models/ChatUtils.sys.mjs" ); @@ -342,3 +344,175 @@ add_task( } } ); + +add_task(async function test_parseContentWithTokens_no_tokens() { + const content = "This is a regular message with no special tokens."; + const result = await parseContentWithTokens(content); + + Assert.equal( + result.cleanContent, + content, + "Clean content should match original when no tokens present" + ); + Assert.equal(result.searchQueries.length, 0, "Should have no search queries"); + Assert.equal(result.usedInsights.length, 0, "Should have no used insights"); +}); + +add_task(async function test_parseContentWithTokens_single_search_token() { + const content = + "You can find great coffee in the downtown area.§search: best coffee shops near me§"; + const result = await parseContentWithTokens(content); + + Assert.equal( + result.cleanContent, + "You can find great coffee in the downtown area.", + "Should remove search token from content" + ); + Assert.equal(result.searchQueries.length, 1, "Should have one search query"); + Assert.equal( + result.searchQueries[0], + "best coffee shops near me", + "Should extract correct search query" + ); + Assert.equal(result.usedInsights.length, 0, "Should have no used insights"); +}); + +add_task(async function test_parseContentWithTokens_single_insight_token() { + const content = + "I recommend trying herbal tea blends.§existing_insight: likes tea§"; + const result = await parseContentWithTokens(content); + + Assert.equal( + result.cleanContent, + "I recommend trying herbal tea blends.", + "Should remove insight token from content" + ); + Assert.equal(result.searchQueries.length, 0, "Should have no search queries"); + Assert.equal(result.usedInsights.length, 1, "Should have one used insight"); + Assert.equal( + result.usedInsights[0], + "likes tea", + "Should extract correct insight" + ); +}); + +add_task(async function test_parseContentWithTokens_multiple_mixed_tokens() { + const content = + "I recommend checking out organic coffee options.§existing_insight: prefers organic§ They have great flavor profiles.§search: organic coffee beans reviews§§search: best organic cafes nearby§"; + const result = await parseContentWithTokens(content); + + Assert.equal( + result.cleanContent, + "I recommend checking out organic coffee options. They have great flavor profiles.", + "Should remove all tokens from content" + ); + Assert.equal( + result.searchQueries.length, + 2, + "Should have two search queries" + ); + Assert.deepEqual( + result.searchQueries, + ["organic coffee beans reviews", "best organic cafes nearby"], + "Should extract search queries in correct order" + ); + Assert.equal(result.usedInsights.length, 1, "Should have one used insight"); + Assert.equal( + result.usedInsights[0], + "prefers organic", + "Should extract correct insight" + ); +}); + +add_task(async function test_parseContentWithTokens_tokens_with_whitespace() { + const content = + "You can find more details online.§search: coffee brewing methods §"; + const result = await parseContentWithTokens(content); + + Assert.equal( + result.cleanContent, + "You can find more details online.", + "Should remove token with whitespace" + ); + Assert.equal(result.searchQueries.length, 1, "Should have one search query"); + Assert.equal( + result.searchQueries[0], + "coffee brewing methods", + "Should trim whitespace from extracted query" + ); +}); + +add_task(async function test_parseContentWithTokens_adjacent_tokens() { + const content = + "Here are some great Italian dining options.§existing_insight: prefers italian food§§search: local italian restaurants§"; + const result = await parseContentWithTokens(content); + + Assert.equal( + result.cleanContent, + "Here are some great Italian dining options.", + "Should remove adjacent tokens" + ); + Assert.equal(result.searchQueries.length, 1, "Should have one search query"); + Assert.equal( + result.searchQueries[0], + "local italian restaurants", + "Should extract search query" + ); + Assert.equal(result.usedInsights.length, 1, "Should have one insight"); + Assert.equal( + result.usedInsights[0], + "prefers italian food", + "Should extract insight" + ); +}); + +add_task(function test_detectTokens_basic_pattern() { + const content = + "There are many great options available.§search: coffee shops near downtown§§search: best rated restaurants§"; + const searchRegex = /§search:\s*([^§]+)§/gi; + const result = detectTokens(content, searchRegex, "query"); + + Assert.equal(result.length, 2, "Should find two matches"); + Assert.equal( + result[0].query, + "coffee shops near downtown", + "First match should extract correct query" + ); + Assert.equal( + result[0].fullMatch, + "§search: coffee shops near downtown§", + "First match should include full match" + ); + Assert.equal( + result[0].startIndex, + 39, + "First match should have correct start index" + ); + Assert.equal( + result[1].query, + "best rated restaurants", + "Second match should extract correct query" + ); +}); + +add_task(function test_detectTokens_custom_key() { + const content = + "I recommend trying the Thai curry.§insight: prefers spicy food§"; + const insightRegex = /§insight:\s*([^§]+)§/gi; + const result = detectTokens(content, insightRegex, "customKey"); + + Assert.equal(result.length, 1, "Should find one match"); + Assert.equal( + result[0].customKey, + "prefers spicy food", + "Should use custom key for extracted value" + ); + Assert.ok( + result[0].hasOwnProperty("customKey"), + "Result should have the custom key property" + ); + Assert.ok( + !result[0].hasOwnProperty("query"), + "Result should not have default 'query' property" + ); +});