[ tor-browser ].git.dasho

commit 75e1bd656b68b0d4f09c055c1fd3fe816c54d8a9
parent 3ef513152eeba70f727d780959a658fe0495d188
Author: Tom Zhang <tzhang@mozilla.com>
Date:   Fri, 19 Dec 2025 18:18:49 +0000

Bug 2006772 - implement response parsing functions for insights and search terms r=ai-models-reviewers,tburrell

Differential Revision: https://phabricator.services.mozilla.com/D277125

Diffstat:
M browser/components/aiwindow/models/ChatUtils.sys.mjs  | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M browser/components/aiwindow/models/tests/xpcshell/test_ChatUtils.js  | 174 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

2 files changed, 246 insertions(+), 0 deletions(-)
diff --git a/browser/components/aiwindow/models/ChatUtils.sys.mjs b/browser/components/aiwindow/models/ChatUtils.sys.mjs
@@ -149,3 +149,75 @@ export async function constructRelevantInsightsContextMessage(message) {
   // If there aren't any relevant insights, return null
   return null;
 }
+
+/**
+ * Response parsing funtions to detect special tagged information like insights and search terms.
+ * Also return the cleaned content after removing all the taggings.
+ *
+ * @param {string} content
+ * @returns {Promise<object>}
+ */
+export async function parseContentWithTokens(content) {
+  const searchRegex = /§search:\s*([^§]+)§/gi;
+  const insightsRegex = /§existing_insight:\s*([^§]+)§/gi;
+
+  const searchTokens = detectTokens(content, searchRegex, "query");
+  const insightsTokens = detectTokens(content, insightsRegex, "insights");
+  // Sort all tokens in reverse index order for easier removal
+  const allTokens = [...searchTokens, ...insightsTokens].sort(
+    (a, b) => b.startIndex - a.startIndex
+  );
+
+  if (allTokens.length === 0) {
+    return {
+      cleanContent: content,
+      searchQueries: [],
+      usedInsights: [],
+    };
+  }
+
+  // Clean content by removing tagged information
+  let cleanContent = content;
+  const searchQueries = [];
+  const usedInsights = [];
+
+  for (const token of allTokens) {
+    if (token.query) {
+      searchQueries.unshift(token.query);
+    } else if (token.insights) {
+      usedInsights.unshift(token.insights);
+      // TODO: do we need customEvent to dispatch used insights as we iterate?
+    }
+    cleanContent =
+      cleanContent.slice(0, token.startIndex) +
+      cleanContent.slice(token.endIndex);
+  }
+
+  return {
+    cleanContent: cleanContent.trim(),
+    searchQueries,
+    usedInsights,
+  };
+}
+
+/**
+ * Given the content and the regex pattern to search, find all occurrence of matches.
+ *
+ * @param {string} content
+ * @param {RegExp} regexPattern
+ * @param {string} key
+ * @returns {Array<object>}
+ */
+export function detectTokens(content, regexPattern, key) {
+  const matches = [];
+  let match;
+  while ((match = regexPattern.exec(content)) !== null) {
+    matches.push({
+      fullMatch: match[0],
+      [key]: match[1].trim(),
+      startIndex: match.index,
+      endIndex: match.index + match[0].length,
+    });
+  }
+  return matches;
+}
diff --git a/browser/components/aiwindow/models/tests/xpcshell/test_ChatUtils.js b/browser/components/aiwindow/models/tests/xpcshell/test_ChatUtils.js
@@ -11,6 +11,8 @@ const {
   getLocalIsoTime,
   getCurrentTabMetadata,
   constructRelevantInsightsContextMessage,
+  parseContentWithTokens,
+  detectTokens,
 } = ChromeUtils.importESModule(
   "moz-src:///browser/components/aiwindow/models/ChatUtils.sys.mjs"
 );
@@ -342,3 +344,175 @@ add_task(
     }
   }
 );
+
+add_task(async function test_parseContentWithTokens_no_tokens() {
+  const content = "This is a regular message with no special tokens.";
+  const result = await parseContentWithTokens(content);
+
+  Assert.equal(
+    result.cleanContent,
+    content,
+    "Clean content should match original when no tokens present"
+  );
+  Assert.equal(result.searchQueries.length, 0, "Should have no search queries");
+  Assert.equal(result.usedInsights.length, 0, "Should have no used insights");
+});
+
+add_task(async function test_parseContentWithTokens_single_search_token() {
+  const content =
+    "You can find great coffee in the downtown area.§search: best coffee shops near me§";
+  const result = await parseContentWithTokens(content);
+
+  Assert.equal(
+    result.cleanContent,
+    "You can find great coffee in the downtown area.",
+    "Should remove search token from content"
+  );
+  Assert.equal(result.searchQueries.length, 1, "Should have one search query");
+  Assert.equal(
+    result.searchQueries[0],
+    "best coffee shops near me",
+    "Should extract correct search query"
+  );
+  Assert.equal(result.usedInsights.length, 0, "Should have no used insights");
+});
+
+add_task(async function test_parseContentWithTokens_single_insight_token() {
+  const content =
+    "I recommend trying herbal tea blends.§existing_insight: likes tea§";
+  const result = await parseContentWithTokens(content);
+
+  Assert.equal(
+    result.cleanContent,
+    "I recommend trying herbal tea blends.",
+    "Should remove insight token from content"
+  );
+  Assert.equal(result.searchQueries.length, 0, "Should have no search queries");
+  Assert.equal(result.usedInsights.length, 1, "Should have one used insight");
+  Assert.equal(
+    result.usedInsights[0],
+    "likes tea",
+    "Should extract correct insight"
+  );
+});
+
+add_task(async function test_parseContentWithTokens_multiple_mixed_tokens() {
+  const content =
+    "I recommend checking out organic coffee options.§existing_insight: prefers organic§ They have great flavor profiles.§search: organic coffee beans reviews§§search: best organic cafes nearby§";
+  const result = await parseContentWithTokens(content);
+
+  Assert.equal(
+    result.cleanContent,
+    "I recommend checking out organic coffee options. They have great flavor profiles.",
+    "Should remove all tokens from content"
+  );
+  Assert.equal(
+    result.searchQueries.length,
+    2,
+    "Should have two search queries"
+  );
+  Assert.deepEqual(
+    result.searchQueries,
+    ["organic coffee beans reviews", "best organic cafes nearby"],
+    "Should extract search queries in correct order"
+  );
+  Assert.equal(result.usedInsights.length, 1, "Should have one used insight");
+  Assert.equal(
+    result.usedInsights[0],
+    "prefers organic",
+    "Should extract correct insight"
+  );
+});
+
+add_task(async function test_parseContentWithTokens_tokens_with_whitespace() {
+  const content =
+    "You can find more details online.§search:   coffee brewing methods   §";
+  const result = await parseContentWithTokens(content);
+
+  Assert.equal(
+    result.cleanContent,
+    "You can find more details online.",
+    "Should remove token with whitespace"
+  );
+  Assert.equal(result.searchQueries.length, 1, "Should have one search query");
+  Assert.equal(
+    result.searchQueries[0],
+    "coffee brewing methods",
+    "Should trim whitespace from extracted query"
+  );
+});
+
+add_task(async function test_parseContentWithTokens_adjacent_tokens() {
+  const content =
+    "Here are some great Italian dining options.§existing_insight: prefers italian food§§search: local italian restaurants§";
+  const result = await parseContentWithTokens(content);
+
+  Assert.equal(
+    result.cleanContent,
+    "Here are some great Italian dining options.",
+    "Should remove adjacent tokens"
+  );
+  Assert.equal(result.searchQueries.length, 1, "Should have one search query");
+  Assert.equal(
+    result.searchQueries[0],
+    "local italian restaurants",
+    "Should extract search query"
+  );
+  Assert.equal(result.usedInsights.length, 1, "Should have one insight");
+  Assert.equal(
+    result.usedInsights[0],
+    "prefers italian food",
+    "Should extract insight"
+  );
+});
+
+add_task(function test_detectTokens_basic_pattern() {
+  const content =
+    "There are many great options available.§search: coffee shops near downtown§§search: best rated restaurants§";
+  const searchRegex = /§search:\s*([^§]+)§/gi;
+  const result = detectTokens(content, searchRegex, "query");
+
+  Assert.equal(result.length, 2, "Should find two matches");
+  Assert.equal(
+    result[0].query,
+    "coffee shops near downtown",
+    "First match should extract correct query"
+  );
+  Assert.equal(
+    result[0].fullMatch,
+    "§search: coffee shops near downtown§",
+    "First match should include full match"
+  );
+  Assert.equal(
+    result[0].startIndex,
+    39,
+    "First match should have correct start index"
+  );
+  Assert.equal(
+    result[1].query,
+    "best rated restaurants",
+    "Second match should extract correct query"
+  );
+});
+
+add_task(function test_detectTokens_custom_key() {
+  const content =
+    "I recommend trying the Thai curry.§insight: prefers spicy food§";
+  const insightRegex = /§insight:\s*([^§]+)§/gi;
+  const result = detectTokens(content, insightRegex, "customKey");
+
+  Assert.equal(result.length, 1, "Should find one match");
+  Assert.equal(
+    result[0].customKey,
+    "prefers spicy food",
+    "Should use custom key for extracted value"
+  );
+  Assert.ok(
+    result[0].hasOwnProperty("customKey"),
+    "Result should have the custom key property"
+  );
+  Assert.ok(
+    !result[0].hasOwnProperty("query"),
+    "Result should not have default 'query' property"
+  );
+});

	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE

M	browser/components/aiwindow/models/ChatUtils.sys.mjs	\|	72	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	browser/components/aiwindow/models/tests/xpcshell/test_ChatUtils.js	\|	174	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++