[ tor-browser ].git.dasho

commit 738aad16509302b669354d90ac832a543ccb9691
parent 414417ee92315e4fbb07658d53e2e5d6adaa609c
Author: pstanciu <pstanciu@mozilla.com>
Date:   Mon, 29 Dec 2025 23:47:57 +0200

Revert "Bug 2006430 - Add workaround for general category queries r=tzhang,ai-models-reviewers" for causing bc failures @ browser_all_files_referenced.js

This reverts commit b9eea0033f00d7f08df55efd3b5ac7facb2ab963.

Diffstat:
M browser/components/aiwindow/models/SearchBrowsingHistory.sys.mjs  | 28 ----------------------------
D browser/components/aiwindow/models/SearchBrowsingHistoryDomainBoost.sys.mjs  | 396 -------------------------------------------------------------------------------
M browser/components/aiwindow/models/moz.build  | 1 -
D browser/components/aiwindow/models/tests/xpcshell/test_SearchBrowsingHistoryDomainBoost.js  | 53 -----------------------------------------------------
M browser/components/aiwindow/models/tests/xpcshell/xpcshell.toml  | 2 --

5 files changed, 0 insertions(+), 480 deletions(-)
diff --git a/browser/components/aiwindow/models/SearchBrowsingHistory.sys.mjs b/browser/components/aiwindow/models/SearchBrowsingHistory.sys.mjs
@@ -11,9 +11,6 @@ ChromeUtils.defineESModuleGetters(lazy, {
   PlacesUtils: "resource://gre/modules/PlacesUtils.sys.mjs",
   getPlacesSemanticHistoryManager:
     "resource://gre/modules/PlacesSemanticHistoryManager.sys.mjs",
-  // Domain fallback / workaround for general-category queries (games, movies, etc.)
-  SearchBrowsingHistoryDomainBoost:
-    "resource://gre/modules/SearchBrowsingHistoryDomainBoost.sys.mjs",
 });
 
 /**
@@ -284,31 +281,6 @@ async function searchBrowsingHistorySemantic({
   for (let row of results) {
     rows.push(await buildHistoryRow(row));
   }
-
-  // Domain fallback for general-category queries (games, movies, news, etc.)
-  // Keep semantic ranking primary, only top-up if we have room.
-  if (rows.length < historyLimit) {
-    const domains =
-      lazy.SearchBrowsingHistoryDomainBoost.matchDomains(searchTerm);
-    if (domains?.length) {
-      const domainRows =
-        await lazy.SearchBrowsingHistoryDomainBoost.searchByDomains({
-          conn,
-          domains,
-          startTs,
-          endTs,
-          historyLimit: Math.max(historyLimit * 2, 200), // extra for dedupe
-          buildHistoryRow,
-        });
-
-      return lazy.SearchBrowsingHistoryDomainBoost.mergeDedupe(
-        rows,
-        domainRows,
-        historyLimit
-      );
-    }
-  }
-
   return rows;
 }
 
diff --git a/browser/components/aiwindow/models/SearchBrowsingHistoryDomainBoost.sys.mjs b/browser/components/aiwindow/models/SearchBrowsingHistoryDomainBoost.sys.mjs
@@ -1,396 +0,0 @@
-/**
- * This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/.
- */
-
-/**
- * SearchBrowsingHistoryDomainBoost
- *
- * Temporary heuristic for general-category queries (games, movies, news, etc.)
- * when semantic embeddings over title/description are insufficient.
- *
- * Safe to remove once richer embeddings or better intent classification lands.
- */
-
-export const CATEGORIES_JSON = {
-  language: "en",
-  categories: [
-    {
-      id: "games",
-      terms: [
-        "game",
-        "games",
-        "video game",
-        "video games",
-        "pc games",
-        "console games",
-      ],
-      domains: [
-        "steampowered.com",
-        "roblox.com",
-        "ign.com",
-        "gamespot.com",
-        "polygon.com",
-        "metacritic.com",
-        "epicgames.com",
-        "store.playstation.com",
-        "xbox.com",
-        "nintendo.com",
-      ],
-    },
-    {
-      id: "movies",
-      terms: ["movie", "movies", "film", "films", "cinema"],
-      domains: [
-        "imdb.com",
-        "rottentomatoes.com",
-        "metacritic.com",
-        "letterboxd.com",
-        "netflix.com",
-        "primevideo.com",
-        "disneyplus.com",
-        "hulu.com",
-        "max.com",
-      ],
-    },
-    {
-      id: "tv",
-      terms: ["tv show", "tv shows", "show", "shows", "series", "tv series"],
-      domains: [
-        "imdb.com",
-        "rottentomatoes.com",
-        "metacritic.com",
-        "tvmaze.com",
-        "thetvdb.com",
-        "netflix.com",
-        "primevideo.com",
-        "disneyplus.com",
-        "hulu.com",
-        "max.com",
-      ],
-    },
-    {
-      id: "books",
-      terms: ["book", "books", "novel", "novels"],
-      domains: [
-        "goodreads.com",
-        "gutenberg.org",
-        "openlibrary.org",
-        "barnesandnoble.com",
-        "indigo.ca",
-      ],
-    },
-    {
-      id: "anime",
-      terms: ["anime", "manga"],
-      domains: [
-        "myanimelist.net",
-        "anilist.co",
-        "kitsu.app",
-        "crunchyroll.com",
-      ],
-    },
-    {
-      id: "music",
-      terms: ["music", "song", "songs", "album", "albums", "lyrics"],
-      domains: [
-        "spotify.com",
-        "music.apple.com",
-        "soundcloud.com",
-        "bandcamp.com",
-        "music.youtube.com",
-      ],
-    },
-    {
-      id: "podcasts",
-      terms: ["podcast", "podcasts"],
-      domains: [
-        "podcasts.apple.com",
-        "overcast.fm",
-        "pocketcasts.com",
-        "castbox.fm",
-      ],
-    },
-    {
-      id: "papers_research",
-      terms: [
-        "paper",
-        "papers",
-        "research paper",
-        "research papers",
-        "academic paper",
-        "academic papers",
-        "journal",
-        "journals",
-        "study",
-        "studies",
-        "publication",
-        "publications",
-      ],
-      domains: [
-        "scholar.google.com",
-        "arxiv.org",
-        "semanticscholar.org",
-        "pubmed.ncbi.nlm.nih.gov",
-        "researchgate.net",
-        "ieeexplore.ieee.org",
-        "dl.acm.org",
-        "springer.com",
-        "nature.com",
-        "science.org",
-      ],
-    },
-    {
-      id: "tech_news",
-      terms: ["tech news", "technology news", "startup news"],
-      domains: [
-        "theverge.com",
-        "techcrunch.com",
-        "wired.com",
-        "arstechnica.com",
-        "engadget.com",
-      ],
-    },
-    {
-      id: "finance_news",
-      terms: ["finance news", "business news", "market news", "stock news"],
-      domains: [
-        "bloomberg.com",
-        "wsj.com",
-        "ft.com",
-        "reuters.com",
-        "cnbc.com",
-      ],
-    },
-    {
-      id: "news",
-      terms: [
-        "news",
-        "headline",
-        "headlines",
-        "breaking news",
-        "world news",
-        "latest news",
-      ],
-      domains: [
-        "reuters.com",
-        "apnews.com",
-        "bbc.com",
-        "cnn.com",
-        "nytimes.com",
-        "theguardian.com",
-        "washingtonpost.com",
-        "aljazeera.com",
-        "npr.org",
-        "wsj.com",
-        "bloomberg.com",
-        "ft.com",
-      ],
-    },
-    {
-      id: "recipes",
-      terms: [
-        "recipe",
-        "recipes",
-        "cooking",
-        "food",
-        "dinner ideas",
-        "meal prep",
-      ],
-      domains: [
-        "allrecipes.com",
-        "seriouseats.com",
-        "foodnetwork.com",
-        "bbcgoodfood.com",
-        "epicurious.com",
-        "nytcooking.com",
-      ],
-    },
-    {
-      id: "travel",
-      terms: ["travel", "hotels", "places", "destinations", "things to do"],
-      domains: [
-        "tripadvisor.com",
-        "booking.com",
-        "expedia.com",
-        "airbnb.com",
-        "lonelyplanet.com",
-      ],
-    },
-  ],
-};
-
-/**
- * Normalizes a query string into a lowercase, space-separated form suitable for matching
- * and comparison.
- *
- * @param {string} s
- * @returns {string}
- */
-function normalizeQuery(s) {
-  return (s || "")
-    .toLowerCase()
-    .replace(/[^\p{L}\p{N}]+/gu, " ")
-    .replace(/\s+/g, " ")
-    .trim();
-}
-
-/**
- * Returns the matched category domains if searchTerm looks like a general category query.
- * Uses phrase matching on normalized query string.
- *
- * @param {string} searchTerm
- * @param {object} [categoriesJson=CATEGORIES_JSON]
- * @returns {string[]|null}
- */
-export function matchDomains(searchTerm, categoriesJson = CATEGORIES_JSON) {
-  const q = ` ${normalizeQuery(searchTerm)} `;
-  if (!q.trim()) {
-    return null;
-  }
-
-  for (const cat of categoriesJson.categories) {
-    for (const t of cat.terms) {
-      // Pad with spaces to enable whole-token phrase matching via includes.
-      const tt = ` ${normalizeQuery(t)} `;
-      if (tt.trim() && q.includes(tt)) {
-        return cat.domains;
-      }
-    }
-  }
-
-  return null;
-}
-
-/**
- * Builds a SQL WHERE clause for matching `http`/`https` URLs belonging
- * to the given root domains and their `www` variants.
- *
- * @param {string[]} domains
- * @returns {{ where: string, params: object }}
- */
-function buildDomainUrlWhere(domains) {
-  const clauses = [];
-  const params = {};
-  let i = 0;
-
-  for (const raw of domains || []) {
-    const d = String(raw).toLowerCase();
-    if (!d) {
-      continue;
-    }
-
-    // - https://domain/...
-    // - https://www.domain/...
-    params[`d${i}`] = `%://${d}/%`;
-    clauses.push(`lower(url) LIKE :d${i++}`);
-
-    params[`d${i}`] = `%://www.${d}/%`;
-    clauses.push(`lower(url) LIKE :d${i++}`);
-  }
-
-  return {
-    where: clauses.length ? `(${clauses.join(" OR ")})` : "0",
-    params,
-  };
-}
-
-/**
- * Domain-filtered moz_places query (time-windowed).
- *
- * @param {object} params
- * @param {object} params.conn
- * @param {string[]} params.domains
- * @param {number|null} params.startTs
- * @param {number|null} params.endTs
- * @param {number} params.historyLimit
- * @param {Function} params.buildHistoryRow
- * @returns {Promise<object[]>}
- */
-export async function searchByDomains({
-  conn,
-  domains,
-  startTs,
-  endTs,
-  historyLimit,
-  buildHistoryRow,
-}) {
-  if (!conn || !Array.isArray(domains) || !domains.length) {
-    return [];
-  }
-
-  const { where, params } = buildDomainUrlWhere(domains);
-
-  const results = await conn.executeCached(
-    `
-      SELECT id,
-             title,
-             url,
-             NULL AS distance,
-             visit_count,
-             frecency,
-             last_visit_date,
-             preview_image_url
-      FROM moz_places
-      WHERE frecency <> 0
-        AND (:startTs IS NULL OR last_visit_date >= :startTs)
-        AND (:endTs IS NULL OR last_visit_date <= :endTs)
-        AND ${where}
-      ORDER BY last_visit_date DESC, frecency DESC
-      LIMIT :limit
-    `,
-    {
-      startTs,
-      endTs,
-      limit: historyLimit,
-      ...params,
-    }
-  );
-
-  const rows = [];
-  for (const row of results) {
-    rows.push(await buildHistoryRow(row));
-  }
-  return rows;
-}
-
-/**
- * Merge two result lists, keeping `primary` order, then topping up from `secondary`,
- * while de-duping by url (fallback to id).
- *
- * @param {object[]} primary
- * @param {object[]} secondary
- * @param {number} limit
- * @returns {object[]}
- */
-export function mergeDedupe(primary, secondary, limit) {
-  const seen = new Set();
-  const out = [];
-
-  const keyOf = r => r?.url || r?.id;
-
-  for (const r of primary || []) {
-    const k = keyOf(r);
-    if (!seen.has(k)) {
-      seen.add(k);
-      out.push(r);
-      if (out.length >= limit) {
-        return out;
-      }
-    }
-  }
-
-  for (const r of secondary || []) {
-    const k = keyOf(r);
-    if (!seen.has(k)) {
-      seen.add(k);
-      out.push(r);
-      if (out.length >= limit) {
-        return out;
-      }
-    }
-  }
-
-  return out;
-}
diff --git a/browser/components/aiwindow/models/moz.build b/browser/components/aiwindow/models/moz.build
@@ -28,7 +28,6 @@ MOZ_SRC_FILES += [
     "InsightsSchemas.sys.mjs",
     "IntentClassifier.sys.mjs",
     "SearchBrowsingHistory.sys.mjs",
-    "SearchBrowsingHistoryDomainBoost.sys.mjs",
     "TitleGeneration.sys.mjs",
     "Tools.sys.mjs",
     "Utils.sys.mjs",
diff --git a/browser/components/aiwindow/models/tests/xpcshell/test_SearchBrowsingHistoryDomainBoost.js b/browser/components/aiwindow/models/tests/xpcshell/test_SearchBrowsingHistoryDomainBoost.js
@@ -1,53 +0,0 @@
-/**
- * This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/.
- */
-
-const { matchDomains, mergeDedupe } = ChromeUtils.importESModule(
-  "moz-src:///browser/components/aiwindow/models/SearchBrowsingHistoryDomainBoost.sys.mjs"
-);
-
-add_task(async function test_matchDomains_games_and_boundary_behavior() {
-  // Positive: should match games category
-  const domains = matchDomains("video games");
-  Assert.ok(
-    domains?.includes("steampowered.com"),
-    "Should include steampowered.com for games"
-  );
-
-  // Negative: should not match substrings inside words ("endgame" should not trigger "game")
-  const domains2 = matchDomains("endgame");
-  Assert.equal(domains2, null, "Should not match 'game' inside 'endgame'");
-});
-
-add_task(async function test_matchDomains_prefers_longer_phrases() {
-  // "tech news" should match tech_news (not generic news)
-  const domains = matchDomains("tech news");
-  Assert.ok(
-    domains?.includes("techcrunch.com"),
-    "Should match tech_news domains"
-  );
-  Assert.ok(
-    !domains.includes("reuters.com"),
-    "Should not fall back to generic news domains"
-  );
-});
-
-add_task(async function test_mergeDedupe_semantic_first_then_topup() {
-  const primary = [
-    { id: 1, url: "https://example.com/a", title: "A" },
-    { id: 2, url: "https://example.com/b", title: "B" },
-  ];
-  const secondary = [
-    { id: 3, url: "https://example.com/b", title: "B dup" }, // dup by url
-    { id: 4, url: "https://example.com/c", title: "C" },
-  ];
-
-  const out = mergeDedupe(primary, secondary, 10);
-  Assert.deepEqual(
-    out.map(r => r.url),
-    ["https://example.com/a", "https://example.com/b", "https://example.com/c"],
-    "Should keep primary order and de-dupe by url"
-  );
-});
diff --git a/browser/components/aiwindow/models/tests/xpcshell/xpcshell.toml b/browser/components/aiwindow/models/tests/xpcshell/xpcshell.toml
@@ -28,8 +28,6 @@ support-files = []
 
 ["test_SearchBrowsingHistory.js"]
 
-["test_SearchBrowsingHistoryDomainBoost.js"]
-
 ["test_TitleGeneration.js"]
 
 ["test_Tools_GetOpenTabs.js"]

	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE

M	browser/components/aiwindow/models/SearchBrowsingHistory.sys.mjs	\|	28	----------------------------
D	browser/components/aiwindow/models/SearchBrowsingHistoryDomainBoost.sys.mjs	\|	396	-------------------------------------------------------------------------------
M	browser/components/aiwindow/models/moz.build	\|	1	-
D	browser/components/aiwindow/models/tests/xpcshell/test_SearchBrowsingHistoryDomainBoost.js	\|	53	-----------------------------------------------------
M	browser/components/aiwindow/models/tests/xpcshell/xpcshell.toml	\|	2	--