tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

commit 49b7caee0c4ebc2a63194628ea1af66fffbe32ec
parent 2d8e96b913537a12a358358d56a1ec42299c4b63
Author: Chidam Gopal <cgopal@mozilla.com>
Date:   Thu,  4 Dec 2025 18:37:13 +0000

Bug 2004095 - Improve History API for insights r=cdipersio,ai-models-reviewers

Differential Revision: https://phabricator.services.mozilla.com/D275095

Diffstat:
Mbrowser/components/aiwindow/models/InsightsHistorySource.sys.mjs | 69+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------
Mbrowser/components/aiwindow/models/tests/xpcshell/test_InsightsHistorySource.js | 47+++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 106 insertions(+), 10 deletions(-)

diff --git a/browser/components/aiwindow/models/InsightsHistorySource.sys.mjs b/browser/components/aiwindow/models/InsightsHistorySource.sys.mjs @@ -50,11 +50,50 @@ const SEARCH_ENGINE_PATTERN = new RegExp( /** * Fetch recent browsing history from Places (SQL), aggregate by URL, - * tag "search" vs "history", and filter low-visit URLs. + * tag "search" vs "history", and attach simple frequency percentiles. + * + * This API is designed to support both: + * - Initial ("Day 0") backfills over a fixed time window, and + * - Incremental reads using a visit_date watermark (`sinceMicros`). + * + * Callers can either: + * 1. Pass `sinceMicros` (microseconds since epoch, Places visit_date-style) + * to fetch visits with `visit_date >= sinceMicros`, or + * 2. Omit `sinceMicros` and let `days` define a relative cutoff window + * from "now" (e.g., last 60 days). + * + * Typical usage: + * - Day 0: getRecentHistory({ sinceMicros: 0, maxResults: 3000 }) + * // or: getRecentHistory({ days: 60, maxResults: 3000 }) + * - Incremental: + * const rows = await getRecentHistory({ sinceMicros: lastWatermark }); + * const nextWatermark = Math.max(...rows.map(r => r.visitDateMicros)); + * + * NOTE: `visitDateMicros` in the returned objects is the raw Places + * visit_date (microseconds since epoch, UTC). + * + * @param {object} [opts] + * @param {number} [opts.sinceMicros=null] + * Optional absolute cutoff in microseconds since epoch (Places + * visit_date). If provided, this is used directly as the cutoff: + * only visits with `visit_date >= sinceMicros` are returned. + * + * This is the recommended way to implement incremental reads: + * store the max `visitDateMicros` from the previous run and pass + * it (or max + 1) back in as `sinceMicros`. + * + * @param {number} [opts.days=DEFAULT_DAYS] + * How far back to look if `sinceMicros` is not provided. + * The cutoff is computed as: + * cutoff = now() - days * MS_PER_DAY + * + * Ignored when `sinceMicros` is non-null. + * + * @param {number} [opts.maxResults=DEFAULT_MAX_RESULTS] + * Maximum number of rows to return from the SQL query (after + * sorting by most recent visit). Note that this caps the number + * of visits, not distinct URLs. * - * @param {object} opts - * @param {number} [opts.days=60] How far back to look - * @param {number} [opts.maxResults=3000] Max rows to return (after sort) * @returns {Promise<Array<{ * url: string, * title: string, @@ -66,14 +105,24 @@ const SEARCH_ENGINE_PATTERN = new RegExp( * }>>} */ export async function getRecentHistory(opts = {}) { - const days = opts.days ?? DEFAULT_DAYS; - const maxResults = opts.maxResults ?? DEFAULT_MAX_RESULTS; + // If provided, this is a Places visit_date-style cutoff in microseconds + // When non-null, `days` is ignored and we use `sinceMicros` directly. + const { + sinceMicros = null, + days = DEFAULT_DAYS, + maxResults = DEFAULT_MAX_RESULTS, + } = opts; // Places stores visit_date in microseconds since epoch. - const cutoffMicros = Math.max( - 0, - (Date.now() - days * MS_PER_DAY) * MICROS_PER_MS - ); + let cutoffMicros; + if (sinceMicros != null) { + cutoffMicros = Math.max(0, sinceMicros); + } else { + cutoffMicros = Math.max( + 0, + (Date.now() - days * MS_PER_DAY) * MICROS_PER_MS + ); + } const isSearchVisit = urlStr => { try { diff --git a/browser/components/aiwindow/models/tests/xpcshell/test_InsightsHistorySource.js b/browser/components/aiwindow/models/tests/xpcshell/test_InsightsHistorySource.js @@ -376,6 +376,53 @@ add_task(function test_sessionizeVisits_basic() { } }); +add_task(async function test_sinceMicros_cutoff_and_overrides_days() { + await PlacesUtils.history.clear(); + const nowMs = Date.now(); + + const early = makeVisit( + "https://early.example.com/", + "Early", + nowMs, + -60 * 60 * 1000 // 1 hour ago + ); + const late = makeVisit( + "https://late.example.com/", + "Late", + nowMs, + -5 * 60 * 1000 // 5 minutes ago + ); + + await PlacesUtils.history.insertMany([early, late]); + + // Get the raw visitDateMicros so we can compute a watermark between them. + const allRows = await getRecentHistory({ days: 1, maxResults: 10 }); + const byUrl = new Map(allRows.map(r => [r.url, r])); + const earlyVisit = byUrl.get(early.url); + const lateVisit = byUrl.get(late.url); + + Assert.ok(earlyVisit && lateVisit, "Both visits present in initial fetch"); + + const midMicros = + (earlyVisit.visitDateMicros + lateVisit.visitDateMicros) / 2; + + // Get visits since midMicros + const rowsSince = await getRecentHistory({ + sinceMicros: midMicros, + maxResults: 10, + }); + + const urlsSince = rowsSince.map(r => r.url); + Assert.ok( + urlsSince.includes(late.url), + "Late visit included when sinceMicros is between early and late" + ); + Assert.ok( + !urlsSince.includes(early.url), + "Early visit excluded by sinceMicros cutoff" + ); +}); + add_task(function test_sessionizeVisits_empty_and_invalid() { // Empty input -> empty output let sessionized = sessionizeVisits([]);