commit 49b7caee0c4ebc2a63194628ea1af66fffbe32ec
parent 2d8e96b913537a12a358358d56a1ec42299c4b63
Author: Chidam Gopal <cgopal@mozilla.com>
Date: Thu, 4 Dec 2025 18:37:13 +0000
Bug 2004095 - Improve History API for insights r=cdipersio,ai-models-reviewers
Differential Revision: https://phabricator.services.mozilla.com/D275095
Diffstat:
2 files changed, 106 insertions(+), 10 deletions(-)
diff --git a/browser/components/aiwindow/models/InsightsHistorySource.sys.mjs b/browser/components/aiwindow/models/InsightsHistorySource.sys.mjs
@@ -50,11 +50,50 @@ const SEARCH_ENGINE_PATTERN = new RegExp(
/**
* Fetch recent browsing history from Places (SQL), aggregate by URL,
- * tag "search" vs "history", and filter low-visit URLs.
+ * tag "search" vs "history", and attach simple frequency percentiles.
+ *
+ * This API is designed to support both:
+ * - Initial ("Day 0") backfills over a fixed time window, and
+ * - Incremental reads using a visit_date watermark (`sinceMicros`).
+ *
+ * Callers can either:
+ * 1. Pass `sinceMicros` (microseconds since epoch, Places visit_date-style)
+ * to fetch visits with `visit_date >= sinceMicros`, or
+ * 2. Omit `sinceMicros` and let `days` define a relative cutoff window
+ * from "now" (e.g., last 60 days).
+ *
+ * Typical usage:
+ * - Day 0: getRecentHistory({ sinceMicros: 0, maxResults: 3000 })
+ * // or: getRecentHistory({ days: 60, maxResults: 3000 })
+ * - Incremental:
+ * const rows = await getRecentHistory({ sinceMicros: lastWatermark });
+ * const nextWatermark = Math.max(...rows.map(r => r.visitDateMicros));
+ *
+ * NOTE: `visitDateMicros` in the returned objects is the raw Places
+ * visit_date (microseconds since epoch, UTC).
+ *
+ * @param {object} [opts]
+ * @param {number} [opts.sinceMicros=null]
+ * Optional absolute cutoff in microseconds since epoch (Places
+ * visit_date). If provided, this is used directly as the cutoff:
+ * only visits with `visit_date >= sinceMicros` are returned.
+ *
+ * This is the recommended way to implement incremental reads:
+ * store the max `visitDateMicros` from the previous run and pass
+ * it (or max + 1) back in as `sinceMicros`.
+ *
+ * @param {number} [opts.days=DEFAULT_DAYS]
+ * How far back to look if `sinceMicros` is not provided.
+ * The cutoff is computed as:
+ * cutoff = now() - days * MS_PER_DAY
+ *
+ * Ignored when `sinceMicros` is non-null.
+ *
+ * @param {number} [opts.maxResults=DEFAULT_MAX_RESULTS]
+ * Maximum number of rows to return from the SQL query (after
+ * sorting by most recent visit). Note that this caps the number
+ * of visits, not distinct URLs.
*
- * @param {object} opts
- * @param {number} [opts.days=60] How far back to look
- * @param {number} [opts.maxResults=3000] Max rows to return (after sort)
* @returns {Promise<Array<{
* url: string,
* title: string,
@@ -66,14 +105,24 @@ const SEARCH_ENGINE_PATTERN = new RegExp(
* }>>}
*/
export async function getRecentHistory(opts = {}) {
- const days = opts.days ?? DEFAULT_DAYS;
- const maxResults = opts.maxResults ?? DEFAULT_MAX_RESULTS;
+ // If provided, this is a Places visit_date-style cutoff in microseconds
+ // When non-null, `days` is ignored and we use `sinceMicros` directly.
+ const {
+ sinceMicros = null,
+ days = DEFAULT_DAYS,
+ maxResults = DEFAULT_MAX_RESULTS,
+ } = opts;
// Places stores visit_date in microseconds since epoch.
- const cutoffMicros = Math.max(
- 0,
- (Date.now() - days * MS_PER_DAY) * MICROS_PER_MS
- );
+ let cutoffMicros;
+ if (sinceMicros != null) {
+ cutoffMicros = Math.max(0, sinceMicros);
+ } else {
+ cutoffMicros = Math.max(
+ 0,
+ (Date.now() - days * MS_PER_DAY) * MICROS_PER_MS
+ );
+ }
const isSearchVisit = urlStr => {
try {
diff --git a/browser/components/aiwindow/models/tests/xpcshell/test_InsightsHistorySource.js b/browser/components/aiwindow/models/tests/xpcshell/test_InsightsHistorySource.js
@@ -376,6 +376,53 @@ add_task(function test_sessionizeVisits_basic() {
}
});
+add_task(async function test_sinceMicros_cutoff_and_overrides_days() {
+ await PlacesUtils.history.clear();
+ const nowMs = Date.now();
+
+ const early = makeVisit(
+ "https://early.example.com/",
+ "Early",
+ nowMs,
+ -60 * 60 * 1000 // 1 hour ago
+ );
+ const late = makeVisit(
+ "https://late.example.com/",
+ "Late",
+ nowMs,
+ -5 * 60 * 1000 // 5 minutes ago
+ );
+
+ await PlacesUtils.history.insertMany([early, late]);
+
+ // Get the raw visitDateMicros so we can compute a watermark between them.
+ const allRows = await getRecentHistory({ days: 1, maxResults: 10 });
+ const byUrl = new Map(allRows.map(r => [r.url, r]));
+ const earlyVisit = byUrl.get(early.url);
+ const lateVisit = byUrl.get(late.url);
+
+ Assert.ok(earlyVisit && lateVisit, "Both visits present in initial fetch");
+
+ const midMicros =
+ (earlyVisit.visitDateMicros + lateVisit.visitDateMicros) / 2;
+
+ // Get visits since midMicros
+ const rowsSince = await getRecentHistory({
+ sinceMicros: midMicros,
+ maxResults: 10,
+ });
+
+ const urlsSince = rowsSince.map(r => r.url);
+ Assert.ok(
+ urlsSince.includes(late.url),
+ "Late visit included when sinceMicros is between early and late"
+ );
+ Assert.ok(
+ !urlsSince.includes(early.url),
+ "Early visit excluded by sinceMicros cutoff"
+ );
+});
+
add_task(function test_sessionizeVisits_empty_and_invalid() {
// Empty input -> empty output
let sessionized = sessionizeVisits([]);