commit 1f301699e06a655e43be19615e628ac95912a2a7
parent 0b1b8ae99ebb40fd76e3e94ab3e6d7f3ae9b8d1d
Author: Rolf Rando <rrando@mozilla.com>
Date: Wed, 8 Oct 2025 15:21:29 +0000
Bug 1990399 - Add randomness to content items in newtab_content telemetry r=home-newtab-reviewers,nbarrett
• Add randomization to telemetry for non-spoc items, with a probability that is a function of a target epsilon value and the number of items in the user list
• Add ability to pause all items sent with the newtab_content ping after a certain configurable count that has been set. Resets every 24 hours.
• Shuffle newtab_content items for additional privacy
Differential Revision: https://phabricator.services.mozilla.com/D265928
Diffstat:
4 files changed, 388 insertions(+), 21 deletions(-)
diff --git a/browser/extensions/newtab/lib/NewTabContentPing.sys.mjs b/browser/extensions/newtab/lib/NewTabContentPing.sys.mjs
@@ -8,6 +8,7 @@ const lazy = {};
ChromeUtils.defineESModuleGetters(lazy, {
DeferredTask: "resource://gre/modules/DeferredTask.sys.mjs",
+ PersistentCache: "resource://newtab/lib/PersistentCache.sys.mjs",
});
XPCOMUtils.defineLazyPreferenceGetter(
@@ -17,10 +18,31 @@ XPCOMUtils.defineLazyPreferenceGetter(
5000
);
+const EVENT_STATS_KEY = "event_stats";
+const CACHE_KEY = "newtab_content_event_stats";
+
+const EVENT_STATS_PERIOD_MS = 60 * 60 * 24 * 1000;
+const MAX_UINT32 = 0xffffffff;
+
export class NewTabContentPing {
#eventBuffer = [];
#deferredTask = null;
#lastDelaySelection = 0;
+ #maxDailyEvents = 0;
+ #curInstanceEventsSent = 0; // Used for tests
+
+ constructor() {
+ this.#maxDailyEvents = 0;
+ this.cache = this.PersistentCache(CACHE_KEY, true);
+ }
+
+ /**
+ * Set the maximum number of events to send in a 24 hour period
+ * @param {int} maxEvents
+ */
+ setMaxEventsPerDay(maxEvents) {
+ this.#maxDailyEvents = maxEvents || 0;
+ }
/**
* Adds a event recording for Glean.newtabContent to the internal buffer.
@@ -54,8 +76,8 @@ export class NewTabContentPing {
if (!this.#deferredTask) {
this.#lastDelaySelection = this.#generateRandomSubmissionDelayMs();
- this.#deferredTask = new lazy.DeferredTask(() => {
- this.#flushEventsAndSubmit();
+ this.#deferredTask = new lazy.DeferredTask(async () => {
+ await this.#flushEventsAndSubmit();
}, this.#lastDelaySelection);
this.#deferredTask.arm();
}
@@ -71,24 +93,74 @@ export class NewTabContentPing {
}
/**
+ * Resets the impression stats object of the Newtab_content ping and returns it.
+ */
+ async resetStats() {
+ const eventStats = {
+ count: 0,
+ lastUpdated: this.Date().now(),
+ };
+ await this.cache.set(EVENT_STATS_KEY, eventStats);
+ return eventStats;
+ }
+
+ /**
+ * Randomly shuffles the elements of an array in place using the Fisher–Yates algorithm.
+ * @param {Array} array - The array to shuffle. This array will be modified.
+ * @returns {Array} The same array instance, shuffled randomly.
+ */
+ static shuffleArray(array) {
+ for (let i = array.length - 1; i > 0; i--) {
+ const j = Math.floor(Math.random() * (i + 1));
+ const temp = array[i];
+ array[i] = array[j];
+ array[j] = temp;
+ }
+ return array;
+ }
+ /**
* Called by the DeferredTask when the randomly selected delay has elapsed
* after calling scheduleSubmission.
*/
- #flushEventsAndSubmit() {
+ async #flushEventsAndSubmit() {
this.#deferredTask = null;
+ // See if we have no event stats or the stats period has cycled
+ let eventStats = await this.cache.get(EVENT_STATS_KEY, {});
+ if (
+ !eventStats?.lastUpdated ||
+ !(this.Date().now() - eventStats.lastUpdated < EVENT_STATS_PERIOD_MS)
+ ) {
+ eventStats = await this.resetStats();
+ }
+
let events = this.#eventBuffer;
this.#eventBuffer = [];
+ if (this.#maxDailyEvents > 0) {
+ if (eventStats?.count >= this.#maxDailyEvents) {
+ // Drop the events. Don't send.
+ return;
+ }
+ }
+ eventStats.count += events.length;
+ await this.cache.set(EVENT_STATS_KEY, eventStats);
- for (let [eventName, data] of events) {
+ for (let [eventName, data] of NewTabContentPing.shuffleArray(events)) {
try {
Glean.newtabContent[eventName].record(data);
} catch (e) {
console.error(e);
}
}
-
GleanPings.newtabContent.submit();
+ this.#curInstanceEventsSent += events.length;
+ }
+
+ /**
+ * Returns number of events sent through Glean in this instance of the class.
+ */
+ get testOnlyCurInstanceEventCount() {
+ return this.#curInstanceEventsSent;
}
/**
@@ -151,24 +223,49 @@ export class NewTabContentPing {
: 5000;
const RANGE = MAX_SUBMISSION_DELAY - MIN_SUBMISSION_DELAY + 1;
- const MAX_UINT32 = 0xffffffff;
+ const selection = NewTabContentPing.secureRandIntInRange(RANGE);
+ return MIN_SUBMISSION_DELAY + (selection % RANGE);
+ }
+ /**
+ * Returns a secure random number between 0 and range
+ * @param {int} range Integer value range
+ * @returns {int} Random value between 0 and range non-inclusive
+ */
+ static secureRandIntInRange(range) {
// To ensure a uniform distribution, we discard values that could introduce
// modulo bias. We divide the 2^32 range into equal-sized "buckets" and only
// accept random values that fall entirely within one of these buckets.
// This ensures each possible output in the target range is equally likely.
- const BUCKET_SIZE = Math.floor(MAX_UINT32 / RANGE);
- const MAX_ACCEPTABLE = BUCKET_SIZE * RANGE;
+
+ const BUCKET_SIZE = Math.floor(MAX_UINT32 / range);
+ const MAX_ACCEPTABLE = BUCKET_SIZE * range;
let selection;
let randomValues = new Uint32Array(1);
-
do {
crypto.getRandomValues(randomValues);
[selection] = randomValues;
} while (selection >= MAX_ACCEPTABLE);
+ return selection % range;
+ }
- return MIN_SUBMISSION_DELAY + (selection % RANGE);
+ /**
+ * Returns true or false with a certain proability specified
+ * @param {Number} prob Probability
+ * @returns {boolean} Random boolean result of probability prob
+ */
+ static decideWithProbability(prob) {
+ if (prob <= 0) {
+ return false;
+ }
+ if (prob >= 1) {
+ return true;
+ }
+ const randomValues = new Uint32Array(1);
+ crypto.getRandomValues(randomValues);
+ const random = randomValues[0] / MAX_UINT32;
+ return random < prob;
}
/**
@@ -182,9 +279,9 @@ export class NewTabContentPing {
* The originally selected random delay for submitting the newtab-content
* ping.
* @throws {Error}
- * Throws if this is called when no submission has been scheduled yet.
+ * Function throws an exception if this is called when no submission has been scheduled yet.
*/
- testOnlyForceFlush() {
+ async testOnlyForceFlush() {
if (!Cu.isInAutomation) {
return 0;
}
@@ -192,9 +289,21 @@ export class NewTabContentPing {
if (this.#deferredTask) {
this.#deferredTask.disarm();
this.#deferredTask = null;
- this.#flushEventsAndSubmit();
+ await this.#flushEventsAndSubmit();
return this.#lastDelaySelection;
}
throw new Error("No submission was scheduled.");
}
}
+
+/**
+ * Creating a thin wrapper around PersistentCache, and Date.
+ * This makes it easier for us to write automated tests
+ */
+NewTabContentPing.prototype.PersistentCache = (...args) => {
+ return new lazy.PersistentCache(...args);
+};
+
+NewTabContentPing.prototype.Date = () => {
+ return Date;
+};
diff --git a/browser/extensions/newtab/lib/TelemetryFeed.sys.mjs b/browser/extensions/newtab/lib/TelemetryFeed.sys.mjs
@@ -78,6 +78,26 @@ const PREF_SYSTEM_INFERRED_PERSONALIZATION =
const PREF_SECTIONS_PERSONALIZATION_ENABLED =
"discoverystream.sections.personalization.enabled";
+const TOP_STORIES_SECTION_NAME = "top_stories_section";
+
+/**
+ Additional parameters defined in the newTabTrainHop experimenter method
+
+ trainhopConfig.newtabPrivatePing.randomContentProbabilityEpsilonMicro
+ Epsilon for randomizing content impression and click telemetry using the RandomizedReponse method
+ in the newtab_content ping , as integer multipled by 1e6
+
+ trainhopConfig.newtabPrivatePing.dailyEventCap
+ Maximum newtab_content events that can be sent in 24 hour period.
+*/
+const TRAINHOP_PREF_RANDOM_CONTENT_PROBABILITY_MICRO =
+ "randomContentProbabilityEpsilonMicro";
+
+/**
+ * Maximum newtab_content events that can be sent in 24 hour period.
+ */
+const TRAINHOP_PREF_DAILY_EVENT_CAP = "dailyEventCap";
+
// This is a mapping table between the user preferences and its encoding code
export const USER_PREFS_ENCODING = {
showSearch: 1 << 0,
@@ -134,6 +154,8 @@ export class TelemetryFeed {
this._aboutHomeSeen = false;
this._classifySite = classifySite;
this._browserOpenNewtabStart = null;
+ this._privateRandomContentTelemetryProbablityValues = {};
+
this.newtabContentPing = new lazy.NewTabContentPing();
XPCOMUtils.defineLazyPreferenceGetter(
@@ -475,7 +497,6 @@ export class TelemetryFeed {
*/
async endSession(portID) {
const session = this.sessions.get(portID);
-
if (!session) {
// It's possible the tab was never visible – in which case, there was no user session.
return;
@@ -760,6 +781,91 @@ export class TelemetryFeed {
}
}
+ /**
+ * @returns Flat list of all articles for the New Tab. Does not include spocs (ads)
+ */
+ getAllRecommendations() {
+ const merinoData = this.store?.getState()?.DiscoveryStream?.feeds.data;
+ return Object.values(merinoData ?? {}).flatMap(
+ feed => feed?.data?.recommendations ?? []
+ );
+ }
+
+ /**
+ * @returns Number of articles for the New Tab. Does not include spocs (ads)
+ */
+ getRecommendationCount() {
+ const merinoData = this.store?.getState()?.DiscoveryStream?.feeds.data;
+ return Object.values(merinoData ?? {}).reduce(
+ (count, feed) => count + (feed.data?.recommendations?.length || 0),
+ 0
+ );
+ }
+
+ /**
+ * Occasionally replaces a content item with another that is in the feed.
+ * @param {*} item
+ * @returns Same item, but another item occasionally based on probablility setting.
+ * Sponsored items are unchanged
+ */
+ randomizeOrganicContentEvent(item) {
+ if (item.is_sponsored) {
+ return item; // Don't alter spocs
+ }
+ const epsilon =
+ this._privateRandomContentTelemetryProbablityValues?.epsilon ?? 0;
+ if (!epsilon) {
+ return item;
+ }
+ if (!("n" in this._privateRandomContentTelemetryProbablityValues)) {
+ // We cache the number of items in the feed because it's computationally expensive to compute.
+ // This may not be ideal, but the number of content items typically is very similar over reloads
+ this._privateRandomContentTelemetryProbablityValues.n =
+ this.getRecommendationCount();
+ }
+ const { n } = this._privateRandomContentTelemetryProbablityValues;
+ if (!n || n < 10) {
+ // None or very view articles. We're in an intermediate or errorstate.
+ return item;
+ }
+ const cache_key = `probability_${epsilon}_${n}`; // Lookup of probability for a item size
+ if (!(cache_key in this._privateRandomContentTelemetryProbablityValues)) {
+ this._privateRandomContentTelemetryProbablityValues[cache_key] = {
+ p: Math.exp(epsilon) / (Math.exp(epsilon) + n - 1),
+ };
+ }
+
+ const { p } =
+ this._privateRandomContentTelemetryProbablityValues[cache_key];
+ if (!lazy.NewTabContentPing.decideWithProbability(p)) {
+ return item;
+ }
+ const allRecs = this.getAllRecommendations(); // Number of recommendations has changed
+ if (!allRecs.length) {
+ return item;
+ }
+
+ // Update number of recs for next round of checks for next round
+ this._privateRandomContentTelemetryProbablityValues.n = allRecs.length;
+
+ const randomIndex = lazy.NewTabContentPing.secureRandIntInRange(
+ allRecs.length
+ );
+ let randomItem = allRecs[randomIndex];
+ const resultItem = {
+ ...item,
+ topic: randomItem.topic,
+ corpus_item_id: randomItem.corpus_item_id,
+ };
+ // If we're replacing a non top stories item, then assign the appropriate
+ // section to the item
+ if (resultItem.section !== TOP_STORIES_SECTION_NAME && randomItem.section) {
+ resultItem.section = randomItem.section;
+ resultItem.section_position = randomItem.section_position;
+ }
+ return resultItem;
+ }
+
handleDiscoveryStreamUserEvent(action) {
this.handleUserEvent({
...action,
@@ -856,9 +962,11 @@ export class TelemetryFeed {
),
newtab_visit_id: session.session_id,
});
-
if (this.privatePingEnabled) {
- this.newtabContentPing.recordEvent("click", gleanData);
+ this.newtabContentPing.recordEvent(
+ "click",
+ this.randomizeOrganicContentEvent(gleanData)
+ );
}
if (shim) {
if (this.canSendUnifiedAdsSpocCallbacks) {
@@ -1186,6 +1294,17 @@ export class TelemetryFeed {
if (inferredInterests) {
privateMetrics.inferredInterests = inferredInterests;
}
+ this._privateRandomContentTelemetryProbablityValues = {
+ epsilon:
+ (prefs?.trainhopConfig?.newtabPrivatePing?.[
+ TRAINHOP_PREF_RANDOM_CONTENT_PROBABILITY_MICRO
+ ] || 0) / 1e6,
+ };
+ const impressionCap =
+ prefs?.trainhopConfig?.newtabPrivatePing?.[
+ TRAINHOP_PREF_DAILY_EVENT_CAP
+ ] || 0;
+ this.newtabContentPing.setMaxEventsPerDay(impressionCap);
// When we have a coarse interest vector we want to make sure there isn't
// anything additionaly identifable as a unique identifier. Therefore,
// when interest vectors are used we reduce our context profile somewhat.
@@ -1958,7 +2077,10 @@ export class TelemetryFeed {
newtab_visit_id: session.session_id,
});
if (this.privatePingEnabled) {
- this.newtabContentPing.recordEvent("impression", gleanData);
+ this.newtabContentPing.recordEvent(
+ "impression",
+ this.randomizeOrganicContentEvent(gleanData)
+ );
}
}
if (tile.shim) {
diff --git a/browser/extensions/newtab/test/xpcshell/test_NewTabContentPing.js b/browser/extensions/newtab/test/xpcshell/test_NewTabContentPing.js
@@ -5,6 +5,7 @@
ChromeUtils.defineESModuleGetters(this, {
NewTabContentPing: "resource://newtab/lib/NewTabContentPing.sys.mjs",
+ sinon: "resource://testing-common/Sinon.sys.mjs",
});
const MAX_SUBMISSION_DELAY = Services.prefs.getIntPref(
@@ -23,6 +24,7 @@ add_setup(() => {
*/
add_task(async function test_recordEvent_sanitizes_and_buffers() {
let ping = new NewTabContentPing();
+ ping.resetStats();
// These fields are expected to be stripped before they get recorded in the
// event.
@@ -33,6 +35,7 @@ add_task(async function test_recordEvent_sanitizes_and_buffers() {
recommended_at: "1748877997039",
received_rank: 0,
event_source: "card",
+ layout_name: "card-layout",
};
// These fields are expected to survive the sanitization.
@@ -52,7 +55,6 @@ add_task(async function test_recordEvent_sanitizes_and_buffers() {
ping.recordEvent("click", {
// These should be sanitized out.
...sanitizedFields,
-
...expectedFields,
});
@@ -64,6 +66,7 @@ add_task(async function test_recordEvent_sanitizes_and_buffers() {
await GleanPings.newtabContent.testSubmission(
() => {
+ // Test callback
let [clickEvent] = Glean.newtabContent.click.testGetValue();
Assert.ok(clickEvent, "Found click event.");
for (let fieldName of Object.keys(sanitizedFields)) {
@@ -89,8 +92,9 @@ add_task(async function test_recordEvent_sanitizes_and_buffers() {
);
}
},
- () => {
- let delay = ping.testOnlyForceFlush();
+ async () => {
+ // Submit Callback
+ let delay = await ping.testOnlyForceFlush();
Assert.greater(delay, 1000, "Picked a random value greater than 1000");
Assert.less(
delay,
@@ -100,3 +104,136 @@ add_task(async function test_recordEvent_sanitizes_and_buffers() {
}
);
});
+
+/**
+ * Tests that the recordEvent caps the maximum number of events posted to a maxiumum
+ */
+add_task(async function test_recordEvent_caps_events() {
+ const MAX_EVENTS = 2;
+
+ let ping = new NewTabContentPing();
+ ping.setMaxEventsPerDay(MAX_EVENTS);
+ ping.resetStats();
+
+ // These fields are expected to survive the sanitization.
+ let expectedFields = {
+ section: "business",
+ corpus_item_id: "7fc404a1-74ec-450b-8eef-4f52b45ec510",
+ topic: "business",
+ };
+
+ ping.recordEvent("click", {
+ ...expectedFields,
+ });
+
+ ping.recordEvent("impression", {
+ ...expectedFields,
+ });
+
+ let extraMetrics = {
+ utcOffset: "1",
+ experimentBranch: "some-branch",
+ };
+ ping.scheduleSubmission(extraMetrics);
+
+ await GleanPings.newtabContent.testSubmission(
+ () => {
+ // Test Callback
+ let [clickEvent] = Glean.newtabContent.click.testGetValue();
+ Assert.ok(clickEvent, "Found click event.");
+ let [impression] = Glean.newtabContent.impression.testGetValue();
+ Assert.ok(impression, "Found impression event.");
+ },
+ async () => {
+ // Submit Callback
+ await ping.testOnlyForceFlush();
+ }
+ );
+
+ Assert.equal(
+ ping.testOnlyCurInstanceEventCount,
+ 2,
+ "Expected number of events sent"
+ );
+
+ ping.recordEvent("section_impression", {
+ ...expectedFields,
+ });
+ ping.scheduleSubmission(extraMetrics);
+ await ping.testOnlyForceFlush();
+
+ Assert.equal(ping.testOnlyCurInstanceEventCount, 2, "No new events sent");
+
+ ping = new NewTabContentPing();
+ ping.setMaxEventsPerDay(MAX_EVENTS);
+
+ Assert.equal(ping.testOnlyCurInstanceEventCount, 0, "Event count reset");
+
+ ping.recordEvent("section_impression", {
+ ...expectedFields,
+ });
+ ping.scheduleSubmission(extraMetrics);
+ await ping.testOnlyForceFlush();
+
+ Assert.equal(
+ ping.testOnlyCurInstanceEventCount,
+ 0,
+ "No new events after re-creating NewTabContentPing class"
+ );
+
+ // Some time has passed
+ let sandbox = sinon.createSandbox();
+
+ sandbox.stub(NewTabContentPing.prototype, "Date").returns({
+ now: () => Date.now() + 3600 * 25 * 1000, // 25 hours in future
+ });
+
+ ping.scheduleSubmission(extraMetrics);
+
+ ping.recordEvent("click", {
+ ...expectedFields,
+ });
+
+ await GleanPings.newtabContent.testSubmission(
+ () => {
+ // Test Callback
+ let [click] = Glean.newtabContent.click.testGetValue();
+ Assert.ok(click, "Found click event.");
+ },
+ async () => {
+ // Submit Callback
+ await ping.testOnlyForceFlush();
+ }
+ );
+ Assert.equal(ping.testOnlyCurInstanceEventCount, 1, "Event sending restored");
+ sandbox.restore();
+});
+
+add_task(function test_decideWithProbability() {
+ Assert.equal(NewTabContentPing.decideWithProbability(-0.1), false);
+ Assert.equal(NewTabContentPing.decideWithProbability(1.1), true);
+});
+
+add_task(function test_shuffleArray() {
+ const shuffled = NewTabContentPing.shuffleArray([1, 3, 5]);
+ Assert.equal(shuffled.length, 3);
+ Assert.ok(shuffled.includes(3), "Shuffled item in array");
+ Assert.ok(shuffled.includes(1), "Shuffled item in array");
+ Assert.ok(shuffled.includes(5), "Shuffled item in array");
+ Assert.equal(NewTabContentPing.shuffleArray([]).length, 0);
+});
+
+add_task(async function test_secureRandIntInRange() {
+ for (let k = 0; k < 10; k++) {
+ Assert.greater(
+ 10,
+ NewTabContentPing.secureRandIntInRange(10),
+ "Random value in range"
+ );
+ Assert.less(
+ -1,
+ NewTabContentPing.secureRandIntInRange(10),
+ "Random value in range"
+ );
+ }
+});
diff --git a/toolkit/components/nimbus/FeatureManifest.yaml b/toolkit/components/nimbus/FeatureManifest.yaml
@@ -1268,7 +1268,6 @@ newtabPrivatePing:
pref: browser.newtabpage.activity-stream.telemetry.privatePing.maxSubmissionDelayMs
description: >-
The maximum range for the random delay from scheduling the newtab-content ping to actually sending it. The minimum is 1000 by default. This is in milliseconds.
-
newtabPublisherFavicons:
description: Enabled publihser favicons on cards in newtab
owner: nbarrett@mozilla.com