tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

commit 1f301699e06a655e43be19615e628ac95912a2a7
parent 0b1b8ae99ebb40fd76e3e94ab3e6d7f3ae9b8d1d
Author: Rolf Rando <rrando@mozilla.com>
Date:   Wed,  8 Oct 2025 15:21:29 +0000

Bug 1990399 - Add randomness to content items in newtab_content telemetry r=home-newtab-reviewers,nbarrett

• Add randomization to telemetry for non-spoc items, with a probability that is a function of a target epsilon value and the number of items in the user list

• Add ability to pause all items sent with the newtab_content ping after a certain configurable count that has been set. Resets every 24 hours.

• Shuffle newtab_content items for additional privacy

Differential Revision: https://phabricator.services.mozilla.com/D265928

Diffstat:
Mbrowser/extensions/newtab/lib/NewTabContentPing.sys.mjs | 135+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
Mbrowser/extensions/newtab/lib/TelemetryFeed.sys.mjs | 130++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
Mbrowser/extensions/newtab/test/xpcshell/test_NewTabContentPing.js | 143+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
Mtoolkit/components/nimbus/FeatureManifest.yaml | 1-
4 files changed, 388 insertions(+), 21 deletions(-)

diff --git a/browser/extensions/newtab/lib/NewTabContentPing.sys.mjs b/browser/extensions/newtab/lib/NewTabContentPing.sys.mjs @@ -8,6 +8,7 @@ const lazy = {}; ChromeUtils.defineESModuleGetters(lazy, { DeferredTask: "resource://gre/modules/DeferredTask.sys.mjs", + PersistentCache: "resource://newtab/lib/PersistentCache.sys.mjs", }); XPCOMUtils.defineLazyPreferenceGetter( @@ -17,10 +18,31 @@ XPCOMUtils.defineLazyPreferenceGetter( 5000 ); +const EVENT_STATS_KEY = "event_stats"; +const CACHE_KEY = "newtab_content_event_stats"; + +const EVENT_STATS_PERIOD_MS = 60 * 60 * 24 * 1000; +const MAX_UINT32 = 0xffffffff; + export class NewTabContentPing { #eventBuffer = []; #deferredTask = null; #lastDelaySelection = 0; + #maxDailyEvents = 0; + #curInstanceEventsSent = 0; // Used for tests + + constructor() { + this.#maxDailyEvents = 0; + this.cache = this.PersistentCache(CACHE_KEY, true); + } + + /** + * Set the maximum number of events to send in a 24 hour period + * @param {int} maxEvents + */ + setMaxEventsPerDay(maxEvents) { + this.#maxDailyEvents = maxEvents || 0; + } /** * Adds a event recording for Glean.newtabContent to the internal buffer. @@ -54,8 +76,8 @@ export class NewTabContentPing { if (!this.#deferredTask) { this.#lastDelaySelection = this.#generateRandomSubmissionDelayMs(); - this.#deferredTask = new lazy.DeferredTask(() => { - this.#flushEventsAndSubmit(); + this.#deferredTask = new lazy.DeferredTask(async () => { + await this.#flushEventsAndSubmit(); }, this.#lastDelaySelection); this.#deferredTask.arm(); } @@ -71,24 +93,74 @@ export class NewTabContentPing { } /** + * Resets the impression stats object of the Newtab_content ping and returns it. + */ + async resetStats() { + const eventStats = { + count: 0, + lastUpdated: this.Date().now(), + }; + await this.cache.set(EVENT_STATS_KEY, eventStats); + return eventStats; + } + + /** + * Randomly shuffles the elements of an array in place using the Fisher–Yates algorithm. + * @param {Array} array - The array to shuffle. This array will be modified. + * @returns {Array} The same array instance, shuffled randomly. + */ + static shuffleArray(array) { + for (let i = array.length - 1; i > 0; i--) { + const j = Math.floor(Math.random() * (i + 1)); + const temp = array[i]; + array[i] = array[j]; + array[j] = temp; + } + return array; + } + /** * Called by the DeferredTask when the randomly selected delay has elapsed * after calling scheduleSubmission. */ - #flushEventsAndSubmit() { + async #flushEventsAndSubmit() { this.#deferredTask = null; + // See if we have no event stats or the stats period has cycled + let eventStats = await this.cache.get(EVENT_STATS_KEY, {}); + if ( + !eventStats?.lastUpdated || + !(this.Date().now() - eventStats.lastUpdated < EVENT_STATS_PERIOD_MS) + ) { + eventStats = await this.resetStats(); + } + let events = this.#eventBuffer; this.#eventBuffer = []; + if (this.#maxDailyEvents > 0) { + if (eventStats?.count >= this.#maxDailyEvents) { + // Drop the events. Don't send. + return; + } + } + eventStats.count += events.length; + await this.cache.set(EVENT_STATS_KEY, eventStats); - for (let [eventName, data] of events) { + for (let [eventName, data] of NewTabContentPing.shuffleArray(events)) { try { Glean.newtabContent[eventName].record(data); } catch (e) { console.error(e); } } - GleanPings.newtabContent.submit(); + this.#curInstanceEventsSent += events.length; + } + + /** + * Returns number of events sent through Glean in this instance of the class. + */ + get testOnlyCurInstanceEventCount() { + return this.#curInstanceEventsSent; } /** @@ -151,24 +223,49 @@ export class NewTabContentPing { : 5000; const RANGE = MAX_SUBMISSION_DELAY - MIN_SUBMISSION_DELAY + 1; - const MAX_UINT32 = 0xffffffff; + const selection = NewTabContentPing.secureRandIntInRange(RANGE); + return MIN_SUBMISSION_DELAY + (selection % RANGE); + } + /** + * Returns a secure random number between 0 and range + * @param {int} range Integer value range + * @returns {int} Random value between 0 and range non-inclusive + */ + static secureRandIntInRange(range) { // To ensure a uniform distribution, we discard values that could introduce // modulo bias. We divide the 2^32 range into equal-sized "buckets" and only // accept random values that fall entirely within one of these buckets. // This ensures each possible output in the target range is equally likely. - const BUCKET_SIZE = Math.floor(MAX_UINT32 / RANGE); - const MAX_ACCEPTABLE = BUCKET_SIZE * RANGE; + + const BUCKET_SIZE = Math.floor(MAX_UINT32 / range); + const MAX_ACCEPTABLE = BUCKET_SIZE * range; let selection; let randomValues = new Uint32Array(1); - do { crypto.getRandomValues(randomValues); [selection] = randomValues; } while (selection >= MAX_ACCEPTABLE); + return selection % range; + } - return MIN_SUBMISSION_DELAY + (selection % RANGE); + /** + * Returns true or false with a certain proability specified + * @param {Number} prob Probability + * @returns {boolean} Random boolean result of probability prob + */ + static decideWithProbability(prob) { + if (prob <= 0) { + return false; + } + if (prob >= 1) { + return true; + } + const randomValues = new Uint32Array(1); + crypto.getRandomValues(randomValues); + const random = randomValues[0] / MAX_UINT32; + return random < prob; } /** @@ -182,9 +279,9 @@ export class NewTabContentPing { * The originally selected random delay for submitting the newtab-content * ping. * @throws {Error} - * Throws if this is called when no submission has been scheduled yet. + * Function throws an exception if this is called when no submission has been scheduled yet. */ - testOnlyForceFlush() { + async testOnlyForceFlush() { if (!Cu.isInAutomation) { return 0; } @@ -192,9 +289,21 @@ export class NewTabContentPing { if (this.#deferredTask) { this.#deferredTask.disarm(); this.#deferredTask = null; - this.#flushEventsAndSubmit(); + await this.#flushEventsAndSubmit(); return this.#lastDelaySelection; } throw new Error("No submission was scheduled."); } } + +/** + * Creating a thin wrapper around PersistentCache, and Date. + * This makes it easier for us to write automated tests + */ +NewTabContentPing.prototype.PersistentCache = (...args) => { + return new lazy.PersistentCache(...args); +}; + +NewTabContentPing.prototype.Date = () => { + return Date; +}; diff --git a/browser/extensions/newtab/lib/TelemetryFeed.sys.mjs b/browser/extensions/newtab/lib/TelemetryFeed.sys.mjs @@ -78,6 +78,26 @@ const PREF_SYSTEM_INFERRED_PERSONALIZATION = const PREF_SECTIONS_PERSONALIZATION_ENABLED = "discoverystream.sections.personalization.enabled"; +const TOP_STORIES_SECTION_NAME = "top_stories_section"; + +/** + Additional parameters defined in the newTabTrainHop experimenter method + + trainhopConfig.newtabPrivatePing.randomContentProbabilityEpsilonMicro + Epsilon for randomizing content impression and click telemetry using the RandomizedReponse method + in the newtab_content ping , as integer multipled by 1e6 + + trainhopConfig.newtabPrivatePing.dailyEventCap + Maximum newtab_content events that can be sent in 24 hour period. +*/ +const TRAINHOP_PREF_RANDOM_CONTENT_PROBABILITY_MICRO = + "randomContentProbabilityEpsilonMicro"; + +/** + * Maximum newtab_content events that can be sent in 24 hour period. + */ +const TRAINHOP_PREF_DAILY_EVENT_CAP = "dailyEventCap"; + // This is a mapping table between the user preferences and its encoding code export const USER_PREFS_ENCODING = { showSearch: 1 << 0, @@ -134,6 +154,8 @@ export class TelemetryFeed { this._aboutHomeSeen = false; this._classifySite = classifySite; this._browserOpenNewtabStart = null; + this._privateRandomContentTelemetryProbablityValues = {}; + this.newtabContentPing = new lazy.NewTabContentPing(); XPCOMUtils.defineLazyPreferenceGetter( @@ -475,7 +497,6 @@ export class TelemetryFeed { */ async endSession(portID) { const session = this.sessions.get(portID); - if (!session) { // It's possible the tab was never visible – in which case, there was no user session. return; @@ -760,6 +781,91 @@ export class TelemetryFeed { } } + /** + * @returns Flat list of all articles for the New Tab. Does not include spocs (ads) + */ + getAllRecommendations() { + const merinoData = this.store?.getState()?.DiscoveryStream?.feeds.data; + return Object.values(merinoData ?? {}).flatMap( + feed => feed?.data?.recommendations ?? [] + ); + } + + /** + * @returns Number of articles for the New Tab. Does not include spocs (ads) + */ + getRecommendationCount() { + const merinoData = this.store?.getState()?.DiscoveryStream?.feeds.data; + return Object.values(merinoData ?? {}).reduce( + (count, feed) => count + (feed.data?.recommendations?.length || 0), + 0 + ); + } + + /** + * Occasionally replaces a content item with another that is in the feed. + * @param {*} item + * @returns Same item, but another item occasionally based on probablility setting. + * Sponsored items are unchanged + */ + randomizeOrganicContentEvent(item) { + if (item.is_sponsored) { + return item; // Don't alter spocs + } + const epsilon = + this._privateRandomContentTelemetryProbablityValues?.epsilon ?? 0; + if (!epsilon) { + return item; + } + if (!("n" in this._privateRandomContentTelemetryProbablityValues)) { + // We cache the number of items in the feed because it's computationally expensive to compute. + // This may not be ideal, but the number of content items typically is very similar over reloads + this._privateRandomContentTelemetryProbablityValues.n = + this.getRecommendationCount(); + } + const { n } = this._privateRandomContentTelemetryProbablityValues; + if (!n || n < 10) { + // None or very view articles. We're in an intermediate or errorstate. + return item; + } + const cache_key = `probability_${epsilon}_${n}`; // Lookup of probability for a item size + if (!(cache_key in this._privateRandomContentTelemetryProbablityValues)) { + this._privateRandomContentTelemetryProbablityValues[cache_key] = { + p: Math.exp(epsilon) / (Math.exp(epsilon) + n - 1), + }; + } + + const { p } = + this._privateRandomContentTelemetryProbablityValues[cache_key]; + if (!lazy.NewTabContentPing.decideWithProbability(p)) { + return item; + } + const allRecs = this.getAllRecommendations(); // Number of recommendations has changed + if (!allRecs.length) { + return item; + } + + // Update number of recs for next round of checks for next round + this._privateRandomContentTelemetryProbablityValues.n = allRecs.length; + + const randomIndex = lazy.NewTabContentPing.secureRandIntInRange( + allRecs.length + ); + let randomItem = allRecs[randomIndex]; + const resultItem = { + ...item, + topic: randomItem.topic, + corpus_item_id: randomItem.corpus_item_id, + }; + // If we're replacing a non top stories item, then assign the appropriate + // section to the item + if (resultItem.section !== TOP_STORIES_SECTION_NAME && randomItem.section) { + resultItem.section = randomItem.section; + resultItem.section_position = randomItem.section_position; + } + return resultItem; + } + handleDiscoveryStreamUserEvent(action) { this.handleUserEvent({ ...action, @@ -856,9 +962,11 @@ export class TelemetryFeed { ), newtab_visit_id: session.session_id, }); - if (this.privatePingEnabled) { - this.newtabContentPing.recordEvent("click", gleanData); + this.newtabContentPing.recordEvent( + "click", + this.randomizeOrganicContentEvent(gleanData) + ); } if (shim) { if (this.canSendUnifiedAdsSpocCallbacks) { @@ -1186,6 +1294,17 @@ export class TelemetryFeed { if (inferredInterests) { privateMetrics.inferredInterests = inferredInterests; } + this._privateRandomContentTelemetryProbablityValues = { + epsilon: + (prefs?.trainhopConfig?.newtabPrivatePing?.[ + TRAINHOP_PREF_RANDOM_CONTENT_PROBABILITY_MICRO + ] || 0) / 1e6, + }; + const impressionCap = + prefs?.trainhopConfig?.newtabPrivatePing?.[ + TRAINHOP_PREF_DAILY_EVENT_CAP + ] || 0; + this.newtabContentPing.setMaxEventsPerDay(impressionCap); // When we have a coarse interest vector we want to make sure there isn't // anything additionaly identifable as a unique identifier. Therefore, // when interest vectors are used we reduce our context profile somewhat. @@ -1958,7 +2077,10 @@ export class TelemetryFeed { newtab_visit_id: session.session_id, }); if (this.privatePingEnabled) { - this.newtabContentPing.recordEvent("impression", gleanData); + this.newtabContentPing.recordEvent( + "impression", + this.randomizeOrganicContentEvent(gleanData) + ); } } if (tile.shim) { diff --git a/browser/extensions/newtab/test/xpcshell/test_NewTabContentPing.js b/browser/extensions/newtab/test/xpcshell/test_NewTabContentPing.js @@ -5,6 +5,7 @@ ChromeUtils.defineESModuleGetters(this, { NewTabContentPing: "resource://newtab/lib/NewTabContentPing.sys.mjs", + sinon: "resource://testing-common/Sinon.sys.mjs", }); const MAX_SUBMISSION_DELAY = Services.prefs.getIntPref( @@ -23,6 +24,7 @@ add_setup(() => { */ add_task(async function test_recordEvent_sanitizes_and_buffers() { let ping = new NewTabContentPing(); + ping.resetStats(); // These fields are expected to be stripped before they get recorded in the // event. @@ -33,6 +35,7 @@ add_task(async function test_recordEvent_sanitizes_and_buffers() { recommended_at: "1748877997039", received_rank: 0, event_source: "card", + layout_name: "card-layout", }; // These fields are expected to survive the sanitization. @@ -52,7 +55,6 @@ add_task(async function test_recordEvent_sanitizes_and_buffers() { ping.recordEvent("click", { // These should be sanitized out. ...sanitizedFields, - ...expectedFields, }); @@ -64,6 +66,7 @@ add_task(async function test_recordEvent_sanitizes_and_buffers() { await GleanPings.newtabContent.testSubmission( () => { + // Test callback let [clickEvent] = Glean.newtabContent.click.testGetValue(); Assert.ok(clickEvent, "Found click event."); for (let fieldName of Object.keys(sanitizedFields)) { @@ -89,8 +92,9 @@ add_task(async function test_recordEvent_sanitizes_and_buffers() { ); } }, - () => { - let delay = ping.testOnlyForceFlush(); + async () => { + // Submit Callback + let delay = await ping.testOnlyForceFlush(); Assert.greater(delay, 1000, "Picked a random value greater than 1000"); Assert.less( delay, @@ -100,3 +104,136 @@ add_task(async function test_recordEvent_sanitizes_and_buffers() { } ); }); + +/** + * Tests that the recordEvent caps the maximum number of events posted to a maxiumum + */ +add_task(async function test_recordEvent_caps_events() { + const MAX_EVENTS = 2; + + let ping = new NewTabContentPing(); + ping.setMaxEventsPerDay(MAX_EVENTS); + ping.resetStats(); + + // These fields are expected to survive the sanitization. + let expectedFields = { + section: "business", + corpus_item_id: "7fc404a1-74ec-450b-8eef-4f52b45ec510", + topic: "business", + }; + + ping.recordEvent("click", { + ...expectedFields, + }); + + ping.recordEvent("impression", { + ...expectedFields, + }); + + let extraMetrics = { + utcOffset: "1", + experimentBranch: "some-branch", + }; + ping.scheduleSubmission(extraMetrics); + + await GleanPings.newtabContent.testSubmission( + () => { + // Test Callback + let [clickEvent] = Glean.newtabContent.click.testGetValue(); + Assert.ok(clickEvent, "Found click event."); + let [impression] = Glean.newtabContent.impression.testGetValue(); + Assert.ok(impression, "Found impression event."); + }, + async () => { + // Submit Callback + await ping.testOnlyForceFlush(); + } + ); + + Assert.equal( + ping.testOnlyCurInstanceEventCount, + 2, + "Expected number of events sent" + ); + + ping.recordEvent("section_impression", { + ...expectedFields, + }); + ping.scheduleSubmission(extraMetrics); + await ping.testOnlyForceFlush(); + + Assert.equal(ping.testOnlyCurInstanceEventCount, 2, "No new events sent"); + + ping = new NewTabContentPing(); + ping.setMaxEventsPerDay(MAX_EVENTS); + + Assert.equal(ping.testOnlyCurInstanceEventCount, 0, "Event count reset"); + + ping.recordEvent("section_impression", { + ...expectedFields, + }); + ping.scheduleSubmission(extraMetrics); + await ping.testOnlyForceFlush(); + + Assert.equal( + ping.testOnlyCurInstanceEventCount, + 0, + "No new events after re-creating NewTabContentPing class" + ); + + // Some time has passed + let sandbox = sinon.createSandbox(); + + sandbox.stub(NewTabContentPing.prototype, "Date").returns({ + now: () => Date.now() + 3600 * 25 * 1000, // 25 hours in future + }); + + ping.scheduleSubmission(extraMetrics); + + ping.recordEvent("click", { + ...expectedFields, + }); + + await GleanPings.newtabContent.testSubmission( + () => { + // Test Callback + let [click] = Glean.newtabContent.click.testGetValue(); + Assert.ok(click, "Found click event."); + }, + async () => { + // Submit Callback + await ping.testOnlyForceFlush(); + } + ); + Assert.equal(ping.testOnlyCurInstanceEventCount, 1, "Event sending restored"); + sandbox.restore(); +}); + +add_task(function test_decideWithProbability() { + Assert.equal(NewTabContentPing.decideWithProbability(-0.1), false); + Assert.equal(NewTabContentPing.decideWithProbability(1.1), true); +}); + +add_task(function test_shuffleArray() { + const shuffled = NewTabContentPing.shuffleArray([1, 3, 5]); + Assert.equal(shuffled.length, 3); + Assert.ok(shuffled.includes(3), "Shuffled item in array"); + Assert.ok(shuffled.includes(1), "Shuffled item in array"); + Assert.ok(shuffled.includes(5), "Shuffled item in array"); + Assert.equal(NewTabContentPing.shuffleArray([]).length, 0); +}); + +add_task(async function test_secureRandIntInRange() { + for (let k = 0; k < 10; k++) { + Assert.greater( + 10, + NewTabContentPing.secureRandIntInRange(10), + "Random value in range" + ); + Assert.less( + -1, + NewTabContentPing.secureRandIntInRange(10), + "Random value in range" + ); + } +}); diff --git a/toolkit/components/nimbus/FeatureManifest.yaml b/toolkit/components/nimbus/FeatureManifest.yaml @@ -1268,7 +1268,6 @@ newtabPrivatePing: pref: browser.newtabpage.activity-stream.telemetry.privatePing.maxSubmissionDelayMs description: >- The maximum range for the random delay from scheduling the newtab-content ping to actually sending it. The minimum is 1000 by default. This is in milliseconds. - newtabPublisherFavicons: description: Enabled publihser favicons on cards in newtab owner: nbarrett@mozilla.com