tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

commit 46ce02b5c7f015ba91fee158c305c05a0bc17fa9
parent 2665adee74b624d68ed0fa0153396945b1f1a1e8
Author: Rolf Rando <rrando@mozilla.com>
Date:   Wed,  8 Oct 2025 21:27:33 +0000

Bug 1992913 - Add interest vector normalization support for CTR-based inferred interest model r=mlplyler,home-newtab-reviewers,nbarrett

Current inferred models sent to server use threshold /randomized raw click-through rates, but we don't have any normalization so that, for example, very high CTR and low CTR users both have vectors with useful information.

This patch allows rescaling/normalization to be applied to the interest vector before thresholding/randomness is added.

Differential Revision: https://phabricator.services.mozilla.com/D267723

Diffstat:
Mbrowser/extensions/newtab/lib/InferredModel/FeatureModel.sys.mjs | 62+++++++++++++++++++++++++++++++++++++++++++++++++-------------
Mbrowser/extensions/newtab/lib/InferredPersonalizationFeed.sys.mjs | 5+++--
Mbrowser/extensions/newtab/test/xpcshell/test_InferredFeatureModel.js | 113++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
3 files changed, 164 insertions(+), 16 deletions(-)

diff --git a/browser/extensions/newtab/lib/InferredModel/FeatureModel.sys.mjs b/browser/extensions/newtab/lib/InferredModel/FeatureModel.sys.mjs @@ -245,8 +245,10 @@ export class FeatureModel { interestVectorModel, tileImportance, modelType, - rescale = true, + rescale = false, logScale = false, + normalize = false, + normalizeL1 = false, privateFeatures = [], }) { this.modelId = modelId; @@ -255,6 +257,8 @@ export class FeatureModel { this.interestVectorModel = interestVectorModel; this.rescale = rescale; this.logScale = logScale; + this.normalize = normalize; + this.normalizeL1 = normalizeL1; this.modelType = modelType; this.privateFeatures = privateFeatures; } @@ -273,6 +277,7 @@ export class FeatureModel { tileImportance, interestVectorModel, normalize: json.normalize, + normalizeL1: json.normalize_l1, rescale: json.rescale, logScale: json.log_scale, clickScale: json.clickScale, @@ -316,6 +321,7 @@ export class FeatureModel { computeInterestVector({ dataForIntervals, indexSchema, + applyPostProcessing = false, applyThresholding = false, applyDifferentialPrivacy = false, }) { @@ -365,16 +371,8 @@ export class FeatureModel { delete totalResults[SPECIAL_FEATURE_CLICK]; } - if (this.logScale) { - totalResults = dictApply(totalResults, x => Math.log(x + 1)); - } - - if (this.rescale) { - let divisor = Math.max(...Object.values(totalResults)); - if (divisor <= 0.001) { - divisor = 0.001; - } - totalResults = dictApply(totalResults, x => x / divisor); + if (applyPostProcessing) { + totalResults = this.applyPostProcessing(totalResults); } if (this.clickScale && numClicks > 0) { @@ -422,6 +420,40 @@ export class FeatureModel { } } + applyPostProcessing(valueDict) { + let res = valueDict; + if (this.logScale) { + res = dictApply(valueDict, x => Math.log(x + 1)); + } + + if (this.rescale) { + let divisor = Math.max(...Object.values(res)); + if (divisor <= 1e-6) { + divisor = 1e-6; + } + res = dictApply(res, x => x / divisor); + } + + if (this.normalizeL1) { + let magnitude = Object.values(res).reduce((sum, c) => sum + c, 0); + if (magnitude <= 1e-6) { + magnitude = 1e-6; + } + res = dictApply(res, x => x / magnitude); + } + + if (this.normalize) { + let magnitude = Math.sqrt( + Object.values(res).reduce((sum, c) => sum + c ** 2, 0) + ); + if (magnitude <= 1e-6) { + magnitude = 1e-6; + } + res = dictApply(res, x => x / magnitude); + } + return res; + } + /** * Computes interest vectors based on click-through rate (CTR) by dividing the click dictionary * by the impression dictionary. Applies differential privacy using Laplace noise, and optionally @@ -446,7 +478,8 @@ export class FeatureModel { model_id = "unknown", condensePrivateValues = true, }) { - const inferredInterests = divideDict(clicks, impressions); + let inferredInterests = divideDict(clicks, impressions); + const originalInterestValues = { ...inferredInterests }; const resultObject = { @@ -455,7 +488,9 @@ export class FeatureModel { if (this.supportsCoarseInterests()) { // always true - const coarseValues = { ...originalInterestValues }; + const coarseValues = this.applyPostProcessing({ + ...originalInterestValues, + }); this.applyThresholding(coarseValues, false); resultObject.coarseInferredInterests = { ...coarseValues, model_id }; } @@ -469,6 +504,7 @@ export class FeatureModel { this.privateFeatures.includes(key) ) ); + this.applyPostProcessing({ ...originalInterestValues }); } this.applyThresholding(coarsePrivateValues, true); diff --git a/browser/extensions/newtab/lib/InferredPersonalizationFeed.sys.mjs b/browser/extensions/newtab/lib/InferredPersonalizationFeed.sys.mjs @@ -139,14 +139,15 @@ export class InferredPersonalizationFeed { intervals, CLICK_TABLE ); - + const isClickModel = model.modelType === MODEL_TYPE.CLICKS; const interests = model.computeInterestVectors({ dataForIntervals: aggClickPerInterval, indexSchema: schema, model_id: inferredModel.model_id, + applyPostProcessing: isClickModel, }); - if (model.modelType === MODEL_TYPE.CLICKS) { + if (isClickModel) { return interests; } diff --git a/browser/extensions/newtab/test/xpcshell/test_InferredFeatureModel.js b/browser/extensions/newtab/test/xpcshell/test_InferredFeatureModel.js @@ -11,6 +11,20 @@ ChromeUtils.defineESModuleGetters(this, { "resource://newtab/lib/InferredModel/FeatureModel.sys.mjs", }); +/** + * Compares two dictionaries up to decimalPoints decimal points + * + * @param {Object} a + * @param {Object} b + * @param {number} decimalPoints + * @returns {boolean} True if vectors are similar + */ +function vectorLooseEquals(a, b, decimalPoints = 2) { + return Object.entries(a).every( + ([k, v]) => v.toFixed(decimalPoints) === b[k].toFixed(decimalPoints) + ); +} + add_task(function test_dictAdd() { let dict = {}; dictAdd(dict, "a", 3); @@ -330,13 +344,14 @@ add_task(function test_modelChecks() { ); }); -add_task(function test_computeInterestVector() { +add_task(function test_computeInterestVectorClickModel() { const modelData = { ...jsonModelData, rescale: true }; const model = FeatureModel.fromJSON(modelData); const result = model.computeInterestVector({ dataForIntervals: SQL_RESULT_DATA, indexSchema: SCHEMA, applyThresholding: false, + applyPostProcessing: true, }); Assert.ok("parenting" in result, "Result should contain parenting"); Assert.ok("news_reader" in result, "Result should contain news_reader"); @@ -511,6 +526,82 @@ const ctrModelDataNoDP = { }, }; +const ctrModelData = { + model_type: "ctr", + noise_scale: 0, + day_time_weighting: { + days: [3, 14, 45], + relative_weight: [1, 0.5, 0.3], + }, + interest_vector: { + news_reader: { + features: { pub_nytimes_com: 0.5, pub_cnn_com: 0.5 }, + thresholds: [0.3, 0, 8], + diff_p: 1, + diff_q: 0, + }, + parenting: { + features: { parenting: 1 }, + thresholds: [0.3, 0, 8], + diff_p: 1, + diff_q: 0, + }, + }, +}; + +add_task(function test_postProcessing() { + let model = FeatureModel.fromJSON({ + ...ctrModelDataNoDP, + normalize_l1: true, + }); + ok( + vectorLooseEquals(model.applyPostProcessing({ a: 0.3, b: 0.5 }), { + a: 0.3 / 0.8, + b: 0.5 / 0.8, + }), + "L1 normalization" + ); + model = FeatureModel.fromJSON({ ...ctrModelDataNoDP, normalize: true }); + ok( + vectorLooseEquals(model.applyPostProcessing({ a: 1, b: 1 }), { + a: Math.sqrt(2) / 2, + b: Math.sqrt(2) / 2, + }), + "L2 normalization" + ); + model = FeatureModel.fromJSON({ ...ctrModelDataNoDP, rescale: true }); + ok( + vectorLooseEquals(model.applyPostProcessing({ a: 1.3, b: 1.3 }), { + a: 1, + b: 1, + }), + "Rescale" + ); + ok( + vectorLooseEquals(model.applyPostProcessing({ a: 0.0, b: 0.0 }), { + a: 0.0, + b: 0, + }), + "Rescale" + ); + model = FeatureModel.fromJSON({ ...ctrModelDataNoDP, normalize: true }); + ok( + vectorLooseEquals(model.applyPostProcessing({ a: 0.0, b: 0.0 }), { + a: 0.0, + b: 0, + }), + "L1 0 vector" + ); + model = FeatureModel.fromJSON({ ...ctrModelDataNoDP, rescale: true }); + ok( + vectorLooseEquals(model.applyPostProcessing({ a: 0.0, b: 0.0 }), { + a: 0.0, + b: 0, + }), + "Rescale 0 vector" + ); +}); + add_task(function test_computeCTRInterestVectorsNoNoise() { const model = FeatureModel.fromJSON(ctrModelDataNoDP); @@ -534,3 +625,23 @@ add_task(function test_computeCTRInterestVectorsNoNoise() { Assert.equal(result.inferredInterests.news_reader, 0); Assert.ok(!result.coarseInferredInterests, "No coarse inferred interests"); }); + +add_task(function test_computeCTRInterestReprocessing() { + const model = FeatureModel.fromJSON({ + ...ctrModelData, + normalize_l1: true, + }); + // Note these are typically computed with the model.inferredInterests function and are not raw + // per feature impressions + const clickInferredInterests = { parenting: 1 }; + const impressionInferredInterests = { parenting: 2, news_reader: 4 }; + const result = model.computeCTRInterestVectors({ + clicks: clickInferredInterests, + impressions: impressionInferredInterests, + model_id: "test-ctr-model", + }); + Assert.equal(result.inferredInterests.parenting, 0.5); + Assert.equal(result.inferredInterests.news_reader, 0); + Assert.equal(result.coarseInferredInterests.parenting, 2); // ctr of 0.5, with vector normalized to 1 + Assert.equal(result.coarseInferredInterests.news_reader, 0); +});