commit 46ce02b5c7f015ba91fee158c305c05a0bc17fa9
parent 2665adee74b624d68ed0fa0153396945b1f1a1e8
Author: Rolf Rando <rrando@mozilla.com>
Date: Wed, 8 Oct 2025 21:27:33 +0000
Bug 1992913 - Add interest vector normalization support for CTR-based inferred interest model r=mlplyler,home-newtab-reviewers,nbarrett
Current inferred models sent to server use threshold /randomized raw click-through rates, but we don't have any normalization so that, for example, very high CTR and low CTR users both have vectors with useful information.
This patch allows rescaling/normalization to be applied to the interest vector before thresholding/randomness is added.
Differential Revision: https://phabricator.services.mozilla.com/D267723
Diffstat:
3 files changed, 164 insertions(+), 16 deletions(-)
diff --git a/browser/extensions/newtab/lib/InferredModel/FeatureModel.sys.mjs b/browser/extensions/newtab/lib/InferredModel/FeatureModel.sys.mjs
@@ -245,8 +245,10 @@ export class FeatureModel {
interestVectorModel,
tileImportance,
modelType,
- rescale = true,
+ rescale = false,
logScale = false,
+ normalize = false,
+ normalizeL1 = false,
privateFeatures = [],
}) {
this.modelId = modelId;
@@ -255,6 +257,8 @@ export class FeatureModel {
this.interestVectorModel = interestVectorModel;
this.rescale = rescale;
this.logScale = logScale;
+ this.normalize = normalize;
+ this.normalizeL1 = normalizeL1;
this.modelType = modelType;
this.privateFeatures = privateFeatures;
}
@@ -273,6 +277,7 @@ export class FeatureModel {
tileImportance,
interestVectorModel,
normalize: json.normalize,
+ normalizeL1: json.normalize_l1,
rescale: json.rescale,
logScale: json.log_scale,
clickScale: json.clickScale,
@@ -316,6 +321,7 @@ export class FeatureModel {
computeInterestVector({
dataForIntervals,
indexSchema,
+ applyPostProcessing = false,
applyThresholding = false,
applyDifferentialPrivacy = false,
}) {
@@ -365,16 +371,8 @@ export class FeatureModel {
delete totalResults[SPECIAL_FEATURE_CLICK];
}
- if (this.logScale) {
- totalResults = dictApply(totalResults, x => Math.log(x + 1));
- }
-
- if (this.rescale) {
- let divisor = Math.max(...Object.values(totalResults));
- if (divisor <= 0.001) {
- divisor = 0.001;
- }
- totalResults = dictApply(totalResults, x => x / divisor);
+ if (applyPostProcessing) {
+ totalResults = this.applyPostProcessing(totalResults);
}
if (this.clickScale && numClicks > 0) {
@@ -422,6 +420,40 @@ export class FeatureModel {
}
}
+ applyPostProcessing(valueDict) {
+ let res = valueDict;
+ if (this.logScale) {
+ res = dictApply(valueDict, x => Math.log(x + 1));
+ }
+
+ if (this.rescale) {
+ let divisor = Math.max(...Object.values(res));
+ if (divisor <= 1e-6) {
+ divisor = 1e-6;
+ }
+ res = dictApply(res, x => x / divisor);
+ }
+
+ if (this.normalizeL1) {
+ let magnitude = Object.values(res).reduce((sum, c) => sum + c, 0);
+ if (magnitude <= 1e-6) {
+ magnitude = 1e-6;
+ }
+ res = dictApply(res, x => x / magnitude);
+ }
+
+ if (this.normalize) {
+ let magnitude = Math.sqrt(
+ Object.values(res).reduce((sum, c) => sum + c ** 2, 0)
+ );
+ if (magnitude <= 1e-6) {
+ magnitude = 1e-6;
+ }
+ res = dictApply(res, x => x / magnitude);
+ }
+ return res;
+ }
+
/**
* Computes interest vectors based on click-through rate (CTR) by dividing the click dictionary
* by the impression dictionary. Applies differential privacy using Laplace noise, and optionally
@@ -446,7 +478,8 @@ export class FeatureModel {
model_id = "unknown",
condensePrivateValues = true,
}) {
- const inferredInterests = divideDict(clicks, impressions);
+ let inferredInterests = divideDict(clicks, impressions);
+
const originalInterestValues = { ...inferredInterests };
const resultObject = {
@@ -455,7 +488,9 @@ export class FeatureModel {
if (this.supportsCoarseInterests()) {
// always true
- const coarseValues = { ...originalInterestValues };
+ const coarseValues = this.applyPostProcessing({
+ ...originalInterestValues,
+ });
this.applyThresholding(coarseValues, false);
resultObject.coarseInferredInterests = { ...coarseValues, model_id };
}
@@ -469,6 +504,7 @@ export class FeatureModel {
this.privateFeatures.includes(key)
)
);
+ this.applyPostProcessing({ ...originalInterestValues });
}
this.applyThresholding(coarsePrivateValues, true);
diff --git a/browser/extensions/newtab/lib/InferredPersonalizationFeed.sys.mjs b/browser/extensions/newtab/lib/InferredPersonalizationFeed.sys.mjs
@@ -139,14 +139,15 @@ export class InferredPersonalizationFeed {
intervals,
CLICK_TABLE
);
-
+ const isClickModel = model.modelType === MODEL_TYPE.CLICKS;
const interests = model.computeInterestVectors({
dataForIntervals: aggClickPerInterval,
indexSchema: schema,
model_id: inferredModel.model_id,
+ applyPostProcessing: isClickModel,
});
- if (model.modelType === MODEL_TYPE.CLICKS) {
+ if (isClickModel) {
return interests;
}
diff --git a/browser/extensions/newtab/test/xpcshell/test_InferredFeatureModel.js b/browser/extensions/newtab/test/xpcshell/test_InferredFeatureModel.js
@@ -11,6 +11,20 @@ ChromeUtils.defineESModuleGetters(this, {
"resource://newtab/lib/InferredModel/FeatureModel.sys.mjs",
});
+/**
+ * Compares two dictionaries up to decimalPoints decimal points
+ *
+ * @param {Object} a
+ * @param {Object} b
+ * @param {number} decimalPoints
+ * @returns {boolean} True if vectors are similar
+ */
+function vectorLooseEquals(a, b, decimalPoints = 2) {
+ return Object.entries(a).every(
+ ([k, v]) => v.toFixed(decimalPoints) === b[k].toFixed(decimalPoints)
+ );
+}
+
add_task(function test_dictAdd() {
let dict = {};
dictAdd(dict, "a", 3);
@@ -330,13 +344,14 @@ add_task(function test_modelChecks() {
);
});
-add_task(function test_computeInterestVector() {
+add_task(function test_computeInterestVectorClickModel() {
const modelData = { ...jsonModelData, rescale: true };
const model = FeatureModel.fromJSON(modelData);
const result = model.computeInterestVector({
dataForIntervals: SQL_RESULT_DATA,
indexSchema: SCHEMA,
applyThresholding: false,
+ applyPostProcessing: true,
});
Assert.ok("parenting" in result, "Result should contain parenting");
Assert.ok("news_reader" in result, "Result should contain news_reader");
@@ -511,6 +526,82 @@ const ctrModelDataNoDP = {
},
};
+const ctrModelData = {
+ model_type: "ctr",
+ noise_scale: 0,
+ day_time_weighting: {
+ days: [3, 14, 45],
+ relative_weight: [1, 0.5, 0.3],
+ },
+ interest_vector: {
+ news_reader: {
+ features: { pub_nytimes_com: 0.5, pub_cnn_com: 0.5 },
+ thresholds: [0.3, 0, 8],
+ diff_p: 1,
+ diff_q: 0,
+ },
+ parenting: {
+ features: { parenting: 1 },
+ thresholds: [0.3, 0, 8],
+ diff_p: 1,
+ diff_q: 0,
+ },
+ },
+};
+
+add_task(function test_postProcessing() {
+ let model = FeatureModel.fromJSON({
+ ...ctrModelDataNoDP,
+ normalize_l1: true,
+ });
+ ok(
+ vectorLooseEquals(model.applyPostProcessing({ a: 0.3, b: 0.5 }), {
+ a: 0.3 / 0.8,
+ b: 0.5 / 0.8,
+ }),
+ "L1 normalization"
+ );
+ model = FeatureModel.fromJSON({ ...ctrModelDataNoDP, normalize: true });
+ ok(
+ vectorLooseEquals(model.applyPostProcessing({ a: 1, b: 1 }), {
+ a: Math.sqrt(2) / 2,
+ b: Math.sqrt(2) / 2,
+ }),
+ "L2 normalization"
+ );
+ model = FeatureModel.fromJSON({ ...ctrModelDataNoDP, rescale: true });
+ ok(
+ vectorLooseEquals(model.applyPostProcessing({ a: 1.3, b: 1.3 }), {
+ a: 1,
+ b: 1,
+ }),
+ "Rescale"
+ );
+ ok(
+ vectorLooseEquals(model.applyPostProcessing({ a: 0.0, b: 0.0 }), {
+ a: 0.0,
+ b: 0,
+ }),
+ "Rescale"
+ );
+ model = FeatureModel.fromJSON({ ...ctrModelDataNoDP, normalize: true });
+ ok(
+ vectorLooseEquals(model.applyPostProcessing({ a: 0.0, b: 0.0 }), {
+ a: 0.0,
+ b: 0,
+ }),
+ "L1 0 vector"
+ );
+ model = FeatureModel.fromJSON({ ...ctrModelDataNoDP, rescale: true });
+ ok(
+ vectorLooseEquals(model.applyPostProcessing({ a: 0.0, b: 0.0 }), {
+ a: 0.0,
+ b: 0,
+ }),
+ "Rescale 0 vector"
+ );
+});
+
add_task(function test_computeCTRInterestVectorsNoNoise() {
const model = FeatureModel.fromJSON(ctrModelDataNoDP);
@@ -534,3 +625,23 @@ add_task(function test_computeCTRInterestVectorsNoNoise() {
Assert.equal(result.inferredInterests.news_reader, 0);
Assert.ok(!result.coarseInferredInterests, "No coarse inferred interests");
});
+
+add_task(function test_computeCTRInterestReprocessing() {
+ const model = FeatureModel.fromJSON({
+ ...ctrModelData,
+ normalize_l1: true,
+ });
+ // Note these are typically computed with the model.inferredInterests function and are not raw
+ // per feature impressions
+ const clickInferredInterests = { parenting: 1 };
+ const impressionInferredInterests = { parenting: 2, news_reader: 4 };
+ const result = model.computeCTRInterestVectors({
+ clicks: clickInferredInterests,
+ impressions: impressionInferredInterests,
+ model_id: "test-ctr-model",
+ });
+ Assert.equal(result.inferredInterests.parenting, 0.5);
+ Assert.equal(result.inferredInterests.news_reader, 0);
+ Assert.equal(result.coarseInferredInterests.parenting, 2); // ctr of 0.5, with vector normalized to 1
+ Assert.equal(result.coarseInferredInterests.news_reader, 0);
+});