[ tor-browser ].git.dasho

commit 46ce02b5c7f015ba91fee158c305c05a0bc17fa9
parent 2665adee74b624d68ed0fa0153396945b1f1a1e8
Author: Rolf Rando <rrando@mozilla.com>
Date:   Wed,  8 Oct 2025 21:27:33 +0000

Bug 1992913 - Add interest vector normalization support for CTR-based inferred interest model r=mlplyler,home-newtab-reviewers,nbarrett

Current inferred models sent to server use threshold /randomized raw click-through rates, but we don't have any normalization so that, for example, very high CTR and low CTR users both have vectors with useful information.

This patch allows rescaling/normalization to be applied to the interest vector before thresholding/randomness is added.

Differential Revision: https://phabricator.services.mozilla.com/D267723

Diffstat:
M browser/extensions/newtab/lib/InferredModel/FeatureModel.sys.mjs  | 62 +++++++++++++++++++++++++++++++++++++++++++++++++-------------
M browser/extensions/newtab/lib/InferredPersonalizationFeed.sys.mjs  | 5 +++--
M browser/extensions/newtab/test/xpcshell/test_InferredFeatureModel.js  | 113 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-

3 files changed, 164 insertions(+), 16 deletions(-)
diff --git a/browser/extensions/newtab/lib/InferredModel/FeatureModel.sys.mjs b/browser/extensions/newtab/lib/InferredModel/FeatureModel.sys.mjs
@@ -245,8 +245,10 @@ export class FeatureModel {
     interestVectorModel,
     tileImportance,
     modelType,
-    rescale = true,
+    rescale = false,
     logScale = false,
+    normalize = false,
+    normalizeL1 = false,
     privateFeatures = [],
   }) {
     this.modelId = modelId;
@@ -255,6 +257,8 @@ export class FeatureModel {
     this.interestVectorModel = interestVectorModel;
     this.rescale = rescale;
     this.logScale = logScale;
+    this.normalize = normalize;
+    this.normalizeL1 = normalizeL1;
     this.modelType = modelType;
     this.privateFeatures = privateFeatures;
   }
@@ -273,6 +277,7 @@ export class FeatureModel {
       tileImportance,
       interestVectorModel,
       normalize: json.normalize,
+      normalizeL1: json.normalize_l1,
       rescale: json.rescale,
       logScale: json.log_scale,
       clickScale: json.clickScale,
@@ -316,6 +321,7 @@ export class FeatureModel {
   computeInterestVector({
     dataForIntervals,
     indexSchema,
+    applyPostProcessing = false,
     applyThresholding = false,
     applyDifferentialPrivacy = false,
   }) {
@@ -365,16 +371,8 @@ export class FeatureModel {
       delete totalResults[SPECIAL_FEATURE_CLICK];
     }
 
-    if (this.logScale) {
-      totalResults = dictApply(totalResults, x => Math.log(x + 1));
-    }
-
-    if (this.rescale) {
-      let divisor = Math.max(...Object.values(totalResults));
-      if (divisor <= 0.001) {
-        divisor = 0.001;
-      }
-      totalResults = dictApply(totalResults, x => x / divisor);
+    if (applyPostProcessing) {
+      totalResults = this.applyPostProcessing(totalResults);
     }
 
     if (this.clickScale && numClicks > 0) {
@@ -422,6 +420,40 @@ export class FeatureModel {
     }
   }
 
+  applyPostProcessing(valueDict) {
+    let res = valueDict;
+    if (this.logScale) {
+      res = dictApply(valueDict, x => Math.log(x + 1));
+    }
+
+    if (this.rescale) {
+      let divisor = Math.max(...Object.values(res));
+      if (divisor <= 1e-6) {
+        divisor = 1e-6;
+      }
+      res = dictApply(res, x => x / divisor);
+    }
+
+    if (this.normalizeL1) {
+      let magnitude = Object.values(res).reduce((sum, c) => sum + c, 0);
+      if (magnitude <= 1e-6) {
+        magnitude = 1e-6;
+      }
+      res = dictApply(res, x => x / magnitude);
+    }
+
+    if (this.normalize) {
+      let magnitude = Math.sqrt(
+        Object.values(res).reduce((sum, c) => sum + c ** 2, 0)
+      );
+      if (magnitude <= 1e-6) {
+        magnitude = 1e-6;
+      }
+      res = dictApply(res, x => x / magnitude);
+    }
+    return res;
+  }
+
   /**
    * Computes interest vectors based on click-through rate (CTR) by dividing the click dictionary
    * by the impression dictionary. Applies differential privacy using Laplace noise, and optionally
@@ -446,7 +478,8 @@ export class FeatureModel {
     model_id = "unknown",
     condensePrivateValues = true,
   }) {
-    const inferredInterests = divideDict(clicks, impressions);
+    let inferredInterests = divideDict(clicks, impressions);
+
     const originalInterestValues = { ...inferredInterests };
 
     const resultObject = {
@@ -455,7 +488,9 @@ export class FeatureModel {
 
     if (this.supportsCoarseInterests()) {
       // always true
-      const coarseValues = { ...originalInterestValues };
+      const coarseValues = this.applyPostProcessing({
+        ...originalInterestValues,
+      });
       this.applyThresholding(coarseValues, false);
       resultObject.coarseInferredInterests = { ...coarseValues, model_id };
     }
@@ -469,6 +504,7 @@ export class FeatureModel {
             this.privateFeatures.includes(key)
           )
         );
+        this.applyPostProcessing({ ...originalInterestValues });
       }
       this.applyThresholding(coarsePrivateValues, true);
 
diff --git a/browser/extensions/newtab/lib/InferredPersonalizationFeed.sys.mjs b/browser/extensions/newtab/lib/InferredPersonalizationFeed.sys.mjs
@@ -139,14 +139,15 @@ export class InferredPersonalizationFeed {
       intervals,
       CLICK_TABLE
     );
-
+    const isClickModel = model.modelType === MODEL_TYPE.CLICKS;
     const interests = model.computeInterestVectors({
       dataForIntervals: aggClickPerInterval,
       indexSchema: schema,
       model_id: inferredModel.model_id,
+      applyPostProcessing: isClickModel,
     });
 
-    if (model.modelType === MODEL_TYPE.CLICKS) {
+    if (isClickModel) {
       return interests;
     }
 
diff --git a/browser/extensions/newtab/test/xpcshell/test_InferredFeatureModel.js b/browser/extensions/newtab/test/xpcshell/test_InferredFeatureModel.js
@@ -11,6 +11,20 @@ ChromeUtils.defineESModuleGetters(this, {
     "resource://newtab/lib/InferredModel/FeatureModel.sys.mjs",
 });
 
+/**
+ * Compares two dictionaries up to decimalPoints decimal points
+ *
+ * @param {Object} a
+ * @param {Object} b
+ * @param {number} decimalPoints
+ * @returns {boolean} True if vectors are similar
+ */
+function vectorLooseEquals(a, b, decimalPoints = 2) {
+  return Object.entries(a).every(
+    ([k, v]) => v.toFixed(decimalPoints) === b[k].toFixed(decimalPoints)
+  );
+}
+
 add_task(function test_dictAdd() {
   let dict = {};
   dictAdd(dict, "a", 3);
@@ -330,13 +344,14 @@ add_task(function test_modelChecks() {
   );
 });
 
-add_task(function test_computeInterestVector() {
+add_task(function test_computeInterestVectorClickModel() {
   const modelData = { ...jsonModelData, rescale: true };
   const model = FeatureModel.fromJSON(modelData);
   const result = model.computeInterestVector({
     dataForIntervals: SQL_RESULT_DATA,
     indexSchema: SCHEMA,
     applyThresholding: false,
+    applyPostProcessing: true,
   });
   Assert.ok("parenting" in result, "Result should contain parenting");
   Assert.ok("news_reader" in result, "Result should contain news_reader");
@@ -511,6 +526,82 @@ const ctrModelDataNoDP = {
   },
 };
 
+const ctrModelData = {
+  model_type: "ctr",
+  noise_scale: 0,
+  day_time_weighting: {
+    days: [3, 14, 45],
+    relative_weight: [1, 0.5, 0.3],
+  },
+  interest_vector: {
+    news_reader: {
+      features: { pub_nytimes_com: 0.5, pub_cnn_com: 0.5 },
+      thresholds: [0.3, 0, 8],
+      diff_p: 1,
+      diff_q: 0,
+    },
+    parenting: {
+      features: { parenting: 1 },
+      thresholds: [0.3, 0, 8],
+      diff_p: 1,
+      diff_q: 0,
+    },
+  },
+};
+
+add_task(function test_postProcessing() {
+  let model = FeatureModel.fromJSON({
+    ...ctrModelDataNoDP,
+    normalize_l1: true,
+  });
+  ok(
+    vectorLooseEquals(model.applyPostProcessing({ a: 0.3, b: 0.5 }), {
+      a: 0.3 / 0.8,
+      b: 0.5 / 0.8,
+    }),
+    "L1 normalization"
+  );
+  model = FeatureModel.fromJSON({ ...ctrModelDataNoDP, normalize: true });
+  ok(
+    vectorLooseEquals(model.applyPostProcessing({ a: 1, b: 1 }), {
+      a: Math.sqrt(2) / 2,
+      b: Math.sqrt(2) / 2,
+    }),
+    "L2 normalization"
+  );
+  model = FeatureModel.fromJSON({ ...ctrModelDataNoDP, rescale: true });
+  ok(
+    vectorLooseEquals(model.applyPostProcessing({ a: 1.3, b: 1.3 }), {
+      a: 1,
+      b: 1,
+    }),
+    "Rescale"
+  );
+  ok(
+    vectorLooseEquals(model.applyPostProcessing({ a: 0.0, b: 0.0 }), {
+      a: 0.0,
+      b: 0,
+    }),
+    "Rescale"
+  );
+  model = FeatureModel.fromJSON({ ...ctrModelDataNoDP, normalize: true });
+  ok(
+    vectorLooseEquals(model.applyPostProcessing({ a: 0.0, b: 0.0 }), {
+      a: 0.0,
+      b: 0,
+    }),
+    "L1 0 vector"
+  );
+  model = FeatureModel.fromJSON({ ...ctrModelDataNoDP, rescale: true });
+  ok(
+    vectorLooseEquals(model.applyPostProcessing({ a: 0.0, b: 0.0 }), {
+      a: 0.0,
+      b: 0,
+    }),
+    "Rescale 0 vector"
+  );
+});
+
 add_task(function test_computeCTRInterestVectorsNoNoise() {
   const model = FeatureModel.fromJSON(ctrModelDataNoDP);
 
@@ -534,3 +625,23 @@ add_task(function test_computeCTRInterestVectorsNoNoise() {
   Assert.equal(result.inferredInterests.news_reader, 0);
   Assert.ok(!result.coarseInferredInterests, "No coarse inferred interests");
 });
+
+add_task(function test_computeCTRInterestReprocessing() {
+  const model = FeatureModel.fromJSON({
+    ...ctrModelData,
+    normalize_l1: true,
+  });
+  // Note these are typically computed with the model.inferredInterests function and are not raw
+  // per feature impressions
+  const clickInferredInterests = { parenting: 1 };
+  const impressionInferredInterests = { parenting: 2, news_reader: 4 };
+  const result = model.computeCTRInterestVectors({
+    clicks: clickInferredInterests,
+    impressions: impressionInferredInterests,
+    model_id: "test-ctr-model",
+  });
+  Assert.equal(result.inferredInterests.parenting, 0.5);
+  Assert.equal(result.inferredInterests.news_reader, 0);
+  Assert.equal(result.coarseInferredInterests.parenting, 2); // ctr of 0.5, with vector normalized to 1
+  Assert.equal(result.coarseInferredInterests.news_reader, 0);
+});

	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE

M	browser/extensions/newtab/lib/InferredModel/FeatureModel.sys.mjs	\|	62	+++++++++++++++++++++++++++++++++++++++++++++++++-------------
M	browser/extensions/newtab/lib/InferredPersonalizationFeed.sys.mjs	\|	5	+++--
M	browser/extensions/newtab/test/xpcshell/test_InferredFeatureModel.js	\|	113	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-