[ tor-browser ].git.dasho

FeatureModel.sys.mjs (20008B)
      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 
      5 /**
      6 * This file has classes to combine New Tab feature events (aggregated from a sqlLite table) into an interest model.
      7 */
      8 
      9 import {
     10  FORMAT,
     11  AggregateResultKeys,
     12  SPECIAL_FEATURE_CLICK,
     13 } from "resource://newtab/lib/InferredModel/InferredConstants.sys.mjs";
     14 
     15 export const DAYS_TO_MS = 60 * 60 * 24 * 1000;
     16 
     17 const MAX_INT_32 = 2 ** 32;
     18 
     19 /**
     20 * Divides numerator fields by the denominator. Value is set to 0 if denominator is missing or 0.
     21 * Adds 0 value for all situations where there is a denominator but no numerator value.
     22 *
     23 * @param {{[key: string]: number}} numerator
     24 * @param {{[key: string]: number}} denominator
     25 * @returns {{[key: string]: number}}
     26 */
     27 export function divideDict(numerator, denominator) {
     28  const result = {};
     29  Object.keys(numerator).forEach(k => {
     30    result[k] = denominator[k] ? numerator[k] / denominator[k] : 0;
     31  });
     32  Object.keys(denominator).forEach(k => {
     33    if (!(k in result)) {
     34      result[k] = 0.0;
     35    }
     36  });
     37  return result;
     38 }
     39 
     40 /**
     41 * Unary encoding with randomized response for differential privacy.
     42 * The output must be decoded to back to an integer when aggregating a historgram on a server
     43 *
     44 * @param {number} x - Integer input (0 <= x < N)
     45 * @param {number} N - Number of values (see ablove)
     46 * @param {number} p - Probability of keeping a 1-bit as 1 (after one-hot encoding the output)
     47 * @param {number} q - Probability of flipping a 0-bit to 1
     48 * @returns {string} - Bitstring after unary encoding and randomized response
     49 */
     50 export function unaryEncodeDiffPrivacy(x, N, p, q) {
     51  const bitstring = [];
     52  const randomValues = new Uint32Array(N);
     53  crypto.getRandomValues(randomValues);
     54  for (let i = 0; i < N; i++) {
     55    const trueBit = i === x ? 1 : 0;
     56    const rand = randomValues[i] / MAX_INT_32;
     57    if (trueBit === 1) {
     58      bitstring.push(rand <= p ? "1" : "0");
     59    } else {
     60      bitstring.push(rand <= q ? "1" : "0");
     61    }
     62  }
     63  return bitstring.join("");
     64 }
     65 
     66 /**
     67 * Adds value to all a particular key in a dictionary. If the key is missing it sets the value.
     68 *
     69 * @param {object} dict - The dictionary to modify.
     70 * @param {string} key - The key whose value should be added or set.
     71 * @param {number} value - The value to add to the key.
     72 */
     73 export function dictAdd(dict, key, value) {
     74  if (key in dict) {
     75    dict[key] += value;
     76  } else {
     77    dict[key] = value;
     78  }
     79 }
     80 
     81 /**
     82 * Apply function to all keys in dictionary, returning new dictionary.
     83 *
     84 * @param {object} obj - The object whose values should be transformed.
     85 * @param {Function} fn - The function to apply to each value.
     86 * @returns {object} A new object with the transformed values.
     87 */
     88 export function dictApply(obj, fn) {
     89  return Object.fromEntries(
     90    Object.entries(obj).map(([key, value]) => [key, fn(value)])
     91  );
     92 }
     93 
     94 /**
     95 * Class for re-scaling events based on time passed.
     96 */
     97 export class DayTimeWeighting {
     98  /**
     99   * Instantiate class based on a series of day periods in the past.
    100   *
    101   * @param {int[]} pastDays Series of number of days, indicating days ago intervals in reverse chonological order.
    102   * Intervals are added: If the first value is 1 and the second is 5, then the first inteval is 0-1 and second interval is between 1 and 6.
    103   * @param {number[]} relativeWeight Relative weight of each period. Must be same length as pastDays
    104   */
    105  constructor(pastDays, relativeWeight) {
    106    this.pastDays = pastDays;
    107    this.relativeWeight = relativeWeight;
    108  }
    109 
    110  static fromJSON(json) {
    111    return new DayTimeWeighting(json.days, json.relative_weight);
    112  }
    113 
    114  /**
    115   * Get a series of interval pairs in the past based on the pastDays.
    116   *
    117   * @param {number} curTimeMs Base time time in MS. Usually current time.
    118   * @returns
    119   */
    120  getDateIntervals(curTimeMs) {
    121    let curEndTime = curTimeMs;
    122 
    123    const res = this.pastDays.map(daysAgo => {
    124      const start = new Date(curEndTime - daysAgo * DAYS_TO_MS);
    125      const end = new Date(curEndTime);
    126 
    127      curEndTime = start;
    128      return { start, end };
    129    });
    130    return res;
    131  }
    132 
    133  /**
    134   * Get relative weight of current index.
    135   *
    136   * @param {int} weightIndex Index
    137   * @returns {number} Weight at index, or 0 if index out of range.
    138   */
    139  getRelativeWeight(weightIndex) {
    140    if (weightIndex >= this.pastDays.length) {
    141      return 0;
    142    }
    143    return this.relativeWeight[weightIndex];
    144  }
    145 }
    146 
    147 /**
    148 * Describes the mapping from a set of aggregated events to a single interest feature
    149 */
    150 export class InterestFeatures {
    151  constructor(
    152    name,
    153    featureWeights,
    154    thresholds = null,
    155    diff_p = 0.5,
    156    diff_q = 0.5
    157  ) {
    158    this.name = name;
    159    this.featureWeights = featureWeights;
    160    // Thresholds must be in ascending order
    161    this.thresholds = thresholds;
    162    this.diff_p = diff_p;
    163    this.diff_q = diff_q;
    164  }
    165 
    166  static fromJSON(name, json) {
    167    return new InterestFeatures(
    168      name,
    169      json.features,
    170      json.thresholds || null,
    171      json.diff_p,
    172      json.diff_q
    173    );
    174  }
    175 
    176  /**
    177   * Quantize a feature value based on the thresholds specified in the class.
    178   *
    179   * @param {number} inValue Value computed by model for the feature.
    180   * @returns Quantized value. A value between 0 and number of thresholds specified (inclusive)
    181   */
    182  applyThresholds(inValue) {
    183    if (!this.thresholds) {
    184      return inValue;
    185    }
    186    for (let k = 0; k < this.thresholds.length; k++) {
    187      if (inValue < this.thresholds[k]) {
    188        return k;
    189      }
    190    }
    191    return this.thresholds.length;
    192  }
    193 
    194  /**
    195   * Applies Differential Privacy Unary Encoding method, outputting a one-hot encoded vector with randomizaiton.
    196   * Accurate historgrams of values can be computed with reasonable accuracy.
    197   * If the class has no or 0 p/q values set for differential privacy, then response is original number non-encoded.
    198   *
    199   * @param {number} inValue Value to randomize
    200   * @returns Bitfield as a string, that is the same as the thresholds length + 1
    201   */
    202  applyDifferentialPrivacy(inValue) {
    203    if (!this.thresholds || !this.diff_p) {
    204      return inValue;
    205    }
    206    return unaryEncodeDiffPrivacy(
    207      inValue,
    208      this.thresholds.length + 1,
    209      this.diff_p,
    210      this.diff_q
    211    );
    212  }
    213 }
    214 
    215 /**
    216 * Manages relative tile importance
    217 */
    218 export class TileImportance {
    219  constructor(tileImportanceMappings) {
    220    this.mappings = {};
    221    for (const [formatKey, formatId] of Object.entries(FORMAT)) {
    222      if (formatKey in tileImportanceMappings) {
    223        this.mappings[formatId] = tileImportanceMappings[formatKey];
    224      }
    225    }
    226  }
    227 
    228  getRelativeCTRForTile(tileType) {
    229    return this.mappings[tileType] || 1;
    230  }
    231 
    232  static fromJSON(json) {
    233    return new TileImportance(json);
    234  }
    235 }
    236 
    237 /***
    238 * A simple model for aggregating features
    239 */
    240 
    241 export class FeatureModel {
    242  /**
    243   *
    244   * @param {string} modelId
    245   * @param {object} dayTimeWeighting Data for day time weighting class
    246   * @param {object} interestVectorModel Data for interest model
    247   * @param {object} tileImportance Data for tile importance
    248   * @param {boolean} rescale Whether to rescale to max value
    249   * @param {boolean} logScale Whether to apply natural log (ln(x+ 1)) before rescaling
    250   */
    251  constructor({
    252    modelId,
    253    dayTimeWeighting,
    254    interestVectorModel,
    255    tileImportance,
    256    modelType,
    257    rescale = false,
    258    logScale = false,
    259    normalize = false,
    260    normalizeL1 = false,
    261    privateFeatures = [],
    262  }) {
    263    this.modelId = modelId;
    264    this.tileImportance = tileImportance;
    265    this.dayTimeWeighting = dayTimeWeighting;
    266    this.interestVectorModel = interestVectorModel;
    267    this.rescale = rescale;
    268    this.logScale = logScale;
    269    this.normalize = normalize;
    270    this.normalizeL1 = normalizeL1;
    271    this.modelType = modelType;
    272    this.privateFeatures = privateFeatures;
    273  }
    274 
    275  static fromJSON(json) {
    276    const dayTimeWeighting = DayTimeWeighting.fromJSON(json.day_time_weighting);
    277    const interestVectorModel = {};
    278    const tileImportance = TileImportance.fromJSON(json.tile_importance || {});
    279 
    280    for (const [name, featureJson] of Object.entries(json.interest_vector)) {
    281      interestVectorModel[name] = InterestFeatures.fromJSON(name, featureJson);
    282    }
    283 
    284    return new FeatureModel({
    285      dayTimeWeighting,
    286      tileImportance,
    287      interestVectorModel,
    288      normalize: json.normalize,
    289      normalizeL1: json.normalize_l1,
    290      rescale: json.rescale,
    291      logScale: json.log_scale,
    292      clickScale: json.clickScale,
    293      modelType: json.model_type,
    294      privateFeatures: json.private_features ?? null,
    295    });
    296  }
    297 
    298  supportsCoarseInterests() {
    299    return Object.values(this.interestVectorModel).every(
    300      fm => fm.thresholds && fm.thresholds.length
    301    );
    302  }
    303 
    304  supportsCoarsePrivateInterests() {
    305    return Object.values(this.interestVectorModel).every(
    306      fm =>
    307        fm.thresholds &&
    308        fm.thresholds.length &&
    309        "diff_p" in fm &&
    310        "diff_q" in fm
    311    );
    312  }
    313 
    314  /**
    315   * Return date intervals for the query
    316   */
    317  getDateIntervals(curTimeMs) {
    318    return this.dayTimeWeighting.getDateIntervals(curTimeMs);
    319  }
    320 
    321  /**
    322   * Computes an interest vector or aggregate based on the model and raw sql inout.
    323   *
    324   * @param {object} config
    325   * @param {Array.<Array.<string|number>>} config.dataForIntervals Raw aggregate output from SQL query. Could be clicks or impressions
    326   * @param {{[key: string]: number}} config.indexSchema Map of keys to indices in each sub-array in dataForIntervals
    327   * @param {boolean} [config.applyThresholding=false] Whether to apply thresholds
    328   * @param {boolean} [config.applyDifferntialPrivacy=false] Whether to apply differential privacy. This will be used for sending to telemetry.
    329   * @returns
    330   */
    331  computeInterestVector({
    332    dataForIntervals,
    333    indexSchema,
    334    applyPostProcessing = false,
    335    applyThresholding = false,
    336    applyDifferentialPrivacy = false,
    337  }) {
    338    const processedPerTimeInterval = dataForIntervals.map(
    339      (intervalData, idx) => {
    340        const intervalRawTotal = {};
    341        const perPeriodTotals = {};
    342        intervalData.forEach(aggElement => {
    343          const feature = aggElement[indexSchema[AggregateResultKeys.FEATURE]];
    344          let value = aggElement[indexSchema[AggregateResultKeys.VALUE]]; // In the future we could support format here
    345          dictAdd(intervalRawTotal, feature, value);
    346        });
    347 
    348        const weight = this.dayTimeWeighting.getRelativeWeight(idx); // Weight for this time interval
    349        Object.values(this.interestVectorModel).forEach(interestFeature => {
    350          for (const featureUsed of Object.keys(
    351            interestFeature.featureWeights
    352          )) {
    353            if (featureUsed in intervalRawTotal) {
    354              dictAdd(
    355                perPeriodTotals,
    356                interestFeature.name,
    357                intervalRawTotal[featureUsed] *
    358                  weight *
    359                  interestFeature.featureWeights[featureUsed]
    360              );
    361            }
    362          }
    363        });
    364        return perPeriodTotals;
    365      }
    366    );
    367 
    368    // Since we are doing linear combinations, it is fine to do the day-time weighting at this step
    369    let totalResults = {};
    370    processedPerTimeInterval.forEach(intervalTotals => {
    371      for (const key of Object.keys(intervalTotals)) {
    372        dictAdd(totalResults, key, intervalTotals[key]);
    373      }
    374    });
    375 
    376    let numClicks = -1;
    377 
    378    // If clicks is a feature, it's handled as special case
    379    if (SPECIAL_FEATURE_CLICK in totalResults) {
    380      numClicks = totalResults[SPECIAL_FEATURE_CLICK];
    381      delete totalResults[SPECIAL_FEATURE_CLICK];
    382    }
    383 
    384    if (applyPostProcessing) {
    385      totalResults = this.applyPostProcessing(totalResults);
    386    }
    387 
    388    if (this.clickScale && numClicks > 0) {
    389      totalResults = dictApply(totalResults, x => x / numClicks);
    390    }
    391 
    392    const zeroFilledResult = {};
    393    // Set non-click or impression values in a way that preserves original key order
    394    Object.values(this.interestVectorModel).forEach(interestFeature => {
    395      zeroFilledResult[interestFeature.name] =
    396        totalResults[interestFeature.name] || 0;
    397    });
    398    totalResults = zeroFilledResult;
    399 
    400    if (numClicks >= 0) {
    401      // Optional
    402      totalResults[SPECIAL_FEATURE_CLICK] = numClicks;
    403    }
    404    if (applyThresholding) {
    405      this.applyThresholding(totalResults, applyDifferentialPrivacy);
    406    }
    407    return totalResults;
    408  }
    409 
    410  /**
    411   * Convert float to discrete values, based on threshold parmaters for each feature in the model.
    412   * Values are modifified in place on provided dictionary.
    413   *
    414   * @param {object} valueDict of all values in model
    415   * @param {boolean} applyDifferentialPrivacy whether to apply differential privacy as well as thresholding.
    416   */
    417  applyThresholding(valueDict, applyDifferentialPrivacy = false) {
    418    for (const key of Object.keys(valueDict)) {
    419      if (key in this.interestVectorModel) {
    420        valueDict[key] = this.interestVectorModel[key].applyThresholds(
    421          valueDict[key],
    422          applyDifferentialPrivacy
    423        );
    424        if (applyDifferentialPrivacy) {
    425          valueDict[key] = this.interestVectorModel[
    426            key
    427          ].applyDifferentialPrivacy(valueDict[key], applyDifferentialPrivacy);
    428        }
    429      }
    430    }
    431  }
    432 
    433  applyPostProcessing(valueDict) {
    434    let res = valueDict;
    435    if (this.logScale) {
    436      res = dictApply(valueDict, x => Math.log(x + 1));
    437    }
    438 
    439    if (this.rescale) {
    440      let divisor = Math.max(...Object.values(res));
    441      if (divisor <= 1e-6) {
    442        divisor = 1e-6;
    443      }
    444      res = dictApply(res, x => x / divisor);
    445    }
    446 
    447    if (this.normalizeL1) {
    448      let magnitude = Object.values(res).reduce((sum, c) => sum + c, 0);
    449      if (magnitude <= 1e-6) {
    450        magnitude = 1e-6;
    451      }
    452      res = dictApply(res, x => x / magnitude);
    453    }
    454 
    455    if (this.normalize) {
    456      let magnitude = Math.sqrt(
    457        Object.values(res).reduce((sum, c) => sum + c ** 2, 0)
    458      );
    459      if (magnitude <= 1e-6) {
    460        magnitude = 1e-6;
    461      }
    462      res = dictApply(res, x => x / magnitude);
    463    }
    464    return res;
    465  }
    466 
    467  /**
    468   * Computes interest vectors based on click-through rate (CTR) by dividing the click dictionary
    469   * by the impression dictionary. Applies differential privacy using Laplace noise, and optionally
    470   * computes coarse (without noise) and coarse-private interest vectors if supported by the model.
    471   *
    472   * In all cases model_id is returned.
    473   *
    474   * @param {object} params - Function parameters.
    475   * @param {{[key: string]: number}} params.clickDict - A dictionary of interest keys to click counts.
    476   * @param {{[key: string]: number}} params.impressionDict - A dictionary of interest keys to impression counts.
    477   * @param {string} [params.model_id="unknown"] - Identifier for the model used in generating the vectors.
    478   * @param {boolean} [params.condensePrivateValues=true] - If true, condenses coarse private interest values into an array format.
    479   *
    480   * @returns {object} result - An object containing one or more of the following:
    481   * @returns {object} result.inferredInterest - A dictionary of private inferred interest scores
    482   * @returns {object} [result.coarseInferredInterests] - A dictionary of thresholded interest scores (non-private), if supported.
    483   * @returns {object} [result.coarsePrivateInferredInterests] - A dictionary of thresholded interest scores with differential privacy, if supported.
    484   */
    485  computeCTRInterestVectors({
    486    clicks,
    487    impressions,
    488    model_id = "unknown",
    489    condensePrivateValues = true,
    490  }) {
    491    let inferredInterests = divideDict(clicks, impressions);
    492 
    493    const originalInterestValues = { ...inferredInterests };
    494 
    495    const resultObject = {
    496      inferredInterests: { ...inferredInterests, model_id },
    497    };
    498 
    499    if (this.supportsCoarseInterests()) {
    500      // always true
    501      const coarseValues = this.applyPostProcessing({
    502        ...originalInterestValues,
    503      });
    504      this.applyThresholding(coarseValues, false);
    505      resultObject.coarseInferredInterests = { ...coarseValues, model_id };
    506    }
    507 
    508    if (this.supportsCoarsePrivateInterests()) {
    509      let coarsePrivateValues = { ...originalInterestValues };
    510      if (this.privateFeatures) {
    511        // filter here for private features
    512        coarsePrivateValues = Object.fromEntries(
    513          Object.entries(coarsePrivateValues).filter(([key]) =>
    514            this.privateFeatures.includes(key)
    515          )
    516        );
    517      }
    518      coarsePrivateValues = this.applyPostProcessing(coarsePrivateValues);
    519      this.applyThresholding(coarsePrivateValues, true);
    520 
    521      if (condensePrivateValues) {
    522        resultObject.coarsePrivateInferredInterests = {
    523          // Key order preserved in Gecko
    524          values: Object.values(coarsePrivateValues),
    525          model_id,
    526        };
    527      } else {
    528        resultObject.coarsePrivateInferredInterests = {
    529          ...coarsePrivateValues,
    530          model_id,
    531        };
    532      }
    533    }
    534    return resultObject;
    535  }
    536 
    537  /**
    538   * Computes various types of interest vectors from user interaction data across intervals.
    539   * Returns standard inferred interests (with Laplace noise), and optionally returns
    540   * coarse-grained and private-coarse versions depending on model support.
    541   *
    542   * @param {object} params - The function parameters.
    543   * @param {Array<object>} params.dataForIntervals - An array of data points grouped by time intervals (e.g., clicks, impressions).
    544   * @param {object} params.indexSchema - Schema that defines how interest indices should be computed.
    545   * @param {string} [params.model_id="unknown"] - Identifier for the model used to produce these vectors.
    546   * @param {boolean} [params.condensePrivateValues=true] - If true, condenses coarse private interest values into an array format.
    547   *
    548   * @returns {object} result - An object containing the computed interest vectors.
    549   * @returns {object} result.inferredInterests - A dictionary of private inferred interest values, with `model_id`.
    550   * @returns {object} [result.coarseInferredInterests] - Coarse thresholded (non-private) interest vector, if supported.
    551   * @returns {object | {values: Array<number>, model_id: string}} [result.coarsePrivateInferredInterests] - Coarse and differentially private interests.
    552   *           If `condensePrivateValues` is true, returned as an object with a `values` array; otherwise, as a dictionary.
    553   */
    554  computeInterestVectors({
    555    dataForIntervals,
    556    indexSchema,
    557    model_id = "unknown",
    558    condensePrivateValues = true,
    559  }) {
    560    const result = {};
    561    let inferredInterests;
    562    let coarseInferredInterests;
    563    let coarsePrivateInferredInterests;
    564 
    565    inferredInterests = this.computeInterestVector({
    566      dataForIntervals,
    567      indexSchema,
    568    });
    569    result.inferredInterests = { ...inferredInterests };
    570 
    571    if (this.supportsCoarseInterests()) {
    572      coarseInferredInterests = this.computeInterestVector({
    573        dataForIntervals,
    574        indexSchema,
    575        applyThresholding: true,
    576      });
    577      if (coarseInferredInterests) {
    578        result.coarseInferredInterests = {
    579          ...coarseInferredInterests,
    580          model_id,
    581        };
    582      }
    583    }
    584 
    585    if (this.supportsCoarsePrivateInterests()) {
    586      coarsePrivateInferredInterests = this.computeInterestVector({
    587        dataForIntervals,
    588        indexSchema,
    589        applyThresholding: true,
    590        applyDifferentialPrivacy: true,
    591      });
    592      if (coarsePrivateInferredInterests) {
    593        if (condensePrivateValues) {
    594          result.coarsePrivateInferredInterests = {
    595            // Key order preserved in Gecko
    596            values: Object.values(coarsePrivateInferredInterests),
    597            model_id,
    598          };
    599        } else {
    600          result.coarsePrivateInferredInterests = {
    601            ...coarsePrivateInferredInterests,
    602            model_id,
    603          };
    604        }
    605      }
    606    }
    607    return result;
    608  }
    609 }
	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE