FeatureModel.sys.mjs (20008B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 /** 6 * This file has classes to combine New Tab feature events (aggregated from a sqlLite table) into an interest model. 7 */ 8 9 import { 10 FORMAT, 11 AggregateResultKeys, 12 SPECIAL_FEATURE_CLICK, 13 } from "resource://newtab/lib/InferredModel/InferredConstants.sys.mjs"; 14 15 export const DAYS_TO_MS = 60 * 60 * 24 * 1000; 16 17 const MAX_INT_32 = 2 ** 32; 18 19 /** 20 * Divides numerator fields by the denominator. Value is set to 0 if denominator is missing or 0. 21 * Adds 0 value for all situations where there is a denominator but no numerator value. 22 * 23 * @param {{[key: string]: number}} numerator 24 * @param {{[key: string]: number}} denominator 25 * @returns {{[key: string]: number}} 26 */ 27 export function divideDict(numerator, denominator) { 28 const result = {}; 29 Object.keys(numerator).forEach(k => { 30 result[k] = denominator[k] ? numerator[k] / denominator[k] : 0; 31 }); 32 Object.keys(denominator).forEach(k => { 33 if (!(k in result)) { 34 result[k] = 0.0; 35 } 36 }); 37 return result; 38 } 39 40 /** 41 * Unary encoding with randomized response for differential privacy. 42 * The output must be decoded to back to an integer when aggregating a historgram on a server 43 * 44 * @param {number} x - Integer input (0 <= x < N) 45 * @param {number} N - Number of values (see ablove) 46 * @param {number} p - Probability of keeping a 1-bit as 1 (after one-hot encoding the output) 47 * @param {number} q - Probability of flipping a 0-bit to 1 48 * @returns {string} - Bitstring after unary encoding and randomized response 49 */ 50 export function unaryEncodeDiffPrivacy(x, N, p, q) { 51 const bitstring = []; 52 const randomValues = new Uint32Array(N); 53 crypto.getRandomValues(randomValues); 54 for (let i = 0; i < N; i++) { 55 const trueBit = i === x ? 1 : 0; 56 const rand = randomValues[i] / MAX_INT_32; 57 if (trueBit === 1) { 58 bitstring.push(rand <= p ? "1" : "0"); 59 } else { 60 bitstring.push(rand <= q ? "1" : "0"); 61 } 62 } 63 return bitstring.join(""); 64 } 65 66 /** 67 * Adds value to all a particular key in a dictionary. If the key is missing it sets the value. 68 * 69 * @param {object} dict - The dictionary to modify. 70 * @param {string} key - The key whose value should be added or set. 71 * @param {number} value - The value to add to the key. 72 */ 73 export function dictAdd(dict, key, value) { 74 if (key in dict) { 75 dict[key] += value; 76 } else { 77 dict[key] = value; 78 } 79 } 80 81 /** 82 * Apply function to all keys in dictionary, returning new dictionary. 83 * 84 * @param {object} obj - The object whose values should be transformed. 85 * @param {Function} fn - The function to apply to each value. 86 * @returns {object} A new object with the transformed values. 87 */ 88 export function dictApply(obj, fn) { 89 return Object.fromEntries( 90 Object.entries(obj).map(([key, value]) => [key, fn(value)]) 91 ); 92 } 93 94 /** 95 * Class for re-scaling events based on time passed. 96 */ 97 export class DayTimeWeighting { 98 /** 99 * Instantiate class based on a series of day periods in the past. 100 * 101 * @param {int[]} pastDays Series of number of days, indicating days ago intervals in reverse chonological order. 102 * Intervals are added: If the first value is 1 and the second is 5, then the first inteval is 0-1 and second interval is between 1 and 6. 103 * @param {number[]} relativeWeight Relative weight of each period. Must be same length as pastDays 104 */ 105 constructor(pastDays, relativeWeight) { 106 this.pastDays = pastDays; 107 this.relativeWeight = relativeWeight; 108 } 109 110 static fromJSON(json) { 111 return new DayTimeWeighting(json.days, json.relative_weight); 112 } 113 114 /** 115 * Get a series of interval pairs in the past based on the pastDays. 116 * 117 * @param {number} curTimeMs Base time time in MS. Usually current time. 118 * @returns 119 */ 120 getDateIntervals(curTimeMs) { 121 let curEndTime = curTimeMs; 122 123 const res = this.pastDays.map(daysAgo => { 124 const start = new Date(curEndTime - daysAgo * DAYS_TO_MS); 125 const end = new Date(curEndTime); 126 127 curEndTime = start; 128 return { start, end }; 129 }); 130 return res; 131 } 132 133 /** 134 * Get relative weight of current index. 135 * 136 * @param {int} weightIndex Index 137 * @returns {number} Weight at index, or 0 if index out of range. 138 */ 139 getRelativeWeight(weightIndex) { 140 if (weightIndex >= this.pastDays.length) { 141 return 0; 142 } 143 return this.relativeWeight[weightIndex]; 144 } 145 } 146 147 /** 148 * Describes the mapping from a set of aggregated events to a single interest feature 149 */ 150 export class InterestFeatures { 151 constructor( 152 name, 153 featureWeights, 154 thresholds = null, 155 diff_p = 0.5, 156 diff_q = 0.5 157 ) { 158 this.name = name; 159 this.featureWeights = featureWeights; 160 // Thresholds must be in ascending order 161 this.thresholds = thresholds; 162 this.diff_p = diff_p; 163 this.diff_q = diff_q; 164 } 165 166 static fromJSON(name, json) { 167 return new InterestFeatures( 168 name, 169 json.features, 170 json.thresholds || null, 171 json.diff_p, 172 json.diff_q 173 ); 174 } 175 176 /** 177 * Quantize a feature value based on the thresholds specified in the class. 178 * 179 * @param {number} inValue Value computed by model for the feature. 180 * @returns Quantized value. A value between 0 and number of thresholds specified (inclusive) 181 */ 182 applyThresholds(inValue) { 183 if (!this.thresholds) { 184 return inValue; 185 } 186 for (let k = 0; k < this.thresholds.length; k++) { 187 if (inValue < this.thresholds[k]) { 188 return k; 189 } 190 } 191 return this.thresholds.length; 192 } 193 194 /** 195 * Applies Differential Privacy Unary Encoding method, outputting a one-hot encoded vector with randomizaiton. 196 * Accurate historgrams of values can be computed with reasonable accuracy. 197 * If the class has no or 0 p/q values set for differential privacy, then response is original number non-encoded. 198 * 199 * @param {number} inValue Value to randomize 200 * @returns Bitfield as a string, that is the same as the thresholds length + 1 201 */ 202 applyDifferentialPrivacy(inValue) { 203 if (!this.thresholds || !this.diff_p) { 204 return inValue; 205 } 206 return unaryEncodeDiffPrivacy( 207 inValue, 208 this.thresholds.length + 1, 209 this.diff_p, 210 this.diff_q 211 ); 212 } 213 } 214 215 /** 216 * Manages relative tile importance 217 */ 218 export class TileImportance { 219 constructor(tileImportanceMappings) { 220 this.mappings = {}; 221 for (const [formatKey, formatId] of Object.entries(FORMAT)) { 222 if (formatKey in tileImportanceMappings) { 223 this.mappings[formatId] = tileImportanceMappings[formatKey]; 224 } 225 } 226 } 227 228 getRelativeCTRForTile(tileType) { 229 return this.mappings[tileType] || 1; 230 } 231 232 static fromJSON(json) { 233 return new TileImportance(json); 234 } 235 } 236 237 /*** 238 * A simple model for aggregating features 239 */ 240 241 export class FeatureModel { 242 /** 243 * 244 * @param {string} modelId 245 * @param {object} dayTimeWeighting Data for day time weighting class 246 * @param {object} interestVectorModel Data for interest model 247 * @param {object} tileImportance Data for tile importance 248 * @param {boolean} rescale Whether to rescale to max value 249 * @param {boolean} logScale Whether to apply natural log (ln(x+ 1)) before rescaling 250 */ 251 constructor({ 252 modelId, 253 dayTimeWeighting, 254 interestVectorModel, 255 tileImportance, 256 modelType, 257 rescale = false, 258 logScale = false, 259 normalize = false, 260 normalizeL1 = false, 261 privateFeatures = [], 262 }) { 263 this.modelId = modelId; 264 this.tileImportance = tileImportance; 265 this.dayTimeWeighting = dayTimeWeighting; 266 this.interestVectorModel = interestVectorModel; 267 this.rescale = rescale; 268 this.logScale = logScale; 269 this.normalize = normalize; 270 this.normalizeL1 = normalizeL1; 271 this.modelType = modelType; 272 this.privateFeatures = privateFeatures; 273 } 274 275 static fromJSON(json) { 276 const dayTimeWeighting = DayTimeWeighting.fromJSON(json.day_time_weighting); 277 const interestVectorModel = {}; 278 const tileImportance = TileImportance.fromJSON(json.tile_importance || {}); 279 280 for (const [name, featureJson] of Object.entries(json.interest_vector)) { 281 interestVectorModel[name] = InterestFeatures.fromJSON(name, featureJson); 282 } 283 284 return new FeatureModel({ 285 dayTimeWeighting, 286 tileImportance, 287 interestVectorModel, 288 normalize: json.normalize, 289 normalizeL1: json.normalize_l1, 290 rescale: json.rescale, 291 logScale: json.log_scale, 292 clickScale: json.clickScale, 293 modelType: json.model_type, 294 privateFeatures: json.private_features ?? null, 295 }); 296 } 297 298 supportsCoarseInterests() { 299 return Object.values(this.interestVectorModel).every( 300 fm => fm.thresholds && fm.thresholds.length 301 ); 302 } 303 304 supportsCoarsePrivateInterests() { 305 return Object.values(this.interestVectorModel).every( 306 fm => 307 fm.thresholds && 308 fm.thresholds.length && 309 "diff_p" in fm && 310 "diff_q" in fm 311 ); 312 } 313 314 /** 315 * Return date intervals for the query 316 */ 317 getDateIntervals(curTimeMs) { 318 return this.dayTimeWeighting.getDateIntervals(curTimeMs); 319 } 320 321 /** 322 * Computes an interest vector or aggregate based on the model and raw sql inout. 323 * 324 * @param {object} config 325 * @param {Array.<Array.<string|number>>} config.dataForIntervals Raw aggregate output from SQL query. Could be clicks or impressions 326 * @param {{[key: string]: number}} config.indexSchema Map of keys to indices in each sub-array in dataForIntervals 327 * @param {boolean} [config.applyThresholding=false] Whether to apply thresholds 328 * @param {boolean} [config.applyDifferntialPrivacy=false] Whether to apply differential privacy. This will be used for sending to telemetry. 329 * @returns 330 */ 331 computeInterestVector({ 332 dataForIntervals, 333 indexSchema, 334 applyPostProcessing = false, 335 applyThresholding = false, 336 applyDifferentialPrivacy = false, 337 }) { 338 const processedPerTimeInterval = dataForIntervals.map( 339 (intervalData, idx) => { 340 const intervalRawTotal = {}; 341 const perPeriodTotals = {}; 342 intervalData.forEach(aggElement => { 343 const feature = aggElement[indexSchema[AggregateResultKeys.FEATURE]]; 344 let value = aggElement[indexSchema[AggregateResultKeys.VALUE]]; // In the future we could support format here 345 dictAdd(intervalRawTotal, feature, value); 346 }); 347 348 const weight = this.dayTimeWeighting.getRelativeWeight(idx); // Weight for this time interval 349 Object.values(this.interestVectorModel).forEach(interestFeature => { 350 for (const featureUsed of Object.keys( 351 interestFeature.featureWeights 352 )) { 353 if (featureUsed in intervalRawTotal) { 354 dictAdd( 355 perPeriodTotals, 356 interestFeature.name, 357 intervalRawTotal[featureUsed] * 358 weight * 359 interestFeature.featureWeights[featureUsed] 360 ); 361 } 362 } 363 }); 364 return perPeriodTotals; 365 } 366 ); 367 368 // Since we are doing linear combinations, it is fine to do the day-time weighting at this step 369 let totalResults = {}; 370 processedPerTimeInterval.forEach(intervalTotals => { 371 for (const key of Object.keys(intervalTotals)) { 372 dictAdd(totalResults, key, intervalTotals[key]); 373 } 374 }); 375 376 let numClicks = -1; 377 378 // If clicks is a feature, it's handled as special case 379 if (SPECIAL_FEATURE_CLICK in totalResults) { 380 numClicks = totalResults[SPECIAL_FEATURE_CLICK]; 381 delete totalResults[SPECIAL_FEATURE_CLICK]; 382 } 383 384 if (applyPostProcessing) { 385 totalResults = this.applyPostProcessing(totalResults); 386 } 387 388 if (this.clickScale && numClicks > 0) { 389 totalResults = dictApply(totalResults, x => x / numClicks); 390 } 391 392 const zeroFilledResult = {}; 393 // Set non-click or impression values in a way that preserves original key order 394 Object.values(this.interestVectorModel).forEach(interestFeature => { 395 zeroFilledResult[interestFeature.name] = 396 totalResults[interestFeature.name] || 0; 397 }); 398 totalResults = zeroFilledResult; 399 400 if (numClicks >= 0) { 401 // Optional 402 totalResults[SPECIAL_FEATURE_CLICK] = numClicks; 403 } 404 if (applyThresholding) { 405 this.applyThresholding(totalResults, applyDifferentialPrivacy); 406 } 407 return totalResults; 408 } 409 410 /** 411 * Convert float to discrete values, based on threshold parmaters for each feature in the model. 412 * Values are modifified in place on provided dictionary. 413 * 414 * @param {object} valueDict of all values in model 415 * @param {boolean} applyDifferentialPrivacy whether to apply differential privacy as well as thresholding. 416 */ 417 applyThresholding(valueDict, applyDifferentialPrivacy = false) { 418 for (const key of Object.keys(valueDict)) { 419 if (key in this.interestVectorModel) { 420 valueDict[key] = this.interestVectorModel[key].applyThresholds( 421 valueDict[key], 422 applyDifferentialPrivacy 423 ); 424 if (applyDifferentialPrivacy) { 425 valueDict[key] = this.interestVectorModel[ 426 key 427 ].applyDifferentialPrivacy(valueDict[key], applyDifferentialPrivacy); 428 } 429 } 430 } 431 } 432 433 applyPostProcessing(valueDict) { 434 let res = valueDict; 435 if (this.logScale) { 436 res = dictApply(valueDict, x => Math.log(x + 1)); 437 } 438 439 if (this.rescale) { 440 let divisor = Math.max(...Object.values(res)); 441 if (divisor <= 1e-6) { 442 divisor = 1e-6; 443 } 444 res = dictApply(res, x => x / divisor); 445 } 446 447 if (this.normalizeL1) { 448 let magnitude = Object.values(res).reduce((sum, c) => sum + c, 0); 449 if (magnitude <= 1e-6) { 450 magnitude = 1e-6; 451 } 452 res = dictApply(res, x => x / magnitude); 453 } 454 455 if (this.normalize) { 456 let magnitude = Math.sqrt( 457 Object.values(res).reduce((sum, c) => sum + c ** 2, 0) 458 ); 459 if (magnitude <= 1e-6) { 460 magnitude = 1e-6; 461 } 462 res = dictApply(res, x => x / magnitude); 463 } 464 return res; 465 } 466 467 /** 468 * Computes interest vectors based on click-through rate (CTR) by dividing the click dictionary 469 * by the impression dictionary. Applies differential privacy using Laplace noise, and optionally 470 * computes coarse (without noise) and coarse-private interest vectors if supported by the model. 471 * 472 * In all cases model_id is returned. 473 * 474 * @param {object} params - Function parameters. 475 * @param {{[key: string]: number}} params.clickDict - A dictionary of interest keys to click counts. 476 * @param {{[key: string]: number}} params.impressionDict - A dictionary of interest keys to impression counts. 477 * @param {string} [params.model_id="unknown"] - Identifier for the model used in generating the vectors. 478 * @param {boolean} [params.condensePrivateValues=true] - If true, condenses coarse private interest values into an array format. 479 * 480 * @returns {object} result - An object containing one or more of the following: 481 * @returns {object} result.inferredInterest - A dictionary of private inferred interest scores 482 * @returns {object} [result.coarseInferredInterests] - A dictionary of thresholded interest scores (non-private), if supported. 483 * @returns {object} [result.coarsePrivateInferredInterests] - A dictionary of thresholded interest scores with differential privacy, if supported. 484 */ 485 computeCTRInterestVectors({ 486 clicks, 487 impressions, 488 model_id = "unknown", 489 condensePrivateValues = true, 490 }) { 491 let inferredInterests = divideDict(clicks, impressions); 492 493 const originalInterestValues = { ...inferredInterests }; 494 495 const resultObject = { 496 inferredInterests: { ...inferredInterests, model_id }, 497 }; 498 499 if (this.supportsCoarseInterests()) { 500 // always true 501 const coarseValues = this.applyPostProcessing({ 502 ...originalInterestValues, 503 }); 504 this.applyThresholding(coarseValues, false); 505 resultObject.coarseInferredInterests = { ...coarseValues, model_id }; 506 } 507 508 if (this.supportsCoarsePrivateInterests()) { 509 let coarsePrivateValues = { ...originalInterestValues }; 510 if (this.privateFeatures) { 511 // filter here for private features 512 coarsePrivateValues = Object.fromEntries( 513 Object.entries(coarsePrivateValues).filter(([key]) => 514 this.privateFeatures.includes(key) 515 ) 516 ); 517 } 518 coarsePrivateValues = this.applyPostProcessing(coarsePrivateValues); 519 this.applyThresholding(coarsePrivateValues, true); 520 521 if (condensePrivateValues) { 522 resultObject.coarsePrivateInferredInterests = { 523 // Key order preserved in Gecko 524 values: Object.values(coarsePrivateValues), 525 model_id, 526 }; 527 } else { 528 resultObject.coarsePrivateInferredInterests = { 529 ...coarsePrivateValues, 530 model_id, 531 }; 532 } 533 } 534 return resultObject; 535 } 536 537 /** 538 * Computes various types of interest vectors from user interaction data across intervals. 539 * Returns standard inferred interests (with Laplace noise), and optionally returns 540 * coarse-grained and private-coarse versions depending on model support. 541 * 542 * @param {object} params - The function parameters. 543 * @param {Array<object>} params.dataForIntervals - An array of data points grouped by time intervals (e.g., clicks, impressions). 544 * @param {object} params.indexSchema - Schema that defines how interest indices should be computed. 545 * @param {string} [params.model_id="unknown"] - Identifier for the model used to produce these vectors. 546 * @param {boolean} [params.condensePrivateValues=true] - If true, condenses coarse private interest values into an array format. 547 * 548 * @returns {object} result - An object containing the computed interest vectors. 549 * @returns {object} result.inferredInterests - A dictionary of private inferred interest values, with `model_id`. 550 * @returns {object} [result.coarseInferredInterests] - Coarse thresholded (non-private) interest vector, if supported. 551 * @returns {object | {values: Array<number>, model_id: string}} [result.coarsePrivateInferredInterests] - Coarse and differentially private interests. 552 * If `condensePrivateValues` is true, returned as an object with a `values` array; otherwise, as a dictionary. 553 */ 554 computeInterestVectors({ 555 dataForIntervals, 556 indexSchema, 557 model_id = "unknown", 558 condensePrivateValues = true, 559 }) { 560 const result = {}; 561 let inferredInterests; 562 let coarseInferredInterests; 563 let coarsePrivateInferredInterests; 564 565 inferredInterests = this.computeInterestVector({ 566 dataForIntervals, 567 indexSchema, 568 }); 569 result.inferredInterests = { ...inferredInterests }; 570 571 if (this.supportsCoarseInterests()) { 572 coarseInferredInterests = this.computeInterestVector({ 573 dataForIntervals, 574 indexSchema, 575 applyThresholding: true, 576 }); 577 if (coarseInferredInterests) { 578 result.coarseInferredInterests = { 579 ...coarseInferredInterests, 580 model_id, 581 }; 582 } 583 } 584 585 if (this.supportsCoarsePrivateInterests()) { 586 coarsePrivateInferredInterests = this.computeInterestVector({ 587 dataForIntervals, 588 indexSchema, 589 applyThresholding: true, 590 applyDifferentialPrivacy: true, 591 }); 592 if (coarsePrivateInferredInterests) { 593 if (condensePrivateValues) { 594 result.coarsePrivateInferredInterests = { 595 // Key order preserved in Gecko 596 values: Object.values(coarsePrivateInferredInterests), 597 model_id, 598 }; 599 } else { 600 result.coarsePrivateInferredInterests = { 601 ...coarsePrivateInferredInterests, 602 model_id, 603 }; 604 } 605 } 606 } 607 return result; 608 } 609 }