RankShortcutsWorkerClass.mjs (18812B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ 4 5 import { thompsonSampleSort } from "resource://newtab/lib/SmartShortcutsRanker/ThomSample.mjs"; //"ThomSample.mjs"; 6 7 /** 8 * Linear interpolation of values in histogram, wraps from end to beginning 9 * 10 * @param {number[]} hist Defines histogram of counts 11 * @param {number} t Time/Index we are interpolating to 12 * @returns {normed: number} Normalized number 13 */ 14 export function interpolateWrappedHistogram(hist, t) { 15 if (!hist.length) { 16 return hist; 17 } 18 const n = hist.length; 19 const lo = Math.floor(t) % n; 20 const hi = (lo + 1) % n; 21 const frac = t - Math.floor(t); 22 23 // Adjust for negative t 24 const loWrapped = (lo + n) % n; 25 const hiWrapped = (hi + n) % n; 26 27 return (1 - frac) * hist[loWrapped] + frac * hist[hiWrapped]; 28 } 29 /** 30 * Bayesian update of vec to reflect pvec weighted by tau 31 * 32 * @param {number[]} vec Array of values to update 33 * @param {number[]} pvec Normalized array of values to reference 34 * @param {number} tau Strength of the update 35 * @returns {number[]} Normalized array 36 */ 37 export function bayesHist(vec, pvec, tau) { 38 if (!pvec || !vec.length || vec.length !== pvec.length) { 39 return vec; 40 } 41 const vsum = vec.reduce((a, b) => a + b, 0); 42 if (vsum + tau === 0) { 43 return vec; 44 } 45 const bhist = vec.map((v, i) => (v + tau * pvec[i]) / (vsum + tau)); 46 return bhist; 47 } 48 49 function getCurrentHourOfDay() { 50 const now = new Date(); 51 return now.getHours() + now.getMinutes() / 60 + now.getSeconds() / 3600; 52 } 53 54 function getCurrentDayOfWeek() { 55 const now = new Date(); 56 return now.getDay(); // returns an int not a float 57 } 58 59 /** 60 * Compute average clicks and imps over items 61 * Smooth those averages towards priors 62 * 63 * @param {number[]} clicks Array of clicks 64 * @param {number[]} imps Array of impressions 65 * @param {number} pos_rate prior for clicks 66 * @param {number} neg_rate prior for impressions 67 * @returns {number[]} smoothed click count and impression count 68 */ 69 70 /** 71 * Normalize values in an array by the sum of the array 72 * 73 * @param {number[]} vec Array of values to normalize 74 * @returns {number[]} Normalized array 75 */ 76 export function sumNorm(vec) { 77 if (!vec.length) { 78 return vec; 79 } 80 const vsum = vec.reduce((a, b) => a + b, 0); 81 82 let normed = []; 83 if (!Number.isFinite(vsum) || vsum !== 0) { 84 normed = vec.map(v => v / vsum); 85 } else { 86 normed = vec; 87 } 88 return normed; 89 } 90 91 /** 92 * this function normalizes all values in vals and updates 93 * the running mean and variance in normobj 94 * normobj stores info to calculate a running mean and variance 95 * over a feature, this is stored in the shortcut cache 96 * 97 * @param {number[]} vals scores to normalize 98 * @param {object} normobj Dictionary of storing info for running mean var 99 * @returns {[number, obj]} normalized features and the updated object 100 */ 101 export function normUpdate(vals, input_normobj) { 102 if (!vals.length) { 103 return [vals, input_normobj]; 104 } 105 let normobj = {}; 106 if ( 107 !input_normobj || 108 !Number.isFinite(input_normobj.mean) || 109 !Number.isFinite(input_normobj.var) 110 ) { 111 normobj = { beta: 1e-3, mean: vals[0], var: 1.0 }; 112 } else { 113 normobj = { ...input_normobj }; 114 } 115 let delta = 0; 116 for (const v of vals) { 117 delta = v - normobj.mean; 118 normobj.mean += normobj.beta * delta; 119 normobj.var = 120 (1 - normobj.beta) * normobj.var + normobj.beta * delta * delta; 121 } 122 if (normobj.var <= 1e-8) { 123 normobj.var = 1e-8; 124 } 125 126 const std = Math.sqrt(normobj.var); 127 const new_vals = vals.map(v => (v - normobj.mean) / std); 128 return [new_vals, normobj]; 129 } 130 131 /** 132 * Normalize a dictionary of {key: hist[]} using squared values and column-wise normalization. 133 * Returns {key: normedHist[]} where each hist[j] is divided by sum_k hist_k[j]^2. 134 * 135 * @param {{[key: string]: number}} dict - A dictionary mapping keys to arrays of P(t|s) values. 136 * @returns {{[key: string]: number}} New dictionary with normalized histograms (P(s|t)). 137 */ 138 export function normHistDict(dict) { 139 const keys = Object.keys(dict); 140 if (keys.length === 0) { 141 return {}; 142 } 143 144 const t = dict[keys[0]].length; 145 146 // square all hist values to emphasize differences 147 const squared = {}; 148 for (const [key, hist] of Object.entries(dict)) { 149 squared[key] = hist.map(v => v * v); 150 } 151 152 // compute column-wise sums 153 const colSums = Array(t).fill(0); 154 for (let j = 0; j < t; j++) { 155 for (const key of keys) { 156 colSums[j] += squared[key][j]; 157 } 158 } 159 160 // normalize 161 const normalized = {}; 162 for (const [key, row] of Object.entries(squared)) { 163 normalized[key] = row.map((val, j) => (colSums[j] ? val / colSums[j] : 0)); 164 } 165 166 return normalized; 167 } 168 169 /** 170 * Compute linear combination of scores, weighted 171 * 172 * @param {object[]} scores Dictionary of scores 173 * @param {object[]} weights Dictionary of weights 174 * @returns {number} Linear combination of scores*weights 175 */ 176 export function computeLinearScore(scores, weights) { 177 let final = 0; 178 let score = 0; 179 for (const [feature, weight] of Object.entries(weights)) { 180 score = scores[feature] ?? 0; 181 final += score * weight; 182 } 183 return final; 184 } 185 186 export function processSeasonality(guids, input, tau, curtime) { 187 const { hists } = input; 188 const { pvec } = input; 189 // normalize new/rare site visits using general seasonlity to control against "fliers" 190 const b_hists = Object.fromEntries( 191 Object.entries(hists).map(([guid, hist]) => [ 192 guid, 193 bayesHist(hist, pvec, tau), 194 ]) 195 ); 196 // convert to P(time | site), prepare for bayes 197 const timegivensite = Object.fromEntries( 198 Object.entries(b_hists).map(([guid, hist]) => [guid, sumNorm(hist)]) 199 ); 200 // use bayes to convert to P(site | time) 201 const sitegiventime = normHistDict(timegivensite); 202 // interpolate P(site | time) to weight each site 203 const weights = Object.fromEntries( 204 Object.entries(sitegiventime).map(([guid, hist]) => [ 205 guid, 206 interpolateWrappedHistogram(hist, curtime), 207 ]) 208 ); 209 // make it an array 210 const weightsarr = guids.map(key => weights[key]); 211 // normalize to account for interpolation oddities 212 const normweights = sumNorm(weightsarr); 213 return normweights; 214 } 215 216 // Visit type codes in Places 217 const TYPE = { 218 LINK: 1, 219 TYPED: 2, 220 BOOKMARK: 3, 221 EMBED: 4, 222 REDIRECT_PERM: 5, 223 REDIRECT_TEMP: 6, 224 DOWNLOAD: 7, 225 FRAMED_LINK: 8, 226 RELOAD: 9, 227 }; 228 229 // default bonus map; copy from frecency 230 const TYPE_SCORE = { 231 [TYPE.TYPED]: 200, 232 [TYPE.LINK]: 100, 233 [TYPE.BOOKMARK]: 75, 234 [TYPE.RELOAD]: 0, 235 [TYPE.REDIRECT_PERM]: 0, 236 [TYPE.REDIRECT_TEMP]: 0, 237 [TYPE.EMBED]: 0, 238 [TYPE.FRAMED_LINK]: 0, 239 [TYPE.DOWNLOAD]: 0, 240 }; 241 /** 242 * Build features that break apart frecency into: 243 * frequency, recency, re-frecency 244 * frequency total_visits*F(visit_types) 245 * recency is exponential decay of last 10 visits 246 * re-frecency is dot_product(frequency,recency) 247 * 248 * difference between frecency and re-frecency is how the 249 * recency calc is done, recency is exponential decay instead 250 * of the frecency buckets for interpretability 251 * 252 * @param {object} visitCounts guid -> {visit.type, visit.time} 253 * @param {object} visitCounts guid -> total visit count 254 * @param {object} opts options for controlling calc 255 * @returns {Promise<{pvec: number[]|null, hists: any}>} 256 */ 257 export async function buildFrecencyFeatures( 258 visitsByGuid, 259 visitCounts, 260 opts = {} 261 ) { 262 const { 263 halfLifeDays = 28, // 28 reproduces frecency 264 } = opts; 265 266 const nowMs = Date.now(); 267 const dayMs = 864e5; 268 const tauDays = halfLifeDays / Math.log(2); 269 270 // Transposed output: { refre: {guid:...}, rece: {guid:...}, freq: {guid:...} } 271 const byFeature = { refre: {}, rece: {}, freq: {}, unid: {} }; 272 273 for (const [guid, visits] of Object.entries(visitsByGuid)) { 274 // take the log here, original frecency grows linearly with visits 275 // lets test out log growth 276 const total = Math.log((visitCounts?.[guid] ?? 0) + 1); 277 278 const time_scores = []; 279 const type_scores = []; 280 281 const days_visited = new Set([]); 282 283 for (let i = 0; i < visits.length; i++) { 284 const { visit_date_us, visit_type } = visits[i]; 285 const ageDays = (nowMs - visit_date_us / 1000) / dayMs; 286 days_visited.add(Math.floor(ageDays)); 287 const t = Math.exp(-ageDays / tauDays); // exponential decay 288 const b = TYPE_SCORE[visit_type] ?? 0; 289 time_scores.push(t); 290 type_scores.push(b); 291 } 292 // dot captures the interaction between time and type, basically frecency 293 const dot = time_scores.reduce( 294 (s, x, i) => s + x * (type_scores[i] ?? 0), 295 0 296 ); 297 // time_score is a pure recency feature 298 const time_score = time_scores.reduce((s, x) => s + x, 0); 299 // type_score is frequency feature weighted by how the user got to the site 300 const type_score = type_scores.reduce((s, x) => s + x, 0); 301 302 byFeature.refre[guid] = total * dot; // interaction (≈ frecency-like) 303 byFeature.rece[guid] = time_score; // recency-only 304 byFeature.freq[guid] = total * type_score; // frequency-only (lifetime-weighted) 305 byFeature.unid[guid] = days_visited.size; 306 } 307 308 return byFeature; 309 } 310 311 // small helpers used only here 312 const _projectByGuid = (guids, dict) => guids.map(g => dict[g]); 313 314 const _applyVectorFeature = ( 315 namei, 316 rawVec, 317 norms, 318 score_map, 319 guids, 320 updated_norms 321 ) => { 322 const [vals, n] = normUpdate(rawVec, norms[namei]); 323 updated_norms[namei] = n; 324 guids.forEach((g, i) => { 325 score_map[g][namei] = vals[i]; 326 }); 327 }; 328 329 export async function weightedSampleTopSites(input) { 330 const updated_norms = {}; 331 const score_map = Object.fromEntries( 332 input.guid.map(guid => [ 333 guid, 334 Object.fromEntries(input.features.map(f => [f, 0])), 335 ]) 336 ); 337 338 // Table-driven vector features that already exist as per-guid dictionaries 339 const dictFeatures = { 340 bmark: () => _projectByGuid(input.guid, input.bmark_scores), 341 open: () => _projectByGuid(input.guid, input.open_scores), 342 rece: () => _projectByGuid(input.guid, input.rece_scores), 343 freq: () => _projectByGuid(input.guid, input.freq_scores), 344 refre: () => _projectByGuid(input.guid, input.refre_scores), 345 unid: () => _projectByGuid(input.guid, input.unid_scores), 346 }; 347 348 // 1) Simple vector features 349 for (const namei of Object.keys(dictFeatures)) { 350 if (input.features.includes(namei)) { 351 _applyVectorFeature( 352 namei, 353 dictFeatures[namei](), 354 input.norms, 355 score_map, 356 input.guid, 357 updated_norms 358 ); 359 } 360 } 361 362 // 2) CTR feature (derived vector) 363 if (input.features.includes("ctr")) { 364 const raw_ctr = input.impressions.map( 365 (imp, i) => (input.clicks[i] + 1) / (imp + 1) 366 ); 367 _applyVectorFeature( 368 "ctr", 369 raw_ctr, 370 input.norms, 371 score_map, 372 input.guid, 373 updated_norms 374 ); 375 } 376 377 // 3) Thompson feature (special case) 378 if (input.features.includes("thom")) { 379 const ranked_thetas = await thompsonSampleSort({ 380 key_array: input.guid, 381 obs_positive: input.clicks, 382 obs_negative: input.impressions.map((imp, i) => 383 Math.max(0, imp - input.clicks[i]) 384 ), 385 prior_positive: input.clicks.map(() => input.alpha), 386 prior_negative: input.impressions.map(() => input.beta), 387 do_sort: false, 388 }); 389 const [vals, n] = normUpdate(ranked_thetas[1], input.norms.thom); 390 updated_norms.thom = n; 391 input.guid.forEach((g, i) => { 392 score_map[g].thom = vals[i]; 393 }); 394 } 395 396 // 4) Frecency vector (already an array) 397 if (input.features.includes("frec")) { 398 const [vals, n] = normUpdate(input.frecency, input.norms.frec); 399 updated_norms.frec = n; 400 input.guid.forEach((g, i) => { 401 score_map[g].frec = vals[i]; 402 }); 403 } 404 405 // 5) Seasonality features 406 if (input.features.includes("hour")) { 407 const raw = processSeasonality( 408 input.guid, 409 input.hourly_seasonality, 410 input.tau, 411 getCurrentHourOfDay() 412 ); 413 _applyVectorFeature( 414 "hour", 415 raw, 416 input.norms, 417 score_map, 418 input.guid, 419 updated_norms 420 ); 421 } 422 if (input.features.includes("daily")) { 423 const raw = processSeasonality( 424 input.guid, 425 input.daily_seasonality, 426 input.tau, 427 getCurrentDayOfWeek() 428 ); 429 _applyVectorFeature( 430 "daily", 431 raw, 432 input.norms, 433 score_map, 434 input.guid, 435 updated_norms 436 ); 437 } 438 439 // 6) Bias 440 if (input.features.includes("bias")) { 441 input.guid.forEach(g => { 442 score_map[g].bias = 1; 443 }); 444 } 445 446 // 7) Final score 447 for (const g of input.guid) { 448 score_map[g].final = computeLinearScore(score_map[g], input.weights); 449 } 450 451 return { score_map, norms: updated_norms }; 452 } 453 454 export function clampWeights(weights, maxNorm = 100) { 455 const norm = Math.hypot(...Object.values(weights)); 456 if (norm > maxNorm) { 457 const scale = maxNorm / norm; 458 return Object.fromEntries( 459 Object.entries(weights).map(([k, v]) => [k, v * scale]) 460 ); 461 } 462 return weights; 463 } 464 465 /** 466 * Update the logistic regression weights for shortcuts ranking using gradient descent. 467 * 468 * @param {object} input 469 * @param {string[]} input.features 470 * The list of feature names (keys in weights and score_map[guid]). 471 * @param {object} input.data 472 * Mapping guid -> { clicks: number, impressions: number }. 473 * @param {object} input.scores 474 * Mapping guid -> { final: number, [feature]: number } (final = w·x). 475 * @param {object} input.weights 476 * Current weight vector, keyed by feature 477 * @param {number} input.eta 478 * Learning rate. 479 * @param {number} [input.click_bonus=1] 480 * Multiplicative weight for click events. 481 * @param {boolean} [do_clamp=true] 482 * If true, clamp weights after update. 483 * @returns {object} 484 * Updated weights object. 485 */ 486 export function updateWeights(input, do_clamp = true) { 487 const { features } = input; 488 const { data } = input; 489 const score_map = input.scores; 490 let { weights } = input; 491 const { eta } = input; 492 const click_bonus = input.click_bonus ?? 1; 493 494 const grads = Object.create(null); 495 let total = 0; 496 497 // init gradient accumulator 498 for (let j = 0; j < features.length; j += 1) { 499 grads[features[j]] = 0; 500 } 501 502 for (const guid of Object.keys(data)) { 503 if ( 504 score_map && 505 score_map[guid] && 506 typeof score_map[guid].final === "number" 507 ) { 508 /* impressions without a click can happen for many reasons 509 that are unrelated to the item. clicks are almost 510 always a deliberate action. therefore, we should 511 learn more from clicks. the click_bonus can be viewed 512 as a weight on click events and biases the model 513 to learning more from clicks relative to impressions 514 */ 515 const clicks = (data[guid].clicks | 0) * click_bonus; 516 const impressions = data[guid].impressions | 0; 517 if (clicks === 0 && impressions === 0) { 518 continue; 519 } 520 521 const z = score_map[guid].final; 522 const p = 1 / (1 + Math.exp(-z)); // sigmoid 523 const factor = clicks * (p - 1) + impressions * p; 524 525 for (const feature of features) { 526 const fval = score_map[guid][feature] || 0; 527 grads[feature] += factor * fval; 528 } 529 530 total += clicks + impressions; 531 } 532 } 533 534 if (total > 0) { 535 const scale = eta / total; 536 for (const feature of features) { 537 weights[feature] -= scale * grads[feature]; 538 } 539 } 540 if (do_clamp) { 541 weights = clampWeights(weights); 542 } 543 return weights; 544 } 545 546 /** 547 * Reorder GUIDs into their desired positions. 548 * - First claimant for a slot gets it. 549 * - Collisions are resolved by filling remaining slots left→right in original order. 550 * - If a requested position >= guids.length, that item goes to the last slot. 551 * 552 * @param {string[]} guids 553 * @param {Map<string, number>} posMap Map of guid → desired 0-based index 554 * @returns {string[]} reordered guids 555 */ 556 export function placeGuidsByPositions(guids, posMap) { 557 const size = guids.length; 558 const out = Array(size).fill(null); 559 const placed = new Set(); 560 561 // Pass 1: try to place at desired index (clamp > size-1 to size-1) 562 for (let i = 0; i < guids.length; i++) { 563 const g = guids[i]; 564 let idx = posMap.get(g); 565 if (!Number.isInteger(idx) || idx < 0) { 566 continue; 567 } 568 569 if (idx >= size) { 570 idx = size - 1; // clamp oversize → last slot 571 } 572 573 if (out[idx] === null) { 574 out[idx] = g; 575 placed.add(i); 576 } 577 // collisions handled in pass 2 578 } 579 580 // Pass 2: fill holes with unplaced guids, in input order 581 let cursor = 0; 582 const putNext = item => { 583 while (cursor < size && out[cursor] !== null) { 584 cursor++; 585 } 586 if (cursor < size) { 587 out[cursor++] = item; 588 } 589 }; 590 591 for (let i = 0; i < guids.length; i++) { 592 if (!placed.has(i)) { 593 putNext(guids[i]); 594 } 595 } 596 597 return out; 598 } 599 600 /** 601 * Given last-click positions aligned to guids, shift by numSponsored (clamp negatives to 0, 602 * preserve nulls), then place GUIDs accordingly. 603 * 604 * Positions and guids are each arrays in the same order, here we map 605 * guid to positions, we do this here to put as much as possible on the promise 606 * we shift from "positions" which is absolute shortcut position to array index 607 * which ignores the sponsored shortcuts. 608 * 609 * This function has a known shortcoming where the positions will be incorrect if the number 610 * of sponsored shortcuts changes. We accept this because 1) changes should be very rare 611 * 2) it fails safely 3) the differences should be off by the change in the number of sponsored 612 * shortcuts which is at most 3 613 * 614 * @param {(number|null|undefined)[]} positions // aligned with guids 615 * @param {string[]} guids 616 * @param {number} numSponsored 617 * @returns {string[]} reordered guids 618 */ 619 export function applyStickyClicks(positions, guids, numSponsored) { 620 const guidToPos = new Map( 621 guids.map((g, i) => { 622 const pos = positions[i]; 623 if (pos === null) { 624 return [g, null]; // preserve nulls 625 } 626 const shifted = pos - numSponsored; 627 return [g, shifted < 0 ? 0 : shifted]; // clamp negatives 628 }) 629 ); 630 631 // Use either variant depending on how you want collisions handled: 632 return placeGuidsByPositions(guids, guidToPos, { oneBased: false }); 633 } 634 635 export class RankShortcutsWorker { 636 async weightedSampleTopSites(input) { 637 return weightedSampleTopSites(input); 638 } 639 async sumNorm(vec) { 640 return sumNorm(vec); 641 } 642 async updateWeights(input) { 643 return updateWeights(input); 644 } 645 async buildFrecencyFeatures(raw_frec, visit_totals) { 646 return buildFrecencyFeatures(raw_frec, visit_totals); 647 } 648 async applyStickyClicks(positions, topsites, numSponsored) { 649 return applyStickyClicks(positions, topsites, numSponsored); 650 } 651 }