RankShortcuts.mjs (34439B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ 4 5 /* 6 Smart Shortcuts uses experimental prefs on newtabTrainhopConfig. 7 These prefs can be accessed at prefValues.trainhopConfig.smartShortcuts 8 9 * enabled: do smart shortcuts (TopSitesFeed) 10 * over_sample_multiplier: number of rows of shortcuts to consider for smart shortcuts 11 * a user has n rows, we then query for n*over_sample_multiplier items to rank 12 * (TopSitesFeed) 13 * force_log: log shortcuts interactions regardless of enabled (SmartShortcutsFeed) 14 * features: arry of feature name strings 15 * eta: learning rate for feature weights 16 * click_bonus: multiplier applied to clicks 17 * positive_prior: thompson sampling alpha 18 * negative_prior: thompson sampling beta 19 * sticky_numimps: number of impressions for sticky clicks. 0 turns off 20 * 21 * thom_weight: weight of thompson sampling. divided by 100 22 * frec_weight: weight of frecency. divided by 100 23 * hour_weight: weight of hourly seasonality. divided by 100 24 * daily_weight: weight of daily seasonality. divided by 100 25 * bmark_weight: weight of is_bookmark. divided by 100 26 * rece_weight: weight of recency. divided by 100 27 * freq_weight: weight of frequency. divided by 100 28 * refre_weight: weight of re-done frecency. divided by 100 29 * open_weight: weight of is_open. divided by 100 30 * unid_weight: weight of unique days visited. divided by 100 31 * ctr_weight: weight of ctr. divided by 100 32 * bias_weight: weight of bias. divided by 100 33 34 */ 35 36 const SHORTCUT_TABLE = "moz_newtab_shortcuts_interaction"; 37 const PLACES_TABLE = "moz_places"; 38 const VISITS_TABLE = "moz_historyvisits"; 39 const BOOKMARK_TABLE = "moz_bookmarks"; 40 const BASE_SEASONALITY_CACHE_EXPIRATION = 1e3 * 60 * 60 * 24 * 7; // 7 day in miliseconds 41 const ETA = 0; 42 const CLICK_BONUS = 10; 43 44 const FEATURE_META = { 45 thom: { pref: "thom_weight", def: 5 }, 46 frec: { pref: "frec_weight", def: 95 }, 47 hour: { pref: "hour_weight", def: 0 }, 48 daily: { pref: "daily_weight", def: 0 }, 49 bmark: { pref: "bmark_weight", def: 0 }, 50 rece: { pref: "rece_weight", def: 0 }, 51 freq: { pref: "freq_weight", def: 0 }, 52 refre: { pref: "refre_weight", def: 0 }, 53 open: { pref: "open_weight", def: 0 }, 54 unid: { pref: "unid_weight", def: 0 }, 55 ctr: { pref: "ctr_weight", def: 0 }, 56 bias: { pref: "bias_weight", def: 1 }, 57 }; 58 59 const FEATURES = ["frec", "thom", "bias"]; 60 const SHORTCUT_POSITIVE_PRIOR = 1; 61 const SHORTCUT_NEGATIVE_PRIOR = 1000; 62 const STICKY_NUMIMPS = 0; 63 const SMART_TELEM = false; 64 65 const lazy = {}; 66 67 ChromeUtils.defineESModuleGetters(lazy, { 68 BasePromiseWorker: "resource://gre/modules/PromiseWorker.sys.mjs", 69 NewTabUtils: "resource://gre/modules/NewTabUtils.sys.mjs", 70 PersistentCache: "resource://newtab/lib/PersistentCache.sys.mjs", 71 PlacesUtils: "resource://gre/modules/PlacesUtils.sys.mjs", 72 SessionStore: "resource:///modules/sessionstore/SessionStore.sys.mjs", 73 }); 74 75 import { sortKeysValues } from "resource://newtab/lib/SmartShortcutsRanker/ThomSample.mjs"; 76 77 // helper for lowering precision of numbers, save space in telemetry 78 // longest string i can come up with out of this function: 79 // -0.000009999 which is 12 characters 80 export const roundNum = (x, sig = 4, eps = 1e-6) => { 81 if (typeof x !== "number" || !isFinite(x)) { 82 return x; 83 } 84 85 // clip very small absolute values to zero 86 if (Math.abs(x) < eps) { 87 return 0; 88 } 89 90 const n = Number(x.toPrecision(sig)); 91 92 // normalize -0 to 0 93 return Object.is(n, -0) ? 0 : n; 94 }; 95 96 /** 97 * For each guid, look at its last 10 shortcut interactions and, if a click occurred, 98 * return the position of the (most recent) click within those 10. 99 * 100 * @param {Array<{guid:string}>} topsites Array of top site objects (must include guid) 101 * @param {string} table Shortcuts interactions table name (columns: place_id, event_type, position, timestamp_s, …) 102 * @param {string} placeTable moz_places table name (columns: id, guid, …) 103 * @returns {Promise<number[]| (number|null)[]>} Array aligned with input topsites: position or null 104 */ 105 export async function fetchShortcutLastClickPositions( 106 guidList, 107 table, 108 placeTable, 109 numImps = 10 110 ) { 111 if (!guidList.length) { 112 return []; 113 } 114 115 // Build VALUES(...) for the GUIDs, escaping any single quotes just in case 116 const valuesClause = guidList 117 .map(guid => `('${String(guid).replace(/'/g, "''")}')`) 118 .join(", "); 119 120 // We: 121 // 1) map input GUIDs -> place_id 122 // 2) rank each guid's interactions by timestamp desc (and rowid as tie-breaker) 123 // 3) keep only the last numImps (rn <= numImps) 124 // 4) within those numImps, pick the most recent row that is a click (event_type=1), and grab its position 125 // 126 // Note: LEFT JOINs ensure we still return rows for GUIDs that have no interactions. 127 const sql = ` 128 -- input array of strings becomes column vector 129 WITH input_keys(guid) AS ( 130 VALUES ${valuesClause} 131 ), 132 -- build map guid->place.id 133 place_ids AS ( 134 SELECT i.guid, p.id AS place_id 135 FROM input_keys i 136 JOIN ${placeTable} p ON p.guid = i.guid 137 ), 138 -- grab the last N iteractions for each place_id 139 recent AS ( 140 SELECT 141 pi.guid, 142 t.tile_position AS position, 143 t.event_type AS event_type, 144 t.timestamp_s AS ts 145 FROM place_ids pi 146 JOIN ${table} t 147 ON t.place_id = pi.place_id 148 AND t.rowid IN ( 149 SELECT tt.rowid 150 FROM ${table} tt 151 WHERE tt.place_id = pi.place_id 152 ORDER BY tt.timestamp_s DESC, tt.rowid DESC 153 LIMIT ${Number(numImps)} 154 ) 155 ), 156 -- amongst the last numImps, get most recent click 157 -- build a column for rank of each event in a guid sublist 158 -- sort by putting clicks at top then time 159 -- get only the first position (r=1) 160 -- only get clicks (event_type=1) 161 -- returns null if no click events 162 best AS ( 163 SELECT guid, position 164 FROM ( 165 SELECT 166 guid, position, event_type, ts, 167 ROW_NUMBER() OVER ( 168 PARTITION BY guid 169 ORDER BY (event_type = 1) DESC, ts DESC 170 ) AS r 171 FROM recent 172 ) 173 WHERE r = 1 AND event_type = 1 174 ) 175 -- map back to guid 176 SELECT pi.guid AS key, b.position 177 FROM place_ids pi 178 LEFT JOIN best b ON b.guid = pi.guid; 179 `; 180 181 const { activityStreamProvider } = lazy.NewTabUtils; 182 const rows = await activityStreamProvider.executePlacesQuery(sql); 183 184 // rows: [guid, position|null][] 185 const posByGuid = new Map(rows.map(([key, position]) => [key, position])); 186 // Return array aligned to input order (null if no click in last 10) 187 return guidList.map(g => (posByGuid.has(g) ? posByGuid.get(g) : null)); 188 } 189 190 export async function getOpenTabURLsFromSessionLive() { 191 // Ensure SessionStore is ready (important at early startup) 192 if (lazy.SessionStore.promiseInitialized) { 193 await lazy.SessionStore.promiseInitialized; 194 } 195 const stateJSON = lazy.SessionStore.getBrowserState(); // sync string 196 const state = JSON.parse(stateJSON); // { windows: [...] } 197 198 const urls = []; 199 for (const win of state?.windows ?? []) { 200 for (const tab of win?.tabs ?? []) { 201 const i = Math.max(0, (tab.index ?? 1) - 1); // current entry is 1-based 202 const entry = tab.entries?.[i] ?? tab.entries?.[tab.entries.length - 1]; 203 const url = entry?.url; 204 if (url) { 205 urls.push(url); 206 } 207 } 208 } 209 return urls; 210 } 211 212 export async function getOpenTabsWithPlacesFromSessionLive() { 213 const urls = await getOpenTabURLsFromSessionLive(); 214 const out = []; 215 for (const url of urls) { 216 let guid = null; 217 if (url.startsWith("http")) { 218 try { 219 guid = (await lazy.PlacesUtils.history.fetch(url))?.guid ?? null; 220 } catch {} 221 } 222 out.push({ url, guid }); 223 } 224 return out; 225 } 226 227 /** 228 * For each input places GUID, report if it is currently open 229 * note there is a < 30 second delay between a guid opening and this function 230 * registering that change 231 * 232 * @param {string[]} guids Array of guid stirngs 233 * @returns {Promise<object>} Map of guid -> is open 234 */ 235 export async function getIsOpen(guids, isStartup) { 236 if (!isStartup?.isStartup) { 237 // Grab all currently open tabs with GUIDs 238 const openTabs = await getOpenTabsWithPlacesFromSessionLive(); 239 240 // Build a Set of GUIDs for fast lookup 241 const openGuids = new Set( 242 openTabs.map(t => t.guid).filter(Boolean) // skip nulls 243 ); 244 245 // Map each input guid to 1/0 246 const result = {}; 247 for (const g of guids) { 248 result[g] = openGuids.has(g) ? 1 : 0; 249 } 250 return result; 251 } 252 253 // During startup: just return all 0s 254 const result = {}; 255 for (const g of guids) { 256 result[g] = 0; 257 } 258 return result; 259 } 260 261 /** 262 * For each input places GUID, report the total visits 263 * 264 * @param {object[]} topsites Array of objects with a `guid` field (moz_places.guid) 265 * @param {string} [placesTable='moz_places'] Table name for places 266 * @returns {Promise<object>} Map of guid -> visit total 267 */ 268 export async function fetchVisitCountsByGuid(topsites, placeTable) { 269 if (!topsites?.length) { 270 return {}; 271 } 272 273 const guidList = topsites.map(site => site.guid); 274 275 // Safely quote each guid for VALUES(), escaping single quotes 276 const values = guidList 277 .map(guid => `('${String(guid).replace(/'/g, "''")}')`) 278 .join(", "); 279 280 const sql = ` 281 WITH input(guid) AS (VALUES ${values}) 282 SELECT i.guid, COALESCE(p.visit_count, 0) AS visit_count 283 FROM input i 284 LEFT JOIN ${placeTable} p ON p.guid = i.guid 285 ORDER BY i.guid; 286 `; 287 288 const { activityStreamProvider } = lazy.NewTabUtils; 289 const rows = await activityStreamProvider.executePlacesQuery(sql); 290 const out = Object.create(null); 291 for (const [guid, visit_count] of rows) { 292 out[guid] = visit_count; 293 } 294 return out; // { guid: visit_count } 295 } 296 297 /** 298 * For each input places GUID, return the time and type of the last 10 visits 299 * this is a replication of what is done during frecency calculation 300 * 301 * @param {object[]} topsites Array of objects with a `guid` field (moz_places.guid) 302 * @param {string} [table='moz_historyvisits'] Table name for history 303 * @param {string} [placeTable='moz_places'] Table name for places 304 * @returns {Promise<object>} Map of guid -> {visit_time, visit_type} 305 */ 306 export async function fetchLast10VisitsByGuid(topsites, table, placeTable) { 307 if (!topsites?.length) { 308 return {}; 309 } 310 311 const guids = topsites.map(s => String(s.guid)); 312 const valuesClause = guids 313 .map(g => `('${g.replace(/'/g, "''")}')`) 314 .join(", "); 315 316 // Mirrors Firefox's pattern: 317 // SELECT ... FROM moz_historyvisits WHERE place_id = h.id ORDER BY visit_date DESC LIMIT 10 318 const sql = ` 319 WITH input(guid) AS (VALUES ${valuesClause}) 320 SELECT 321 i.guid, 322 v.visit_date AS visit_date_us, 323 v.visit_type 324 FROM input i 325 JOIN ${placeTable} h ON h.guid = i.guid 326 JOIN ${table} v ON v.place_id = h.id 327 WHERE v.id IN ( 328 SELECT vv.id 329 FROM ${table} vv 330 WHERE vv.place_id = h.id 331 ORDER BY vv.visit_date DESC 332 LIMIT 10 /* limit to the last 10 visits */ 333 ) 334 ORDER BY i.guid, v.visit_date DESC; 335 `; 336 337 const { activityStreamProvider } = lazy.NewTabUtils; 338 const rows = await activityStreamProvider.executePlacesQuery(sql); 339 // `guids` is the array you queried with 340 // `rows` is the result from runQuery(sql) -> Array<[guid, visit_date_us, visit_type]> 341 342 const out = Object.fromEntries(guids.map(g => [g, []])); 343 344 for (const [guid, visit_date_us, visit_type] of rows) { 345 // rows are already ordered by guid, visit_date DESC per your SQL 346 out[guid].push({ visit_date_us, visit_type }); 347 } 348 349 // `out` is: { [guid]: [ { visit_date_us, visit_type }, ... ] } 350 return out; 351 } 352 353 /** 354 * For each input places GUID, report whether it is bookmarked. 355 * Walks guid -> moz_places.id -> moz_bookmarks.fk (type=1). 356 * 357 * @param {object[]} topsites Array of objects with a `guid` field (moz_places.guid) 358 * @param {string} [placesTable='moz_places'] Table name for places 359 * @param {string} [bookmarksTable='moz_bookmarks'] Table name for bookmarks 360 * @returns {Promise<object>} Map of guid -> boolean (true if bookmarked) 361 */ 362 export async function fetchBookmarkedFlags( 363 topsites, 364 bookmarksTable = "moz_bookmarks", 365 placesTable = "moz_places" 366 ) { 367 if (!topsites.length) { 368 return {}; 369 } 370 371 const guidList = topsites.map(site => site.guid); 372 373 // Safely quote each guid for VALUES(), escaping single quotes 374 const valuesClause = guidList 375 .map(guid => `('${String(guid).replace(/'/g, "''")}')`) 376 .join(", "); 377 378 // We LEFT JOIN so every input guid appears once, even if not found/bookmarked. 379 const sql = ` 380 WITH input_keys(guid) AS ( 381 VALUES ${valuesClause} 382 ) 383 SELECT 384 ik.guid AS key, 385 COALESCE(COUNT(b.id), 0) AS bookmark_count 386 FROM input_keys AS ik 387 LEFT JOIN ${placesTable} AS p 388 ON p.guid = ik.guid 389 LEFT JOIN ${bookmarksTable} AS b 390 ON b.fk = p.id 391 AND b.type = 1 -- only actual bookmark items 392 GROUP BY ik.guid 393 ORDER BY ik.guid; 394 `; 395 396 const { activityStreamProvider } = lazy.NewTabUtils; 397 const rows = await activityStreamProvider.executePlacesQuery(sql); 398 399 // rows: [key, bookmark_count] 400 const result = {}; 401 for (const [key, count] of rows) { 402 if (key) { 403 result[key] = count > 0; 404 } 405 } 406 407 // Ensure every requested guid is present (defensive) 408 for (const site of topsites) { 409 if (!(site.guid in result)) { 410 result[site.guid] = false; 411 } 412 } 413 414 return result; 415 } 416 417 /** 418 * Get histogram of all site visits over day-of-week 419 * 420 * @param {object[]} topsites Array of topsites objects 421 * @param {string} table Table to query 422 * @param {string} placeTable Table to map guid->place_id 423 * @returns {result: object} Dictionary of histograms of day-of-week site opens 424 */ 425 export async function fetchDailyVisitsSpecific(topsites, table, placeTable) { 426 if (!topsites.length) { 427 return {}; 428 } 429 const guidList = topsites.map(site => site.guid); 430 431 const valuesClause = guidList 432 .map(guid => `('${guid.replace(/'/g, "''")}')`) 433 .join(", "); 434 435 const sql = ` 436 WITH input_keys(guid) AS ( 437 VALUES ${valuesClause} 438 ), 439 place_ids AS ( 440 SELECT input_keys.guid, pTable.id AS place_id 441 FROM input_keys 442 LEFT JOIN ${placeTable} as pTable ON pTable.guid = input_keys.guid 443 ) 444 SELECT 445 place_ids.guid AS key, 446 CAST(strftime('%w', dTable.visit_date / 1e6, 'unixepoch') AS INTEGER) AS day_of_week, 447 COUNT(dTable.visit_date) AS visit_count 448 FROM place_ids 449 LEFT JOIN ${table} as dTable 450 ON dTable.place_id = place_ids.place_id 451 AND dTable.visit_date >= 1e6 * strftime('%s', 'now', '-2 months') 452 GROUP BY place_ids.guid, day_of_week 453 ORDER BY place_ids.guid, day_of_week; 454 `; 455 const { activityStreamProvider } = lazy.NewTabUtils; 456 const rows = await activityStreamProvider.executePlacesQuery(sql); 457 const histograms = {}; 458 for (const [key, day_of_week, visit_count] of rows) { 459 if (!histograms[key]) { 460 histograms[key] = Array(7).fill(0); 461 } 462 if (day_of_week !== null) { 463 histograms[key][day_of_week] = visit_count; 464 } 465 } 466 for (const site of topsites) { 467 if (!histograms[site.guid]) { 468 histograms[site.guid] = Array(7).fill(0); 469 } 470 } 471 return histograms; 472 } 473 474 /** 475 * Get histogram of all site visits over day-of-week 476 * 477 * @param {string} table Table to query 478 * @returns {number[]} Histogram of day-of-week site opens 479 */ 480 export async function fetchDailyVisitsAll(table) { 481 const sql = ` 482 SELECT 483 CAST(strftime('%w', ${table}.visit_date / 1e6, 'unixepoch') AS INTEGER) AS day_of_week, 484 COUNT(*) AS visit_count 485 FROM ${table} 486 WHERE ${table}.visit_date >= 1e6 * strftime('%s', 'now', '-6 months') 487 GROUP BY day_of_week 488 ORDER BY day_of_week; 489 `; 490 491 const { activityStreamProvider } = lazy.NewTabUtils; 492 const rows = await activityStreamProvider.executePlacesQuery(sql); 493 const histogram = Array(7).fill(0); 494 for (const [day_of_week, visit_count] of rows) { 495 if (day_of_week !== null) { 496 histogram[day_of_week] = visit_count; 497 } 498 } 499 return histogram; 500 } 501 /** 502 * Get histogram of all site visits over hour-of-day 503 * 504 * @param {object[]} topsites Array of topsites objects 505 * @param {string} table Table to query 506 * @param {string} placeTable Table to map guid->place_id 507 * @returns {object} Dictionary of histograms of hour-of-day site opens 508 */ 509 export async function fetchHourlyVisitsSpecific(topsites, table, placeTable) { 510 if (!topsites.length) { 511 return {}; 512 } 513 const guidList = topsites.map(site => site.guid); 514 515 const valuesClause = guidList 516 .map(guid => `('${guid.replace(/'/g, "''")}')`) 517 .join(", "); 518 519 const sql = ` 520 WITH input_keys(guid) AS ( 521 VALUES ${valuesClause} 522 ), 523 place_ids AS ( 524 SELECT input_keys.guid, pTable.id AS place_id 525 FROM input_keys 526 LEFT JOIN ${placeTable} as pTable ON pTable.guid = input_keys.guid 527 ) 528 SELECT 529 place_ids.guid AS key, 530 CAST(strftime('%H', hTable.visit_date / 1e6, 'unixepoch') AS INTEGER) AS hour_of_day, 531 COUNT(hTable.visit_date) AS visit_count 532 FROM place_ids 533 LEFT JOIN ${table} as hTable 534 ON hTable.place_id = place_ids.place_id 535 AND hTable.visit_date >= 1e6 * strftime('%s', 'now', '-2 months') 536 GROUP BY place_ids.guid, hour_of_day 537 ORDER BY place_ids.guid, hour_of_day; 538 `; 539 const { activityStreamProvider } = lazy.NewTabUtils; 540 const rows = await activityStreamProvider.executePlacesQuery(sql); 541 const histograms = {}; 542 for (const [key, hour_of_day, visit_count] of rows) { 543 if (!histograms[key]) { 544 histograms[key] = Array(24).fill(0); 545 } 546 if (hour_of_day !== null) { 547 histograms[key][hour_of_day] = visit_count; 548 } 549 } 550 for (const site of topsites) { 551 if (!histograms[site.guid]) { 552 histograms[site.guid] = Array(24).fill(0); 553 } 554 } 555 return histograms; 556 } 557 558 /** 559 * Get histogram of all site visits over hour-of-day 560 * 561 * @param {string} table Table to query 562 * @returns {number[]} Histogram of hour-of-day site opens 563 */ 564 export async function fetchHourlyVisitsAll(table) { 565 const sql = ` 566 SELECT 567 CAST(strftime('%H', ${table}.visit_date / 1e6, 'unixepoch') AS INTEGER) AS hour_of_day, 568 COUNT(*) AS visit_count 569 FROM ${table} 570 WHERE ${table}.visit_date >= 1e6 * strftime('%s', 'now', '-6 months') 571 GROUP BY hour_of_day 572 ORDER BY hour_of_day; 573 `; 574 575 const { activityStreamProvider } = lazy.NewTabUtils; 576 const rows = await activityStreamProvider.executePlacesQuery(sql); 577 const histogram = Array(24).fill(0); 578 for (const [hour_of_day, visit_count] of rows) { 579 if (hour_of_day !== null) { 580 histogram[hour_of_day] = visit_count; 581 } 582 } 583 return histogram; 584 } 585 586 /** 587 * Build weights object only for the requested features. 588 * 589 * @param {object} prefValues - contains trainhopConfig.smartShortcuts 590 * @param {string[]} features - e.g. ["thom","frec"] (bias optional) 591 */ 592 function initShortcutWeights(prefValues, features) { 593 const cfg = prefValues?.trainhopConfig?.smartShortcuts ?? {}; // remove second config 594 const out = {}; 595 596 for (const f of features) { 597 const meta = FEATURE_META[f]; 598 if (!meta) { 599 continue; 600 } // unknown feature: skip 601 602 const raw = cfg[meta.pref]; 603 const percent = Number.isFinite(raw) ? raw : meta.def; 604 out[f] = percent / 100; 605 } 606 607 return out; 608 } 609 /** 610 * Check for bad numerical weights or changes in init config 611 * 612 * @param {object} all_weights Dictionary of weights from cache 613 * @param {string[]} features List of features to have weights 614 * @returns {object[]} current weights and the init weights 615 */ 616 function checkWeights(all_weights, features) { 617 if ( 618 !all_weights.current || 619 !all_weights.old_init || 620 Object.keys(all_weights.current).length === 0 621 ) { 622 return [all_weights.new_init, all_weights.new_init]; 623 } 624 for (const fkey of features) { 625 if ( 626 !Number.isFinite(all_weights.current[fkey]) || 627 all_weights.old_init[fkey] !== all_weights.new_init[fkey] 628 ) { 629 return [all_weights.new_init, all_weights.new_init]; 630 } 631 } 632 return [all_weights.current, all_weights.old_init]; 633 } 634 635 /** 636 * Get clicks and impressions for sites in topsites array 637 * 638 * @param {object[]} topsites Array of topsites objects 639 * @param {string} table Table for shortcuts interactions 640 * @param {string} placeTable moz_places table 641 * @returns {clicks: [number[], impressions: number[]]} Clicks and impressions for each site in topsites 642 */ 643 async function fetchShortcutInteractions(topsites, table, placeTable) { 644 if (!topsites.length) { 645 // Return empty clicks and impressions arrays 646 return [[], []]; 647 } 648 649 const guidList = topsites.map(site => site.guid); 650 651 const valuesClause = guidList 652 .map(guid => `('${guid.replace(/'/g, "''")}')`) 653 .join(", "); 654 655 // Only get records in the last 2 months! 656 // Join no places table to map guid to place_id 657 const sql = ` 658 WITH input_keys(guid) AS ( 659 VALUES ${valuesClause} 660 ), 661 place_ids AS ( 662 SELECT input_keys.guid, ${placeTable}.id AS place_id 663 FROM input_keys 664 JOIN ${placeTable} ON ${placeTable}.guid = input_keys.guid 665 ) 666 SELECT 667 place_ids.guid AS key, 668 COALESCE(SUM(${table}.event_type), 0) AS total_clicks, 669 COALESCE(SUM(1 - ${table}.event_type), 0) AS total_impressions 670 FROM place_ids 671 LEFT JOIN ${table} ON ${table}.place_id = place_ids.place_id 672 AND ${table}.timestamp_s >= strftime('%s', 'now', '-2 months') 673 GROUP BY place_ids.guid; 674 `; 675 676 const { activityStreamProvider } = lazy.NewTabUtils; 677 const interactions = await activityStreamProvider.executePlacesQuery(sql); 678 const interactionMap = new Map( 679 interactions.map(row => { 680 // Destructure the array into variables 681 const [key, total_clicks, total_impressions] = row; 682 return [key, { clicks: total_clicks, impressions: total_impressions }]; 683 }) 684 ); 685 686 // Rebuild aligned arrays in same order as input 687 const clicks = guidList.map(guid => 688 interactionMap.has(guid) ? interactionMap.get(guid).clicks : 0 689 ); 690 691 const impressions = guidList.map(guid => 692 interactionMap.has(guid) ? interactionMap.get(guid).impressions : 0 693 ); 694 return [clicks, impressions]; 695 } 696 697 export class RankShortcutsProvider { 698 constructor() { 699 this.sc_obj = new lazy.PersistentCache("shortcut_cache", true); 700 } 701 get rankShortcutsWorker() { 702 if (!this._rankShortcutsWorker) { 703 this._rankShortcutsWorker = new lazy.BasePromiseWorker( 704 "resource://newtab/lib/SmartShortcutsRanker/RankShortcuts.worker.mjs", 705 { type: "module" } 706 ); 707 } 708 return this._rankShortcutsWorker; 709 } 710 711 /** 712 * Get hourly seasonality priors and per-site histograms. 713 * 714 * @param {Array<object>} topsites 715 * @param {object} shortcut_cache 716 * @param {object} isStartup stores the boolean isStartup 717 * @returns {Promise<{pvec: number[]|null, hists: any}>} 718 */ 719 async getHourlySeasonalityData(topsites, shortcut_cache, isStartup) { 720 const cache = (shortcut_cache && shortcut_cache.hourly_seasonality) || null; 721 const startup = isStartup.isStartup; 722 723 let hourly_prob = null; 724 725 const expired = 726 cache && 727 Date.now() - (cache.timestamp || 0) > BASE_SEASONALITY_CACHE_EXPIRATION; 728 const missing = !cache || !cache.pvec; 729 730 if (!startup && (missing || expired)) { 731 const all_hourly_hist = await fetchHourlyVisitsAll(VISITS_TABLE); 732 hourly_prob = await this.rankShortcutsWorker.post("sumNorm", [ 733 all_hourly_hist, 734 ]); 735 // persist fresh prior 736 await this.sc_obj.set("hourly_seasonality", { 737 pvec: hourly_prob, 738 timestamp: Date.now(), 739 }); 740 } else { 741 // safe read with optional chaining + null fallback 742 hourly_prob = cache?.pvec ?? null; 743 } 744 745 // Per-topsite histograms are needed regardless 746 const hourly_hists = await fetchHourlyVisitsSpecific( 747 topsites, 748 VISITS_TABLE, 749 PLACES_TABLE 750 ); 751 752 return { pvec: hourly_prob, hists: hourly_hists }; 753 } 754 755 /** 756 * Get daily seasonality priors and per-site histograms. 757 * 758 * @param {Array<object>} topsites 759 * @param {object} shortcut_cache 760 * @param {object} isStartup stores the boolean isStartup 761 * @returns {Promise<{pvec: number[]|null, hists: any}>} 762 */ 763 async getDailySeasonalityData(topsites, shortcut_cache, isStartup) { 764 const cache = shortcut_cache?.daily_seasonality ?? null; 765 const startup = isStartup.isStartup; 766 767 let daily_prob = null; 768 769 const expired = 770 cache && 771 Date.now() - (cache.timestamp || 0) > BASE_SEASONALITY_CACHE_EXPIRATION; 772 const missing = !cache || !cache.pvec; 773 774 if (!startup && (missing || expired)) { 775 const all_daily_hist = await fetchDailyVisitsAll(VISITS_TABLE); 776 daily_prob = await this.rankShortcutsWorker.post("sumNorm", [ 777 all_daily_hist, 778 ]); 779 // persist fresh prior 780 await this.sc_obj.set("daily_seasonality", { 781 pvec: daily_prob, 782 timestamp: Date.now(), 783 }); 784 } else { 785 daily_prob = cache?.pvec ?? null; 786 } 787 788 // Per-topsite histograms are needed regardless 789 const daily_hists = await fetchDailyVisitsSpecific( 790 topsites, 791 VISITS_TABLE, 792 PLACES_TABLE 793 ); 794 795 return { pvec: daily_prob, hists: daily_hists }; 796 } 797 798 /** 799 * Check the shortcut interaction table for new events since 800 * the last time we updated the model weights 801 * 802 * @param {object} cahce_data shortcut cache 803 * @param {string} table Shortcuts interaction table 804 * @param {string} placeTable moz_places table 805 * @returns {Promise<object>} Map of guid -> clicks and impression counts 806 */ 807 async getLatestInteractions(cache_data, table, placeTable = "moz_places") { 808 const now_s = Math.floor(Date.now() / 1000); 809 let tlu = Number(cache_data.time_last_update ?? 0); 810 if (tlu > 1e11) { 811 tlu = Math.floor(tlu / 1000); 812 } // ms -> s 813 const since = Math.max(tlu, now_s - 24 * 60 * 60); 814 815 const { activityStreamProvider } = lazy.NewTabUtils; 816 817 const rows = await activityStreamProvider.executePlacesQuery( 818 ` 819 SELECT 820 p.guid AS guid, 821 SUM(CASE WHEN e.event_type = 1 THEN 1 ELSE 0 END) AS clicks, 822 SUM(CASE WHEN e.event_type = 0 THEN 1 ELSE 0 END) AS impressions 823 FROM ${table} e 824 JOIN ${placeTable} p ON p.id = e.place_id 825 WHERE e.timestamp_s >= ${since} 826 GROUP BY p.guid 827 ` 828 ); 829 830 const dict = Object.create(null); 831 832 for (const r of Array.isArray(rows) ? rows : (rows ?? [])) { 833 const guid = r.guid ?? (Array.isArray(r) ? r[0] : undefined); 834 if (!guid) { 835 continue; 836 } 837 const clicks = Number(r.clicks ?? (Array.isArray(r) ? r[1] : 0)) || 0; 838 const impressions = 839 Number(r.impressions ?? (Array.isArray(r) ? r[2] : 0)) || 0; 840 dict[guid] = { clicks, impressions }; 841 } 842 843 await this.sc_obj.set("time_last_update", now_s); 844 845 return dict; 846 } 847 848 /** 849 * Get "frecency" features: frequency, recency, re-frecency, unique days visited 850 * 851 * @param {Array<object>} withGuid topsites we are building features for 852 * @returns {Promise<{}>} guid -> rece, freq, and refre features 853 */ 854 async fetchRefreFeatures(withGuid) { 855 const raw_frec = await fetchLast10VisitsByGuid( 856 withGuid, 857 VISITS_TABLE, 858 PLACES_TABLE 859 ); 860 const visit_totals = await fetchVisitCountsByGuid(withGuid, PLACES_TABLE); 861 const output = await this.rankShortcutsWorker.post( 862 "buildFrecencyFeatures", 863 [raw_frec, visit_totals] 864 ); 865 866 return output; 867 } 868 869 /** 870 /** 871 * Smart Shortcuts ranking main call 872 * 873 * @param {Array<object>} topsites 874 * @param {object} prefValues 875 * @param {object} isStartup stores the boolean isStartup 876 * @returns {Promise<{}>} topsites reordered 877 */ 878 async rankTopSites(topsites, prefValues, isStartup, numSponsored = 0) { 879 if (!prefValues?.trainhopConfig?.smartShortcuts) { 880 return topsites; 881 } 882 // get our feature set 883 const features = 884 prefValues.trainhopConfig?.smartShortcuts?.features ?? FEATURES; 885 886 // split topsites into two arrays, we only rank those with guid 887 const [withGuid, withoutGuid] = topsites.reduce( 888 ([withG, withoutG], site) => { 889 if (site.guid && typeof site.guid === "string") { 890 withG.push(site); 891 } else { 892 withoutG.push(site); 893 } 894 return [withG, withoutG]; 895 }, 896 [[], []] 897 ); 898 899 // query for interactions, sql cant be on promise 900 // always do this but only used for thompson and ctr 901 const [clicks, impressions] = await fetchShortcutInteractions( 902 withGuid, 903 SHORTCUT_TABLE, 904 PLACES_TABLE 905 ); 906 907 // cache stores weights and the last feature values used to produce ranking 908 // PersistentCache r/w cant be on promise 909 const sc_cache = await this.sc_obj.get(); 910 911 // check for bad weights (numerical) or change in init configs 912 let [weights, init_weights] = checkWeights( 913 { 914 current: sc_cache.weights, 915 new_init: initShortcutWeights(prefValues, features), 916 old_init: sc_cache.init_weights, 917 }, 918 features 919 ); 920 921 // update our weights 922 const latest_interaction_data = await this.getLatestInteractions( 923 sc_cache, 924 SHORTCUT_TABLE 925 ); 926 weights = await this.rankShortcutsWorker.post("updateWeights", [ 927 { 928 data: latest_interaction_data, 929 scores: sc_cache.score_map, 930 features, 931 weights, 932 eta: (prefValues.trainhopConfig?.smartShortcuts?.eta ?? ETA) / 10000, 933 click_bonus: 934 (prefValues.trainhopConfig?.smartShortcuts?.click_bonus ?? 935 CLICK_BONUS) / 10, 936 }, 937 ]); 938 939 // write the weights and init... sometimes redundant 940 await this.sc_obj.set("weights", weights); 941 await this.sc_obj.set("init_weights", init_weights); 942 943 // feature data 944 const hourly_seasonality = features?.includes?.("hour") 945 ? await this.getHourlySeasonalityData(withGuid, sc_cache, isStartup) 946 : null; 947 const daily_seasonality = features?.includes?.("daily") 948 ? await this.getDailySeasonalityData(withGuid, sc_cache, isStartup) 949 : null; 950 const bmark_scores = features?.includes?.("bmark") 951 ? await fetchBookmarkedFlags(withGuid, BOOKMARK_TABLE, PLACES_TABLE) 952 : null; 953 const refrec_scores = ["rece", "freq", "refre", "unid"].some(f => 954 features.includes(f) 955 ) 956 ? await this.fetchRefreFeatures(withGuid, features) 957 : { rece: null, freq: null, refre: null, unid: null }; 958 const open_scores = features?.includes?.("open") 959 ? await getIsOpen( 960 withGuid.map(t => t.guid), 961 isStartup 962 ) 963 : null; 964 // call to the promise worker to do the ranking 965 const frecency_scores = withGuid.map(t => t.frecency); 966 const output = await this.rankShortcutsWorker.post( 967 "weightedSampleTopSites", 968 [ 969 { 970 features, 971 alpha: 972 prefValues.trainhopConfig?.smartShortcuts?.positive_prior ?? 973 SHORTCUT_POSITIVE_PRIOR, 974 beta: 975 prefValues.trainhopConfig?.smartShortcuts?.negative_prior ?? 976 SHORTCUT_NEGATIVE_PRIOR, 977 tau: 100, 978 guid: withGuid.map(t => t.guid), 979 clicks, 980 impressions, 981 norms: 982 sc_cache.norms ?? 983 Object.fromEntries(features.map(key => [key, null])), 984 weights, 985 frecency: frecency_scores, 986 hourly_seasonality, 987 daily_seasonality, 988 bmark_scores, 989 open_scores, 990 rece_scores: refrec_scores?.rece, 991 freq_scores: refrec_scores?.freq, 992 refre_scores: refrec_scores?.refre, 993 unid_scores: refrec_scores?.unid, 994 }, 995 ] 996 ); 997 // update the cache 998 await this.sc_obj.set("norms", output.norms); 999 await this.sc_obj.set("score_map", output.score_map); 1000 1001 // final score for ranking as an array 1002 let final_scores = withGuid.map(g => output.score_map[g.guid].final); 1003 //catch nan errors 1004 if (final_scores.some(x => Number.isNaN(x))) { 1005 final_scores = frecency_scores; 1006 } 1007 1008 // sort by scores 1009 const sortedSitesVals = sortKeysValues(final_scores, withGuid); 1010 let [sortedSites] = sortedSitesVals; 1011 1012 // sticky clicks. keep an item at a certain position for at least 1013 // numImps impressions after a click occurs 1014 const numImps = 1015 prefValues?.trainhopConfig?.smartShortcuts?.sticky_numimps ?? 1016 STICKY_NUMIMPS; 1017 if (numImps > 0) { 1018 const sguid = sortedSites.map(s => s.guid); 1019 const positions = await fetchShortcutLastClickPositions( 1020 sguid, 1021 SHORTCUT_TABLE, 1022 PLACES_TABLE, 1023 numImps 1024 ); 1025 const stickyGuids = await this.rankShortcutsWorker.post( 1026 "applyStickyClicks", 1027 [positions, sguid, numSponsored] 1028 ); 1029 // Build a lookup table guid -> site object 1030 const byGuid = new Map(sortedSites.map(site => [site.guid, site])); 1031 1032 // Map over ordered guids, pulling objects from the lookup 1033 sortedSites = stickyGuids.map(g => byGuid.get(g)).filter(Boolean); 1034 } 1035 // grab topsites without guid 1036 const combined = sortedSites.concat(withoutGuid); 1037 1038 // tack weights and scores so they can pass through to telemetry 1039 if (prefValues?.trainhopConfig?.smartShortcuts?.telem || SMART_TELEM) { 1040 // store a version of weights that is rounded 1041 const roundWeights = Object.fromEntries( 1042 Object.entries(weights ?? {}).map(([key, v]) => [ 1043 key, 1044 typeof v === "number" && isFinite(v) ? roundNum(v) : (v ?? null), 1045 ]) 1046 ); 1047 // do the tacking 1048 combined.forEach(s => { 1049 const raw = output?.score_map?.[s.guid]; 1050 s.scores = 1051 raw && typeof raw === "object" 1052 ? Object.fromEntries( 1053 Object.entries(raw).map(([k, v]) => [ 1054 k, 1055 typeof v === "number" && isFinite(v) 1056 ? roundNum(v) 1057 : (v ?? null), 1058 ]) 1059 ) 1060 : null; 1061 s.weights = roundWeights; 1062 }); 1063 } 1064 return combined; 1065 } 1066 }