tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

RankShortcuts.mjs (34439B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
      4 
      5 /*
      6 Smart Shortcuts uses experimental prefs on newtabTrainhopConfig.
      7 These prefs can be accessed at prefValues.trainhopConfig.smartShortcuts
      8 
      9 * enabled: do smart shortcuts (TopSitesFeed)
     10 * over_sample_multiplier: number of rows of shortcuts to consider for smart shortcuts
     11 *          a user has n rows, we then query for n*over_sample_multiplier items to rank
     12 *          (TopSitesFeed)
     13 * force_log: log shortcuts interactions regardless of enabled (SmartShortcutsFeed)
     14 * features: arry of feature name strings
     15 * eta: learning rate for feature weights
     16 * click_bonus: multiplier applied to clicks
     17 * positive_prior: thompson sampling alpha
     18 * negative_prior: thompson sampling beta
     19 * sticky_numimps: number of impressions for sticky clicks. 0 turns off
     20 *
     21 * thom_weight: weight of thompson sampling. divided by 100
     22 * frec_weight: weight of frecency. divided by 100
     23 * hour_weight: weight of hourly seasonality. divided by 100
     24 * daily_weight: weight of daily seasonality. divided by 100
     25 * bmark_weight: weight of is_bookmark. divided by 100
     26 * rece_weight: weight of recency. divided by 100
     27 * freq_weight: weight of frequency. divided by 100
     28 * refre_weight: weight of re-done frecency. divided by 100
     29 * open_weight: weight of is_open. divided by 100
     30 * unid_weight: weight of unique days visited. divided by 100
     31 * ctr_weight: weight of ctr. divided by 100
     32 * bias_weight: weight of bias. divided by 100
     33 
     34 */
     35 
     36 const SHORTCUT_TABLE = "moz_newtab_shortcuts_interaction";
     37 const PLACES_TABLE = "moz_places";
     38 const VISITS_TABLE = "moz_historyvisits";
     39 const BOOKMARK_TABLE = "moz_bookmarks";
     40 const BASE_SEASONALITY_CACHE_EXPIRATION = 1e3 * 60 * 60 * 24 * 7; // 7 day in miliseconds
     41 const ETA = 0;
     42 const CLICK_BONUS = 10;
     43 
     44 const FEATURE_META = {
     45  thom: { pref: "thom_weight", def: 5 },
     46  frec: { pref: "frec_weight", def: 95 },
     47  hour: { pref: "hour_weight", def: 0 },
     48  daily: { pref: "daily_weight", def: 0 },
     49  bmark: { pref: "bmark_weight", def: 0 },
     50  rece: { pref: "rece_weight", def: 0 },
     51  freq: { pref: "freq_weight", def: 0 },
     52  refre: { pref: "refre_weight", def: 0 },
     53  open: { pref: "open_weight", def: 0 },
     54  unid: { pref: "unid_weight", def: 0 },
     55  ctr: { pref: "ctr_weight", def: 0 },
     56  bias: { pref: "bias_weight", def: 1 },
     57 };
     58 
     59 const FEATURES = ["frec", "thom", "bias"];
     60 const SHORTCUT_POSITIVE_PRIOR = 1;
     61 const SHORTCUT_NEGATIVE_PRIOR = 1000;
     62 const STICKY_NUMIMPS = 0;
     63 const SMART_TELEM = false;
     64 
     65 const lazy = {};
     66 
     67 ChromeUtils.defineESModuleGetters(lazy, {
     68  BasePromiseWorker: "resource://gre/modules/PromiseWorker.sys.mjs",
     69  NewTabUtils: "resource://gre/modules/NewTabUtils.sys.mjs",
     70  PersistentCache: "resource://newtab/lib/PersistentCache.sys.mjs",
     71  PlacesUtils: "resource://gre/modules/PlacesUtils.sys.mjs",
     72  SessionStore: "resource:///modules/sessionstore/SessionStore.sys.mjs",
     73 });
     74 
     75 import { sortKeysValues } from "resource://newtab/lib/SmartShortcutsRanker/ThomSample.mjs";
     76 
     77 // helper for lowering precision of numbers, save space in telemetry
     78 // longest string i can come up with out of this function:
     79 //               -0.000009999 which is 12 characters
     80 export const roundNum = (x, sig = 4, eps = 1e-6) => {
     81  if (typeof x !== "number" || !isFinite(x)) {
     82    return x;
     83  }
     84 
     85  // clip very small absolute values to zero
     86  if (Math.abs(x) < eps) {
     87    return 0;
     88  }
     89 
     90  const n = Number(x.toPrecision(sig));
     91 
     92  // normalize -0 to 0
     93  return Object.is(n, -0) ? 0 : n;
     94 };
     95 
     96 /**
     97 * For each guid, look at its last 10 shortcut interactions and, if a click occurred,
     98 * return the position of the (most recent) click within those 10.
     99 *
    100 * @param {Array<{guid:string}>} topsites  Array of top site objects (must include guid)
    101 * @param {string} table                   Shortcuts interactions table name (columns: place_id, event_type, position, timestamp_s, …)
    102 * @param {string} placeTable              moz_places table name (columns: id, guid, …)
    103 * @returns {Promise<number[]| (number|null)[]>} Array aligned with input topsites: position or null
    104 */
    105 export async function fetchShortcutLastClickPositions(
    106  guidList,
    107  table,
    108  placeTable,
    109  numImps = 10
    110 ) {
    111  if (!guidList.length) {
    112    return [];
    113  }
    114 
    115  // Build VALUES(...) for the GUIDs, escaping any single quotes just in case
    116  const valuesClause = guidList
    117    .map(guid => `('${String(guid).replace(/'/g, "''")}')`)
    118    .join(", ");
    119 
    120  // We:
    121  //  1) map input GUIDs -> place_id
    122  //  2) rank each guid's interactions by timestamp desc (and rowid as tie-breaker)
    123  //  3) keep only the last numImps (rn <= numImps)
    124  //  4) within those numImps, pick the most recent row that is a click (event_type=1), and grab its position
    125  //
    126  // Note: LEFT JOINs ensure we still return rows for GUIDs that have no interactions.
    127  const sql = `
    128    -- input array of strings becomes column vector
    129    WITH input_keys(guid) AS (
    130      VALUES ${valuesClause}
    131    ),
    132    -- build map guid->place.id
    133    place_ids AS (
    134      SELECT i.guid, p.id AS place_id
    135      FROM input_keys i
    136      JOIN ${placeTable} p ON p.guid = i.guid
    137    ),
    138    -- grab the last N iteractions for each place_id
    139    recent AS (
    140      SELECT
    141        pi.guid,
    142        t.tile_position AS position,
    143        t.event_type    AS event_type,
    144        t.timestamp_s   AS ts
    145      FROM place_ids pi
    146      JOIN ${table} t
    147        ON t.place_id = pi.place_id
    148      AND t.rowid IN (
    149            SELECT tt.rowid
    150            FROM ${table} tt
    151            WHERE tt.place_id = pi.place_id
    152            ORDER BY tt.timestamp_s DESC, tt.rowid DESC
    153            LIMIT ${Number(numImps)}
    154          )
    155    ),
    156    -- amongst the last numImps, get most recent click
    157    -- build a column for rank of each event in a guid sublist
    158    -- sort by putting clicks at top then time
    159    -- get only the first position (r=1)
    160    -- only get clicks (event_type=1)
    161    -- returns null if no click events
    162    best AS (
    163      SELECT guid, position
    164      FROM (
    165        SELECT
    166          guid, position, event_type, ts,
    167          ROW_NUMBER() OVER (
    168            PARTITION BY guid
    169            ORDER BY (event_type = 1) DESC, ts DESC
    170          ) AS r
    171        FROM recent
    172      )
    173      WHERE r = 1 AND event_type = 1
    174    )
    175      -- map back to guid
    176    SELECT pi.guid AS key, b.position
    177    FROM place_ids pi
    178    LEFT JOIN best b ON b.guid = pi.guid;
    179  `;
    180 
    181  const { activityStreamProvider } = lazy.NewTabUtils;
    182  const rows = await activityStreamProvider.executePlacesQuery(sql);
    183 
    184  // rows: [guid, position|null][]
    185  const posByGuid = new Map(rows.map(([key, position]) => [key, position]));
    186  // Return array aligned to input order (null if no click in last 10)
    187  return guidList.map(g => (posByGuid.has(g) ? posByGuid.get(g) : null));
    188 }
    189 
    190 export async function getOpenTabURLsFromSessionLive() {
    191  // Ensure SessionStore is ready (important at early startup)
    192  if (lazy.SessionStore.promiseInitialized) {
    193    await lazy.SessionStore.promiseInitialized;
    194  }
    195  const stateJSON = lazy.SessionStore.getBrowserState(); // sync string
    196  const state = JSON.parse(stateJSON); // { windows: [...] }
    197 
    198  const urls = [];
    199  for (const win of state?.windows ?? []) {
    200    for (const tab of win?.tabs ?? []) {
    201      const i = Math.max(0, (tab.index ?? 1) - 1); // current entry is 1-based
    202      const entry = tab.entries?.[i] ?? tab.entries?.[tab.entries.length - 1];
    203      const url = entry?.url;
    204      if (url) {
    205        urls.push(url);
    206      }
    207    }
    208  }
    209  return urls;
    210 }
    211 
    212 export async function getOpenTabsWithPlacesFromSessionLive() {
    213  const urls = await getOpenTabURLsFromSessionLive();
    214  const out = [];
    215  for (const url of urls) {
    216    let guid = null;
    217    if (url.startsWith("http")) {
    218      try {
    219        guid = (await lazy.PlacesUtils.history.fetch(url))?.guid ?? null;
    220      } catch {}
    221    }
    222    out.push({ url, guid });
    223  }
    224  return out;
    225 }
    226 
    227 /**
    228 * For each input places GUID, report if it is currently open
    229 * note there is a < 30 second delay between a guid opening and this function
    230 * registering that change
    231 *
    232 * @param {string[]} guids Array of guid stirngs
    233 * @returns {Promise<object>} Map of guid -> is open
    234 */
    235 export async function getIsOpen(guids, isStartup) {
    236  if (!isStartup?.isStartup) {
    237    // Grab all currently open tabs with GUIDs
    238    const openTabs = await getOpenTabsWithPlacesFromSessionLive();
    239 
    240    // Build a Set of GUIDs for fast lookup
    241    const openGuids = new Set(
    242      openTabs.map(t => t.guid).filter(Boolean) // skip nulls
    243    );
    244 
    245    // Map each input guid to 1/0
    246    const result = {};
    247    for (const g of guids) {
    248      result[g] = openGuids.has(g) ? 1 : 0;
    249    }
    250    return result;
    251  }
    252 
    253  // During startup: just return all 0s
    254  const result = {};
    255  for (const g of guids) {
    256    result[g] = 0;
    257  }
    258  return result;
    259 }
    260 
    261 /**
    262 * For each input places GUID, report the total visits
    263 *
    264 * @param {object[]} topsites Array of objects with a `guid` field (moz_places.guid)
    265 * @param {string} [placesTable='moz_places'] Table name for places
    266 * @returns {Promise<object>} Map of guid -> visit total
    267 */
    268 export async function fetchVisitCountsByGuid(topsites, placeTable) {
    269  if (!topsites?.length) {
    270    return {};
    271  }
    272 
    273  const guidList = topsites.map(site => site.guid);
    274 
    275  // Safely quote each guid for VALUES(), escaping single quotes
    276  const values = guidList
    277    .map(guid => `('${String(guid).replace(/'/g, "''")}')`)
    278    .join(", ");
    279 
    280  const sql = `
    281    WITH input(guid) AS (VALUES ${values})
    282    SELECT i.guid, COALESCE(p.visit_count, 0) AS visit_count
    283    FROM input i
    284    LEFT JOIN ${placeTable} p ON p.guid = i.guid
    285    ORDER BY i.guid;
    286  `;
    287 
    288  const { activityStreamProvider } = lazy.NewTabUtils;
    289  const rows = await activityStreamProvider.executePlacesQuery(sql);
    290  const out = Object.create(null);
    291  for (const [guid, visit_count] of rows) {
    292    out[guid] = visit_count;
    293  }
    294  return out; // { guid: visit_count }
    295 }
    296 
    297 /**
    298 * For each input places GUID, return the time and type of the last 10 visits
    299 * this is a replication of what is done during frecency calculation
    300 *
    301 * @param {object[]} topsites Array of objects with a `guid` field (moz_places.guid)
    302 * @param {string} [table='moz_historyvisits'] Table name for history
    303 * @param {string} [placeTable='moz_places'] Table name for places
    304 * @returns {Promise<object>} Map of guid -> {visit_time, visit_type}
    305 */
    306 export async function fetchLast10VisitsByGuid(topsites, table, placeTable) {
    307  if (!topsites?.length) {
    308    return {};
    309  }
    310 
    311  const guids = topsites.map(s => String(s.guid));
    312  const valuesClause = guids
    313    .map(g => `('${g.replace(/'/g, "''")}')`)
    314    .join(", ");
    315 
    316  // Mirrors Firefox's pattern:
    317  // SELECT ... FROM moz_historyvisits WHERE place_id = h.id ORDER BY visit_date DESC LIMIT 10
    318  const sql = `
    319    WITH input(guid) AS (VALUES ${valuesClause})
    320    SELECT
    321      i.guid,
    322      v.visit_date AS visit_date_us,
    323      v.visit_type
    324    FROM input i
    325    JOIN ${placeTable} h ON h.guid = i.guid
    326    JOIN ${table} v ON v.place_id = h.id
    327    WHERE v.id IN (
    328      SELECT vv.id
    329      FROM ${table} vv
    330      WHERE vv.place_id = h.id
    331      ORDER BY vv.visit_date DESC
    332      LIMIT 10  /* limit to the last 10 visits */
    333    )
    334    ORDER BY i.guid, v.visit_date DESC;
    335  `;
    336 
    337  const { activityStreamProvider } = lazy.NewTabUtils;
    338  const rows = await activityStreamProvider.executePlacesQuery(sql);
    339  // `guids` is the array you queried with
    340  // `rows` is the result from runQuery(sql) -> Array<[guid, visit_date_us, visit_type]>
    341 
    342  const out = Object.fromEntries(guids.map(g => [g, []]));
    343 
    344  for (const [guid, visit_date_us, visit_type] of rows) {
    345    // rows are already ordered by guid, visit_date DESC per your SQL
    346    out[guid].push({ visit_date_us, visit_type });
    347  }
    348 
    349  // `out` is: { [guid]: [ { visit_date_us, visit_type }, ... ] }
    350  return out;
    351 }
    352 
    353 /**
    354 * For each input places GUID, report whether it is bookmarked.
    355 * Walks guid -> moz_places.id -> moz_bookmarks.fk (type=1).
    356 *
    357 * @param {object[]} topsites Array of objects with a `guid` field (moz_places.guid)
    358 * @param {string} [placesTable='moz_places'] Table name for places
    359 * @param {string} [bookmarksTable='moz_bookmarks'] Table name for bookmarks
    360 * @returns {Promise<object>} Map of guid -> boolean (true if bookmarked)
    361 */
    362 export async function fetchBookmarkedFlags(
    363  topsites,
    364  bookmarksTable = "moz_bookmarks",
    365  placesTable = "moz_places"
    366 ) {
    367  if (!topsites.length) {
    368    return {};
    369  }
    370 
    371  const guidList = topsites.map(site => site.guid);
    372 
    373  // Safely quote each guid for VALUES(), escaping single quotes
    374  const valuesClause = guidList
    375    .map(guid => `('${String(guid).replace(/'/g, "''")}')`)
    376    .join(", ");
    377 
    378  // We LEFT JOIN so every input guid appears once, even if not found/bookmarked.
    379  const sql = `
    380    WITH input_keys(guid) AS (
    381      VALUES ${valuesClause}
    382    )
    383    SELECT
    384      ik.guid AS key,
    385      COALESCE(COUNT(b.id), 0) AS bookmark_count
    386    FROM input_keys AS ik
    387    LEFT JOIN ${placesTable} AS p
    388      ON p.guid = ik.guid
    389    LEFT JOIN ${bookmarksTable} AS b
    390      ON b.fk = p.id
    391      AND b.type = 1            -- only actual bookmark items
    392    GROUP BY ik.guid
    393    ORDER BY ik.guid;
    394  `;
    395 
    396  const { activityStreamProvider } = lazy.NewTabUtils;
    397  const rows = await activityStreamProvider.executePlacesQuery(sql);
    398 
    399  // rows: [key, bookmark_count]
    400  const result = {};
    401  for (const [key, count] of rows) {
    402    if (key) {
    403      result[key] = count > 0;
    404    }
    405  }
    406 
    407  // Ensure every requested guid is present (defensive)
    408  for (const site of topsites) {
    409    if (!(site.guid in result)) {
    410      result[site.guid] = false;
    411    }
    412  }
    413 
    414  return result;
    415 }
    416 
    417 /**
    418 * Get histogram of all site visits over day-of-week
    419 *
    420 * @param {object[]} topsites Array of topsites objects
    421 * @param {string} table Table to query
    422 * @param {string} placeTable Table to map guid->place_id
    423 * @returns {result: object} Dictionary of histograms of day-of-week site opens
    424 */
    425 export async function fetchDailyVisitsSpecific(topsites, table, placeTable) {
    426  if (!topsites.length) {
    427    return {};
    428  }
    429  const guidList = topsites.map(site => site.guid);
    430 
    431  const valuesClause = guidList
    432    .map(guid => `('${guid.replace(/'/g, "''")}')`)
    433    .join(", ");
    434 
    435  const sql = `
    436      WITH input_keys(guid) AS (
    437        VALUES ${valuesClause}
    438      ),
    439      place_ids AS (
    440        SELECT input_keys.guid, pTable.id AS place_id
    441        FROM input_keys
    442        LEFT JOIN ${placeTable} as pTable ON pTable.guid = input_keys.guid
    443      )
    444      SELECT
    445        place_ids.guid AS key,
    446        CAST(strftime('%w', dTable.visit_date / 1e6, 'unixepoch') AS INTEGER) AS day_of_week,
    447        COUNT(dTable.visit_date) AS visit_count
    448      FROM place_ids
    449      LEFT JOIN ${table} as dTable
    450        ON dTable.place_id = place_ids.place_id
    451        AND dTable.visit_date >= 1e6 * strftime('%s', 'now', '-2 months')
    452      GROUP BY place_ids.guid, day_of_week
    453      ORDER BY place_ids.guid, day_of_week;
    454      `;
    455  const { activityStreamProvider } = lazy.NewTabUtils;
    456  const rows = await activityStreamProvider.executePlacesQuery(sql);
    457  const histograms = {};
    458  for (const [key, day_of_week, visit_count] of rows) {
    459    if (!histograms[key]) {
    460      histograms[key] = Array(7).fill(0);
    461    }
    462    if (day_of_week !== null) {
    463      histograms[key][day_of_week] = visit_count;
    464    }
    465  }
    466  for (const site of topsites) {
    467    if (!histograms[site.guid]) {
    468      histograms[site.guid] = Array(7).fill(0);
    469    }
    470  }
    471  return histograms;
    472 }
    473 
    474 /**
    475 * Get histogram of all site visits over day-of-week
    476 *
    477 * @param {string} table Table to query
    478 * @returns {number[]} Histogram of day-of-week site opens
    479 */
    480 export async function fetchDailyVisitsAll(table) {
    481  const sql = `
    482    SELECT
    483      CAST(strftime('%w', ${table}.visit_date / 1e6, 'unixepoch') AS INTEGER) AS day_of_week,
    484      COUNT(*) AS visit_count
    485    FROM ${table}
    486    WHERE ${table}.visit_date >= 1e6 * strftime('%s', 'now', '-6 months')
    487    GROUP BY day_of_week
    488    ORDER BY day_of_week;
    489  `;
    490 
    491  const { activityStreamProvider } = lazy.NewTabUtils;
    492  const rows = await activityStreamProvider.executePlacesQuery(sql);
    493  const histogram = Array(7).fill(0);
    494  for (const [day_of_week, visit_count] of rows) {
    495    if (day_of_week !== null) {
    496      histogram[day_of_week] = visit_count;
    497    }
    498  }
    499  return histogram;
    500 }
    501 /**
    502 * Get histogram of all site visits over hour-of-day
    503 *
    504 * @param {object[]} topsites Array of topsites objects
    505 * @param {string} table Table to query
    506 * @param {string} placeTable Table to map guid->place_id
    507 * @returns {object} Dictionary of histograms of hour-of-day site opens
    508 */
    509 export async function fetchHourlyVisitsSpecific(topsites, table, placeTable) {
    510  if (!topsites.length) {
    511    return {};
    512  }
    513  const guidList = topsites.map(site => site.guid);
    514 
    515  const valuesClause = guidList
    516    .map(guid => `('${guid.replace(/'/g, "''")}')`)
    517    .join(", ");
    518 
    519  const sql = `
    520      WITH input_keys(guid) AS (
    521        VALUES ${valuesClause}
    522      ),
    523      place_ids AS (
    524        SELECT input_keys.guid, pTable.id AS place_id
    525        FROM input_keys
    526        LEFT JOIN ${placeTable} as pTable ON pTable.guid = input_keys.guid
    527      )
    528      SELECT
    529        place_ids.guid AS key,
    530        CAST(strftime('%H', hTable.visit_date / 1e6, 'unixepoch') AS INTEGER) AS hour_of_day,
    531        COUNT(hTable.visit_date) AS visit_count
    532      FROM place_ids
    533      LEFT JOIN ${table} as hTable
    534        ON hTable.place_id = place_ids.place_id
    535        AND hTable.visit_date >= 1e6 * strftime('%s', 'now', '-2 months')
    536      GROUP BY place_ids.guid, hour_of_day
    537      ORDER BY place_ids.guid, hour_of_day;
    538      `;
    539  const { activityStreamProvider } = lazy.NewTabUtils;
    540  const rows = await activityStreamProvider.executePlacesQuery(sql);
    541  const histograms = {};
    542  for (const [key, hour_of_day, visit_count] of rows) {
    543    if (!histograms[key]) {
    544      histograms[key] = Array(24).fill(0);
    545    }
    546    if (hour_of_day !== null) {
    547      histograms[key][hour_of_day] = visit_count;
    548    }
    549  }
    550  for (const site of topsites) {
    551    if (!histograms[site.guid]) {
    552      histograms[site.guid] = Array(24).fill(0);
    553    }
    554  }
    555  return histograms;
    556 }
    557 
    558 /**
    559 * Get histogram of all site visits over hour-of-day
    560 *
    561 * @param {string} table Table to query
    562 * @returns {number[]} Histogram of hour-of-day site opens
    563 */
    564 export async function fetchHourlyVisitsAll(table) {
    565  const sql = `
    566    SELECT
    567      CAST(strftime('%H', ${table}.visit_date / 1e6, 'unixepoch') AS INTEGER) AS hour_of_day,
    568      COUNT(*) AS visit_count
    569    FROM ${table}
    570    WHERE ${table}.visit_date >= 1e6 * strftime('%s', 'now', '-6 months')
    571    GROUP BY hour_of_day
    572    ORDER BY hour_of_day;
    573  `;
    574 
    575  const { activityStreamProvider } = lazy.NewTabUtils;
    576  const rows = await activityStreamProvider.executePlacesQuery(sql);
    577  const histogram = Array(24).fill(0);
    578  for (const [hour_of_day, visit_count] of rows) {
    579    if (hour_of_day !== null) {
    580      histogram[hour_of_day] = visit_count;
    581    }
    582  }
    583  return histogram;
    584 }
    585 
    586 /**
    587 * Build weights object only for the requested features.
    588 *
    589 * @param {object} prefValues - contains trainhopConfig.smartShortcuts
    590 * @param {string[]} features - e.g. ["thom","frec"] (bias optional)
    591 */
    592 function initShortcutWeights(prefValues, features) {
    593  const cfg = prefValues?.trainhopConfig?.smartShortcuts ?? {}; // remove second config
    594  const out = {};
    595 
    596  for (const f of features) {
    597    const meta = FEATURE_META[f];
    598    if (!meta) {
    599      continue;
    600    } // unknown feature: skip
    601 
    602    const raw = cfg[meta.pref];
    603    const percent = Number.isFinite(raw) ? raw : meta.def;
    604    out[f] = percent / 100;
    605  }
    606 
    607  return out;
    608 }
    609 /**
    610 * Check for bad numerical weights or changes in init config
    611 *
    612 * @param {object} all_weights Dictionary of weights from cache
    613 * @param {string[]} features List of features to have weights
    614 * @returns {object[]} current weights and the init weights
    615 */
    616 function checkWeights(all_weights, features) {
    617  if (
    618    !all_weights.current ||
    619    !all_weights.old_init ||
    620    Object.keys(all_weights.current).length === 0
    621  ) {
    622    return [all_weights.new_init, all_weights.new_init];
    623  }
    624  for (const fkey of features) {
    625    if (
    626      !Number.isFinite(all_weights.current[fkey]) ||
    627      all_weights.old_init[fkey] !== all_weights.new_init[fkey]
    628    ) {
    629      return [all_weights.new_init, all_weights.new_init];
    630    }
    631  }
    632  return [all_weights.current, all_weights.old_init];
    633 }
    634 
    635 /**
    636 * Get clicks and impressions for sites in topsites array
    637 *
    638 * @param {object[]} topsites Array of topsites objects
    639 * @param {string} table Table for shortcuts interactions
    640 * @param {string} placeTable moz_places table
    641 * @returns {clicks: [number[], impressions: number[]]} Clicks and impressions for each site in topsites
    642 */
    643 async function fetchShortcutInteractions(topsites, table, placeTable) {
    644  if (!topsites.length) {
    645    // Return empty clicks and impressions arrays
    646    return [[], []];
    647  }
    648 
    649  const guidList = topsites.map(site => site.guid);
    650 
    651  const valuesClause = guidList
    652    .map(guid => `('${guid.replace(/'/g, "''")}')`)
    653    .join(", ");
    654 
    655  // Only get records in the last 2 months!
    656  // Join no places table to map guid to place_id
    657  const sql = `
    658    WITH input_keys(guid) AS (
    659      VALUES ${valuesClause}
    660    ),
    661    place_ids AS (
    662      SELECT input_keys.guid, ${placeTable}.id AS place_id
    663      FROM input_keys
    664      JOIN ${placeTable} ON ${placeTable}.guid = input_keys.guid
    665    )
    666    SELECT
    667      place_ids.guid AS key,
    668      COALESCE(SUM(${table}.event_type), 0) AS total_clicks,
    669      COALESCE(SUM(1 - ${table}.event_type), 0) AS total_impressions
    670    FROM place_ids
    671    LEFT JOIN ${table} ON ${table}.place_id = place_ids.place_id
    672      AND ${table}.timestamp_s >= strftime('%s', 'now', '-2 months')
    673    GROUP BY place_ids.guid;
    674  `;
    675 
    676  const { activityStreamProvider } = lazy.NewTabUtils;
    677  const interactions = await activityStreamProvider.executePlacesQuery(sql);
    678  const interactionMap = new Map(
    679    interactions.map(row => {
    680      // Destructure the array into variables
    681      const [key, total_clicks, total_impressions] = row;
    682      return [key, { clicks: total_clicks, impressions: total_impressions }];
    683    })
    684  );
    685 
    686  // Rebuild aligned arrays in same order as input
    687  const clicks = guidList.map(guid =>
    688    interactionMap.has(guid) ? interactionMap.get(guid).clicks : 0
    689  );
    690 
    691  const impressions = guidList.map(guid =>
    692    interactionMap.has(guid) ? interactionMap.get(guid).impressions : 0
    693  );
    694  return [clicks, impressions];
    695 }
    696 
    697 export class RankShortcutsProvider {
    698  constructor() {
    699    this.sc_obj = new lazy.PersistentCache("shortcut_cache", true);
    700  }
    701  get rankShortcutsWorker() {
    702    if (!this._rankShortcutsWorker) {
    703      this._rankShortcutsWorker = new lazy.BasePromiseWorker(
    704        "resource://newtab/lib/SmartShortcutsRanker/RankShortcuts.worker.mjs",
    705        { type: "module" }
    706      );
    707    }
    708    return this._rankShortcutsWorker;
    709  }
    710 
    711  /**
    712   * Get hourly seasonality priors and per-site histograms.
    713   *
    714   * @param {Array<object>} topsites
    715   * @param {object} shortcut_cache
    716   * @param {object} isStartup stores the boolean isStartup
    717   * @returns {Promise<{pvec: number[]|null, hists: any}>}
    718   */
    719  async getHourlySeasonalityData(topsites, shortcut_cache, isStartup) {
    720    const cache = (shortcut_cache && shortcut_cache.hourly_seasonality) || null;
    721    const startup = isStartup.isStartup;
    722 
    723    let hourly_prob = null;
    724 
    725    const expired =
    726      cache &&
    727      Date.now() - (cache.timestamp || 0) > BASE_SEASONALITY_CACHE_EXPIRATION;
    728    const missing = !cache || !cache.pvec;
    729 
    730    if (!startup && (missing || expired)) {
    731      const all_hourly_hist = await fetchHourlyVisitsAll(VISITS_TABLE);
    732      hourly_prob = await this.rankShortcutsWorker.post("sumNorm", [
    733        all_hourly_hist,
    734      ]);
    735      // persist fresh prior
    736      await this.sc_obj.set("hourly_seasonality", {
    737        pvec: hourly_prob,
    738        timestamp: Date.now(),
    739      });
    740    } else {
    741      // safe read with optional chaining + null fallback
    742      hourly_prob = cache?.pvec ?? null;
    743    }
    744 
    745    // Per-topsite histograms are needed regardless
    746    const hourly_hists = await fetchHourlyVisitsSpecific(
    747      topsites,
    748      VISITS_TABLE,
    749      PLACES_TABLE
    750    );
    751 
    752    return { pvec: hourly_prob, hists: hourly_hists };
    753  }
    754 
    755  /**
    756   * Get daily seasonality priors and per-site histograms.
    757   *
    758   * @param {Array<object>} topsites
    759   * @param {object} shortcut_cache
    760   * @param {object} isStartup stores the boolean isStartup
    761   * @returns {Promise<{pvec: number[]|null, hists: any}>}
    762   */
    763  async getDailySeasonalityData(topsites, shortcut_cache, isStartup) {
    764    const cache = shortcut_cache?.daily_seasonality ?? null;
    765    const startup = isStartup.isStartup;
    766 
    767    let daily_prob = null;
    768 
    769    const expired =
    770      cache &&
    771      Date.now() - (cache.timestamp || 0) > BASE_SEASONALITY_CACHE_EXPIRATION;
    772    const missing = !cache || !cache.pvec;
    773 
    774    if (!startup && (missing || expired)) {
    775      const all_daily_hist = await fetchDailyVisitsAll(VISITS_TABLE);
    776      daily_prob = await this.rankShortcutsWorker.post("sumNorm", [
    777        all_daily_hist,
    778      ]);
    779      // persist fresh prior
    780      await this.sc_obj.set("daily_seasonality", {
    781        pvec: daily_prob,
    782        timestamp: Date.now(),
    783      });
    784    } else {
    785      daily_prob = cache?.pvec ?? null;
    786    }
    787 
    788    // Per-topsite histograms are needed regardless
    789    const daily_hists = await fetchDailyVisitsSpecific(
    790      topsites,
    791      VISITS_TABLE,
    792      PLACES_TABLE
    793    );
    794 
    795    return { pvec: daily_prob, hists: daily_hists };
    796  }
    797 
    798  /**
    799   * Check the shortcut interaction table for new events since
    800   * the last time we updated the model weights
    801   *
    802   * @param {object} cahce_data shortcut cache
    803   * @param {string} table Shortcuts interaction table
    804   * @param {string} placeTable moz_places table
    805   * @returns {Promise<object>} Map of guid -> clicks and impression counts
    806   */
    807  async getLatestInteractions(cache_data, table, placeTable = "moz_places") {
    808    const now_s = Math.floor(Date.now() / 1000);
    809    let tlu = Number(cache_data.time_last_update ?? 0);
    810    if (tlu > 1e11) {
    811      tlu = Math.floor(tlu / 1000);
    812    } // ms -> s
    813    const since = Math.max(tlu, now_s - 24 * 60 * 60);
    814 
    815    const { activityStreamProvider } = lazy.NewTabUtils;
    816 
    817    const rows = await activityStreamProvider.executePlacesQuery(
    818      `
    819      SELECT
    820          p.guid AS guid,
    821          SUM(CASE WHEN e.event_type = 1 THEN 1 ELSE 0 END) AS clicks,
    822          SUM(CASE WHEN e.event_type = 0 THEN 1 ELSE 0 END) AS impressions
    823        FROM ${table} e
    824        JOIN ${placeTable} p ON p.id = e.place_id
    825        WHERE e.timestamp_s >= ${since}
    826        GROUP BY p.guid  
    827      `
    828    );
    829 
    830    const dict = Object.create(null);
    831 
    832    for (const r of Array.isArray(rows) ? rows : (rows ?? [])) {
    833      const guid = r.guid ?? (Array.isArray(r) ? r[0] : undefined);
    834      if (!guid) {
    835        continue;
    836      }
    837      const clicks = Number(r.clicks ?? (Array.isArray(r) ? r[1] : 0)) || 0;
    838      const impressions =
    839        Number(r.impressions ?? (Array.isArray(r) ? r[2] : 0)) || 0;
    840      dict[guid] = { clicks, impressions };
    841    }
    842 
    843    await this.sc_obj.set("time_last_update", now_s);
    844 
    845    return dict;
    846  }
    847 
    848  /**
    849   * Get "frecency" features: frequency, recency, re-frecency, unique days visited
    850   *
    851   * @param {Array<object>} withGuid topsites we are building features for
    852   * @returns {Promise<{}>} guid -> rece, freq, and refre features
    853   */
    854  async fetchRefreFeatures(withGuid) {
    855    const raw_frec = await fetchLast10VisitsByGuid(
    856      withGuid,
    857      VISITS_TABLE,
    858      PLACES_TABLE
    859    );
    860    const visit_totals = await fetchVisitCountsByGuid(withGuid, PLACES_TABLE);
    861    const output = await this.rankShortcutsWorker.post(
    862      "buildFrecencyFeatures",
    863      [raw_frec, visit_totals]
    864    );
    865 
    866    return output;
    867  }
    868 
    869  /**
    870  /**
    871   * Smart Shortcuts ranking main call
    872   *
    873   * @param {Array<object>} topsites
    874   * @param {object} prefValues
    875   * @param {object} isStartup stores the boolean isStartup
    876   * @returns {Promise<{}>} topsites reordered
    877   */
    878  async rankTopSites(topsites, prefValues, isStartup, numSponsored = 0) {
    879    if (!prefValues?.trainhopConfig?.smartShortcuts) {
    880      return topsites;
    881    }
    882    // get our feature set
    883    const features =
    884      prefValues.trainhopConfig?.smartShortcuts?.features ?? FEATURES;
    885 
    886    // split topsites into two arrays, we only rank those with guid
    887    const [withGuid, withoutGuid] = topsites.reduce(
    888      ([withG, withoutG], site) => {
    889        if (site.guid && typeof site.guid === "string") {
    890          withG.push(site);
    891        } else {
    892          withoutG.push(site);
    893        }
    894        return [withG, withoutG];
    895      },
    896      [[], []]
    897    );
    898 
    899    // query for interactions, sql cant be on promise
    900    // always do this but only used for thompson and ctr
    901    const [clicks, impressions] = await fetchShortcutInteractions(
    902      withGuid,
    903      SHORTCUT_TABLE,
    904      PLACES_TABLE
    905    );
    906 
    907    // cache stores weights and the last feature values used to produce ranking
    908    // PersistentCache r/w cant be on promise
    909    const sc_cache = await this.sc_obj.get();
    910 
    911    // check for bad weights (numerical) or change in init configs
    912    let [weights, init_weights] = checkWeights(
    913      {
    914        current: sc_cache.weights,
    915        new_init: initShortcutWeights(prefValues, features),
    916        old_init: sc_cache.init_weights,
    917      },
    918      features
    919    );
    920 
    921    // update our weights
    922    const latest_interaction_data = await this.getLatestInteractions(
    923      sc_cache,
    924      SHORTCUT_TABLE
    925    );
    926    weights = await this.rankShortcutsWorker.post("updateWeights", [
    927      {
    928        data: latest_interaction_data,
    929        scores: sc_cache.score_map,
    930        features,
    931        weights,
    932        eta: (prefValues.trainhopConfig?.smartShortcuts?.eta ?? ETA) / 10000,
    933        click_bonus:
    934          (prefValues.trainhopConfig?.smartShortcuts?.click_bonus ??
    935            CLICK_BONUS) / 10,
    936      },
    937    ]);
    938 
    939    // write the weights and init... sometimes redundant
    940    await this.sc_obj.set("weights", weights);
    941    await this.sc_obj.set("init_weights", init_weights);
    942 
    943    // feature data
    944    const hourly_seasonality = features?.includes?.("hour")
    945      ? await this.getHourlySeasonalityData(withGuid, sc_cache, isStartup)
    946      : null;
    947    const daily_seasonality = features?.includes?.("daily")
    948      ? await this.getDailySeasonalityData(withGuid, sc_cache, isStartup)
    949      : null;
    950    const bmark_scores = features?.includes?.("bmark")
    951      ? await fetchBookmarkedFlags(withGuid, BOOKMARK_TABLE, PLACES_TABLE)
    952      : null;
    953    const refrec_scores = ["rece", "freq", "refre", "unid"].some(f =>
    954      features.includes(f)
    955    )
    956      ? await this.fetchRefreFeatures(withGuid, features)
    957      : { rece: null, freq: null, refre: null, unid: null };
    958    const open_scores = features?.includes?.("open")
    959      ? await getIsOpen(
    960          withGuid.map(t => t.guid),
    961          isStartup
    962        )
    963      : null;
    964    // call to the promise worker to do the ranking
    965    const frecency_scores = withGuid.map(t => t.frecency);
    966    const output = await this.rankShortcutsWorker.post(
    967      "weightedSampleTopSites",
    968      [
    969        {
    970          features,
    971          alpha:
    972            prefValues.trainhopConfig?.smartShortcuts?.positive_prior ??
    973            SHORTCUT_POSITIVE_PRIOR,
    974          beta:
    975            prefValues.trainhopConfig?.smartShortcuts?.negative_prior ??
    976            SHORTCUT_NEGATIVE_PRIOR,
    977          tau: 100,
    978          guid: withGuid.map(t => t.guid),
    979          clicks,
    980          impressions,
    981          norms:
    982            sc_cache.norms ??
    983            Object.fromEntries(features.map(key => [key, null])),
    984          weights,
    985          frecency: frecency_scores,
    986          hourly_seasonality,
    987          daily_seasonality,
    988          bmark_scores,
    989          open_scores,
    990          rece_scores: refrec_scores?.rece,
    991          freq_scores: refrec_scores?.freq,
    992          refre_scores: refrec_scores?.refre,
    993          unid_scores: refrec_scores?.unid,
    994        },
    995      ]
    996    );
    997    // update the cache
    998    await this.sc_obj.set("norms", output.norms);
    999    await this.sc_obj.set("score_map", output.score_map);
   1000 
   1001    // final score for ranking as an array
   1002    let final_scores = withGuid.map(g => output.score_map[g.guid].final);
   1003    //catch nan errors
   1004    if (final_scores.some(x => Number.isNaN(x))) {
   1005      final_scores = frecency_scores;
   1006    }
   1007 
   1008    // sort by scores
   1009    const sortedSitesVals = sortKeysValues(final_scores, withGuid);
   1010    let [sortedSites] = sortedSitesVals;
   1011 
   1012    // sticky clicks. keep an item at a certain position for at least
   1013    // numImps impressions after a click occurs
   1014    const numImps =
   1015      prefValues?.trainhopConfig?.smartShortcuts?.sticky_numimps ??
   1016      STICKY_NUMIMPS;
   1017    if (numImps > 0) {
   1018      const sguid = sortedSites.map(s => s.guid);
   1019      const positions = await fetchShortcutLastClickPositions(
   1020        sguid,
   1021        SHORTCUT_TABLE,
   1022        PLACES_TABLE,
   1023        numImps
   1024      );
   1025      const stickyGuids = await this.rankShortcutsWorker.post(
   1026        "applyStickyClicks",
   1027        [positions, sguid, numSponsored]
   1028      );
   1029      // Build a lookup table guid -> site object
   1030      const byGuid = new Map(sortedSites.map(site => [site.guid, site]));
   1031 
   1032      // Map over ordered guids, pulling objects from the lookup
   1033      sortedSites = stickyGuids.map(g => byGuid.get(g)).filter(Boolean);
   1034    }
   1035    // grab topsites without guid
   1036    const combined = sortedSites.concat(withoutGuid);
   1037 
   1038    // tack weights and scores so they can pass through to telemetry
   1039    if (prefValues?.trainhopConfig?.smartShortcuts?.telem || SMART_TELEM) {
   1040      // store a version of weights that is rounded
   1041      const roundWeights = Object.fromEntries(
   1042        Object.entries(weights ?? {}).map(([key, v]) => [
   1043          key,
   1044          typeof v === "number" && isFinite(v) ? roundNum(v) : (v ?? null),
   1045        ])
   1046      );
   1047      // do the tacking
   1048      combined.forEach(s => {
   1049        const raw = output?.score_map?.[s.guid];
   1050        s.scores =
   1051          raw && typeof raw === "object"
   1052            ? Object.fromEntries(
   1053                Object.entries(raw).map(([k, v]) => [
   1054                  k,
   1055                  typeof v === "number" && isFinite(v)
   1056                    ? roundNum(v)
   1057                    : (v ?? null),
   1058                ])
   1059              )
   1060            : null;
   1061        s.weights = roundWeights;
   1062      });
   1063    }
   1064    return combined;
   1065  }
   1066 }