tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

InferredPersonalizationFeed.sys.mjs (14295B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 
      5 const lazy = {};
      6 ChromeUtils.defineESModuleGetters(lazy, {
      7  NewTabUtils: "resource://gre/modules/NewTabUtils.sys.mjs",
      8  PersistentCache: "resource://newtab/lib/PersistentCache.sys.mjs",
      9  PlacesUtils: "resource://gre/modules/PlacesUtils.sys.mjs",
     10 });
     11 
     12 import { FeatureModel } from "resource://newtab/lib/InferredModel/FeatureModel.sys.mjs";
     13 
     14 import {
     15  FORMAT,
     16  AggregateResultKeys,
     17  DEFAULT_INFERRED_MODEL_DATA,
     18 } from "resource://newtab/lib/InferredModel/InferredConstants.sys.mjs";
     19 
     20 import {
     21  actionTypes as at,
     22  actionCreators as ac,
     23 } from "resource://newtab/common/Actions.mjs";
     24 
     25 import { MODEL_TYPE } from "./InferredModel/InferredConstants.sys.mjs";
     26 
     27 const CACHE_KEY = "inferred_personalization_feed";
     28 const DISCOVERY_STREAM_CACHE_KEY = "discovery_stream";
     29 const INTEREST_VECTOR_UPDATE_HOURS = 24;
     30 const HOURS_TO_MS = 60 * 60 * 1000;
     31 
     32 const PREF_USER_INFERRED_PERSONALIZATION =
     33  "discoverystream.sections.personalization.inferred.user.enabled";
     34 const PREF_SYSTEM_INFERRED_PERSONALIZATION =
     35  "discoverystream.sections.personalization.inferred.enabled";
     36 const PREF_SYSTEM_INFERRED_MODEL_OVERRIDE =
     37  "discoverystream.sections.personalization.inferred.model.override";
     38 
     39 function timeMSToSeconds(timeMS) {
     40  return Math.round(timeMS / 1000);
     41 }
     42 
     43 const CLICK_TABLE = "moz_newtab_story_click";
     44 const IMPRESSION_TABLE = "moz_newtab_story_impression";
     45 const TEST_MODEL_ID = "TEST";
     46 
     47 const OLD_DATA_PRESERVE_DAYS_DEFAULT = 30 * 6;
     48 const OLD_DATA_CLEAR_CHECK_FREQUENCY_MS = 5 * 3600 * 24 * 1000; // 5 days
     49 
     50 /**
     51 * A feature that periodically generates a interest vector for inferred personalization.
     52 */
     53 export class InferredPersonalizationFeed {
     54  constructor() {
     55    this.loaded = false;
     56    this.cache = this.PersistentCache(CACHE_KEY, true);
     57  }
     58 
     59  async reset() {
     60    if (this.cache) {
     61      await this.cache.set("interest_vector", {});
     62    }
     63    this.loaded = false;
     64    this.store.dispatch(
     65      ac.OnlyToMain({
     66        type: at.INFERRED_PERSONALIZATION_RESET,
     67      })
     68    );
     69  }
     70 
     71  isEnabled() {
     72    return (
     73      this.store.getState().Prefs.values[PREF_USER_INFERRED_PERSONALIZATION] &&
     74      this.store.getState().Prefs.values[PREF_SYSTEM_INFERRED_PERSONALIZATION]
     75    );
     76  }
     77 
     78  isStoreData() {
     79    return !!this.store.getState().Prefs.values?.trainhopConfig
     80      ?.newTabSectionsExperiment?.personalizationStoreFeaturesEnabled;
     81  }
     82 
     83  async init() {
     84    await this.loadInterestVector(true /* isStartup */);
     85  }
     86 
     87  async queryDatabaseForTimeIntervals(intervals, table) {
     88    let results = [];
     89    for (const interval of intervals) {
     90      const agg = await this.fetchInferredPersonalizationSummary(
     91        interval.start,
     92        interval.end,
     93        table
     94      );
     95      results.push(agg);
     96    }
     97    return results;
     98  }
     99 
    100  /**
    101   * Get Inferrred model raw data
    102   *
    103   * @returns JSON of inferred model
    104   */
    105  async getInferredModelData() {
    106    const modelOverrideRaw =
    107      this.store.getState().Prefs.values[PREF_SYSTEM_INFERRED_MODEL_OVERRIDE];
    108    if (modelOverrideRaw) {
    109      if (modelOverrideRaw === TEST_MODEL_ID) {
    110        return {
    111          model_id: TEST_MODEL_ID,
    112          model_data: DEFAULT_INFERRED_MODEL_DATA,
    113        };
    114      }
    115      try {
    116        return JSON.parse(modelOverrideRaw);
    117      } catch (_error) {}
    118    }
    119    const dsCache = this.PersistentCache(DISCOVERY_STREAM_CACHE_KEY, true);
    120    const cachedData = (await dsCache.get()) || {};
    121    let { inferredModel } = cachedData;
    122    return inferredModel;
    123  }
    124 
    125  async generateInterestVector() {
    126    const inferredModel = await this.getInferredModelData();
    127    if (!inferredModel || !inferredModel.model_data) {
    128      return {};
    129    }
    130    const model = FeatureModel.fromJSON(inferredModel.model_data);
    131 
    132    const intervals = model.getDateIntervals(this.Date().now());
    133    const schema = {
    134      [AggregateResultKeys.FEATURE]: 0,
    135      [AggregateResultKeys.FORMAT_ENUM]: 1,
    136      [AggregateResultKeys.VALUE]: 2,
    137    };
    138 
    139    const aggClickPerInterval = await this.queryDatabaseForTimeIntervals(
    140      intervals,
    141      CLICK_TABLE
    142    );
    143    const isClickModel = model.modelType === MODEL_TYPE.CLICKS;
    144    const interests = model.computeInterestVectors({
    145      dataForIntervals: aggClickPerInterval,
    146      indexSchema: schema,
    147      model_id: inferredModel.model_id,
    148      applyPostProcessing: isClickModel,
    149    });
    150 
    151    if (isClickModel) {
    152      return interests;
    153    }
    154 
    155    if (
    156      model.modelType === MODEL_TYPE.CLICK_IMP_PAIR ||
    157      model.modelType === MODEL_TYPE.CTR
    158    ) {
    159      // This model type does not support differential privacy or thresholding
    160      const aggImpressionsPerInterval =
    161        await this.queryDatabaseForTimeIntervals(intervals, IMPRESSION_TABLE);
    162      const ivImpressions = model.computeInterestVector({
    163        dataForIntervals: aggImpressionsPerInterval,
    164        indexSchema: schema,
    165      });
    166 
    167      if (model.modelType === MODEL_TYPE.CTR) {
    168        // eslint-disable-next-line no-unused-vars
    169        const { model_id, ...clickTotals } = interests.inferredInterests;
    170        const inferredInterests = model.computeCTRInterestVectors({
    171          clicks: clickTotals,
    172          impressions: ivImpressions,
    173          model_id: inferredModel.model_id,
    174        });
    175        return inferredInterests;
    176      }
    177      const res = {
    178        c: interests.inferredInterests,
    179        i: ivImpressions,
    180        model_id: inferredModel.model_id,
    181      };
    182      return { inferredInterests: res };
    183    }
    184 
    185    // unsupported modelType
    186    return {};
    187  }
    188 
    189  async loadInterestVector(isStartup = false) {
    190    const cachedData = (await this.cache.get()) || {};
    191    let { interest_vector } = cachedData;
    192 
    193    const { values } = this.store.getState().Prefs;
    194    const interestVectorRefreshHours =
    195      values?.inferredPersonalizationConfig?.iv_refresh_frequency_hours ||
    196      INTEREST_VECTOR_UPDATE_HOURS;
    197 
    198    // If we have nothing in cache, or cache has expired, we can make a fresh fetch.
    199    if (
    200      !interest_vector?.lastUpdated ||
    201      !(
    202        this.Date().now() - interest_vector.lastUpdated <
    203        interestVectorRefreshHours * HOURS_TO_MS
    204      )
    205    ) {
    206      let lastClearedDB = interest_vector?.lastClearedDB ?? this.Date().now();
    207      const needsCleanup =
    208        this.Date().now() - lastClearedDB >= OLD_DATA_CLEAR_CHECK_FREQUENCY_MS;
    209      if (needsCleanup) {
    210        await this.clearOldData(
    211          values?.inferredPersonalizationConfig?.history_cull_days ||
    212            OLD_DATA_PRESERVE_DAYS_DEFAULT
    213        );
    214        lastClearedDB = this.Date().now();
    215      }
    216      interest_vector = {
    217        data: await this.generateInterestVector(),
    218        lastUpdated: this.Date().now(),
    219        lastClearedDB,
    220      };
    221    }
    222    await this.cache.set("interest_vector", interest_vector);
    223    this.loaded = true;
    224 
    225    this.store.dispatch(
    226      ac.OnlyToMain({
    227        type: at.INFERRED_PERSONALIZATION_UPDATE,
    228        data: {
    229          lastUpdated: interest_vector.lastUpdated,
    230          inferredInterests: interest_vector.data.inferredInterests,
    231          coarseInferredInterests: interest_vector.data.coarseInferredInterests,
    232          coarsePrivateInferredInterests:
    233            interest_vector.data.coarsePrivateInferredInterests,
    234        },
    235        meta: {
    236          isStartup,
    237        },
    238      })
    239    );
    240  }
    241 
    242  async handleDiscoveryStreamImpressionStats(action) {
    243    const { tiles } = action.data;
    244 
    245    for (const tile of tiles) {
    246      const { type, format, pos, topic, section_position, features } = tile;
    247      if (["organic"].includes(type)) {
    248        await this.recordInferredPersonalizationImpression({
    249          format,
    250          pos,
    251          topic,
    252          section_position,
    253          features,
    254        });
    255      }
    256    }
    257  }
    258 
    259  async handleDiscoveryStreamUserEvent(action) {
    260    switch (action.data?.event) {
    261      case "OPEN_NEW_WINDOW":
    262      case "CLICK": {
    263        const { card_type, format, topic, section_position, features } =
    264          action.data.value ?? {};
    265        const pos = action.data.action_position;
    266        if (["organic"].includes(card_type)) {
    267          await this.recordInferredPersonalizationClick({
    268            format,
    269            pos,
    270            topic,
    271            section_position,
    272            features,
    273          });
    274        }
    275        break;
    276      }
    277    }
    278  }
    279 
    280  async recordInferredPersonalizationImpression(tile) {
    281    await this.recordInferredPersonalizationInteraction(IMPRESSION_TABLE, tile);
    282  }
    283  async recordInferredPersonalizationClick(tile) {
    284    await this.recordInferredPersonalizationInteraction(
    285      CLICK_TABLE,
    286      tile,
    287      true
    288    );
    289  }
    290 
    291  async fetchInferredPersonalizationImpression() {
    292    return await this.fetchInferredPersonalizationInteraction(
    293      "moz_newtab_story_impression"
    294    );
    295  }
    296 
    297  async fetchInferredPersonalizationSummary(startTime, endTime, table) {
    298    let sql = `SELECT feature, card_format_enum, SUM(feature_value) FROM ${table}
    299      WHERE timestamp_s > ${timeMSToSeconds(startTime)}
    300      AND timestamp_s < ${timeMSToSeconds(endTime)}
    301       GROUP BY feature, card_format_enum`;
    302    const { activityStreamProvider } = lazy.NewTabUtils;
    303    const interactions = await activityStreamProvider.executePlacesQuery(sql);
    304    return interactions;
    305  }
    306 
    307  /**
    308   * Deletes older data from a table
    309   *
    310   * @param {int} preserveAgeDays Number of days to preserve
    311   * @param {*} table Table to clear
    312   */
    313  async clearOldDataOfTable(preserveAgeDays, table) {
    314    let sql = `DELETE FROM ${table}
    315      WHERE timestamp_s < ${timeMSToSeconds(this.Date().now()) - preserveAgeDays * 60 * 24}`;
    316    try {
    317      await lazy.PlacesUtils.withConnectionWrapper(
    318        "newtab/lib/InferredPersonalizationFeed.sys.mjs: clearOldDataOfTable",
    319        async db => {
    320          await db.execute(sql);
    321        }
    322      );
    323    } catch (ex) {
    324      console.error(`Error clearning places data ${ex}`);
    325    }
    326  }
    327 
    328  /**
    329   * Deletes older data from impression and click tables
    330   *
    331   * @param {int} preserveAgeDays Number of days to preserve (defaults to 6 months)
    332   */
    333  async clearOldData(preserveAgeDays) {
    334    await this.clearOldDataOfTable(preserveAgeDays, IMPRESSION_TABLE);
    335    await this.clearOldDataOfTable(preserveAgeDays, CLICK_TABLE);
    336  }
    337 
    338  async recordInferredPersonalizationInteraction(
    339    table,
    340    tile,
    341    extraClickEvent = false
    342  ) {
    343    const timestamp_s = timeMSToSeconds(this.Date().now());
    344    const card_format_enum = FORMAT[tile.format];
    345    const position = tile.pos;
    346    const section_position = tile.section_position || 0;
    347    let featureValuePairs = [];
    348    if (extraClickEvent) {
    349      featureValuePairs.push(["click", 1]);
    350    }
    351    if (tile.features) {
    352      featureValuePairs = featureValuePairs.concat(
    353        Object.entries(tile.features)
    354      );
    355    }
    356    if (table !== CLICK_TABLE && table !== IMPRESSION_TABLE) {
    357      return;
    358    }
    359    const primaryValues = {
    360      timestamp_s,
    361      card_format_enum,
    362      position,
    363      section_position,
    364    };
    365 
    366    const insertValues = featureValuePairs.map(pair =>
    367      Object.assign({}, primaryValues, {
    368        feature: pair[0],
    369        feature_value: pair[1],
    370      })
    371    );
    372 
    373    let sql = `
    374    INSERT INTO ${table}(feature, timestamp_s, card_format_enum, position, section_position, feature_value)
    375    VALUES (:feature, :timestamp_s, :card_format_enum, :position, :section_position, :feature_value)
    376    `;
    377    await lazy.PlacesUtils.withConnectionWrapper(
    378      "newtab/lib/InferredPersonalizationFeed.sys.mjs: recordInferredPersonalizationImpression",
    379      async db => {
    380        await db.execute(sql, insertValues);
    381      }
    382    );
    383  }
    384 
    385  async fetchInferredPersonalizationInteraction(table) {
    386    if (
    387      table !== "moz_newtab_story_impression" &&
    388      table !== "moz_newtab_story_click"
    389    ) {
    390      return [];
    391    }
    392 
    393    let sql = `SELECT feature, timestamp_s, card_format_enum, position, section_position, feature_value
    394    FROM ${table}`;
    395    //sql += `WHERE timestamp_s >= ${beginTimeSecs * 1000000}`;
    396    //sql += `AND timestamp_s < ${endTimeSecs * 1000000}`;
    397 
    398    const { activityStreamProvider } = lazy.NewTabUtils;
    399    const interactions = await activityStreamProvider.executePlacesQuery(sql);
    400 
    401    return interactions;
    402  }
    403 
    404  async onPrefChangedAction(action) {
    405    switch (action.data.name) {
    406      case PREF_USER_INFERRED_PERSONALIZATION:
    407      case PREF_SYSTEM_INFERRED_PERSONALIZATION:
    408        if (this.isEnabled() && action.data.value) {
    409          await this.loadInterestVector();
    410        } else {
    411          await this.reset();
    412        }
    413        break;
    414    }
    415  }
    416 
    417  async onAction(action) {
    418    switch (action.type) {
    419      case at.INIT:
    420        if (this.isEnabled()) {
    421          await this.init();
    422        }
    423        break;
    424      case at.UNINIT:
    425        await this.reset();
    426        break;
    427      case at.DISCOVERY_STREAM_DEV_SYSTEM_TICK:
    428      case at.SYSTEM_TICK:
    429        if (this.loaded && this.isEnabled()) {
    430          await this.loadInterestVector();
    431        }
    432        break;
    433      case at.INFERRED_PERSONALIZATION_REFRESH:
    434        if (this.loaded && this.isEnabled()) {
    435          await this.reset();
    436          await this.loadInterestVector();
    437        }
    438        break;
    439      case at.PLACES_HISTORY_CLEARED:
    440        await this.clearOldData(0);
    441        break;
    442      case at.DISCOVERY_STREAM_IMPRESSION_STATS:
    443        // We have the ability to collect feature impressions when the feature is off
    444        if (this.isEnabled() || this.isStoreData()) {
    445          await this.handleDiscoveryStreamImpressionStats(action);
    446        }
    447        break;
    448      case at.DISCOVERY_STREAM_USER_EVENT:
    449        if (this.isEnabled() || this.isStoreData()) {
    450          await this.handleDiscoveryStreamUserEvent(action);
    451        }
    452        break;
    453      case at.PREF_CHANGED:
    454        await this.onPrefChangedAction(action);
    455        break;
    456    }
    457  }
    458 }
    459 
    460 /**
    461 * Creating a thin wrapper around PersistentCache, and Date.
    462 * This makes it easier for us to write automated tests that simulate responses.
    463 */
    464 InferredPersonalizationFeed.prototype.PersistentCache = (...args) => {
    465  return new lazy.PersistentCache(...args);
    466 };
    467 InferredPersonalizationFeed.prototype.Date = () => {
    468  return Date;
    469 };