InferredPersonalizationFeed.sys.mjs (14295B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 const lazy = {}; 6 ChromeUtils.defineESModuleGetters(lazy, { 7 NewTabUtils: "resource://gre/modules/NewTabUtils.sys.mjs", 8 PersistentCache: "resource://newtab/lib/PersistentCache.sys.mjs", 9 PlacesUtils: "resource://gre/modules/PlacesUtils.sys.mjs", 10 }); 11 12 import { FeatureModel } from "resource://newtab/lib/InferredModel/FeatureModel.sys.mjs"; 13 14 import { 15 FORMAT, 16 AggregateResultKeys, 17 DEFAULT_INFERRED_MODEL_DATA, 18 } from "resource://newtab/lib/InferredModel/InferredConstants.sys.mjs"; 19 20 import { 21 actionTypes as at, 22 actionCreators as ac, 23 } from "resource://newtab/common/Actions.mjs"; 24 25 import { MODEL_TYPE } from "./InferredModel/InferredConstants.sys.mjs"; 26 27 const CACHE_KEY = "inferred_personalization_feed"; 28 const DISCOVERY_STREAM_CACHE_KEY = "discovery_stream"; 29 const INTEREST_VECTOR_UPDATE_HOURS = 24; 30 const HOURS_TO_MS = 60 * 60 * 1000; 31 32 const PREF_USER_INFERRED_PERSONALIZATION = 33 "discoverystream.sections.personalization.inferred.user.enabled"; 34 const PREF_SYSTEM_INFERRED_PERSONALIZATION = 35 "discoverystream.sections.personalization.inferred.enabled"; 36 const PREF_SYSTEM_INFERRED_MODEL_OVERRIDE = 37 "discoverystream.sections.personalization.inferred.model.override"; 38 39 function timeMSToSeconds(timeMS) { 40 return Math.round(timeMS / 1000); 41 } 42 43 const CLICK_TABLE = "moz_newtab_story_click"; 44 const IMPRESSION_TABLE = "moz_newtab_story_impression"; 45 const TEST_MODEL_ID = "TEST"; 46 47 const OLD_DATA_PRESERVE_DAYS_DEFAULT = 30 * 6; 48 const OLD_DATA_CLEAR_CHECK_FREQUENCY_MS = 5 * 3600 * 24 * 1000; // 5 days 49 50 /** 51 * A feature that periodically generates a interest vector for inferred personalization. 52 */ 53 export class InferredPersonalizationFeed { 54 constructor() { 55 this.loaded = false; 56 this.cache = this.PersistentCache(CACHE_KEY, true); 57 } 58 59 async reset() { 60 if (this.cache) { 61 await this.cache.set("interest_vector", {}); 62 } 63 this.loaded = false; 64 this.store.dispatch( 65 ac.OnlyToMain({ 66 type: at.INFERRED_PERSONALIZATION_RESET, 67 }) 68 ); 69 } 70 71 isEnabled() { 72 return ( 73 this.store.getState().Prefs.values[PREF_USER_INFERRED_PERSONALIZATION] && 74 this.store.getState().Prefs.values[PREF_SYSTEM_INFERRED_PERSONALIZATION] 75 ); 76 } 77 78 isStoreData() { 79 return !!this.store.getState().Prefs.values?.trainhopConfig 80 ?.newTabSectionsExperiment?.personalizationStoreFeaturesEnabled; 81 } 82 83 async init() { 84 await this.loadInterestVector(true /* isStartup */); 85 } 86 87 async queryDatabaseForTimeIntervals(intervals, table) { 88 let results = []; 89 for (const interval of intervals) { 90 const agg = await this.fetchInferredPersonalizationSummary( 91 interval.start, 92 interval.end, 93 table 94 ); 95 results.push(agg); 96 } 97 return results; 98 } 99 100 /** 101 * Get Inferrred model raw data 102 * 103 * @returns JSON of inferred model 104 */ 105 async getInferredModelData() { 106 const modelOverrideRaw = 107 this.store.getState().Prefs.values[PREF_SYSTEM_INFERRED_MODEL_OVERRIDE]; 108 if (modelOverrideRaw) { 109 if (modelOverrideRaw === TEST_MODEL_ID) { 110 return { 111 model_id: TEST_MODEL_ID, 112 model_data: DEFAULT_INFERRED_MODEL_DATA, 113 }; 114 } 115 try { 116 return JSON.parse(modelOverrideRaw); 117 } catch (_error) {} 118 } 119 const dsCache = this.PersistentCache(DISCOVERY_STREAM_CACHE_KEY, true); 120 const cachedData = (await dsCache.get()) || {}; 121 let { inferredModel } = cachedData; 122 return inferredModel; 123 } 124 125 async generateInterestVector() { 126 const inferredModel = await this.getInferredModelData(); 127 if (!inferredModel || !inferredModel.model_data) { 128 return {}; 129 } 130 const model = FeatureModel.fromJSON(inferredModel.model_data); 131 132 const intervals = model.getDateIntervals(this.Date().now()); 133 const schema = { 134 [AggregateResultKeys.FEATURE]: 0, 135 [AggregateResultKeys.FORMAT_ENUM]: 1, 136 [AggregateResultKeys.VALUE]: 2, 137 }; 138 139 const aggClickPerInterval = await this.queryDatabaseForTimeIntervals( 140 intervals, 141 CLICK_TABLE 142 ); 143 const isClickModel = model.modelType === MODEL_TYPE.CLICKS; 144 const interests = model.computeInterestVectors({ 145 dataForIntervals: aggClickPerInterval, 146 indexSchema: schema, 147 model_id: inferredModel.model_id, 148 applyPostProcessing: isClickModel, 149 }); 150 151 if (isClickModel) { 152 return interests; 153 } 154 155 if ( 156 model.modelType === MODEL_TYPE.CLICK_IMP_PAIR || 157 model.modelType === MODEL_TYPE.CTR 158 ) { 159 // This model type does not support differential privacy or thresholding 160 const aggImpressionsPerInterval = 161 await this.queryDatabaseForTimeIntervals(intervals, IMPRESSION_TABLE); 162 const ivImpressions = model.computeInterestVector({ 163 dataForIntervals: aggImpressionsPerInterval, 164 indexSchema: schema, 165 }); 166 167 if (model.modelType === MODEL_TYPE.CTR) { 168 // eslint-disable-next-line no-unused-vars 169 const { model_id, ...clickTotals } = interests.inferredInterests; 170 const inferredInterests = model.computeCTRInterestVectors({ 171 clicks: clickTotals, 172 impressions: ivImpressions, 173 model_id: inferredModel.model_id, 174 }); 175 return inferredInterests; 176 } 177 const res = { 178 c: interests.inferredInterests, 179 i: ivImpressions, 180 model_id: inferredModel.model_id, 181 }; 182 return { inferredInterests: res }; 183 } 184 185 // unsupported modelType 186 return {}; 187 } 188 189 async loadInterestVector(isStartup = false) { 190 const cachedData = (await this.cache.get()) || {}; 191 let { interest_vector } = cachedData; 192 193 const { values } = this.store.getState().Prefs; 194 const interestVectorRefreshHours = 195 values?.inferredPersonalizationConfig?.iv_refresh_frequency_hours || 196 INTEREST_VECTOR_UPDATE_HOURS; 197 198 // If we have nothing in cache, or cache has expired, we can make a fresh fetch. 199 if ( 200 !interest_vector?.lastUpdated || 201 !( 202 this.Date().now() - interest_vector.lastUpdated < 203 interestVectorRefreshHours * HOURS_TO_MS 204 ) 205 ) { 206 let lastClearedDB = interest_vector?.lastClearedDB ?? this.Date().now(); 207 const needsCleanup = 208 this.Date().now() - lastClearedDB >= OLD_DATA_CLEAR_CHECK_FREQUENCY_MS; 209 if (needsCleanup) { 210 await this.clearOldData( 211 values?.inferredPersonalizationConfig?.history_cull_days || 212 OLD_DATA_PRESERVE_DAYS_DEFAULT 213 ); 214 lastClearedDB = this.Date().now(); 215 } 216 interest_vector = { 217 data: await this.generateInterestVector(), 218 lastUpdated: this.Date().now(), 219 lastClearedDB, 220 }; 221 } 222 await this.cache.set("interest_vector", interest_vector); 223 this.loaded = true; 224 225 this.store.dispatch( 226 ac.OnlyToMain({ 227 type: at.INFERRED_PERSONALIZATION_UPDATE, 228 data: { 229 lastUpdated: interest_vector.lastUpdated, 230 inferredInterests: interest_vector.data.inferredInterests, 231 coarseInferredInterests: interest_vector.data.coarseInferredInterests, 232 coarsePrivateInferredInterests: 233 interest_vector.data.coarsePrivateInferredInterests, 234 }, 235 meta: { 236 isStartup, 237 }, 238 }) 239 ); 240 } 241 242 async handleDiscoveryStreamImpressionStats(action) { 243 const { tiles } = action.data; 244 245 for (const tile of tiles) { 246 const { type, format, pos, topic, section_position, features } = tile; 247 if (["organic"].includes(type)) { 248 await this.recordInferredPersonalizationImpression({ 249 format, 250 pos, 251 topic, 252 section_position, 253 features, 254 }); 255 } 256 } 257 } 258 259 async handleDiscoveryStreamUserEvent(action) { 260 switch (action.data?.event) { 261 case "OPEN_NEW_WINDOW": 262 case "CLICK": { 263 const { card_type, format, topic, section_position, features } = 264 action.data.value ?? {}; 265 const pos = action.data.action_position; 266 if (["organic"].includes(card_type)) { 267 await this.recordInferredPersonalizationClick({ 268 format, 269 pos, 270 topic, 271 section_position, 272 features, 273 }); 274 } 275 break; 276 } 277 } 278 } 279 280 async recordInferredPersonalizationImpression(tile) { 281 await this.recordInferredPersonalizationInteraction(IMPRESSION_TABLE, tile); 282 } 283 async recordInferredPersonalizationClick(tile) { 284 await this.recordInferredPersonalizationInteraction( 285 CLICK_TABLE, 286 tile, 287 true 288 ); 289 } 290 291 async fetchInferredPersonalizationImpression() { 292 return await this.fetchInferredPersonalizationInteraction( 293 "moz_newtab_story_impression" 294 ); 295 } 296 297 async fetchInferredPersonalizationSummary(startTime, endTime, table) { 298 let sql = `SELECT feature, card_format_enum, SUM(feature_value) FROM ${table} 299 WHERE timestamp_s > ${timeMSToSeconds(startTime)} 300 AND timestamp_s < ${timeMSToSeconds(endTime)} 301 GROUP BY feature, card_format_enum`; 302 const { activityStreamProvider } = lazy.NewTabUtils; 303 const interactions = await activityStreamProvider.executePlacesQuery(sql); 304 return interactions; 305 } 306 307 /** 308 * Deletes older data from a table 309 * 310 * @param {int} preserveAgeDays Number of days to preserve 311 * @param {*} table Table to clear 312 */ 313 async clearOldDataOfTable(preserveAgeDays, table) { 314 let sql = `DELETE FROM ${table} 315 WHERE timestamp_s < ${timeMSToSeconds(this.Date().now()) - preserveAgeDays * 60 * 24}`; 316 try { 317 await lazy.PlacesUtils.withConnectionWrapper( 318 "newtab/lib/InferredPersonalizationFeed.sys.mjs: clearOldDataOfTable", 319 async db => { 320 await db.execute(sql); 321 } 322 ); 323 } catch (ex) { 324 console.error(`Error clearning places data ${ex}`); 325 } 326 } 327 328 /** 329 * Deletes older data from impression and click tables 330 * 331 * @param {int} preserveAgeDays Number of days to preserve (defaults to 6 months) 332 */ 333 async clearOldData(preserveAgeDays) { 334 await this.clearOldDataOfTable(preserveAgeDays, IMPRESSION_TABLE); 335 await this.clearOldDataOfTable(preserveAgeDays, CLICK_TABLE); 336 } 337 338 async recordInferredPersonalizationInteraction( 339 table, 340 tile, 341 extraClickEvent = false 342 ) { 343 const timestamp_s = timeMSToSeconds(this.Date().now()); 344 const card_format_enum = FORMAT[tile.format]; 345 const position = tile.pos; 346 const section_position = tile.section_position || 0; 347 let featureValuePairs = []; 348 if (extraClickEvent) { 349 featureValuePairs.push(["click", 1]); 350 } 351 if (tile.features) { 352 featureValuePairs = featureValuePairs.concat( 353 Object.entries(tile.features) 354 ); 355 } 356 if (table !== CLICK_TABLE && table !== IMPRESSION_TABLE) { 357 return; 358 } 359 const primaryValues = { 360 timestamp_s, 361 card_format_enum, 362 position, 363 section_position, 364 }; 365 366 const insertValues = featureValuePairs.map(pair => 367 Object.assign({}, primaryValues, { 368 feature: pair[0], 369 feature_value: pair[1], 370 }) 371 ); 372 373 let sql = ` 374 INSERT INTO ${table}(feature, timestamp_s, card_format_enum, position, section_position, feature_value) 375 VALUES (:feature, :timestamp_s, :card_format_enum, :position, :section_position, :feature_value) 376 `; 377 await lazy.PlacesUtils.withConnectionWrapper( 378 "newtab/lib/InferredPersonalizationFeed.sys.mjs: recordInferredPersonalizationImpression", 379 async db => { 380 await db.execute(sql, insertValues); 381 } 382 ); 383 } 384 385 async fetchInferredPersonalizationInteraction(table) { 386 if ( 387 table !== "moz_newtab_story_impression" && 388 table !== "moz_newtab_story_click" 389 ) { 390 return []; 391 } 392 393 let sql = `SELECT feature, timestamp_s, card_format_enum, position, section_position, feature_value 394 FROM ${table}`; 395 //sql += `WHERE timestamp_s >= ${beginTimeSecs * 1000000}`; 396 //sql += `AND timestamp_s < ${endTimeSecs * 1000000}`; 397 398 const { activityStreamProvider } = lazy.NewTabUtils; 399 const interactions = await activityStreamProvider.executePlacesQuery(sql); 400 401 return interactions; 402 } 403 404 async onPrefChangedAction(action) { 405 switch (action.data.name) { 406 case PREF_USER_INFERRED_PERSONALIZATION: 407 case PREF_SYSTEM_INFERRED_PERSONALIZATION: 408 if (this.isEnabled() && action.data.value) { 409 await this.loadInterestVector(); 410 } else { 411 await this.reset(); 412 } 413 break; 414 } 415 } 416 417 async onAction(action) { 418 switch (action.type) { 419 case at.INIT: 420 if (this.isEnabled()) { 421 await this.init(); 422 } 423 break; 424 case at.UNINIT: 425 await this.reset(); 426 break; 427 case at.DISCOVERY_STREAM_DEV_SYSTEM_TICK: 428 case at.SYSTEM_TICK: 429 if (this.loaded && this.isEnabled()) { 430 await this.loadInterestVector(); 431 } 432 break; 433 case at.INFERRED_PERSONALIZATION_REFRESH: 434 if (this.loaded && this.isEnabled()) { 435 await this.reset(); 436 await this.loadInterestVector(); 437 } 438 break; 439 case at.PLACES_HISTORY_CLEARED: 440 await this.clearOldData(0); 441 break; 442 case at.DISCOVERY_STREAM_IMPRESSION_STATS: 443 // We have the ability to collect feature impressions when the feature is off 444 if (this.isEnabled() || this.isStoreData()) { 445 await this.handleDiscoveryStreamImpressionStats(action); 446 } 447 break; 448 case at.DISCOVERY_STREAM_USER_EVENT: 449 if (this.isEnabled() || this.isStoreData()) { 450 await this.handleDiscoveryStreamUserEvent(action); 451 } 452 break; 453 case at.PREF_CHANGED: 454 await this.onPrefChangedAction(action); 455 break; 456 } 457 } 458 } 459 460 /** 461 * Creating a thin wrapper around PersistentCache, and Date. 462 * This makes it easier for us to write automated tests that simulate responses. 463 */ 464 InferredPersonalizationFeed.prototype.PersistentCache = (...args) => { 465 return new lazy.PersistentCache(...args); 466 }; 467 InferredPersonalizationFeed.prototype.Date = () => { 468 return Date; 469 };