PersonalityProviderWorkerClass.mjs (9009B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 import { 6 tokenize, 7 toksToTfIdfVector, 8 } from "resource://newtab/lib/PersonalityProvider/Tokenize.mjs"; 9 import { NaiveBayesTextTagger } from "resource://newtab/lib/PersonalityProvider/NaiveBayesTextTagger.mjs"; 10 import { NmfTextTagger } from "resource://newtab/lib/PersonalityProvider/NmfTextTagger.mjs"; 11 import { RecipeExecutor } from "resource://newtab/lib/PersonalityProvider/RecipeExecutor.mjs"; 12 13 // A helper function to create a hash out of a file. 14 async function _getFileHash(filepath) { 15 const data = await IOUtils.read(filepath); 16 // File is an instance of Uint8Array 17 const digest = await crypto.subtle.digest("SHA-256", data); 18 const uint8 = new Uint8Array(digest); 19 // return the two-digit hexadecimal code for a byte 20 const toHex = b => b.toString(16).padStart(2, "0"); 21 return Array.from(uint8, toHex).join(""); 22 } 23 24 /** 25 * V2 provider builds and ranks an interest profile (also called an “interest vector”) off the browse history. 26 * This allows Firefox to classify pages into topics, by examining the text found on the page. 27 * It does this by looking at the history text content, title, and description. 28 */ 29 export class PersonalityProviderWorker { 30 async getPersonalityProviderDir() { 31 const personalityProviderDir = PathUtils.join( 32 await PathUtils.getLocalProfileDir(), 33 "personality-provider" 34 ); 35 36 // Cache this so we don't need to await again. 37 this.getPersonalityProviderDir = () => 38 Promise.resolve(personalityProviderDir); 39 return personalityProviderDir; 40 } 41 42 setBaseAttachmentsURL(url) { 43 this.baseAttachmentsURL = url; 44 } 45 46 setInterestConfig(interestConfig) { 47 this.interestConfig = interestConfig; 48 } 49 50 setInterestVector(interestVector) { 51 this.interestVector = interestVector; 52 } 53 54 onSync(event) { 55 const { 56 data: { created, updated, deleted }, 57 } = event; 58 // Remove every removed attachment. 59 const toRemove = deleted.concat(updated.map(u => u.old)); 60 toRemove.forEach(record => this.deleteAttachment(record)); 61 62 // Download every new/updated attachment. 63 const toDownload = created.concat(updated.map(u => u.new)); 64 // maybeDownloadAttachment is async but we don't care inside onSync. 65 toDownload.forEach(record => this.maybeDownloadAttachment(record)); 66 } 67 68 /** 69 * Attempts to download the attachment, but only if it doesn't already exist. 70 */ 71 async maybeDownloadAttachment(record, retries = 3) { 72 const { 73 attachment: { filename, hash, size }, 74 } = record; 75 await IOUtils.makeDirectory(await this.getPersonalityProviderDir()); 76 const localFilePath = PathUtils.join( 77 await this.getPersonalityProviderDir(), 78 filename 79 ); 80 81 let retry = 0; 82 while ( 83 retry++ < retries && 84 // exists is an issue for perf because I might not need to call it. 85 (!(await IOUtils.exists(localFilePath)) || 86 (await IOUtils.stat(localFilePath)).size !== size || 87 (await _getFileHash(localFilePath)) !== hash) 88 ) { 89 await this._downloadAttachment(record); 90 } 91 } 92 93 /** 94 * Downloads the attachment to disk assuming the dir already exists 95 * and any existing files matching the filename are clobbered. 96 */ 97 async _downloadAttachment(record) { 98 const { 99 attachment: { location: loc, filename }, 100 } = record; 101 const remoteFilePath = this.baseAttachmentsURL + loc; 102 const localFilePath = PathUtils.join( 103 await this.getPersonalityProviderDir(), 104 filename 105 ); 106 107 const xhr = new XMLHttpRequest(); 108 // Set false here for a synchronous request, because we're in a worker. 109 xhr.open("GET", remoteFilePath, false); 110 xhr.setRequestHeader("Accept-Encoding", "gzip"); 111 xhr.responseType = "arraybuffer"; 112 xhr.withCredentials = false; 113 xhr.send(null); 114 115 if (xhr.status !== 200) { 116 console.error(`Failed to fetch ${remoteFilePath}: ${xhr.statusText}`); 117 return; 118 } 119 120 const buffer = xhr.response; 121 const bytes = new Uint8Array(buffer); 122 123 await IOUtils.write(localFilePath, bytes, { 124 tmpPath: `${localFilePath}.tmp`, 125 }); 126 } 127 128 async deleteAttachment(record) { 129 const { 130 attachment: { filename }, 131 } = record; 132 await IOUtils.makeDirectory(await this.getPersonalityProviderDir()); 133 const path = PathUtils.join( 134 await this.getPersonalityProviderDir(), 135 filename 136 ); 137 138 await IOUtils.remove(path, { ignoreAbsent: true }); 139 // Cleanup the directory if it is empty, do nothing if it is not empty. 140 try { 141 await IOUtils.remove(await this.getPersonalityProviderDir(), { 142 ignoreAbsent: true, 143 }); 144 } catch (e) { 145 // This is likely because the directory is not empty, so we don't care. 146 } 147 } 148 149 /** 150 * Gets contents of the attachment if it already exists on file, 151 * and if not attempts to download it. 152 */ 153 async getAttachment(record) { 154 const { 155 attachment: { filename }, 156 } = record; 157 const filepath = PathUtils.join( 158 await this.getPersonalityProviderDir(), 159 filename 160 ); 161 162 try { 163 await this.maybeDownloadAttachment(record); 164 return await IOUtils.readJSON(filepath); 165 } catch (error) { 166 console.error(`Failed to load ${filepath}: ${error.message}`); 167 } 168 return {}; 169 } 170 171 async fetchModels(models) { 172 this.models = await Promise.all( 173 models.map(async record => ({ 174 ...(await this.getAttachment(record)), 175 recordKey: record.key, 176 })) 177 ); 178 if (!this.models.length) { 179 return { 180 ok: false, 181 }; 182 } 183 return { 184 ok: true, 185 }; 186 } 187 188 generateTaggers(modelKeys) { 189 if (!this.taggers) { 190 let nbTaggers = []; 191 let nmfTaggers = {}; 192 193 for (let model of this.models) { 194 if (!modelKeys.includes(model.recordKey)) { 195 continue; 196 } 197 if (model.model_type === "nb") { 198 nbTaggers.push(new NaiveBayesTextTagger(model, toksToTfIdfVector)); 199 } else if (model.model_type === "nmf") { 200 nmfTaggers[model.parent_tag] = new NmfTextTagger( 201 model, 202 toksToTfIdfVector 203 ); 204 } 205 } 206 this.taggers = { nbTaggers, nmfTaggers }; 207 } 208 } 209 210 /** 211 * Sets and generates a Recipe Executor. 212 * A Recipe Executor is a set of actions that can be consumed by a Recipe. 213 * The Recipe determines the order and specifics of which the actions are called. 214 */ 215 generateRecipeExecutor() { 216 const recipeExecutor = new RecipeExecutor( 217 this.taggers.nbTaggers, 218 this.taggers.nmfTaggers, 219 tokenize 220 ); 221 this.recipeExecutor = recipeExecutor; 222 } 223 224 /** 225 * Examines the user's browse history and returns an interest vector that 226 * describes the topics the user frequently browses. 227 */ 228 createInterestVector(historyObj) { 229 let interestVector = {}; 230 231 for (let historyRec of historyObj) { 232 let ivItem = this.recipeExecutor.executeRecipe( 233 historyRec, 234 this.interestConfig.history_item_builder 235 ); 236 if (ivItem === null) { 237 continue; 238 } 239 interestVector = this.recipeExecutor.executeCombinerRecipe( 240 interestVector, 241 ivItem, 242 this.interestConfig.interest_combiner 243 ); 244 if (interestVector === null) { 245 return null; 246 } 247 } 248 249 const finalResult = this.recipeExecutor.executeRecipe( 250 interestVector, 251 this.interestConfig.interest_finalizer 252 ); 253 254 return { 255 ok: true, 256 interestVector: finalResult, 257 }; 258 } 259 260 /** 261 * Calculates a score of a Pocket item when compared to the user's interest 262 * vector. Returns the score. Higher scores are better. Assumes this.interestVector 263 * is populated. 264 */ 265 calculateItemRelevanceScore(pocketItem) { 266 const { personalization_models } = pocketItem; 267 let scorableItem; 268 269 // If the server provides some models, we can just use them, 270 // and skip generating them. 271 if (personalization_models && Object.keys(personalization_models).length) { 272 scorableItem = { 273 id: pocketItem.id, 274 item_tags: personalization_models, 275 item_score: pocketItem.item_score, 276 item_sort_id: 1, 277 }; 278 } else { 279 scorableItem = this.recipeExecutor.executeRecipe( 280 pocketItem, 281 this.interestConfig.item_to_rank_builder 282 ); 283 if (scorableItem === null) { 284 return null; 285 } 286 } 287 288 // We're doing a deep copy on an object. 289 let rankingVector = JSON.parse(JSON.stringify(this.interestVector)); 290 291 Object.keys(scorableItem).forEach(key => { 292 rankingVector[key] = scorableItem[key]; 293 }); 294 295 rankingVector = this.recipeExecutor.executeRecipe( 296 rankingVector, 297 this.interestConfig.item_ranker 298 ); 299 300 if (rankingVector === null) { 301 return null; 302 } 303 304 return { scorableItem, rankingVector }; 305 } 306 }