tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

PersonalityProviderWorkerClass.mjs (9009B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 
      5 import {
      6  tokenize,
      7  toksToTfIdfVector,
      8 } from "resource://newtab/lib/PersonalityProvider/Tokenize.mjs";
      9 import { NaiveBayesTextTagger } from "resource://newtab/lib/PersonalityProvider/NaiveBayesTextTagger.mjs";
     10 import { NmfTextTagger } from "resource://newtab/lib/PersonalityProvider/NmfTextTagger.mjs";
     11 import { RecipeExecutor } from "resource://newtab/lib/PersonalityProvider/RecipeExecutor.mjs";
     12 
     13 // A helper function to create a hash out of a file.
     14 async function _getFileHash(filepath) {
     15  const data = await IOUtils.read(filepath);
     16  // File is an instance of Uint8Array
     17  const digest = await crypto.subtle.digest("SHA-256", data);
     18  const uint8 = new Uint8Array(digest);
     19  // return the two-digit hexadecimal code for a byte
     20  const toHex = b => b.toString(16).padStart(2, "0");
     21  return Array.from(uint8, toHex).join("");
     22 }
     23 
     24 /**
     25 * V2 provider builds and ranks an interest profile (also called an “interest vector”) off the browse history.
     26 * This allows Firefox to classify pages into topics, by examining the text found on the page.
     27 * It does this by looking at the history text content, title, and description.
     28 */
     29 export class PersonalityProviderWorker {
     30  async getPersonalityProviderDir() {
     31    const personalityProviderDir = PathUtils.join(
     32      await PathUtils.getLocalProfileDir(),
     33      "personality-provider"
     34    );
     35 
     36    // Cache this so we don't need to await again.
     37    this.getPersonalityProviderDir = () =>
     38      Promise.resolve(personalityProviderDir);
     39    return personalityProviderDir;
     40  }
     41 
     42  setBaseAttachmentsURL(url) {
     43    this.baseAttachmentsURL = url;
     44  }
     45 
     46  setInterestConfig(interestConfig) {
     47    this.interestConfig = interestConfig;
     48  }
     49 
     50  setInterestVector(interestVector) {
     51    this.interestVector = interestVector;
     52  }
     53 
     54  onSync(event) {
     55    const {
     56      data: { created, updated, deleted },
     57    } = event;
     58    // Remove every removed attachment.
     59    const toRemove = deleted.concat(updated.map(u => u.old));
     60    toRemove.forEach(record => this.deleteAttachment(record));
     61 
     62    // Download every new/updated attachment.
     63    const toDownload = created.concat(updated.map(u => u.new));
     64    // maybeDownloadAttachment is async but we don't care inside onSync.
     65    toDownload.forEach(record => this.maybeDownloadAttachment(record));
     66  }
     67 
     68  /**
     69   * Attempts to download the attachment, but only if it doesn't already exist.
     70   */
     71  async maybeDownloadAttachment(record, retries = 3) {
     72    const {
     73      attachment: { filename, hash, size },
     74    } = record;
     75    await IOUtils.makeDirectory(await this.getPersonalityProviderDir());
     76    const localFilePath = PathUtils.join(
     77      await this.getPersonalityProviderDir(),
     78      filename
     79    );
     80 
     81    let retry = 0;
     82    while (
     83      retry++ < retries &&
     84      // exists is an issue for perf because I might not need to call it.
     85      (!(await IOUtils.exists(localFilePath)) ||
     86        (await IOUtils.stat(localFilePath)).size !== size ||
     87        (await _getFileHash(localFilePath)) !== hash)
     88    ) {
     89      await this._downloadAttachment(record);
     90    }
     91  }
     92 
     93  /**
     94   * Downloads the attachment to disk assuming the dir already exists
     95   * and any existing files matching the filename are clobbered.
     96   */
     97  async _downloadAttachment(record) {
     98    const {
     99      attachment: { location: loc, filename },
    100    } = record;
    101    const remoteFilePath = this.baseAttachmentsURL + loc;
    102    const localFilePath = PathUtils.join(
    103      await this.getPersonalityProviderDir(),
    104      filename
    105    );
    106 
    107    const xhr = new XMLHttpRequest();
    108    // Set false here for a synchronous request, because we're in a worker.
    109    xhr.open("GET", remoteFilePath, false);
    110    xhr.setRequestHeader("Accept-Encoding", "gzip");
    111    xhr.responseType = "arraybuffer";
    112    xhr.withCredentials = false;
    113    xhr.send(null);
    114 
    115    if (xhr.status !== 200) {
    116      console.error(`Failed to fetch ${remoteFilePath}: ${xhr.statusText}`);
    117      return;
    118    }
    119 
    120    const buffer = xhr.response;
    121    const bytes = new Uint8Array(buffer);
    122 
    123    await IOUtils.write(localFilePath, bytes, {
    124      tmpPath: `${localFilePath}.tmp`,
    125    });
    126  }
    127 
    128  async deleteAttachment(record) {
    129    const {
    130      attachment: { filename },
    131    } = record;
    132    await IOUtils.makeDirectory(await this.getPersonalityProviderDir());
    133    const path = PathUtils.join(
    134      await this.getPersonalityProviderDir(),
    135      filename
    136    );
    137 
    138    await IOUtils.remove(path, { ignoreAbsent: true });
    139    // Cleanup the directory if it is empty, do nothing if it is not empty.
    140    try {
    141      await IOUtils.remove(await this.getPersonalityProviderDir(), {
    142        ignoreAbsent: true,
    143      });
    144    } catch (e) {
    145      // This is likely because the directory is not empty, so we don't care.
    146    }
    147  }
    148 
    149  /**
    150   * Gets contents of the attachment if it already exists on file,
    151   * and if not attempts to download it.
    152   */
    153  async getAttachment(record) {
    154    const {
    155      attachment: { filename },
    156    } = record;
    157    const filepath = PathUtils.join(
    158      await this.getPersonalityProviderDir(),
    159      filename
    160    );
    161 
    162    try {
    163      await this.maybeDownloadAttachment(record);
    164      return await IOUtils.readJSON(filepath);
    165    } catch (error) {
    166      console.error(`Failed to load ${filepath}: ${error.message}`);
    167    }
    168    return {};
    169  }
    170 
    171  async fetchModels(models) {
    172    this.models = await Promise.all(
    173      models.map(async record => ({
    174        ...(await this.getAttachment(record)),
    175        recordKey: record.key,
    176      }))
    177    );
    178    if (!this.models.length) {
    179      return {
    180        ok: false,
    181      };
    182    }
    183    return {
    184      ok: true,
    185    };
    186  }
    187 
    188  generateTaggers(modelKeys) {
    189    if (!this.taggers) {
    190      let nbTaggers = [];
    191      let nmfTaggers = {};
    192 
    193      for (let model of this.models) {
    194        if (!modelKeys.includes(model.recordKey)) {
    195          continue;
    196        }
    197        if (model.model_type === "nb") {
    198          nbTaggers.push(new NaiveBayesTextTagger(model, toksToTfIdfVector));
    199        } else if (model.model_type === "nmf") {
    200          nmfTaggers[model.parent_tag] = new NmfTextTagger(
    201            model,
    202            toksToTfIdfVector
    203          );
    204        }
    205      }
    206      this.taggers = { nbTaggers, nmfTaggers };
    207    }
    208  }
    209 
    210  /**
    211   * Sets and generates a Recipe Executor.
    212   * A Recipe Executor is a set of actions that can be consumed by a Recipe.
    213   * The Recipe determines the order and specifics of which the actions are called.
    214   */
    215  generateRecipeExecutor() {
    216    const recipeExecutor = new RecipeExecutor(
    217      this.taggers.nbTaggers,
    218      this.taggers.nmfTaggers,
    219      tokenize
    220    );
    221    this.recipeExecutor = recipeExecutor;
    222  }
    223 
    224  /**
    225   * Examines the user's browse history and returns an interest vector that
    226   * describes the topics the user frequently browses.
    227   */
    228  createInterestVector(historyObj) {
    229    let interestVector = {};
    230 
    231    for (let historyRec of historyObj) {
    232      let ivItem = this.recipeExecutor.executeRecipe(
    233        historyRec,
    234        this.interestConfig.history_item_builder
    235      );
    236      if (ivItem === null) {
    237        continue;
    238      }
    239      interestVector = this.recipeExecutor.executeCombinerRecipe(
    240        interestVector,
    241        ivItem,
    242        this.interestConfig.interest_combiner
    243      );
    244      if (interestVector === null) {
    245        return null;
    246      }
    247    }
    248 
    249    const finalResult = this.recipeExecutor.executeRecipe(
    250      interestVector,
    251      this.interestConfig.interest_finalizer
    252    );
    253 
    254    return {
    255      ok: true,
    256      interestVector: finalResult,
    257    };
    258  }
    259 
    260  /**
    261   * Calculates a score of a Pocket item when compared to the user's interest
    262   * vector. Returns the score. Higher scores are better. Assumes this.interestVector
    263   * is populated.
    264   */
    265  calculateItemRelevanceScore(pocketItem) {
    266    const { personalization_models } = pocketItem;
    267    let scorableItem;
    268 
    269    // If the server provides some models, we can just use them,
    270    // and skip generating them.
    271    if (personalization_models && Object.keys(personalization_models).length) {
    272      scorableItem = {
    273        id: pocketItem.id,
    274        item_tags: personalization_models,
    275        item_score: pocketItem.item_score,
    276        item_sort_id: 1,
    277      };
    278    } else {
    279      scorableItem = this.recipeExecutor.executeRecipe(
    280        pocketItem,
    281        this.interestConfig.item_to_rank_builder
    282      );
    283      if (scorableItem === null) {
    284        return null;
    285      }
    286    }
    287 
    288    // We're doing a deep copy on an object.
    289    let rankingVector = JSON.parse(JSON.stringify(this.interestVector));
    290 
    291    Object.keys(scorableItem).forEach(key => {
    292      rankingVector[key] = scorableItem[key];
    293    });
    294 
    295    rankingVector = this.recipeExecutor.executeRecipe(
    296      rankingVector,
    297      this.interestConfig.item_ranker
    298    );
    299 
    300    if (rankingVector === null) {
    301      return null;
    302    }
    303 
    304    return { scorableItem, rankingVector };
    305  }
    306 }