tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

PageDataSchema.sys.mjs (6769B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 
      5 const lazy = {};
      6 
      7 ChromeUtils.defineESModuleGetters(lazy, {
      8  JsonSchemaValidator:
      9    "resource://gre/modules/components-utils/JsonSchemaValidator.sys.mjs",
     10  OpenGraphPageData:
     11    "moz-src:///browser/components/pagedata/OpenGraphPageData.sys.mjs",
     12  SchemaOrgPageData:
     13    "moz-src:///browser/components/pagedata/SchemaOrgPageData.sys.mjs",
     14  TwitterPageData:
     15    "moz-src:///browser/components/pagedata/TwitterPageData.sys.mjs",
     16 });
     17 
     18 ChromeUtils.defineLazyGetter(lazy, "logConsole", function () {
     19  return console.createInstance({
     20    prefix: "PageData",
     21    maxLogLevel: Services.prefs.getBoolPref("browser.pagedata.log", false)
     22      ? "Debug"
     23      : "Warn",
     24  });
     25 });
     26 
     27 /**
     28 * The list of page data collectors. These should be sorted in order of
     29 * specificity, if the same piece of data is provided by two collectors then the
     30 * earlier wins.
     31 *
     32 * Collectors must provide a `collect` function which will be passed the
     33 * document object and should return the PageData structure. The function may be
     34 * asynchronous if needed.
     35 *
     36 * The data returned need not be valid, collectors should return whatever they
     37 * can and then we drop anything that is invalid once all data is joined.
     38 */
     39 ChromeUtils.defineLazyGetter(lazy, "DATA_COLLECTORS", function () {
     40  return [lazy.SchemaOrgPageData, lazy.OpenGraphPageData, lazy.TwitterPageData];
     41 });
     42 
     43 let SCHEMAS = new Map();
     44 
     45 /**
     46 * Loads the schema for the given name.
     47 *
     48 * @param {string} schemaName
     49 *   The name of the schema to load.
     50 * @returns {object}
     51 *   The loaded schema.
     52 */
     53 async function loadSchema(schemaName) {
     54  if (SCHEMAS.has(schemaName)) {
     55    return SCHEMAS.get(schemaName);
     56  }
     57 
     58  let url = `chrome://browser/content/pagedata/schemas/${schemaName.toLocaleLowerCase()}.schema.json`;
     59  let response = await fetch(url);
     60  if (!response.ok) {
     61    throw new Error(`Failed to load schema: ${response.statusText}`);
     62  }
     63 
     64  let schema = await response.json();
     65  SCHEMAS.set(schemaName, schema);
     66  return schema;
     67 }
     68 
     69 /**
     70 * Validates the data using the schema with the given name.
     71 *
     72 * @param {string} schemaName
     73 *   The name of the schema to validate against.
     74 * @param {object} data
     75 *   The data to validate.
     76 */
     77 async function validateData(schemaName, data) {
     78  let schema = await loadSchema(schemaName.toLocaleLowerCase());
     79 
     80  let result = lazy.JsonSchemaValidator.validate(data, schema, {
     81    allowExplicitUndefinedProperties: true,
     82    // Allowed for future expansion of the schema.
     83    allowAdditionalProperties: true,
     84  });
     85 
     86  if (!result.valid) {
     87    throw result.error;
     88  }
     89 }
     90 
     91 /**
     92 * A shared API that can be used in parent or child processes
     93 */
     94 export const PageDataSchema = {
     95  // Enumeration of data types. The keys must match the schema name.
     96  DATA_TYPE: Object.freeze({
     97    // Note that 1 and 2 were used as types in earlier versions and should not be used here.
     98    PRODUCT: 3,
     99    DOCUMENT: 4,
    100    ARTICLE: 5,
    101    AUDIO: 6,
    102    VIDEO: 7,
    103  }),
    104 
    105  /**
    106   * Gets the data type name.
    107   *
    108   * @param {DATA_TYPE} type
    109   *   The data type from the DATA_TYPE enumeration
    110   *
    111   * @returns {string | null} The name for the type or null if not found.
    112   */
    113  nameForType(type) {
    114    for (let [name, value] of Object.entries(this.DATA_TYPE)) {
    115      if (value == type) {
    116        return name;
    117      }
    118    }
    119 
    120    return null;
    121  },
    122 
    123  /**
    124   * Asynchronously validates some page data against the expected schema. Throws
    125   * an exception if validation fails.
    126   *
    127   * @param {DATA_TYPE} type
    128   *   The data type from the DATA_TYPE enumeration
    129   * @param {object} data
    130   *   The page data
    131   */
    132  async validateData(type, data) {
    133    let name = this.nameForType(type);
    134 
    135    if (!name) {
    136      throw new Error(`Unknown data type ${type}`);
    137    }
    138 
    139    await validateData(name, data);
    140  },
    141 
    142  /**
    143   * Asynchronously validates an entire PageData structure. Any invalid or
    144   * unknown data types are dropped.
    145   *
    146   * @param {PageData} pageData
    147   *   The page data
    148   *
    149   * @returns {PageData} The validated page data structure
    150   */
    151  async validatePageData(pageData) {
    152    let { data: dataMap = {}, ...general } = pageData;
    153 
    154    await validateData("general", general);
    155 
    156    let validData = {};
    157 
    158    for (let [type, data] of Object.entries(dataMap)) {
    159      let name = this.nameForType(type);
    160      // Ignore unknown types here.
    161      if (!name) {
    162        continue;
    163      }
    164 
    165      try {
    166        await validateData(name, data);
    167 
    168        validData[type] = data;
    169      } catch (e) {
    170        // Invalid data is dropped.
    171      }
    172    }
    173 
    174    return {
    175      ...general,
    176      data: validData,
    177    };
    178  },
    179 
    180  /**
    181   * Adds new page data into an existing data set. Any existing data is not
    182   * overwritten.
    183   *
    184   * @param {PageData} existingPageData
    185   *   The existing page data
    186   * @param {PageData} newPageData
    187   *   The new page data
    188   *
    189   * @returns {PageData} The joined data.
    190   */
    191  coalescePageData(existingPageData, newPageData) {
    192    // Split out the general data from the map of specific data.
    193    let { data: existingMap = {}, ...existingGeneral } = existingPageData;
    194    let { data: newMap = {}, ...newGeneral } = newPageData;
    195 
    196    Object.assign(newGeneral, existingGeneral);
    197 
    198    let dataMap = {};
    199    for (let [type, data] of Object.entries(existingMap)) {
    200      if (type in newMap) {
    201        dataMap[type] = Object.assign({}, newMap[type], data);
    202      } else {
    203        dataMap[type] = data;
    204      }
    205    }
    206 
    207    for (let [type, data] of Object.entries(newMap)) {
    208      if (!(type in dataMap)) {
    209        dataMap[type] = data;
    210      }
    211    }
    212 
    213    return {
    214      ...newGeneral,
    215      data: dataMap,
    216    };
    217  },
    218 
    219  /**
    220   * Collects page data from a DOM document.
    221   *
    222   * @param {Document} document
    223   *   The DOM document to collect data from
    224   *
    225   * @returns {Promise<PageData | null>} The data collected or null in case of
    226   *   error.
    227   */
    228  async collectPageData(document) {
    229    lazy.logConsole.debug("Starting collection", document.documentURI);
    230 
    231    let pending = lazy.DATA_COLLECTORS.map(async collector => {
    232      try {
    233        return await collector.collect(document);
    234      } catch (e) {
    235        lazy.logConsole.error("Error collecting page data", e);
    236        return null;
    237      }
    238    });
    239 
    240    let pageDataList = await Promise.all(pending);
    241 
    242    let pageData = pageDataList.reduce(PageDataSchema.coalescePageData, {
    243      date: Date.now(),
    244      url: document.documentURI,
    245    });
    246 
    247    try {
    248      return this.validatePageData(pageData);
    249    } catch (e) {
    250      lazy.logConsole.error("Failed to collect valid page data", e);
    251      return null;
    252    }
    253  },
    254 };