tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

URLPattern.sys.mjs (13430B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 
      5 const lazy = {};
      6 
      7 ChromeUtils.defineESModuleGetters(lazy, {
      8  assert: "chrome://remote/content/shared/webdriver/Assert.sys.mjs",
      9  error: "chrome://remote/content/shared/webdriver/Errors.sys.mjs",
     10  pprint: "chrome://remote/content/shared/Format.sys.mjs",
     11 });
     12 
     13 /**
     14 * Parsed pattern to use for URL matching.
     15 *
     16 * @typedef {object} ParsedURLPattern
     17 * @property {string|null} protocol
     18 *     The protocol, for instance "https".
     19 * @property {string|null} hostname
     20 *     The hostname, for instance "example.com".
     21 * @property {string|null} port
     22 *     The serialized port. Empty string for default ports of special schemes.
     23 * @property {string|null} path
     24 *     The path, starting with "/".
     25 * @property {string|null} search
     26 *     The search query string, without the leading "?"
     27 */
     28 
     29 /**
     30 * Subset of properties extracted from a parsed URL.
     31 *
     32 * @typedef {object} ParsedURL
     33 * @property {string=} host
     34 * @property {string|Array<string>} path
     35 *     Either a string if the path is an opaque path, or an array of strings
     36 *     (path segments).
     37 * @property {number=} port
     38 * @property {string=} query
     39 * @property {string=} scheme
     40 */
     41 
     42 /**
     43 * Enum of URLPattern types.
     44 *
     45 * @readonly
     46 * @enum {URLPatternType}
     47 */
     48 const URLPatternType = {
     49  Pattern: "pattern",
     50  String: "string",
     51 };
     52 
     53 const supportedURLPatternTypes = Object.values(URLPatternType);
     54 
     55 const SPECIAL_SCHEMES = ["file", "http", "https", "ws", "wss"];
     56 const DEFAULT_PORTS = {
     57  file: null,
     58  http: 80,
     59  https: 443,
     60  ws: 80,
     61  wss: 443,
     62 };
     63 
     64 /**
     65 * Check if a given URL pattern is compatible with the provided URL.
     66 *
     67 * Implements https://w3c.github.io/webdriver-bidi/#match-url-pattern
     68 *
     69 * @param {ParsedURLPattern} urlPattern
     70 *     The URL pattern to match.
     71 * @param {string} url
     72 *     The string representation of a URL to test against the pattern.
     73 *
     74 * @returns {boolean}
     75 *     True if the pattern is compatible with the provided URL, false otherwise.
     76 */
     77 export function matchURLPattern(urlPattern, url) {
     78  const parsedURL = parseURL(url);
     79 
     80  if (urlPattern.protocol !== null && urlPattern.protocol != parsedURL.scheme) {
     81    return false;
     82  }
     83 
     84  if (urlPattern.hostname !== null && urlPattern.hostname != parsedURL.host) {
     85    return false;
     86  }
     87 
     88  if (urlPattern.port !== null && urlPattern.port != serializePort(parsedURL)) {
     89    return false;
     90  }
     91 
     92  if (
     93    urlPattern.pathname !== null &&
     94    urlPattern.pathname != serializePath(parsedURL)
     95  ) {
     96    return false;
     97  }
     98 
     99  if (urlPattern.search !== null) {
    100    const urlQuery = parsedURL.query === null ? "" : parsedURL.query;
    101    if (urlPattern.search != urlQuery) {
    102      return false;
    103    }
    104  }
    105 
    106  return true;
    107 }
    108 
    109 /**
    110 * Parse a URLPattern into a parsed pattern object which can be used to match
    111 * URLs using `matchURLPattern`.
    112 *
    113 * Implements https://w3c.github.io/webdriver-bidi/#parse-url-pattern
    114 *
    115 * @param {URLPattern} pattern
    116 *     The pattern to parse.
    117 *
    118 * @returns {ParsedURLPattern}
    119 *     The parsed URL pattern.
    120 *
    121 * @throws {InvalidArgumentError}
    122 *     Raised if an argument is of an invalid type or value.
    123 * @throws {UnsupportedOperationError}
    124 *     Raised if the pattern uses a protocol not supported by Firefox.
    125 */
    126 export function parseURLPattern(pattern) {
    127  lazy.assert.object(
    128    pattern,
    129    lazy.pprint`Expected URL pattern to be an object, got ${pattern}`
    130  );
    131 
    132  let hasProtocol = true;
    133  let hasHostname = true;
    134  let hasPort = true;
    135  let hasPathname = true;
    136  let hasSearch = true;
    137 
    138  let patternUrl;
    139  switch (pattern.type) {
    140    case URLPatternType.Pattern: {
    141      patternUrl = "";
    142      if ("protocol" in pattern) {
    143        patternUrl += parseProtocol(pattern.protocol);
    144      } else {
    145        hasProtocol = false;
    146        patternUrl += "http";
    147      }
    148 
    149      const scheme = patternUrl.toLowerCase();
    150      patternUrl += ":";
    151      if (SPECIAL_SCHEMES.includes(scheme)) {
    152        patternUrl += "//";
    153      }
    154 
    155      if ("hostname" in pattern) {
    156        patternUrl += parseHostname(pattern.hostname, scheme);
    157      } else {
    158        if (scheme != "file") {
    159          patternUrl += "placeholder";
    160        }
    161        hasHostname = false;
    162      }
    163 
    164      if ("port" in pattern) {
    165        patternUrl += parsePort(pattern.port);
    166      } else {
    167        hasPort = false;
    168      }
    169 
    170      if ("pathname" in pattern) {
    171        patternUrl += parsePathname(pattern.pathname);
    172      } else {
    173        hasPathname = false;
    174      }
    175 
    176      if ("search" in pattern) {
    177        patternUrl += parseSearch(pattern.search);
    178      } else {
    179        hasSearch = false;
    180      }
    181      break;
    182    }
    183    case URLPatternType.String:
    184      lazy.assert.string(
    185        pattern.pattern,
    186        lazy.pprint`Expected URL pattern "pattern" to be a string, got ${pattern.pattern}`
    187      );
    188      patternUrl = unescapeUrlPattern(pattern.pattern);
    189      break;
    190    default:
    191      throw new lazy.error.InvalidArgumentError(
    192        `Expected "urlPattern" type to be one of ${supportedURLPatternTypes}, got ${pattern.type}`
    193      );
    194  }
    195 
    196  if (!URL.canParse(patternUrl)) {
    197    throw new lazy.error.InvalidArgumentError(
    198      `Unable to parse URL "${patternUrl}"`
    199    );
    200  }
    201 
    202  let parsedURL;
    203  try {
    204    parsedURL = parseURL(patternUrl);
    205  } catch (e) {
    206    throw new lazy.error.InvalidArgumentError(
    207      `Failed to parse URL "${patternUrl}"`
    208    );
    209  }
    210 
    211  if (hasProtocol && !SPECIAL_SCHEMES.includes(parsedURL.scheme)) {
    212    throw new lazy.error.UnsupportedOperationError(
    213      `URL pattern did not specify a supported protocol (one of ${SPECIAL_SCHEMES}), got ${parsedURL.scheme}`
    214    );
    215  }
    216 
    217  return {
    218    protocol: hasProtocol ? parsedURL.scheme : null,
    219    hostname: hasHostname ? parsedURL.host : null,
    220    port: hasPort ? serializePort(parsedURL) : null,
    221    pathname:
    222      hasPathname && parsedURL.path.length ? serializePath(parsedURL) : null,
    223    search: hasSearch ? parsedURL.query || "" : null,
    224  };
    225 }
    226 
    227 /**
    228 * Parse the hostname property of a URLPatternPattern.
    229 *
    230 * @param {string} hostname
    231 *     A hostname property.
    232 * @param {string} scheme
    233 *     The scheme for the URLPatternPattern.
    234 *
    235 * @returns {string}
    236 *     The parsed property.
    237 *
    238 * @throws {InvalidArgumentError}
    239 *     Raised if an argument is of an invalid type or value.
    240 */
    241 function parseHostname(hostname, scheme) {
    242  if (typeof hostname != "string" || hostname == "") {
    243    throw new lazy.error.InvalidArgumentError(
    244      `Expected URLPattern "hostname" to be a non-empty string, got ${hostname}`
    245    );
    246  }
    247 
    248  if (scheme == "file") {
    249    throw new lazy.error.InvalidArgumentError(
    250      `URLPattern with "file" scheme cannot specify a hostname, got ${hostname}`
    251    );
    252  }
    253 
    254  hostname = unescapeUrlPattern(hostname);
    255 
    256  const forbiddenHostnameCharacters = ["/", "?", "#"];
    257  let insideBrackets = false;
    258  for (const codepoint of hostname) {
    259    if (
    260      forbiddenHostnameCharacters.includes(codepoint) ||
    261      (!insideBrackets && codepoint == ":")
    262    ) {
    263      throw new lazy.error.InvalidArgumentError(
    264        `URL pattern "hostname" contained a forbidden character, got "${hostname}"`
    265      );
    266    }
    267 
    268    if (codepoint == "[") {
    269      insideBrackets = true;
    270    } else if (codepoint == "]") {
    271      insideBrackets = false;
    272    }
    273  }
    274 
    275  return hostname;
    276 }
    277 
    278 /**
    279 * Parse the pathname property of a URLPatternPattern.
    280 *
    281 * @param {string} pathname
    282 *     A pathname property.
    283 *
    284 * @returns {string}
    285 *     The parsed property.
    286 *
    287 * @throws {InvalidArgumentError}
    288 *     Raised if an argument is of an invalid type or value.
    289 */
    290 function parsePathname(pathname) {
    291  lazy.assert.string(
    292    pathname,
    293    lazy.pprint`Expected URL pattern "pathname" to be a string, got ${pathname}`
    294  );
    295 
    296  pathname = unescapeUrlPattern(pathname);
    297  if (!pathname.startsWith("/")) {
    298    pathname = `/${pathname}`;
    299  }
    300 
    301  if (pathname.includes("?") || pathname.includes("#")) {
    302    throw new lazy.error.InvalidArgumentError(
    303      `URL pattern "pathname" contained a forbidden character, got "${pathname}"`
    304    );
    305  }
    306 
    307  return pathname;
    308 }
    309 
    310 /**
    311 * Parse the port property of a URLPatternPattern.
    312 *
    313 * @param {string} port
    314 *     A port property.
    315 *
    316 * @returns {string}
    317 *     The parsed property.
    318 *
    319 * @throws {InvalidArgumentError}
    320 *     Raised if an argument is of an invalid type or value.
    321 */
    322 function parsePort(port) {
    323  if (typeof port != "string" || port == "") {
    324    throw new lazy.error.InvalidArgumentError(
    325      `Expected URLPattern "port" to be a non-empty string, got ${port}`
    326    );
    327  }
    328 
    329  port = unescapeUrlPattern(port);
    330 
    331  const isNumber = /^\d*$/.test(port);
    332  if (!isNumber) {
    333    throw new lazy.error.InvalidArgumentError(
    334      `URL pattern "port" is not a valid number, got "${port}"`
    335    );
    336  }
    337 
    338  return `:${port}`;
    339 }
    340 
    341 /**
    342 * Parse the protocol property of a URLPatternPattern.
    343 *
    344 * @param {string} protocol
    345 *     A protocol property.
    346 *
    347 * @returns {string}
    348 *     The parsed property.
    349 *
    350 * @throws {InvalidArgumentError}
    351 *     Raised if an argument is of an invalid type or value.
    352 */
    353 function parseProtocol(protocol) {
    354  if (typeof protocol != "string" || protocol == "") {
    355    throw new lazy.error.InvalidArgumentError(
    356      `Expected URLPattern "protocol" to be a non-empty string, got ${protocol}`
    357    );
    358  }
    359 
    360  protocol = unescapeUrlPattern(protocol);
    361  if (!/^[a-zA-Z0-9+-.]*$/.test(protocol)) {
    362    throw new lazy.error.InvalidArgumentError(
    363      `URL pattern "protocol" contained a forbidden character, got "${protocol}"`
    364    );
    365  }
    366 
    367  return protocol;
    368 }
    369 
    370 /**
    371 * Parse the search property of a URLPatternPattern.
    372 *
    373 * @param {string} search
    374 *     A search property.
    375 *
    376 * @returns {string}
    377 *     The parsed property.
    378 *
    379 * @throws {InvalidArgumentError}
    380 *     Raised if an argument is of an invalid type or value.
    381 */
    382 function parseSearch(search) {
    383  lazy.assert.string(
    384    search,
    385    lazy.pprint`Expected URL pattern "search" to be a string, got ${search}`
    386  );
    387 
    388  search = unescapeUrlPattern(search);
    389  if (!search.startsWith("?")) {
    390    search = `?${search}`;
    391  }
    392 
    393  if (search.includes("#")) {
    394    throw new lazy.error.InvalidArgumentError(
    395      `Expected URLPattern "search" to never contain "#", got ${search}`
    396    );
    397  }
    398 
    399  return search;
    400 }
    401 
    402 /**
    403 * Parse a string URL. This tries to be close to Basic URL Parser, however since
    404 * this is not currently implemented in Firefox and URL parsing has many edge
    405 * cases, it does not try to be a faithful implementation.
    406 *
    407 * Edge cases which are not supported are mostly about non-special URLs, which
    408 * in practice should not be observable in automation.
    409 *
    410 * @param {string} url
    411 *     The string based URL to parse.
    412 * @returns {ParsedURL}
    413 *     The parsed URL.
    414 */
    415 function parseURL(url) {
    416  const urlObj = new URL(url);
    417  const uri = urlObj.URI;
    418 
    419  return {
    420    scheme: uri.scheme,
    421    // Note: Use urlObj instead of uri for hostname:
    422    // nsIURI removes brackets from ipv6 hostnames (eg [::1] becomes ::1).
    423    host: urlObj.hostname,
    424    path: uri.filePath,
    425    // Note: Use urlObj instead of uri for port:
    426    // nsIURI throws on the port getter for non-special schemes.
    427    port: urlObj.port != "" ? Number(uri.port) : null,
    428    query: uri.hasQuery ? uri.query : null,
    429  };
    430 }
    431 
    432 /**
    433 * Serialize the path of a parsed URL.
    434 *
    435 * @see https://pr-preview.s3.amazonaws.com/w3c/webdriver-bidi/pull/429.html#parse-url-pattern
    436 *
    437 * @param {ParsedURL} url
    438 *     A parsed url.
    439 *
    440 * @returns {string}
    441 *     The serialized path
    442 */
    443 function serializePath(url) {
    444  // Check for opaque path
    445  if (typeof url.path == "string") {
    446    return url.path;
    447  }
    448 
    449  let serialized = "";
    450  for (const segment of url.path) {
    451    serialized += `/${segment}`;
    452  }
    453 
    454  return serialized;
    455 }
    456 
    457 /**
    458 * Serialize the port of a parsed URL.
    459 *
    460 * @see https://pr-preview.s3.amazonaws.com/w3c/webdriver-bidi/pull/429.html#parse-url-pattern
    461 *
    462 * @param {ParsedURL} url
    463 *     A parsed url.
    464 *
    465 * @returns {string}
    466 *     The serialized port
    467 */
    468 function serializePort(url) {
    469  let port = null;
    470  if (
    471    SPECIAL_SCHEMES.includes(url.scheme) &&
    472    DEFAULT_PORTS[url.scheme] !== null &&
    473    (url.port === null || url.port == DEFAULT_PORTS[url.scheme])
    474  ) {
    475    port = "";
    476  } else if (url.port !== null) {
    477    port = `${url.port}`;
    478  }
    479 
    480  return port;
    481 }
    482 
    483 /**
    484 * Unescape and check a pattern string against common forbidden characters.
    485 *
    486 * @see https://pr-preview.s3.amazonaws.com/w3c/webdriver-bidi/pull/429.html#unescape-url-pattern
    487 *
    488 * @param {string} pattern
    489 *     Either a full URLPatternString pattern or a property of a URLPatternPattern.
    490 *
    491 * @returns {string}
    492 *     The unescaped pattern
    493 *
    494 * @throws {InvalidArgumentError}
    495 *     Raised if an argument is of an invalid type or value.
    496 */
    497 function unescapeUrlPattern(pattern) {
    498  const forbiddenCharacters = ["(", ")", "*", "{", "}"];
    499  const escapeCharacter = "\\";
    500 
    501  let isEscaped = false;
    502  let result = "";
    503 
    504  for (const codepoint of Array.from(pattern)) {
    505    if (!isEscaped) {
    506      if (forbiddenCharacters.includes(codepoint)) {
    507        throw new lazy.error.InvalidArgumentError(
    508          `URL pattern contained an unescaped forbidden character ${codepoint}`
    509        );
    510      }
    511 
    512      if (codepoint == escapeCharacter) {
    513        isEscaped = true;
    514        continue;
    515      }
    516    }
    517 
    518    result += codepoint;
    519    isEscaped = false;
    520  }
    521 
    522  return result;
    523 }