tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

CanonicalURL.sys.mjs (2490B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 
      5 /**
      6 * Given a web page content document, finds candidates for an explicitly
      7 * declared canonical URL. Includes a fallback URL to use in case the content
      8 * did not declare a canonical URL.
      9 *
     10 * @param {Document} document
     11 * @returns {CanonicalURLSourceResults}
     12 */
     13 export function findCandidates(document) {
     14  return {
     15    link: getLinkRelCanonical(document),
     16    opengraph: getOpenGraphUrl(document),
     17    jsonLd: getJSONLDUrl(document),
     18    fallback: getFallbackCanonicalUrl(document),
     19  };
     20 }
     21 
     22 /**
     23 * Given a set of canonical URL candidates from `CanonicalURL.findCandidates`,
     24 * returns the best value to use as the canonical URL.
     25 *
     26 * @param {CanonicalURLSourceResults} sources
     27 * @returns {string}
     28 */
     29 export function pickCanonicalUrl(sources) {
     30  return (
     31    sources.link ?? sources.opengraph ?? sources.jsonLd ?? sources.fallback
     32  );
     33 }
     34 
     35 /**
     36 * TODO: resolve relative URLs
     37 * TODO: can be a different hostname or domain; does that need special handling?
     38 *
     39 * @see https://www.rfc-editor.org/rfc/rfc6596
     40 *
     41 * @param {Document} document
     42 * @returns {string|null}
     43 */
     44 function getLinkRelCanonical(document) {
     45  return document.querySelector('link[rel="canonical"]')?.getAttribute("href");
     46 }
     47 
     48 /**
     49 * @see https://ogp.me/#url
     50 *
     51 * @param {Document} document
     52 * @returns {string|null}
     53 */
     54 function getOpenGraphUrl(document) {
     55  return document
     56    .querySelector('meta[property="og:url"]')
     57    ?.getAttribute("content");
     58 }
     59 
     60 /**
     61 * Naïvely returns the first JSON-LD entity's URL, if found.
     62 * TODO: make sure it's a web page-like/content schema?
     63 *
     64 * @see https://schema.org/url
     65 *
     66 * @param {Document} document
     67 * @returns {string|null}
     68 */
     69 function getJSONLDUrl(document) {
     70  const firstMatch = Array.from(
     71    document.querySelectorAll('script[type="application/ld+json"]')
     72  )
     73    .map(script => {
     74      try {
     75        return JSON.parse(script.textContent);
     76      } catch {
     77        return null;
     78      }
     79    })
     80    .find(obj => obj && obj.url && typeof obj.url === "string");
     81  return firstMatch?.url;
     82 }
     83 
     84 /**
     85 * @param {Document} document
     86 * @returns {string|null}
     87 */
     88 function getFallbackCanonicalUrl(document) {
     89  const fallbackUrl = URL.parse(document.documentURI);
     90  if (fallbackUrl) {
     91    fallbackUrl.hash = "";
     92    return fallbackUrl.toString();
     93  }
     94  return null;
     95 }