CanonicalURL.sys.mjs (2490B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 /** 6 * Given a web page content document, finds candidates for an explicitly 7 * declared canonical URL. Includes a fallback URL to use in case the content 8 * did not declare a canonical URL. 9 * 10 * @param {Document} document 11 * @returns {CanonicalURLSourceResults} 12 */ 13 export function findCandidates(document) { 14 return { 15 link: getLinkRelCanonical(document), 16 opengraph: getOpenGraphUrl(document), 17 jsonLd: getJSONLDUrl(document), 18 fallback: getFallbackCanonicalUrl(document), 19 }; 20 } 21 22 /** 23 * Given a set of canonical URL candidates from `CanonicalURL.findCandidates`, 24 * returns the best value to use as the canonical URL. 25 * 26 * @param {CanonicalURLSourceResults} sources 27 * @returns {string} 28 */ 29 export function pickCanonicalUrl(sources) { 30 return ( 31 sources.link ?? sources.opengraph ?? sources.jsonLd ?? sources.fallback 32 ); 33 } 34 35 /** 36 * TODO: resolve relative URLs 37 * TODO: can be a different hostname or domain; does that need special handling? 38 * 39 * @see https://www.rfc-editor.org/rfc/rfc6596 40 * 41 * @param {Document} document 42 * @returns {string|null} 43 */ 44 function getLinkRelCanonical(document) { 45 return document.querySelector('link[rel="canonical"]')?.getAttribute("href"); 46 } 47 48 /** 49 * @see https://ogp.me/#url 50 * 51 * @param {Document} document 52 * @returns {string|null} 53 */ 54 function getOpenGraphUrl(document) { 55 return document 56 .querySelector('meta[property="og:url"]') 57 ?.getAttribute("content"); 58 } 59 60 /** 61 * Naïvely returns the first JSON-LD entity's URL, if found. 62 * TODO: make sure it's a web page-like/content schema? 63 * 64 * @see https://schema.org/url 65 * 66 * @param {Document} document 67 * @returns {string|null} 68 */ 69 function getJSONLDUrl(document) { 70 const firstMatch = Array.from( 71 document.querySelectorAll('script[type="application/ld+json"]') 72 ) 73 .map(script => { 74 try { 75 return JSON.parse(script.textContent); 76 } catch { 77 return null; 78 } 79 }) 80 .find(obj => obj && obj.url && typeof obj.url === "string"); 81 return firstMatch?.url; 82 } 83 84 /** 85 * @param {Document} document 86 * @returns {string|null} 87 */ 88 function getFallbackCanonicalUrl(document) { 89 const fallbackUrl = URL.parse(document.documentURI); 90 if (fallbackUrl) { 91 fallbackUrl.hash = ""; 92 return fallbackUrl.toString(); 93 } 94 return null; 95 }