tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

commit a1b1d695856317c4bb08cb8695c45a88a9669316
parent d90acdc399be472b62455d74cfe369b20963d700
Author: Stephen Thompson <sthompson@mozilla.com>
Date:   Mon,  5 Jan 2026 17:37:37 +0000

Bug 2003714 - automated tests for CanonicalURL detection r=dwalker,tabbrowser-reviewers

Unit tests for the CanonicalURL module's logic to find and choose a canonical URL for a given HTML page.

Differential Revision: https://phabricator.services.mozilla.com/D277629

Diffstat:
Mbrowser/components/tabnotes/CanonicalURL.sys.mjs | 5+++--
Abrowser/components/tabnotes/test/unit/test_json_ld.js | 111+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Abrowser/components/tabnotes/test/unit/test_link_rel_canonical.js | 51+++++++++++++++++++++++++++++++++++++++++++++++++++
Abrowser/components/tabnotes/test/unit/test_meta_og_url.js | 51+++++++++++++++++++++++++++++++++++++++++++++++++++
Abrowser/components/tabnotes/test/unit/test_pick_canonical_url.js | 99+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mbrowser/components/tabnotes/test/unit/xpcshell.toml | 8++++++++
6 files changed, 323 insertions(+), 2 deletions(-)

diff --git a/browser/components/tabnotes/CanonicalURL.sys.mjs b/browser/components/tabnotes/CanonicalURL.sys.mjs @@ -67,7 +67,7 @@ function getOpenGraphUrl(document) { * @returns {string|null} */ function getJSONLDUrl(document) { - return Array.from( + const firstMatch = Array.from( document.querySelectorAll('script[type="application/ld+json"]') ) .map(script => { @@ -77,7 +77,8 @@ function getJSONLDUrl(document) { return null; } }) - .find(obj => obj?.url)?.url; + .find(obj => obj && obj.url && typeof obj.url === "string"); + return firstMatch?.url; } /** diff --git a/browser/components/tabnotes/test/unit/test_json_ld.js b/browser/components/tabnotes/test/unit/test_json_ld.js @@ -0,0 +1,111 @@ +/* Any copyright is dedicated to the Public Domain. +https://creativecommons.org/publicdomain/zero/1.0/ */ + +"use strict"; + +const { findCandidates } = ChromeUtils.importESModule( + "moz-src:///browser/components/tabnotes/CanonicalURL.sys.mjs" +); + +/** + * @param {string[]} scripts + * @returns {Document} + */ +function getDocument(scripts) { + const scriptTags = scripts + .map(content => `<script type="application/ld+json">${content}</script>`) + .join("\n"); + + const html = ` +<!DOCTYPE html> +<html> +<head> + <meta charset="utf-8"> +</head> +<body> + ${scriptTags} +</body> +</html> +`; + return Document.parseHTMLUnsafe(html); +} + +add_task(async function test_json_ld_missing() { + const doc = getDocument([]); + + const candidates = findCandidates(doc); + + Assert.equal( + candidates.jsonLd, + undefined, + `JSON-LD data should not be found` + ); +}); + +add_task(async function test_json_ld_basic() { + const doc = getDocument([ + JSON.stringify({ + "@context": "https://schema.org/", + "@type": "Thing", + url: "https://www.example.com", + }), + ]); + + const candidates = findCandidates(doc); + + Assert.equal( + candidates.jsonLd, + "https://www.example.com", + `JSON-LD data should be found` + ); +}); + +add_task(async function test_json_ld_selects_first() { + const doc = getDocument([ + JSON.stringify({ + "@context": "https://schema.org/", + "@type": "Thing", + url: "https://www.example.com/1", + }), + JSON.stringify({ + "@context": "https://schema.org/", + "@type": "CreativeWork", + url: "https://www.example.com/2", + }), + JSON.stringify({ + "@context": "https://schema.org/", + "@type": "WebPage", + url: "https://www.example.com/3", + }), + ]); + + const candidates = findCandidates(doc); + + Assert.equal( + candidates.jsonLd, + "https://www.example.com/1", + `the first JSON-LD data should be preferred` + ); +}); + +add_task(async function test_json_ld_robust_to_url_array() { + const doc = getDocument([ + JSON.stringify({ + "@context": "https://schema.org/", + "@type": "SiteMap", + url: [ + "https://www.example.com/1", + "https://www.example.com/2", + "https://www.example.com/3", + ], + }), + ]); + + const candidates = findCandidates(doc); + + Assert.equal( + candidates.jsonLd, + undefined, + `when url is an array, the JSON-LD data should not be used` + ); +}); diff --git a/browser/components/tabnotes/test/unit/test_link_rel_canonical.js b/browser/components/tabnotes/test/unit/test_link_rel_canonical.js @@ -0,0 +1,51 @@ +/* Any copyright is dedicated to the Public Domain. +https://creativecommons.org/publicdomain/zero/1.0/ */ + +"use strict"; + +const { findCandidates } = ChromeUtils.importESModule( + "moz-src:///browser/components/tabnotes/CanonicalURL.sys.mjs" +); + +/** + * @param {string|undefined} [url] + * @returns {Document} + */ +function getDocument(url) { + const html = ` +<!DOCTYPE html> +<html> +<head> + <meta charset="utf-8"> + ${url ? `<link rel="canonical" href="${url}">` : ""} +</head> +<body> +</body> +</html> +`; + return Document.parseHTMLUnsafe(html); +} + +add_task(async function test_link_rel_canonical_missing() { + const doc = getDocument(); + + const candidates = findCandidates(doc); + + Assert.equal( + candidates.link, + undefined, + `link[rel="canonical"] should not be found` + ); +}); + +add_task(async function test_link_rel_canonical_present() { + const doc = getDocument("https://www.example.com"); + + const candidates = findCandidates(doc); + + Assert.equal( + candidates.link, + "https://www.example.com", + `link[rel="canonical"] should be found` + ); +}); diff --git a/browser/components/tabnotes/test/unit/test_meta_og_url.js b/browser/components/tabnotes/test/unit/test_meta_og_url.js @@ -0,0 +1,51 @@ +/* Any copyright is dedicated to the Public Domain. +https://creativecommons.org/publicdomain/zero/1.0/ */ + +"use strict"; + +const { findCandidates } = ChromeUtils.importESModule( + "moz-src:///browser/components/tabnotes/CanonicalURL.sys.mjs" +); + +/** + * @param {string|undefined} [url] + * @returns {Document} + */ +function getDocument(url) { + const html = ` +<!DOCTYPE html> +<html> +<head> + <meta charset="utf-8"> + ${url ? `<meta property="og:url" content="${url}">` : ""} +</head> +<body> +</body> +</html> +`; + return Document.parseHTMLUnsafe(html); +} + +add_task(async function test_meta_og_url_missing() { + const doc = getDocument(); + + const candidates = findCandidates(doc); + + Assert.equal( + candidates.opengraph, + undefined, + `meta[property="og:url"] should not be found` + ); +}); + +add_task(async function test_meta_og_url_present() { + const doc = getDocument("https://www.example.com"); + + const candidates = findCandidates(doc); + + Assert.equal( + candidates.opengraph, + "https://www.example.com", + `meta[property="og:url"] should be found` + ); +}); diff --git a/browser/components/tabnotes/test/unit/test_pick_canonical_url.js b/browser/components/tabnotes/test/unit/test_pick_canonical_url.js @@ -0,0 +1,99 @@ +/* Any copyright is dedicated to the Public Domain. +https://creativecommons.org/publicdomain/zero/1.0/ */ + +"use strict"; + +const { pickCanonicalUrl } = ChromeUtils.importESModule( + "moz-src:///browser/components/tabnotes/CanonicalURL.sys.mjs" +); + +const LINK_REL_CANONICAL = "https://www.example.com/link_rel_canonical"; +const OPENGRAPH = "https://www.example.com/opengraph"; +const JSON_LD = "https://www.example.com/json-ld"; +const FALLBACK = "https://www.example.com/fallback"; + +add_task(async function test_canonical_link_only() { + Assert.equal( + pickCanonicalUrl({ link: LINK_REL_CANONICAL, fallback: FALLBACK }), + LINK_REL_CANONICAL, + `should always pick link[rel="canonical"] if it was found` + ); +}); + +add_task(async function test_canonical_link_and_opengraph() { + Assert.equal( + pickCanonicalUrl({ + link: LINK_REL_CANONICAL, + opengraph: OPENGRAPH, + fallback: FALLBACK, + }), + LINK_REL_CANONICAL, + `should always pick link[rel="canonical"] if it was found` + ); +}); + +add_task(async function test_canonical_link_and_json_ld() { + Assert.equal( + pickCanonicalUrl({ + link: LINK_REL_CANONICAL, + jsonLd: JSON_LD, + fallback: FALLBACK, + }), + LINK_REL_CANONICAL, + `should always pick link[rel="canonical"] if it was found` + ); +}); + +add_task(async function test_canonical_link_and_opengraph_and_json_ld() { + Assert.equal( + pickCanonicalUrl({ + link: LINK_REL_CANONICAL, + opengraph: OPENGRAPH, + jsonLd: JSON_LD, + fallback: FALLBACK, + }), + LINK_REL_CANONICAL, + `should always pick link[rel="canonical"] if it was found` + ); +}); + +add_task(async function test_opengraph_only() { + Assert.equal( + pickCanonicalUrl({ opengraph: OPENGRAPH, fallback: FALLBACK }), + OPENGRAPH, + `should pick meta[property="og:url"] if canonical link not found` + ); +}); + +add_task(async function test_opengraph_and_json_ld() { + Assert.equal( + pickCanonicalUrl({ + opengraph: OPENGRAPH, + jsonLd: JSON_LD, + fallback: FALLBACK, + }), + OPENGRAPH, + `should pick meta[property="og:url"] if canonical link not found` + ); +}); + +add_task(async function test_json_ld_only() { + Assert.equal( + pickCanonicalUrl({ + jsonLd: JSON_LD, + fallback: FALLBACK, + }), + JSON_LD, + "should pick JSON-LD data if neither canonical link nor og:url were found" + ); +}); + +add_task(async function test_fallback() { + Assert.equal( + pickCanonicalUrl({ + fallback: FALLBACK, + }), + FALLBACK, + "should only use the fallback if nothing else was found" + ); +}); diff --git a/browser/components/tabnotes/test/unit/xpcshell.toml b/browser/components/tabnotes/test/unit/xpcshell.toml @@ -4,4 +4,12 @@ prefs = [ ] head = "head.js" +["test_json_ld.js"] + +["test_link_rel_canonical.js"] + +["test_meta_og_url.js"] + +["test_pick_canonical_url.js"] + ["test_tab_notes.js"]