commit a1b1d695856317c4bb08cb8695c45a88a9669316
parent d90acdc399be472b62455d74cfe369b20963d700
Author: Stephen Thompson <sthompson@mozilla.com>
Date: Mon, 5 Jan 2026 17:37:37 +0000
Bug 2003714 - automated tests for CanonicalURL detection r=dwalker,tabbrowser-reviewers
Unit tests for the CanonicalURL module's logic to find and choose a canonical URL for a given HTML page.
Differential Revision: https://phabricator.services.mozilla.com/D277629
Diffstat:
6 files changed, 323 insertions(+), 2 deletions(-)
diff --git a/browser/components/tabnotes/CanonicalURL.sys.mjs b/browser/components/tabnotes/CanonicalURL.sys.mjs
@@ -67,7 +67,7 @@ function getOpenGraphUrl(document) {
* @returns {string|null}
*/
function getJSONLDUrl(document) {
- return Array.from(
+ const firstMatch = Array.from(
document.querySelectorAll('script[type="application/ld+json"]')
)
.map(script => {
@@ -77,7 +77,8 @@ function getJSONLDUrl(document) {
return null;
}
})
- .find(obj => obj?.url)?.url;
+ .find(obj => obj && obj.url && typeof obj.url === "string");
+ return firstMatch?.url;
}
/**
diff --git a/browser/components/tabnotes/test/unit/test_json_ld.js b/browser/components/tabnotes/test/unit/test_json_ld.js
@@ -0,0 +1,111 @@
+/* Any copyright is dedicated to the Public Domain.
+https://creativecommons.org/publicdomain/zero/1.0/ */
+
+"use strict";
+
+const { findCandidates } = ChromeUtils.importESModule(
+ "moz-src:///browser/components/tabnotes/CanonicalURL.sys.mjs"
+);
+
+/**
+ * @param {string[]} scripts
+ * @returns {Document}
+ */
+function getDocument(scripts) {
+ const scriptTags = scripts
+ .map(content => `<script type="application/ld+json">${content}</script>`)
+ .join("\n");
+
+ const html = `
+<!DOCTYPE html>
+<html>
+<head>
+ <meta charset="utf-8">
+</head>
+<body>
+ ${scriptTags}
+</body>
+</html>
+`;
+ return Document.parseHTMLUnsafe(html);
+}
+
+add_task(async function test_json_ld_missing() {
+ const doc = getDocument([]);
+
+ const candidates = findCandidates(doc);
+
+ Assert.equal(
+ candidates.jsonLd,
+ undefined,
+ `JSON-LD data should not be found`
+ );
+});
+
+add_task(async function test_json_ld_basic() {
+ const doc = getDocument([
+ JSON.stringify({
+ "@context": "https://schema.org/",
+ "@type": "Thing",
+ url: "https://www.example.com",
+ }),
+ ]);
+
+ const candidates = findCandidates(doc);
+
+ Assert.equal(
+ candidates.jsonLd,
+ "https://www.example.com",
+ `JSON-LD data should be found`
+ );
+});
+
+add_task(async function test_json_ld_selects_first() {
+ const doc = getDocument([
+ JSON.stringify({
+ "@context": "https://schema.org/",
+ "@type": "Thing",
+ url: "https://www.example.com/1",
+ }),
+ JSON.stringify({
+ "@context": "https://schema.org/",
+ "@type": "CreativeWork",
+ url: "https://www.example.com/2",
+ }),
+ JSON.stringify({
+ "@context": "https://schema.org/",
+ "@type": "WebPage",
+ url: "https://www.example.com/3",
+ }),
+ ]);
+
+ const candidates = findCandidates(doc);
+
+ Assert.equal(
+ candidates.jsonLd,
+ "https://www.example.com/1",
+ `the first JSON-LD data should be preferred`
+ );
+});
+
+add_task(async function test_json_ld_robust_to_url_array() {
+ const doc = getDocument([
+ JSON.stringify({
+ "@context": "https://schema.org/",
+ "@type": "SiteMap",
+ url: [
+ "https://www.example.com/1",
+ "https://www.example.com/2",
+ "https://www.example.com/3",
+ ],
+ }),
+ ]);
+
+ const candidates = findCandidates(doc);
+
+ Assert.equal(
+ candidates.jsonLd,
+ undefined,
+ `when url is an array, the JSON-LD data should not be used`
+ );
+});
diff --git a/browser/components/tabnotes/test/unit/test_link_rel_canonical.js b/browser/components/tabnotes/test/unit/test_link_rel_canonical.js
@@ -0,0 +1,51 @@
+/* Any copyright is dedicated to the Public Domain.
+https://creativecommons.org/publicdomain/zero/1.0/ */
+
+"use strict";
+
+const { findCandidates } = ChromeUtils.importESModule(
+ "moz-src:///browser/components/tabnotes/CanonicalURL.sys.mjs"
+);
+
+/**
+ * @param {string|undefined} [url]
+ * @returns {Document}
+ */
+function getDocument(url) {
+ const html = `
+<!DOCTYPE html>
+<html>
+<head>
+ <meta charset="utf-8">
+ ${url ? `<link rel="canonical" href="${url}">` : ""}
+</head>
+<body>
+</body>
+</html>
+`;
+ return Document.parseHTMLUnsafe(html);
+}
+
+add_task(async function test_link_rel_canonical_missing() {
+ const doc = getDocument();
+
+ const candidates = findCandidates(doc);
+
+ Assert.equal(
+ candidates.link,
+ undefined,
+ `link[rel="canonical"] should not be found`
+ );
+});
+
+add_task(async function test_link_rel_canonical_present() {
+ const doc = getDocument("https://www.example.com");
+
+ const candidates = findCandidates(doc);
+
+ Assert.equal(
+ candidates.link,
+ "https://www.example.com",
+ `link[rel="canonical"] should be found`
+ );
+});
diff --git a/browser/components/tabnotes/test/unit/test_meta_og_url.js b/browser/components/tabnotes/test/unit/test_meta_og_url.js
@@ -0,0 +1,51 @@
+/* Any copyright is dedicated to the Public Domain.
+https://creativecommons.org/publicdomain/zero/1.0/ */
+
+"use strict";
+
+const { findCandidates } = ChromeUtils.importESModule(
+ "moz-src:///browser/components/tabnotes/CanonicalURL.sys.mjs"
+);
+
+/**
+ * @param {string|undefined} [url]
+ * @returns {Document}
+ */
+function getDocument(url) {
+ const html = `
+<!DOCTYPE html>
+<html>
+<head>
+ <meta charset="utf-8">
+ ${url ? `<meta property="og:url" content="${url}">` : ""}
+</head>
+<body>
+</body>
+</html>
+`;
+ return Document.parseHTMLUnsafe(html);
+}
+
+add_task(async function test_meta_og_url_missing() {
+ const doc = getDocument();
+
+ const candidates = findCandidates(doc);
+
+ Assert.equal(
+ candidates.opengraph,
+ undefined,
+ `meta[property="og:url"] should not be found`
+ );
+});
+
+add_task(async function test_meta_og_url_present() {
+ const doc = getDocument("https://www.example.com");
+
+ const candidates = findCandidates(doc);
+
+ Assert.equal(
+ candidates.opengraph,
+ "https://www.example.com",
+ `meta[property="og:url"] should be found`
+ );
+});
diff --git a/browser/components/tabnotes/test/unit/test_pick_canonical_url.js b/browser/components/tabnotes/test/unit/test_pick_canonical_url.js
@@ -0,0 +1,99 @@
+/* Any copyright is dedicated to the Public Domain.
+https://creativecommons.org/publicdomain/zero/1.0/ */
+
+"use strict";
+
+const { pickCanonicalUrl } = ChromeUtils.importESModule(
+ "moz-src:///browser/components/tabnotes/CanonicalURL.sys.mjs"
+);
+
+const LINK_REL_CANONICAL = "https://www.example.com/link_rel_canonical";
+const OPENGRAPH = "https://www.example.com/opengraph";
+const JSON_LD = "https://www.example.com/json-ld";
+const FALLBACK = "https://www.example.com/fallback";
+
+add_task(async function test_canonical_link_only() {
+ Assert.equal(
+ pickCanonicalUrl({ link: LINK_REL_CANONICAL, fallback: FALLBACK }),
+ LINK_REL_CANONICAL,
+ `should always pick link[rel="canonical"] if it was found`
+ );
+});
+
+add_task(async function test_canonical_link_and_opengraph() {
+ Assert.equal(
+ pickCanonicalUrl({
+ link: LINK_REL_CANONICAL,
+ opengraph: OPENGRAPH,
+ fallback: FALLBACK,
+ }),
+ LINK_REL_CANONICAL,
+ `should always pick link[rel="canonical"] if it was found`
+ );
+});
+
+add_task(async function test_canonical_link_and_json_ld() {
+ Assert.equal(
+ pickCanonicalUrl({
+ link: LINK_REL_CANONICAL,
+ jsonLd: JSON_LD,
+ fallback: FALLBACK,
+ }),
+ LINK_REL_CANONICAL,
+ `should always pick link[rel="canonical"] if it was found`
+ );
+});
+
+add_task(async function test_canonical_link_and_opengraph_and_json_ld() {
+ Assert.equal(
+ pickCanonicalUrl({
+ link: LINK_REL_CANONICAL,
+ opengraph: OPENGRAPH,
+ jsonLd: JSON_LD,
+ fallback: FALLBACK,
+ }),
+ LINK_REL_CANONICAL,
+ `should always pick link[rel="canonical"] if it was found`
+ );
+});
+
+add_task(async function test_opengraph_only() {
+ Assert.equal(
+ pickCanonicalUrl({ opengraph: OPENGRAPH, fallback: FALLBACK }),
+ OPENGRAPH,
+ `should pick meta[property="og:url"] if canonical link not found`
+ );
+});
+
+add_task(async function test_opengraph_and_json_ld() {
+ Assert.equal(
+ pickCanonicalUrl({
+ opengraph: OPENGRAPH,
+ jsonLd: JSON_LD,
+ fallback: FALLBACK,
+ }),
+ OPENGRAPH,
+ `should pick meta[property="og:url"] if canonical link not found`
+ );
+});
+
+add_task(async function test_json_ld_only() {
+ Assert.equal(
+ pickCanonicalUrl({
+ jsonLd: JSON_LD,
+ fallback: FALLBACK,
+ }),
+ JSON_LD,
+ "should pick JSON-LD data if neither canonical link nor og:url were found"
+ );
+});
+
+add_task(async function test_fallback() {
+ Assert.equal(
+ pickCanonicalUrl({
+ fallback: FALLBACK,
+ }),
+ FALLBACK,
+ "should only use the fallback if nothing else was found"
+ );
+});
diff --git a/browser/components/tabnotes/test/unit/xpcshell.toml b/browser/components/tabnotes/test/unit/xpcshell.toml
@@ -4,4 +4,12 @@ prefs = [
]
head = "head.js"
+["test_json_ld.js"]
+
+["test_link_rel_canonical.js"]
+
+["test_meta_og_url.js"]
+
+["test_pick_canonical_url.js"]
+
["test_tab_notes.js"]