tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

commit 2960b00b8ff9c65df18fa61ba23020d696894cdd
parent 38f389d8292b0481625dae12543e725f389b8aa7
Author: Greg Tatum <tatum.creative@gmail.com>
Date:   Sat, 18 Oct 2025 02:16:52 +0000

Bug 1994183 - Support about:reader in the PageExtractor; r=nordzilla,Gijs,translations-reviewers

Differential Revision: https://phabricator.services.mozilla.com/D268902

Diffstat:
Mbrowser/components/translations/tests/browser/head.js | 12++++++++++--
Mtoolkit/components/pageextractor/PageExtractorChild.sys.mjs | 55++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Mtoolkit/components/pageextractor/tests/browser/browser.toml | 2++
Atoolkit/components/pageextractor/tests/browser/browser_dom_extractor_reader_mode.js | 62++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mtoolkit/components/pageextractor/tests/browser/head.js | 55++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Mtoolkit/modules/ActorManagerParent.sys.mjs | 1+
6 files changed, 183 insertions(+), 4 deletions(-)

diff --git a/browser/components/translations/tests/browser/head.js b/browser/components/translations/tests/browser/head.js @@ -329,14 +329,22 @@ async function switchTab(tab, name) { async function toggleReaderMode() { logAction(); const readerButton = document.getElementById("reader-mode-button"); - await waitForCondition(() => readerButton.hidden === false); + await BrowserTestUtils.waitForMutationCondition( + readerButton, + { attributes: true, attributeFilter: ["hidden"] }, + () => readerButton.hidden === false + ); readerButton.getAttribute("readeractive") ? info("Exiting reader mode") : info("Entering reader mode"); const readyPromise = readerButton.getAttribute("readeractive") - ? waitForCondition(() => !readerButton.getAttribute("readeractive")) + ? BrowserTestUtils.waitForMutationCondition( + readerButton, + { attributes: true, attributeFilter: ["readeractive"] }, + () => !readerButton.getAttribute("readeractive") + ) : BrowserTestUtils.waitForContentEvent( gBrowser.selectedBrowser, "AboutReaderContentReady" diff --git a/toolkit/components/pageextractor/PageExtractorChild.sys.mjs b/toolkit/components/pageextractor/PageExtractorChild.sys.mjs @@ -52,8 +52,14 @@ export class PageExtractorChild extends JSWindowActorChild { async receiveMessage({ name, data }) { switch (name) { case "PageExtractorParent:GetReaderModeContent": + if (this.isAboutReader()) { + return this.getAboutReaderContent(); + } return this.getReaderModeContent(data); case "PageExtractorParent:GetText": + if (this.isAboutReader()) { + return this.getAboutReaderContent(); + } return this.getText(data); } return Promise.reject(new Error("Unknown message: " + name)); @@ -82,11 +88,14 @@ export class PageExtractorChild extends JSWindowActorChild { return ""; } - const text = (article?.textContent || "") + let text = (article?.textContent || "") .trim() // Replace duplicate whitespace with either a single newline or space .replace(/(\s*\n\s*)|\s{2,}/g, (_, newline) => (newline ? "\n" : " ")); + if (article.title) { + text = article.title + "\n\n" + text; + } lazy.console.log("GetReaderModeContent", { force }); lazy.console.debug(text); @@ -122,4 +131,48 @@ export class PageExtractorChild extends JSWindowActorChild { return text.trim(); } + + /** + * Special case extracting text from Reader Mode. The original article content is not + * retained once reader mode is activated. It is rendered out to the page. Rather + * than cache an additional copy of the article, just extract the text from the + * actual reader mode DOM. + * + * @returns {string | null} + */ + getAboutReaderContent() { + lazy.console.log("Using special text extraction strategy for about:reader"); + const document = this.manager.contentWindow.document; + + if (!document) { + return null; + } + /** @type {HTMLElement?} */ + const titleEl = document.querySelector(".reader-title"); + /** @type {HTMLElement?} */ + const contentEl = document.querySelector(".moz-reader-content"); + + const title = titleEl?.innerText; + const content = contentEl?.innerText; + if (!title && !content) { + return null; + } + + if (title) { + return `${title}\n\n${content}`.trim(); + } + return content.trim(); + } + + /** + * Checks if about:reader is loaded, which requires special handling. + * + * @returns {boolean} + */ + isAboutReader() { + // Accessing the documentURIObject in this way does not materialize the + // `window.location.href` and should be a cheaper check here. + let url = this.manager.contentWindow.document.documentURIObject; + return url.schemeIs("about") && url.pathQueryRef.startsWith("reader?"); + } } diff --git a/toolkit/components/pageextractor/tests/browser/browser.toml b/toolkit/components/pageextractor/tests/browser/browser.toml @@ -7,3 +7,5 @@ support-files = [ ] ["browser_dom_extractor.js"] + +["browser_dom_extractor_reader_mode.js"] diff --git a/toolkit/components/pageextractor/tests/browser/browser_dom_extractor_reader_mode.js b/toolkit/components/pageextractor/tests/browser/browser_dom_extractor_reader_mode.js @@ -0,0 +1,62 @@ +/* Any copyright is dedicated to the Public Domain. + https://creativecommons.org/publicdomain/zero/1.0/ */ + +"use strict"; + +/** + * There is some inconsistency in newline handling between the modes. Make all newlines + * collapse to just spaces. + * + * @param {string} text + */ +function normalizeWhitespace(text) { + return text.replaceAll("\n\n", "\n").replaceAll("\n", " "); +} + +add_task(async function test_dom_extractor_reader_mode() { + const title = "Etymology of Mochitests"; + const article = + `It's interesting that inside of Mozilla most people call mochitests "moh` + + `kee tests". I believe this is because it is adjacent to the term` + + `"mocha tests", which is pronounced with the hard k sound. However, the` + + `testing infrastructure is named after the delicious Japanese treat known` + + `as mochi. Mochi, pronounced like "moh chee" is a food that is made from` + + `pounding steamed rice into a soft elastic mass.`; + + const { cleanup, getPageExtractor } = await html` + <article> + <h1>${title}</h1> + <p>${article}</p> + </article> + `; + + const text = `${title} ${article}`; + + is( + normalizeWhitespace(await getPageExtractor().getText()), + text, + "Normal page content supports getText" + ); + + is( + normalizeWhitespace(await getPageExtractor().getReaderModeContent()), + text, + "Normal page content supports getReaderModeContent" + ); + + await toggleReaderMode(); + + is( + normalizeWhitespace(await getPageExtractor().getText()), + text, + "about:reader is supported with getText" + ); + + is( + normalizeWhitespace(await getPageExtractor().getReaderModeContent()), + text, + "about:reader is supported with getReaderModeContent" + ); + + await cleanup(); +}); diff --git a/toolkit/components/pageextractor/tests/browser/head.js b/toolkit/components/pageextractor/tests/browser/head.js @@ -29,14 +29,28 @@ async function html(strings, ...values) { true // waitForLoad ); - /** @type {PageExtractorParent} */ const actor = tab.linkedBrowser.browsingContext.currentWindowGlobal.getActor( "PageExtractor" ); return { + /** + * @type {PageExtractorParent} + */ actor, + + /** + * Get a new page extractor, which can change when navigating pages. + * + * @returns {PageExtractorParent} + */ + getPageExtractor() { + return tab.linkedBrowser.browsingContext.currentWindowGlobal.getActor( + "PageExtractor" + ); + }, + async cleanup() { info("Cleaning up"); await serverClosed; @@ -76,3 +90,42 @@ function serveOnce(html) { return { url, serverClosed: promise }; } + +/** + * Click the reader-mode button if the reader-mode button is available. + * Fails if the reader-mode button is hidden. + */ +async function toggleReaderMode() { + const readerButton = document.getElementById("reader-mode-button"); + await BrowserTestUtils.waitForMutationCondition( + readerButton, + { attributes: true, attributeFilter: ["hidden"] }, + () => readerButton.hidden === false + ); + + readerButton.getAttribute("readeractive") + ? info("Exiting reader mode") + : info("Entering reader mode"); + + const readyPromise = readerButton.getAttribute("readeractive") + ? BrowserTestUtils.waitForMutationCondition( + readerButton, + { attributes: true, attributeFilter: ["readeractive"] }, + () => !readerButton.getAttribute("readeractive") + ) + : BrowserTestUtils.waitForContentEvent( + gBrowser.selectedBrowser, + "AboutReaderContentReady" + ); + + click(readerButton, "Clicking the reader-mode button"); + await readyPromise; +} + +function click(button, message) { + info(message); + if (button.hidden) { + throw new Error("The button was hidden when trying to click it."); + } + button.click(); +} diff --git a/toolkit/modules/ActorManagerParent.sys.mjs b/toolkit/modules/ActorManagerParent.sys.mjs @@ -482,6 +482,7 @@ let JSWINDOWACTORS = { "file:///*", "moz-extension://*", "data:text/html,*", + "about:reader?*", ], messageManagerGroups: ["browsers"], },