tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

test_Tools_GetPageContent.js (12240B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 
      5 const { GetPageContent } = ChromeUtils.importESModule(
      6  "moz-src:///browser/components/aiwindow/models/Tools.sys.mjs"
      7 );
      8 
      9 const { sinon } = ChromeUtils.importESModule(
     10  "resource://testing-common/Sinon.sys.mjs"
     11 );
     12 
     13 function createFakeBrowser(url, hasBrowsingContext = true) {
     14  const parsedUrl = new URL(url);
     15  const browser = {
     16    currentURI: {
     17      spec: url,
     18      hostPort: parsedUrl.host,
     19    },
     20  };
     21 
     22  if (hasBrowsingContext) {
     23    browser.browsingContext = {
     24      currentWindowContext: {
     25        getActor: sinon.stub().resolves({
     26          getText: sinon.stub().resolves("Sample page content"),
     27          getReaderModeContent: sinon.stub().resolves(""),
     28        }),
     29      },
     30    };
     31  } else {
     32    browser.browsingContext = null;
     33  }
     34 
     35  return browser;
     36 }
     37 
     38 function createFakeTab(url, title, hasBrowsingContext = true) {
     39  return {
     40    linkedBrowser: createFakeBrowser(url, hasBrowsingContext),
     41    label: title,
     42  };
     43 }
     44 
     45 function createFakeWindow(tabs, closed = false, isAIWindow = true) {
     46  return {
     47    closed,
     48    gBrowser: {
     49      tabs,
     50    },
     51    document: {
     52      documentElement: {
     53        hasAttribute: attr => attr === "ai-window" && isAIWindow,
     54      },
     55    },
     56  };
     57 }
     58 
     59 function setupBrowserWindowTracker(sandbox, windows) {
     60  const BrowserWindowTracker = ChromeUtils.importESModule(
     61    "resource:///modules/BrowserWindowTracker.sys.mjs"
     62  ).BrowserWindowTracker;
     63 
     64  let windowArray;
     65  if (windows === null) {
     66    windowArray = [];
     67  } else if (Array.isArray(windows)) {
     68    windowArray = windows;
     69  } else {
     70    windowArray = [windows];
     71  }
     72  sandbox.stub(BrowserWindowTracker, "orderedWindows").get(() => windowArray);
     73 }
     74 
     75 add_task(async function test_getPageContent_exact_url_match() {
     76  const sb = sinon.createSandbox();
     77 
     78  try {
     79    const targetUrl = "https://example.com/page";
     80    const tabs = [
     81      createFakeTab("https://other.com", "Other"),
     82      createFakeTab(targetUrl, "Example Page"),
     83    ];
     84 
     85    setupBrowserWindowTracker(sb, createFakeWindow(tabs));
     86 
     87    const result = await GetPageContent.getPageContent(
     88      { url: targetUrl },
     89      new Set([targetUrl])
     90    );
     91 
     92    Assert.ok(result.includes("Example Page"), "Should include page title");
     93    Assert.ok(
     94      result.includes("Sample page content"),
     95      "Should include page content"
     96    );
     97    Assert.ok(
     98      result.includes(targetUrl),
     99      "Should include URL in result message"
    100    );
    101  } finally {
    102    sb.restore();
    103  }
    104 });
    105 
    106 add_task(async function test_getPageContent_hostname_match() {
    107  const sb = sinon.createSandbox();
    108 
    109  try {
    110    const tabs = [
    111      createFakeTab("https://example.com/page", "Example Page"),
    112      createFakeTab("https://other.com", "Other"),
    113    ];
    114 
    115    setupBrowserWindowTracker(sb, createFakeWindow(tabs));
    116 
    117    const result = await GetPageContent.getPageContent(
    118      { url: "http://example.com/different" },
    119      new Set(["http://example.com/different"])
    120    );
    121 
    122    Assert.ok(
    123      result.includes("Example Page"),
    124      "Should match by hostname when exact match fails"
    125    );
    126    Assert.ok(
    127      result.includes("Sample page content"),
    128      "Should include page content"
    129    );
    130  } finally {
    131    sb.restore();
    132  }
    133 });
    134 
    135 add_task(async function test_getPageContent_tab_not_found_with_allowed_url() {
    136  const sb = sinon.createSandbox();
    137 
    138  try {
    139    const targetUrl = "https://external.com/article";
    140    const tabs = [
    141      createFakeTab("https://example.com", "Example"),
    142      createFakeTab("https://other.com", "Other"),
    143    ];
    144 
    145    setupBrowserWindowTracker(sb, createFakeWindow(tabs));
    146 
    147    const allowedUrls = new Set([targetUrl]);
    148    const result = await GetPageContent.getPageContent(
    149      { url: targetUrl },
    150      allowedUrls
    151    );
    152 
    153    // Headless extraction doesn't work in xpcshell environment
    154    // In real usage, this would attempt headless extraction for allowed URLs
    155    Assert.ok(
    156      result.includes("Cannot find URL"),
    157      "Should return error when tab not found (headless doesn't work in xpcshell)"
    158    );
    159    Assert.ok(result.includes(targetUrl), "Should include target URL in error");
    160  } finally {
    161    sb.restore();
    162  }
    163 });
    164 
    165 add_task(
    166  async function test_getPageContent_tab_not_found_without_allowed_url() {
    167    const sb = sinon.createSandbox();
    168 
    169    try {
    170      const targetUrl = "https://notfound.com/page";
    171      const tabs = [
    172        createFakeTab("https://example.com", "Example"),
    173        createFakeTab("https://other.com", "Other"),
    174        createFakeTab("https://third.com", "Third"),
    175        createFakeTab("https://fourth.com", "Fourth"),
    176      ];
    177 
    178      setupBrowserWindowTracker(sb, createFakeWindow(tabs));
    179 
    180      const allowedUrls = new Set(["https://different.com"]);
    181 
    182      // When URL is not in allowedUrls, it attempts headless extraction
    183      // This doesn't work in xpcshell, so we expect an error
    184      let errorThrown = false;
    185      try {
    186        await GetPageContent.getPageContent({ url: targetUrl }, allowedUrls);
    187      } catch (error) {
    188        errorThrown = true;
    189        Assert.ok(
    190          error.message.includes("addProgressListener"),
    191          "Should fail with headless browser error in xpcshell"
    192        );
    193      }
    194 
    195      Assert.ok(
    196        errorThrown,
    197        "Should throw error when attempting headless extraction in xpcshell"
    198      );
    199    } finally {
    200      sb.restore();
    201    }
    202  }
    203 );
    204 
    205 add_task(async function test_getPageContent_no_browsing_context() {
    206  const sb = sinon.createSandbox();
    207 
    208  try {
    209    const targetUrl = "https://example.com/loading";
    210    const tabs = [createFakeTab(targetUrl, "Loading Page", false)];
    211 
    212    setupBrowserWindowTracker(sb, createFakeWindow(tabs));
    213 
    214    const result = await GetPageContent.getPageContent(
    215      { url: targetUrl },
    216      new Set([targetUrl])
    217    );
    218 
    219    Assert.ok(
    220      result.includes("Cannot access content"),
    221      "Should return error for unavailable browsing context"
    222    );
    223    Assert.ok(
    224      result.includes("Loading Page"),
    225      "Should include tab label in error"
    226    );
    227    Assert.ok(
    228      result.includes(targetUrl),
    229      "Should include URL in error message"
    230    );
    231  } finally {
    232    sb.restore();
    233  }
    234 });
    235 
    236 add_task(async function test_getPageContent_successful_extraction() {
    237  const sb = sinon.createSandbox();
    238 
    239  try {
    240    const targetUrl = "https://example.com/article";
    241    const pageContent = "This is a well-written article with lots of content.";
    242 
    243    const mockExtractor = {
    244      getText: sinon.stub().resolves(pageContent),
    245      getReaderModeContent: sinon.stub().resolves(""),
    246    };
    247 
    248    const tab = createFakeTab(targetUrl, "Article");
    249    tab.linkedBrowser.browsingContext.currentWindowContext.getActor = sinon
    250      .stub()
    251      .resolves(mockExtractor);
    252 
    253    setupBrowserWindowTracker(sb, createFakeWindow([tab]));
    254 
    255    const result = await GetPageContent.getPageContent(
    256      { url: targetUrl },
    257      new Set([targetUrl])
    258    );
    259 
    260    Assert.ok(result.includes("Content (full page)"), "Should indicate mode");
    261    Assert.ok(result.includes("Article"), "Should include tab title");
    262    Assert.ok(result.includes(targetUrl), "Should include URL");
    263    Assert.ok(result.includes(pageContent), "Should include extracted content");
    264  } finally {
    265    sb.restore();
    266  }
    267 });
    268 
    269 add_task(async function test_getPageContent_content_truncation() {
    270  const sb = sinon.createSandbox();
    271 
    272  try {
    273    const targetUrl = "https://example.com/long";
    274    const longContent = "A".repeat(15000);
    275 
    276    const mockExtractor = {
    277      getText: sinon.stub().resolves(longContent),
    278      getReaderModeContent: sinon.stub().resolves(""),
    279    };
    280 
    281    const tab = createFakeTab(targetUrl, "Long Page");
    282    tab.linkedBrowser.browsingContext.currentWindowContext.getActor = sinon
    283      .stub()
    284      .resolves(mockExtractor);
    285 
    286    setupBrowserWindowTracker(sb, createFakeWindow([tab]));
    287 
    288    const result = await GetPageContent.getPageContent(
    289      { url: targetUrl },
    290      new Set([targetUrl])
    291    );
    292 
    293    const contentMatch = result.match(/Content \(full page\) from.*:\s*(.*)/s);
    294    Assert.ok(contentMatch, "Should match content pattern");
    295 
    296    const extractedContent = contentMatch[1].trim();
    297    Assert.lessOrEqual(
    298      extractedContent.length,
    299      10003,
    300      "Content should be truncated to ~10000 chars (with ...)"
    301    );
    302    Assert.ok(
    303      extractedContent.endsWith("..."),
    304      "Truncated content should end with ..."
    305    );
    306  } finally {
    307    sb.restore();
    308  }
    309 });
    310 
    311 add_task(async function test_getPageContent_empty_content() {
    312  const sb = sinon.createSandbox();
    313 
    314  try {
    315    const targetUrl = "https://example.com/empty";
    316 
    317    const mockExtractor = {
    318      getText: sinon.stub().resolves("   \n  \n   "),
    319      getReaderModeContent: sinon.stub().resolves(""),
    320    };
    321 
    322    const tab = createFakeTab(targetUrl, "Empty Page");
    323    tab.linkedBrowser.browsingContext.currentWindowContext.getActor = sinon
    324      .stub()
    325      .resolves(mockExtractor);
    326 
    327    setupBrowserWindowTracker(sb, createFakeWindow([tab]));
    328 
    329    const result = await GetPageContent.getPageContent(
    330      { url: targetUrl },
    331      new Set([targetUrl])
    332    );
    333 
    334    // Whitespace content is normalized but still returns success
    335    Assert.ok(
    336      result.includes("Content (full page)"),
    337      "Should use full page mode after reader fallback"
    338    );
    339    Assert.ok(result.includes("Empty Page"), "Should include tab label");
    340    // The content is essentially empty after normalization, but still returned
    341    Assert.ok(
    342      result.match(/:\s*$/),
    343      "Content should be mostly empty after normalization"
    344    );
    345  } finally {
    346    sb.restore();
    347  }
    348 });
    349 
    350 add_task(async function test_getPageContent_extraction_error() {
    351  const sb = sinon.createSandbox();
    352 
    353  try {
    354    const targetUrl = "https://example.com/error";
    355 
    356    const mockExtractor = {
    357      getText: sinon.stub().rejects(new Error("Extraction failed")),
    358      getReaderModeContent: sinon.stub().resolves(""),
    359    };
    360 
    361    const tab = createFakeTab(targetUrl, "Error Page");
    362    tab.linkedBrowser.browsingContext.currentWindowContext.getActor = sinon
    363      .stub()
    364      .resolves(mockExtractor);
    365 
    366    setupBrowserWindowTracker(sb, createFakeWindow([tab]));
    367 
    368    const result = await GetPageContent.getPageContent(
    369      { url: targetUrl },
    370      new Set([targetUrl])
    371    );
    372 
    373    Assert.ok(
    374      result.includes("returned no content"),
    375      "Should handle extraction error gracefully"
    376    );
    377    Assert.ok(result.includes("Error Page"), "Should include tab label");
    378  } finally {
    379    sb.restore();
    380  }
    381 });
    382 
    383 add_task(async function test_getPageContent_reader_mode_string() {
    384  const sb = sinon.createSandbox();
    385 
    386  try {
    387    const targetUrl = "https://example.com/reader";
    388    const readerContent = "Clean reader mode text";
    389 
    390    const mockExtractor = {
    391      getText: sinon.stub().resolves("Full content"),
    392      getReaderModeContent: sinon.stub().resolves(readerContent),
    393    };
    394 
    395    const tab = createFakeTab(targetUrl, "Reader Test");
    396    tab.linkedBrowser.browsingContext.currentWindowContext.getActor = sinon
    397      .stub()
    398      .resolves(mockExtractor);
    399 
    400    setupBrowserWindowTracker(sb, createFakeWindow([tab]));
    401 
    402    const result = await GetPageContent.getPageContent(
    403      { url: targetUrl },
    404      new Set([targetUrl])
    405    );
    406 
    407    Assert.ok(
    408      result.includes("Content (reader mode)"),
    409      "Should use reader mode by default"
    410    );
    411    Assert.ok(
    412      result.includes(readerContent),
    413      "Should include reader mode content"
    414    );
    415  } finally {
    416    sb.restore();
    417  }
    418 });
    419 
    420 add_task(async function test_getPageContent_invalid_url_format() {
    421  const sb = sinon.createSandbox();
    422 
    423  try {
    424    const targetUrl = "not-a-valid-url";
    425    const tabs = [createFakeTab("https://example.com", "Example")];
    426 
    427    setupBrowserWindowTracker(sb, createFakeWindow(tabs));
    428 
    429    // Add URL to allowed list so it searches tabs instead of trying headless
    430    const result = await GetPageContent.getPageContent(
    431      { url: targetUrl },
    432      new Set([targetUrl])
    433    );
    434 
    435    Assert.ok(
    436      result.includes("Cannot find URL"),
    437      "Should handle invalid URL format"
    438    );
    439  } finally {
    440    sb.restore();
    441  }
    442 });