test_Tools_GetPageContent.js (12240B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 const { GetPageContent } = ChromeUtils.importESModule( 6 "moz-src:///browser/components/aiwindow/models/Tools.sys.mjs" 7 ); 8 9 const { sinon } = ChromeUtils.importESModule( 10 "resource://testing-common/Sinon.sys.mjs" 11 ); 12 13 function createFakeBrowser(url, hasBrowsingContext = true) { 14 const parsedUrl = new URL(url); 15 const browser = { 16 currentURI: { 17 spec: url, 18 hostPort: parsedUrl.host, 19 }, 20 }; 21 22 if (hasBrowsingContext) { 23 browser.browsingContext = { 24 currentWindowContext: { 25 getActor: sinon.stub().resolves({ 26 getText: sinon.stub().resolves("Sample page content"), 27 getReaderModeContent: sinon.stub().resolves(""), 28 }), 29 }, 30 }; 31 } else { 32 browser.browsingContext = null; 33 } 34 35 return browser; 36 } 37 38 function createFakeTab(url, title, hasBrowsingContext = true) { 39 return { 40 linkedBrowser: createFakeBrowser(url, hasBrowsingContext), 41 label: title, 42 }; 43 } 44 45 function createFakeWindow(tabs, closed = false, isAIWindow = true) { 46 return { 47 closed, 48 gBrowser: { 49 tabs, 50 }, 51 document: { 52 documentElement: { 53 hasAttribute: attr => attr === "ai-window" && isAIWindow, 54 }, 55 }, 56 }; 57 } 58 59 function setupBrowserWindowTracker(sandbox, windows) { 60 const BrowserWindowTracker = ChromeUtils.importESModule( 61 "resource:///modules/BrowserWindowTracker.sys.mjs" 62 ).BrowserWindowTracker; 63 64 let windowArray; 65 if (windows === null) { 66 windowArray = []; 67 } else if (Array.isArray(windows)) { 68 windowArray = windows; 69 } else { 70 windowArray = [windows]; 71 } 72 sandbox.stub(BrowserWindowTracker, "orderedWindows").get(() => windowArray); 73 } 74 75 add_task(async function test_getPageContent_exact_url_match() { 76 const sb = sinon.createSandbox(); 77 78 try { 79 const targetUrl = "https://example.com/page"; 80 const tabs = [ 81 createFakeTab("https://other.com", "Other"), 82 createFakeTab(targetUrl, "Example Page"), 83 ]; 84 85 setupBrowserWindowTracker(sb, createFakeWindow(tabs)); 86 87 const result = await GetPageContent.getPageContent( 88 { url: targetUrl }, 89 new Set([targetUrl]) 90 ); 91 92 Assert.ok(result.includes("Example Page"), "Should include page title"); 93 Assert.ok( 94 result.includes("Sample page content"), 95 "Should include page content" 96 ); 97 Assert.ok( 98 result.includes(targetUrl), 99 "Should include URL in result message" 100 ); 101 } finally { 102 sb.restore(); 103 } 104 }); 105 106 add_task(async function test_getPageContent_hostname_match() { 107 const sb = sinon.createSandbox(); 108 109 try { 110 const tabs = [ 111 createFakeTab("https://example.com/page", "Example Page"), 112 createFakeTab("https://other.com", "Other"), 113 ]; 114 115 setupBrowserWindowTracker(sb, createFakeWindow(tabs)); 116 117 const result = await GetPageContent.getPageContent( 118 { url: "http://example.com/different" }, 119 new Set(["http://example.com/different"]) 120 ); 121 122 Assert.ok( 123 result.includes("Example Page"), 124 "Should match by hostname when exact match fails" 125 ); 126 Assert.ok( 127 result.includes("Sample page content"), 128 "Should include page content" 129 ); 130 } finally { 131 sb.restore(); 132 } 133 }); 134 135 add_task(async function test_getPageContent_tab_not_found_with_allowed_url() { 136 const sb = sinon.createSandbox(); 137 138 try { 139 const targetUrl = "https://external.com/article"; 140 const tabs = [ 141 createFakeTab("https://example.com", "Example"), 142 createFakeTab("https://other.com", "Other"), 143 ]; 144 145 setupBrowserWindowTracker(sb, createFakeWindow(tabs)); 146 147 const allowedUrls = new Set([targetUrl]); 148 const result = await GetPageContent.getPageContent( 149 { url: targetUrl }, 150 allowedUrls 151 ); 152 153 // Headless extraction doesn't work in xpcshell environment 154 // In real usage, this would attempt headless extraction for allowed URLs 155 Assert.ok( 156 result.includes("Cannot find URL"), 157 "Should return error when tab not found (headless doesn't work in xpcshell)" 158 ); 159 Assert.ok(result.includes(targetUrl), "Should include target URL in error"); 160 } finally { 161 sb.restore(); 162 } 163 }); 164 165 add_task( 166 async function test_getPageContent_tab_not_found_without_allowed_url() { 167 const sb = sinon.createSandbox(); 168 169 try { 170 const targetUrl = "https://notfound.com/page"; 171 const tabs = [ 172 createFakeTab("https://example.com", "Example"), 173 createFakeTab("https://other.com", "Other"), 174 createFakeTab("https://third.com", "Third"), 175 createFakeTab("https://fourth.com", "Fourth"), 176 ]; 177 178 setupBrowserWindowTracker(sb, createFakeWindow(tabs)); 179 180 const allowedUrls = new Set(["https://different.com"]); 181 182 // When URL is not in allowedUrls, it attempts headless extraction 183 // This doesn't work in xpcshell, so we expect an error 184 let errorThrown = false; 185 try { 186 await GetPageContent.getPageContent({ url: targetUrl }, allowedUrls); 187 } catch (error) { 188 errorThrown = true; 189 Assert.ok( 190 error.message.includes("addProgressListener"), 191 "Should fail with headless browser error in xpcshell" 192 ); 193 } 194 195 Assert.ok( 196 errorThrown, 197 "Should throw error when attempting headless extraction in xpcshell" 198 ); 199 } finally { 200 sb.restore(); 201 } 202 } 203 ); 204 205 add_task(async function test_getPageContent_no_browsing_context() { 206 const sb = sinon.createSandbox(); 207 208 try { 209 const targetUrl = "https://example.com/loading"; 210 const tabs = [createFakeTab(targetUrl, "Loading Page", false)]; 211 212 setupBrowserWindowTracker(sb, createFakeWindow(tabs)); 213 214 const result = await GetPageContent.getPageContent( 215 { url: targetUrl }, 216 new Set([targetUrl]) 217 ); 218 219 Assert.ok( 220 result.includes("Cannot access content"), 221 "Should return error for unavailable browsing context" 222 ); 223 Assert.ok( 224 result.includes("Loading Page"), 225 "Should include tab label in error" 226 ); 227 Assert.ok( 228 result.includes(targetUrl), 229 "Should include URL in error message" 230 ); 231 } finally { 232 sb.restore(); 233 } 234 }); 235 236 add_task(async function test_getPageContent_successful_extraction() { 237 const sb = sinon.createSandbox(); 238 239 try { 240 const targetUrl = "https://example.com/article"; 241 const pageContent = "This is a well-written article with lots of content."; 242 243 const mockExtractor = { 244 getText: sinon.stub().resolves(pageContent), 245 getReaderModeContent: sinon.stub().resolves(""), 246 }; 247 248 const tab = createFakeTab(targetUrl, "Article"); 249 tab.linkedBrowser.browsingContext.currentWindowContext.getActor = sinon 250 .stub() 251 .resolves(mockExtractor); 252 253 setupBrowserWindowTracker(sb, createFakeWindow([tab])); 254 255 const result = await GetPageContent.getPageContent( 256 { url: targetUrl }, 257 new Set([targetUrl]) 258 ); 259 260 Assert.ok(result.includes("Content (full page)"), "Should indicate mode"); 261 Assert.ok(result.includes("Article"), "Should include tab title"); 262 Assert.ok(result.includes(targetUrl), "Should include URL"); 263 Assert.ok(result.includes(pageContent), "Should include extracted content"); 264 } finally { 265 sb.restore(); 266 } 267 }); 268 269 add_task(async function test_getPageContent_content_truncation() { 270 const sb = sinon.createSandbox(); 271 272 try { 273 const targetUrl = "https://example.com/long"; 274 const longContent = "A".repeat(15000); 275 276 const mockExtractor = { 277 getText: sinon.stub().resolves(longContent), 278 getReaderModeContent: sinon.stub().resolves(""), 279 }; 280 281 const tab = createFakeTab(targetUrl, "Long Page"); 282 tab.linkedBrowser.browsingContext.currentWindowContext.getActor = sinon 283 .stub() 284 .resolves(mockExtractor); 285 286 setupBrowserWindowTracker(sb, createFakeWindow([tab])); 287 288 const result = await GetPageContent.getPageContent( 289 { url: targetUrl }, 290 new Set([targetUrl]) 291 ); 292 293 const contentMatch = result.match(/Content \(full page\) from.*:\s*(.*)/s); 294 Assert.ok(contentMatch, "Should match content pattern"); 295 296 const extractedContent = contentMatch[1].trim(); 297 Assert.lessOrEqual( 298 extractedContent.length, 299 10003, 300 "Content should be truncated to ~10000 chars (with ...)" 301 ); 302 Assert.ok( 303 extractedContent.endsWith("..."), 304 "Truncated content should end with ..." 305 ); 306 } finally { 307 sb.restore(); 308 } 309 }); 310 311 add_task(async function test_getPageContent_empty_content() { 312 const sb = sinon.createSandbox(); 313 314 try { 315 const targetUrl = "https://example.com/empty"; 316 317 const mockExtractor = { 318 getText: sinon.stub().resolves(" \n \n "), 319 getReaderModeContent: sinon.stub().resolves(""), 320 }; 321 322 const tab = createFakeTab(targetUrl, "Empty Page"); 323 tab.linkedBrowser.browsingContext.currentWindowContext.getActor = sinon 324 .stub() 325 .resolves(mockExtractor); 326 327 setupBrowserWindowTracker(sb, createFakeWindow([tab])); 328 329 const result = await GetPageContent.getPageContent( 330 { url: targetUrl }, 331 new Set([targetUrl]) 332 ); 333 334 // Whitespace content is normalized but still returns success 335 Assert.ok( 336 result.includes("Content (full page)"), 337 "Should use full page mode after reader fallback" 338 ); 339 Assert.ok(result.includes("Empty Page"), "Should include tab label"); 340 // The content is essentially empty after normalization, but still returned 341 Assert.ok( 342 result.match(/:\s*$/), 343 "Content should be mostly empty after normalization" 344 ); 345 } finally { 346 sb.restore(); 347 } 348 }); 349 350 add_task(async function test_getPageContent_extraction_error() { 351 const sb = sinon.createSandbox(); 352 353 try { 354 const targetUrl = "https://example.com/error"; 355 356 const mockExtractor = { 357 getText: sinon.stub().rejects(new Error("Extraction failed")), 358 getReaderModeContent: sinon.stub().resolves(""), 359 }; 360 361 const tab = createFakeTab(targetUrl, "Error Page"); 362 tab.linkedBrowser.browsingContext.currentWindowContext.getActor = sinon 363 .stub() 364 .resolves(mockExtractor); 365 366 setupBrowserWindowTracker(sb, createFakeWindow([tab])); 367 368 const result = await GetPageContent.getPageContent( 369 { url: targetUrl }, 370 new Set([targetUrl]) 371 ); 372 373 Assert.ok( 374 result.includes("returned no content"), 375 "Should handle extraction error gracefully" 376 ); 377 Assert.ok(result.includes("Error Page"), "Should include tab label"); 378 } finally { 379 sb.restore(); 380 } 381 }); 382 383 add_task(async function test_getPageContent_reader_mode_string() { 384 const sb = sinon.createSandbox(); 385 386 try { 387 const targetUrl = "https://example.com/reader"; 388 const readerContent = "Clean reader mode text"; 389 390 const mockExtractor = { 391 getText: sinon.stub().resolves("Full content"), 392 getReaderModeContent: sinon.stub().resolves(readerContent), 393 }; 394 395 const tab = createFakeTab(targetUrl, "Reader Test"); 396 tab.linkedBrowser.browsingContext.currentWindowContext.getActor = sinon 397 .stub() 398 .resolves(mockExtractor); 399 400 setupBrowserWindowTracker(sb, createFakeWindow([tab])); 401 402 const result = await GetPageContent.getPageContent( 403 { url: targetUrl }, 404 new Set([targetUrl]) 405 ); 406 407 Assert.ok( 408 result.includes("Content (reader mode)"), 409 "Should use reader mode by default" 410 ); 411 Assert.ok( 412 result.includes(readerContent), 413 "Should include reader mode content" 414 ); 415 } finally { 416 sb.restore(); 417 } 418 }); 419 420 add_task(async function test_getPageContent_invalid_url_format() { 421 const sb = sinon.createSandbox(); 422 423 try { 424 const targetUrl = "not-a-valid-url"; 425 const tabs = [createFakeTab("https://example.com", "Example")]; 426 427 setupBrowserWindowTracker(sb, createFakeWindow(tabs)); 428 429 // Add URL to allowed list so it searches tabs instead of trying headless 430 const result = await GetPageContent.getPageContent( 431 { url: targetUrl }, 432 new Set([targetUrl]) 433 ); 434 435 Assert.ok( 436 result.includes("Cannot find URL"), 437 "Should handle invalid URL format" 438 ); 439 } finally { 440 sb.restore(); 441 } 442 });