test_MemoriesHistorySource.js (20458B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 const { 6 getRecentHistory, 7 sessionizeVisits, 8 generateProfileInputs, 9 aggregateSessions, 10 topkAggregates, 11 } = ChromeUtils.importESModule( 12 "moz-src:///browser/components/aiwindow/models/memories/MemoriesHistorySource.sys.mjs" 13 ); 14 15 /** 16 * Create a single visit object for PlacesUtils.history.insertMany. 17 * 18 * @param {string} url 19 * @param {string} title 20 * @param {number} baseMs base timestamp in ms 21 * @param {number} offsetMs offset from base in ms (negative = earlier) 22 */ 23 function makeVisit(url, title, baseMs, offsetMs = 0) { 24 return { 25 url, 26 title, 27 visits: [{ date: new Date(baseMs + offsetMs) }], 28 }; 29 } 30 31 /** 32 * Build a small, fixed set of synthetic sessionized rows for testing 33 * generateProfileInputs and aggregateSessions. 34 * 35 * Shape matches what generateProfileInputs expects: sessionized rows. 36 * 37 * @param {number} [baseMicros] 38 */ 39 function makeSyntheticSessionRows(baseMicros = Date.now() * 1000) { 40 return [ 41 // Session 1: two history visits + one search 42 { 43 session_id: 1, 44 url: "https://example.com/a1", 45 title: "Example A1", 46 domain: "example.com", 47 visitDateMicros: baseMicros, 48 frequencyPct: 10, 49 domainFrequencyPct: 20, 50 source: "history", 51 }, 52 { 53 session_id: 1, 54 url: "https://example.com/a2", 55 title: "Example A2", 56 domain: "example.com", 57 visitDateMicros: baseMicros + 10_000, 58 frequencyPct: 30, 59 domainFrequencyPct: 40, 60 source: "history", 61 }, 62 { 63 session_id: 1, 64 url: "https://www.google.com/search?q=test", 65 title: "Google search: test", 66 domain: "www.google.com", 67 visitDateMicros: baseMicros + 20_000, 68 frequencyPct: 50, 69 domainFrequencyPct: 60, 70 source: "search", 71 }, 72 73 // Session 2: one visit, no search 74 { 75 session_id: 2, 76 url: "https://mozilla.org/", 77 title: "Mozilla", 78 domain: "mozilla.org", 79 visitDateMicros: baseMicros + 1_000_000, 80 frequencyPct: 70, 81 domainFrequencyPct: 80, 82 source: "history", 83 }, 84 ]; 85 } 86 87 function assertHistoryRowShape(row, msgPrefix = "") { 88 const prefix = msgPrefix ? `${msgPrefix}: ` : ""; 89 90 Assert.strictEqual(typeof row.url, "string", `${prefix}url is a string`); 91 Assert.ok(row.url.length, `${prefix}url present`); 92 93 Assert.strictEqual( 94 typeof row.domain, 95 "string", 96 `${prefix}domain is a string` 97 ); 98 Assert.ok(row.domain.length, `${prefix}domain present`); 99 100 Assert.strictEqual(typeof row.title, "string", `${prefix}title is a string`); 101 Assert.ok(row.title.length, `${prefix}title present`); 102 103 Assert.strictEqual( 104 typeof row.frequencyPct, 105 "number", 106 `${prefix}frequencyPct is a number` 107 ); 108 Assert.strictEqual( 109 typeof row.domainFrequencyPct, 110 "number", 111 `${prefix}domainFrequencyPct is a number` 112 ); 113 114 Assert.ok( 115 row.source === "search" || row.source === "history", 116 `${prefix}source labeled` 117 ); 118 Assert.ok( 119 row.frequencyPct >= 0 && row.frequencyPct <= 100, 120 `${prefix}frequencyPct within 0–100` 121 ); 122 Assert.ok( 123 row.domainFrequencyPct >= 0 && row.domainFrequencyPct <= 100, 124 `${prefix}domainFrequencyPct within 0–100` 125 ); 126 127 Assert.strictEqual( 128 typeof row.visitDateMicros, 129 "number", 130 `${prefix}visitDateMicros is a number` 131 ); 132 Assert.ok( 133 Number.isFinite(row.visitDateMicros), 134 `${prefix}visitDateMicros is finite` 135 ); 136 Assert.greaterOrEqual( 137 row.visitDateMicros, 138 0, 139 `${prefix}visitDateMicros non-negative` 140 ); 141 } 142 143 add_task(async function test_basic_history_fetch_and_shape() { 144 await PlacesUtils.history.clear(); 145 const now = Date.now(); 146 147 const seeded = [ 148 makeVisit( 149 "https://www.google.com/search?q=firefox+history", 150 "Google Search: firefox history", 151 now, 152 -5 * 60 * 1000 153 ), 154 makeVisit( 155 "https://developer.mozilla.org/en-US/docs/Web/JavaScript", 156 "JavaScript | MDN", 157 now, 158 -10 * 60 * 1000 159 ), 160 makeVisit( 161 "https://news.ycombinator.com/", 162 "Hacker News", 163 now, 164 -15 * 60 * 1000 165 ), 166 makeVisit( 167 "https://search.brave.com/search?q=mozsqlite", 168 "Brave Search: mozsqlite", 169 now, 170 -20 * 60 * 1000 171 ), 172 makeVisit( 173 "https://mozilla.org/en-US/", 174 "Internet for people, not profit — Mozilla", 175 now, 176 -25 * 60 * 1000 177 ), 178 ]; 179 180 // Insert via high-level API; Places will populate moz_origins/visits. 181 await PlacesUtils.history.insertMany(seeded); 182 183 const rows = await getRecentHistory({ days: 1, maxResults: 100 }); 184 Assert.ok(Array.isArray(rows), "Should return an array"); 185 Assert.greaterOrEqual( 186 rows.length, 187 seeded.length, 188 "Should return at least seeded rows" 189 ); 190 191 // Verify required fields & types on a sample. 192 for (const [idx, row] of rows.slice(0, 5).entries()) { 193 assertHistoryRowShape(row, `row[${idx}]`); 194 } 195 196 // Check ordering: newest first by visit_date. 197 const copy = rows.map(r => r.visitDateMicros); 198 const sorted = [...copy].sort((a, b) => b - a); 199 Assert.deepEqual( 200 copy.slice(0, 10), 201 sorted.slice(0, 10), 202 "Rows are ordered by visit date desc" 203 ); 204 205 // Search-source tagging should catch major engines with query paths. 206 const byUrl = new Map(rows.map(r => [r.url, r])); 207 Assert.equal( 208 byUrl.get(seeded[0].url).source, 209 "search", 210 "Google search tagged as 'search'" 211 ); 212 Assert.equal( 213 byUrl.get(seeded[3].url).source, 214 "search", 215 "Brave search tagged as 'search'" 216 ); 217 Assert.equal( 218 byUrl.get(seeded[1].url).source, 219 "history", 220 "MDN should be 'history'" 221 ); 222 Assert.equal( 223 byUrl.get(seeded[2].url).source, 224 "history", 225 "Hacker News should be 'history'" 226 ); 227 Assert.equal( 228 byUrl.get(seeded[4].url).source, 229 "history", 230 "Internet for people, not profit — Mozilla" 231 ); 232 }); 233 234 add_task(async function test_maxResults_is_respected() { 235 // Create a burst of visits so we can test LIMIT behavior. 236 await PlacesUtils.history.clear(); 237 238 const base = Date.now(); 239 const toInsert = []; 240 for (let i = 0; i < 50; i++) { 241 toInsert.push( 242 makeVisit( 243 `https://example.com/page-${i}`, 244 `Example Page ${i}`, 245 base, 246 -i * 1000 247 ) 248 ); 249 } 250 await PlacesUtils.history.insertMany(toInsert); 251 252 const rows10 = await getRecentHistory({ days: 1, maxResults: 10 }); 253 Assert.equal(rows10.length, 10, "maxResults=10 respected"); 254 255 const rows5 = await getRecentHistory({ days: 1, maxResults: 5 }); 256 Assert.equal(rows5.length, 5, "maxResults=5 respected"); 257 }); 258 259 add_task(async function test_days_cutoff_is_respected() { 260 await PlacesUtils.history.clear(); 261 262 // One old (2 days), one recent (within 1 hour) 263 const now = Date.now(); 264 await PlacesUtils.history.insertMany([ 265 makeVisit( 266 "https://old.example.com/", 267 "Old Visit", 268 now, 269 -2 * 24 * 60 * 60 * 1000 270 ), 271 makeVisit( 272 "https://recent.example.com/", 273 "Recent Visit", 274 now, 275 -30 * 60 * 1000 276 ), 277 ]); 278 279 const rows = await getRecentHistory({ days: 1, maxResults: 50 }); 280 const urls = rows.map(r => r.url); 281 Assert.ok( 282 urls.includes("https://recent.example.com/"), 283 "Recent visit present" 284 ); 285 Assert.ok( 286 !urls.includes("https://old.example.com/"), 287 "Old visit filtered by days cutoff" 288 ); 289 }); 290 291 add_task(function test_sessionizeVisits_basic() { 292 const baseMs = Date.now(); 293 294 // 3 visits: 295 // - v1 at t 296 // - v2 at t + 1 min (same session) 297 // - v3 at t + 30 min (new session with default 15 min gap) 298 const rows = [ 299 { 300 url: "https://example.com/1", 301 title: "First", 302 domain: "example.com", 303 visitDateMicros: (baseMs + 1 * 60 * 1000) * 1000, // v2 304 }, 305 { 306 url: "https://example.com/0", 307 title: "Zero", 308 domain: "example.com", 309 visitDateMicros: baseMs * 1000, // v1 310 }, 311 { 312 url: "https://example.com/2", 313 title: "Second", 314 domain: "example.com", 315 visitDateMicros: (baseMs + 30 * 60 * 1000) * 1000, // v3 316 }, 317 ]; 318 319 const sessionized = sessionizeVisits(rows); 320 321 Assert.equal(sessionized.length, 3, "All rows kept"); 322 // Sorted ascending by time 323 Assert.ok( 324 sessionized[0].visitDateMicros <= sessionized[1].visitDateMicros && 325 sessionized[1].visitDateMicros <= sessionized[2].visitDateMicros, 326 "Sessionized rows sorted by ascending visit time" 327 ); 328 329 const [r0, r1, r2] = sessionized; 330 331 // First two within 1 minute -> same session_id 332 Assert.strictEqual( 333 r0.session_id, 334 r1.session_id, 335 "First two visits should share a session" 336 ); 337 338 // Third 30 min later -> different session_id with default 15 min gap 339 Assert.notStrictEqual( 340 r1.session_id, 341 r2.session_id, 342 "Third visit should start a new session" 343 ); 344 345 // Required session fields present 346 for (const r of sessionized) { 347 Assert.strictEqual(typeof r.session_id, "number", "session_id is a number"); 348 Assert.strictEqual( 349 typeof r.session_start_ms, 350 "number", 351 "session_start_ms is a number" 352 ); 353 Assert.strictEqual( 354 typeof r.session_start_iso, 355 "string", 356 "session_start_iso is a string" 357 ); 358 // session_start_iso should be a valid ISO string matching session_start_ms 359 const parsed = new Date(r.session_start_iso); 360 Assert.ok( 361 Number.isFinite(parsed.getTime()), 362 "session_start_iso parses as a valid Date" 363 ); 364 Assert.equal( 365 parsed.toISOString(), 366 r.session_start_iso, 367 "session_start_iso is in canonical ISO 8601 format" 368 ); 369 // Also ensure ms and iso are consistent 370 const fromMs = new Date(r.session_start_ms).toISOString(); 371 Assert.equal( 372 fromMs, 373 r.session_start_iso, 374 "session_start_iso matches session_start_ms" 375 ); 376 } 377 }); 378 379 add_task(async function test_sinceMicros_cutoff_and_overrides_days() { 380 await PlacesUtils.history.clear(); 381 const nowMs = Date.now(); 382 383 const early = makeVisit( 384 "https://early.example.com/", 385 "Early", 386 nowMs, 387 -60 * 60 * 1000 // 1 hour ago 388 ); 389 const late = makeVisit( 390 "https://late.example.com/", 391 "Late", 392 nowMs, 393 -5 * 60 * 1000 // 5 minutes ago 394 ); 395 396 await PlacesUtils.history.insertMany([early, late]); 397 398 // Get the raw visitDateMicros so we can compute a watermark between them. 399 const allRows = await getRecentHistory({ days: 1, maxResults: 10 }); 400 const byUrl = new Map(allRows.map(r => [r.url, r])); 401 const earlyVisit = byUrl.get(early.url); 402 const lateVisit = byUrl.get(late.url); 403 404 Assert.ok(earlyVisit && lateVisit, "Both visits present in initial fetch"); 405 406 const midMicros = 407 (earlyVisit.visitDateMicros + lateVisit.visitDateMicros) / 2; 408 409 // Get visits since midMicros 410 const rowsSince = await getRecentHistory({ 411 sinceMicros: midMicros, 412 maxResults: 10, 413 }); 414 415 const urlsSince = rowsSince.map(r => r.url); 416 Assert.ok( 417 urlsSince.includes(late.url), 418 "Late visit included when sinceMicros is between early and late" 419 ); 420 Assert.ok( 421 !urlsSince.includes(early.url), 422 "Early visit excluded by sinceMicros cutoff" 423 ); 424 }); 425 426 add_task(function test_sessionizeVisits_empty_and_invalid() { 427 // Empty input -> empty output 428 let sessionized = sessionizeVisits([]); 429 Assert.ok(Array.isArray(sessionized), "Empty input returns array"); 430 Assert.equal(sessionized.length, 0, "Empty input yields empty output"); 431 432 // Non-finite visitDateMicros should be filtered out 433 const rows = [ 434 { url: "https://example.com/a", visitDateMicros: NaN }, 435 { url: "https://example.com/b", visitDateMicros: Infinity }, 436 { url: "https://example.com/c", visitDateMicros: -Infinity }, 437 ]; 438 sessionized = sessionizeVisits(rows); 439 Assert.equal( 440 sessionized.length, 441 0, 442 "Rows with non-finite visitDateMicros are filtered" 443 ); 444 }); 445 446 add_task(function test_sessionizeVisits_custom_gap() { 447 const baseMs = Date.now(); 448 449 // Two visits 20 minutes apart. 450 const rows = [ 451 { 452 url: "https://example.com/0", 453 visitDateMicros: baseMs * 1000, 454 }, 455 { 456 url: "https://example.com/1", 457 visitDateMicros: (baseMs + 20 * 60 * 1000) * 1000, 458 }, 459 ]; 460 461 // With a huge gapSec, they should stay in one session. 462 const sessionizedLoose = sessionizeVisits(rows, { gapSec: 3600 }); 463 Assert.equal( 464 sessionizedLoose[0].session_id, 465 sessionizedLoose[1].session_id, 466 "Custom large gap keeps visits in one session" 467 ); 468 469 // With a tiny gapSec, they should split. 470 const sessionizedTight = sessionizeVisits(rows, { gapSec: 60 }); 471 Assert.notStrictEqual( 472 sessionizedTight[0].session_id, 473 sessionizedTight[1].session_id, 474 "Custom small gap splits sessions" 475 ); 476 }); 477 478 add_task(function test_generateProfileInputs_shapes() { 479 const rows = makeSyntheticSessionRows(); 480 const prepared = generateProfileInputs(rows); 481 482 // session_id set should be preserved 483 const originalSessionIds = new Set(rows.map(r => r.session_id)); 484 const preparedSessionIds = new Set(prepared.map(r => r.session_id)); 485 Assert.deepEqual( 486 [...preparedSessionIds].sort(), 487 [...originalSessionIds].sort(), 488 "generateProfileInputs preserves session_id set" 489 ); 490 491 Assert.equal(prepared.length, 2, "Two sessions -> two prepared records"); 492 493 const bySession = new Map(prepared.map(r => [r.session_id, r])); 494 const sess1 = bySession.get(1); 495 const sess2 = bySession.get(2); 496 497 Assert.ok(sess1, "Session 1 present"); 498 Assert.ok(sess2, "Session 2 present"); 499 500 // Session 1: has title/domain scores and search_events 501 Assert.greater( 502 Object.keys(sess1.title_scores).length, 503 0, 504 "Session 1 has title_scores" 505 ); 506 Assert.greater( 507 Object.keys(sess1.domain_scores).length, 508 0, 509 "Session 1 has domain_scores" 510 ); 511 Assert.ok( 512 sess1.search_events && 513 typeof sess1.search_events.search_count === "number" && 514 Array.isArray(sess1.search_events.search_titles), 515 "Session 1 has search_events summary" 516 ); 517 518 // Session 2: no search events 519 Assert.equal( 520 Object.keys(sess2.search_events).length, 521 0, 522 "Session 2 has empty search_events" 523 ); 524 525 // Start/end times should be normalized to seconds or null 526 for (const sess of prepared) { 527 Assert.ok( 528 sess.session_start_time === null || 529 Number.isFinite(sess.session_start_time), 530 "session_start_time is null or finite" 531 ); 532 Assert.ok( 533 sess.session_end_time === null || Number.isFinite(sess.session_end_time), 534 "session_end_time is null or finite" 535 ); 536 } 537 }); 538 539 add_task(function test_generateProfileInputs_search_only_and_missing_scores() { 540 const baseMicros = Date.now() * 1000; 541 542 const rows = [ 543 // Session 1: search-only, with frequency/domainFrequency missing 544 { 545 session_id: 1, 546 url: "https://www.google.com/search?q=onlysearch", 547 title: "Google search: onlysearch", 548 domain: "www.google.com", 549 visitDateMicros: baseMicros, 550 source: "search", 551 // frequencyPct and domainFrequencyPct intentionally omitted 552 }, 553 554 // Session 2: one history visit with scores 555 { 556 session_id: 2, 557 url: "https://example.com/", 558 title: "Example", 559 domain: "example.com", 560 visitDateMicros: baseMicros + 1000, 561 frequencyPct: 50, 562 domainFrequencyPct: 60, 563 source: "history", 564 }, 565 ]; 566 567 const prepared = generateProfileInputs(rows); 568 const bySession = new Map(prepared.map(r => [r.session_id, r])); 569 const sess1 = bySession.get(1); 570 const sess2 = bySession.get(2); 571 572 // Session 1: no scores because frecency fields missing, but has search_events 573 Assert.deepEqual( 574 sess1.title_scores, 575 {}, 576 "Search-only session without frecency has empty title_scores" 577 ); 578 Assert.deepEqual( 579 sess1.domain_scores, 580 {}, 581 "Search-only session without frecency has empty domain_scores" 582 ); 583 Assert.ok( 584 sess1.search_events && 585 sess1.search_events.search_count === 1 && 586 Array.isArray(sess1.search_events.search_titles), 587 "Search-only session still has search_events" 588 ); 589 590 // Session 2: has scores, but no search_events 591 Assert.greater( 592 Object.keys(sess2.title_scores).length, 593 0, 594 "History session has title_scores" 595 ); 596 Assert.greater( 597 Object.keys(sess2.domain_scores).length, 598 0, 599 "History session has domain_scores" 600 ); 601 Assert.equal( 602 Object.keys(sess2.search_events).length, 603 0, 604 "History-only session has empty search_events" 605 ); 606 }); 607 608 add_task(function test_aggregateSessions_basic() { 609 const rows = makeSyntheticSessionRows(); 610 const preparedInputs = generateProfileInputs(rows); 611 612 const [domainAgg, titleAgg, searchAgg] = aggregateSessions(preparedInputs); 613 614 const preparedSessionIds = new Set(preparedInputs.map(p => p.session_id)); 615 const searchAggIds = new Set(Object.keys(searchAgg).map(id => Number(id))); 616 617 Assert.ok( 618 [...searchAggIds].every(id => preparedSessionIds.has(id)), 619 "searchAgg keys correspond to prepared session_ids" 620 ); 621 622 // Domains 623 const domainKeys = Object.keys(domainAgg).sort(); 624 Assert.deepEqual( 625 domainKeys, 626 ["example.com", "mozilla.org", "www.google.com"].sort(), 627 "Domain aggregate keys as expected" 628 ); 629 630 const exampleDomain = domainAgg["example.com"]; 631 Assert.ok(exampleDomain, "example.com aggregate present"); 632 Assert.equal( 633 exampleDomain.num_sessions, 634 1, 635 "example.com appears in one session" 636 ); 637 Assert.greater( 638 exampleDomain.session_importance, 639 0, 640 "example.com has session_importance" 641 ); 642 Assert.greaterOrEqual( 643 exampleDomain.last_seen, 644 0, 645 "example.com last_seen is non-negative" 646 ); 647 648 const mozillaDomain = domainAgg["mozilla.org"]; 649 Assert.equal( 650 mozillaDomain.num_sessions, 651 1, 652 "mozilla.org appears in one session" 653 ); 654 655 const googleDomain = domainAgg["www.google.com"]; 656 Assert.ok(googleDomain, "www.google.com aggregate present"); 657 Assert.equal( 658 googleDomain.num_sessions, 659 1, 660 "www.google.com appears in one session" 661 ); 662 663 // Titles 664 Assert.ok( 665 Object.prototype.hasOwnProperty.call(titleAgg, "Example A1"), 666 "Title Example A1 aggregated" 667 ); 668 Assert.ok( 669 Object.prototype.hasOwnProperty.call(titleAgg, "Example A2"), 670 "Title Example A2 aggregated" 671 ); 672 673 const titleA2 = titleAgg["Example A2"]; 674 Assert.equal(titleA2.num_sessions, 1, "Example A2 appears in one session"); 675 676 // Searches 677 Assert.ok( 678 Object.prototype.hasOwnProperty.call(searchAgg, 1), 679 "Search aggregate for session 1 present" 680 ); 681 Assert.ok( 682 !Object.prototype.hasOwnProperty.call(searchAgg, 2), 683 "No search aggregate for session 2" 684 ); 685 686 const search1 = searchAgg[1]; 687 Assert.equal(search1.search_count, 1, "search_count aggregated"); 688 Assert.deepEqual( 689 search1.search_titles.sort(), 690 ["Google search: test"].sort(), 691 "search_titles aggregated and deduplicated" 692 ); 693 Assert.greater( 694 search1.last_searched, 695 0, 696 "last_searched converted to seconds and > 0" 697 ); 698 }); 699 700 add_task(function test_aggregateSessions_empty() { 701 const [domainAgg, titleAgg, searchAgg] = aggregateSessions([]); 702 703 Assert.deepEqual( 704 Object.keys(domainAgg), 705 [], 706 "Empty input -> no domain aggregates" 707 ); 708 Assert.deepEqual( 709 Object.keys(titleAgg), 710 [], 711 "Empty input -> no title aggregates" 712 ); 713 Assert.deepEqual( 714 Object.keys(searchAgg), 715 [], 716 "Empty input -> no search aggregates" 717 ); 718 }); 719 720 add_task(function test_topkAggregates_recency_and_ranking() { 721 const nowSec = Math.floor(Date.now() / 1000); 722 723 // Two domains: 724 // - old.com: very old 725 // - fresh.com: very recent 726 const aggDomains = { 727 "old.com": { 728 score: 100, 729 last_seen: nowSec - 60 * 60 * 24 * 60, // 60 days ago 730 num_sessions: 1, 731 session_importance: 1, 732 }, 733 "fresh.com": { 734 score: 100, 735 last_seen: nowSec - 60 * 60, // 1 hour ago 736 num_sessions: 1, 737 session_importance: 1, 738 }, 739 }; 740 741 const [domainItems] = topkAggregates( 742 aggDomains, 743 {}, 744 {}, 745 { 746 k_domains: 2, 747 k_titles: 0, 748 k_searches: 0, 749 now: nowSec, 750 } 751 ); 752 753 // Expect fresh.com to outrank old.com due to recency decay. 754 const [firstDomain, secondDomain] = domainItems.map(([key]) => key); 755 Assert.equal( 756 firstDomain, 757 "fresh.com", 758 "More recent domain outranks older one" 759 ); 760 Assert.equal(secondDomain, "old.com", "Older domain comes second"); 761 });