SERPCategorization.sys.mjs (51436B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 /** 6 * Functionality related to categorizing SERPs. 7 */ 8 9 import { XPCOMUtils } from "resource://gre/modules/XPCOMUtils.sys.mjs"; 10 11 const lazy = {}; 12 13 ChromeUtils.defineESModuleGetters(lazy, { 14 EnrollmentType: "resource://nimbus/ExperimentAPI.sys.mjs", 15 NimbusFeatures: "resource://nimbus/ExperimentAPI.sys.mjs", 16 Region: "resource://gre/modules/Region.sys.mjs", 17 RemoteSettings: "resource://services-settings/remote-settings.sys.mjs", 18 SearchUtils: "moz-src:///toolkit/components/search/SearchUtils.sys.mjs", 19 Sqlite: "resource://gre/modules/Sqlite.sys.mjs", 20 }); 21 22 ChromeUtils.defineLazyGetter(lazy, "gCryptoHash", () => { 23 return Cc["@mozilla.org/security/hash;1"].createInstance(Ci.nsICryptoHash); 24 }); 25 26 const CATEGORIZATION_PREF = 27 "browser.search.serpEventTelemetryCategorization.enabled"; 28 const CATEGORIZATION_REGION_PREF = 29 "browser.search.serpEventTelemetryCategorization.regionEnabled"; 30 31 XPCOMUtils.defineLazyPreferenceGetter( 32 lazy, 33 "serpEventTelemetryCategorization", 34 CATEGORIZATION_PREF, 35 false, 36 (aPreference, previousValue, newValue) => { 37 if (newValue) { 38 SERPCategorization.init(); 39 } else { 40 SERPCategorization.uninit({ deleteMap: true }); 41 } 42 } 43 ); 44 45 ChromeUtils.defineLazyGetter(lazy, "logConsole", () => { 46 return console.createInstance({ 47 prefix: "SearchTelemetry", 48 maxLogLevel: lazy.SearchUtils.loggingEnabled ? "Debug" : "Warn", 49 }); 50 }); 51 52 XPCOMUtils.defineLazyPreferenceGetter( 53 lazy, 54 "activityLimit", 55 "telemetry.fog.test.activity_limit", 56 120 57 ); 58 59 export const TELEMETRY_CATEGORIZATION_KEY = "search-categorization"; 60 export const TELEMETRY_CATEGORIZATION_DOWNLOAD_SETTINGS = { 61 // Units are in milliseconds. 62 base: 3600000, 63 minAdjust: 60000, 64 maxAdjust: 600000, 65 maxTriesPerSession: 2, 66 }; 67 68 export const CATEGORIZATION_SETTINGS = { 69 STORE_SCHEMA: 1, 70 STORE_FILE: "domain_to_categories.sqlite", 71 STORE_NAME: "domain_to_categories", 72 MAX_DOMAINS_TO_CATEGORIZE: 10, 73 MINIMUM_SCORE: 0, 74 STARTING_RANK: 2, 75 IDLE_TIMEOUT_SECONDS: 60 * 60, 76 WAKE_TIMEOUT_MS: 60 * 60 * 1000, 77 PING_SUBMISSION_THRESHOLD: 10, 78 HAS_MATCHING_REGION: "SearchTelemetry:HasMatchingRegion", 79 INCONCLUSIVE: 0, 80 }; 81 82 /** 83 * @typedef {object} CategorizationResult 84 * @property {string} organic_category 85 * The category for the organic result. 86 * @property {string} organic_num_domains 87 * The number of domains examined to determine the organic category result. 88 * @property {string} organic_num_inconclusive 89 * The number of inconclusive domains when determining the organic result. 90 * @property {string} organic_num_unknown 91 * The number of unknown domains when determining the organic result. 92 * @property {string} sponsored_category 93 * The category for the organic result. 94 * @property {string} sponsored_num_domains 95 * The number of domains examined to determine the sponsored category. 96 * @property {string} sponsored_num_inconclusive 97 * The number of inconclusive domains when determining the sponsored category. 98 * @property {string} sponsored_num_unknown 99 * The category for the sponsored result. 100 * @property {string} mappings_version 101 * The category mapping version used to determine the categories. 102 */ 103 104 /** 105 * @typedef {object} CategorizationExtraParams 106 * @property {string} num_ads_clicked 107 * The total number of ads clicked on a SERP. 108 * @property {string} num_ads_hidden 109 * The total number of ads hidden from the user when categorization occured. 110 * @property {string} num_ads_loaded 111 * The total number of ads loaded when categorization occured. 112 * @property {string} num_ads_visible 113 * The total number of ads visible to the user when categorization occured. 114 */ 115 116 /* eslint-disable jsdoc/valid-types */ 117 /** 118 * @typedef {CategorizationResult & CategorizationExtraParams} RecordCategorizationParameters 119 */ 120 /* eslint-enable jsdoc/valid-types */ 121 122 /** 123 * Categorizes SERPs. 124 */ 125 class Categorizer { 126 async init() { 127 if (this.enabled) { 128 lazy.logConsole.debug("Initialize SERP categorizer."); 129 await SERPDomainToCategoriesMap.init(); 130 SERPCategorizationEventScheduler.init(); 131 SERPCategorizationRecorder.init(); 132 } 133 } 134 135 async uninit({ deleteMap = false } = {}) { 136 lazy.logConsole.debug("Uninit SERP categorizer."); 137 await SERPDomainToCategoriesMap.uninit(deleteMap); 138 SERPCategorizationEventScheduler.uninit(); 139 SERPCategorizationRecorder.uninit(); 140 } 141 142 get enabled() { 143 return lazy.serpEventTelemetryCategorization; 144 } 145 146 /** 147 * Categorizes domains extracted from SERPs. Note that we don't process 148 * domains if the domain-to-categories map is empty (if the client couldn't 149 * download Remote Settings attachments, for example). 150 * 151 * @param {Set} nonAdDomains 152 * Domains from organic results extracted from the page. 153 * @param {Set} adDomains 154 * Domains from ad results extracted from the page. 155 * @returns {Promise<?CategorizationResult>} 156 * The final categorization result. Returns null if the map was empty. 157 */ 158 async maybeCategorizeSERP(nonAdDomains, adDomains) { 159 // Per DS, if the map was empty (e.g. because of a technical issue 160 // downloading the data), we shouldn't report telemetry. 161 // Thus, there is no point attempting to categorize the SERP. 162 if (SERPDomainToCategoriesMap.empty) { 163 SERPCategorizationRecorder.recordMissingImpressionTelemetry(); 164 return null; 165 } 166 /** @type {CategorizationResult} */ 167 let resultsToReport = {}; 168 169 let results = await this.applyCategorizationLogic(nonAdDomains); 170 resultsToReport.organic_category = results.category; 171 resultsToReport.organic_num_domains = results.num_domains; 172 resultsToReport.organic_num_unknown = results.num_unknown; 173 resultsToReport.organic_num_inconclusive = results.num_inconclusive; 174 175 results = await this.applyCategorizationLogic(adDomains); 176 resultsToReport.sponsored_category = results.category; 177 resultsToReport.sponsored_num_domains = results.num_domains; 178 resultsToReport.sponsored_num_unknown = results.num_unknown; 179 resultsToReport.sponsored_num_inconclusive = results.num_inconclusive; 180 181 resultsToReport.mappings_version = 182 SERPDomainToCategoriesMap.version.toString(); 183 184 return resultsToReport; 185 } 186 187 /** 188 * Applies the logic for reducing extracted domains to a single category for 189 * the SERP. 190 * 191 * @param {Set} domains 192 * The domains extracted from the page. 193 * @returns {Promise<object>} resultsToReport 194 * The final categorization results. Keys are: "category", "num_domains", 195 * "num_unknown" and "num_inconclusive". 196 */ 197 async applyCategorizationLogic(domains) { 198 let domainInfo = {}; 199 let domainsCount = 0; 200 let unknownsCount = 0; 201 let inconclusivesCount = 0; 202 203 for (let domain of domains) { 204 domainsCount++; 205 206 let categoryCandidates = await SERPDomainToCategoriesMap.get(domain); 207 208 if (!categoryCandidates.length) { 209 unknownsCount++; 210 continue; 211 } 212 213 // Inconclusive domains do not have more than one category candidate. 214 if ( 215 categoryCandidates[0].category == CATEGORIZATION_SETTINGS.INCONCLUSIVE 216 ) { 217 inconclusivesCount++; 218 continue; 219 } 220 221 domainInfo[domain] = categoryCandidates; 222 } 223 224 let finalCategory; 225 let topCategories = []; 226 // Determine if all domains were unknown or inconclusive. 227 if (unknownsCount + inconclusivesCount == domainsCount) { 228 finalCategory = CATEGORIZATION_SETTINGS.INCONCLUSIVE; 229 } else { 230 let maxScore = CATEGORIZATION_SETTINGS.MINIMUM_SCORE; 231 let rank = CATEGORIZATION_SETTINGS.STARTING_RANK; 232 for (let categoryCandidates of Object.values(domainInfo)) { 233 for (let { category, score } of categoryCandidates) { 234 let adjustedScore = score / Math.log2(rank); 235 if (adjustedScore > maxScore) { 236 maxScore = adjustedScore; 237 topCategories = [category]; 238 } else if (adjustedScore == maxScore) { 239 topCategories.push(Number(category)); 240 } 241 rank++; 242 } 243 } 244 finalCategory = 245 topCategories.length > 1 246 ? this.#chooseRandomlyFrom(topCategories) 247 : topCategories[0]; 248 } 249 250 return { 251 category: finalCategory.toString(), 252 num_domains: domainsCount.toString(), 253 num_unknown: unknownsCount.toString(), 254 num_inconclusive: inconclusivesCount.toString(), 255 }; 256 } 257 258 #chooseRandomlyFrom(categories) { 259 let randIdx = Math.floor(Math.random() * categories.length); 260 return categories[randIdx]; 261 } 262 } 263 264 /** 265 * Contains outstanding categorizations of browser objects that have yet to be 266 * scheduled to be reported into a Glean event. 267 * They are kept here until one of the conditions are met: 268 * 1. The browser that was tracked is no longer being tracked. 269 * 2. A user has been idle for IDLE_TIMEOUT_SECONDS 270 * 3. The user has awoken their computer and the time elapsed from the last 271 * categorization event exceeds WAKE_TIMEOUT_MS. 272 */ 273 class CategorizationEventScheduler { 274 /** 275 * A WeakMap containing browser objects mapped to a callback. 276 * 277 * @type {WeakMap | null} 278 */ 279 #browserToCallbackMap = null; 280 281 /** 282 * An instance of user idle service. Cached for testing purposes. 283 * 284 * @type {nsIUserIdleService | null} 285 */ 286 #idleService = null; 287 288 /** 289 * Whether it has been initialized. 290 * 291 * @type {boolean} 292 */ 293 #init = false; 294 295 /** 296 * The last Date.now() of a callback insertion. 297 * 298 * @type {number | null} 299 */ 300 #mostRecentMs = null; 301 302 init() { 303 if (this.#init) { 304 return; 305 } 306 307 lazy.logConsole.debug("Initializing categorization event scheduler."); 308 309 this.#browserToCallbackMap = new WeakMap(); 310 311 // In tests, we simulate idleness as it is more reliable and easier than 312 // trying to replicate idleness. The way to do is so it by creating 313 // an mock idle service and having the component subscribe to it. If we 314 // used a lazy instantiation of idle service, the test could only ever be 315 // subscribed to the real one. 316 this.#idleService = Cc["@mozilla.org/widget/useridleservice;1"].getService( 317 Ci.nsIUserIdleService 318 ); 319 320 this.#idleService.addIdleObserver( 321 this, 322 CATEGORIZATION_SETTINGS.IDLE_TIMEOUT_SECONDS 323 ); 324 325 Services.obs.addObserver(this, "quit-application"); 326 Services.obs.addObserver(this, "wake_notification"); 327 328 this.#init = true; 329 } 330 331 uninit() { 332 if (!this.#init) { 333 return; 334 } 335 336 this.#browserToCallbackMap = null; 337 338 lazy.logConsole.debug("Un-initializing categorization event scheduler."); 339 this.#idleService.removeIdleObserver( 340 this, 341 CATEGORIZATION_SETTINGS.IDLE_TIMEOUT_SECONDS 342 ); 343 344 Services.obs.removeObserver(this, "quit-application"); 345 Services.obs.removeObserver(this, "wake_notification"); 346 347 this.#idleService = null; 348 this.#init = false; 349 } 350 351 observe(subject, topic) { 352 switch (topic) { 353 case "idle": 354 lazy.logConsole.debug("Triggering all callbacks due to idle."); 355 this.#sendAllCallbacks(); 356 break; 357 case "quit-application": 358 this.uninit(); 359 break; 360 case "wake_notification": 361 if ( 362 this.#mostRecentMs && 363 Date.now() - this.#mostRecentMs >= 364 CATEGORIZATION_SETTINGS.WAKE_TIMEOUT_MS 365 ) { 366 lazy.logConsole.debug( 367 "Triggering all callbacks due to a wake notification." 368 ); 369 this.#sendAllCallbacks(); 370 } 371 break; 372 } 373 } 374 375 addCallback(browser, callback) { 376 lazy.logConsole.debug("Adding callback to queue."); 377 this.#mostRecentMs = Date.now(); 378 this.#browserToCallbackMap?.set(browser, callback); 379 } 380 381 sendCallback(browser) { 382 let callback = this.#browserToCallbackMap?.get(browser); 383 if (callback) { 384 lazy.logConsole.debug("Triggering callback."); 385 callback(); 386 Services.obs.notifyObservers( 387 null, 388 "recorded-single-categorization-event" 389 ); 390 this.#browserToCallbackMap.delete(browser); 391 } 392 } 393 394 #sendAllCallbacks() { 395 let browsers = ChromeUtils.nondeterministicGetWeakMapKeys( 396 this.#browserToCallbackMap 397 ); 398 if (browsers) { 399 lazy.logConsole.debug("Triggering all callbacks."); 400 for (let browser of browsers) { 401 this.sendCallback(browser); 402 } 403 } 404 this.#mostRecentMs = null; 405 Services.obs.notifyObservers(null, "recorded-all-categorization-events"); 406 } 407 } 408 409 /** 410 * Handles reporting SERP categorization telemetry to Glean. 411 */ 412 class CategorizationRecorder { 413 #init = false; 414 415 // The number of SERP categorizations that have been recorded but not yet 416 // reported in a Glean ping. 417 #serpCategorizationsCount = 0; 418 419 // When the user started interacting with the SERP. 420 #userInteractionStartTime = null; 421 422 async init() { 423 if (this.#init) { 424 return; 425 } 426 427 Services.obs.addObserver(this, "user-interaction-active"); 428 Services.obs.addObserver(this, "user-interaction-inactive"); 429 this.#init = true; 430 this.#serpCategorizationsCount = Services.prefs.getIntPref( 431 "browser.search.serpMetricsRecordedCounter", 432 0 433 ); 434 Services.prefs.setIntPref("browser.search.serpMetricsRecordedCounter", 0); 435 this.submitPing("startup"); 436 Services.obs.notifyObservers(null, "categorization-recorder-init"); 437 } 438 439 uninit() { 440 if (this.#init) { 441 Services.obs.removeObserver(this, "user-interaction-active"); 442 Services.obs.removeObserver(this, "user-interaction-inactive"); 443 Services.prefs.setIntPref( 444 "browser.search.serpMetricsRecordedCounter", 445 this.#serpCategorizationsCount 446 ); 447 448 this.#resetCategorizationRecorderData(); 449 this.#init = false; 450 } 451 } 452 453 observe(subject, topic, _data) { 454 switch (topic) { 455 case "user-interaction-active": { 456 // If the user is already active, we don't want to overwrite the start 457 // time. 458 if (this.#userInteractionStartTime == null) { 459 this.#userInteractionStartTime = Date.now(); 460 } 461 break; 462 } 463 case "user-interaction-inactive": { 464 let currentTime = Date.now(); 465 let activityLimitInMs = lazy.activityLimit * 1000; 466 if ( 467 this.#userInteractionStartTime && 468 currentTime - this.#userInteractionStartTime >= activityLimitInMs 469 ) { 470 this.submitPing("inactivity"); 471 } 472 this.#userInteractionStartTime = null; 473 break; 474 } 475 } 476 } 477 478 /** 479 * Helper function for recording the SERP categorization event. 480 * 481 * @param {RecordCategorizationParameters} resultToReport 482 * The object containing all the data required to report. 483 */ 484 recordCategorizationTelemetry(resultToReport) { 485 lazy.logConsole.debug( 486 "Reporting the following categorization result:", 487 resultToReport 488 ); 489 Glean.serp.categorization.record(resultToReport); 490 491 this.#incrementCategorizationsCount(); 492 } 493 494 /** 495 * Helper function for recording Glean telemetry when issues with the 496 * domain-to-categories map cause the categorization and impression not to be 497 * recorded. 498 */ 499 recordMissingImpressionTelemetry() { 500 lazy.logConsole.debug( 501 "Recording a missing impression due to an issue with the domain-to-categories map." 502 ); 503 Glean.serp.categorizationNoMapFound.add(); 504 this.#incrementCategorizationsCount(); 505 } 506 507 /** 508 * Adds a Glean object metric to the custom SERP categorization ping if info 509 * about a single experiment has been requested via Nimbus config. 510 */ 511 maybeExtractAndRecordExperimentInfo() { 512 let targetExperiment = 513 lazy.NimbusFeatures.search.getVariable("targetExperiment"); 514 if (!targetExperiment) { 515 lazy.logConsole.debug("No targetExperiment found."); 516 return; 517 } 518 519 lazy.logConsole.debug("Found targetExperiment:", targetExperiment); 520 521 let metadata = lazy.NimbusFeatures.search.getEnrollmentMetadata( 522 lazy.EnrollmentType.EXPERIMENT 523 ); 524 if (metadata?.slug !== targetExperiment) { 525 metadata = lazy.NimbusFeatures.search.getEnrollmentMetadata( 526 lazy.EnrollmentType.ROLLOUT 527 ); 528 529 if (metadata?.slug !== targetExperiment) { 530 lazy.logConsole.debug( 531 "No experiment or rollout found that matches targetExperiment." 532 ); 533 return; 534 } 535 } 536 537 let experimentToRecord = { 538 slug: metadata.slug, 539 branch: metadata.branch, 540 }; 541 lazy.logConsole.debug("Experiment data:", experimentToRecord); 542 Glean.serp.experimentInfo.set(experimentToRecord); 543 } 544 545 submitPing(reason) { 546 if (!this.#serpCategorizationsCount) { 547 return; 548 } 549 550 // If experiment info has been requested via Nimbus config, we want to 551 // record it just before submitting the ping. 552 this.maybeExtractAndRecordExperimentInfo(); 553 lazy.logConsole.debug("Submitting SERP categorization ping:", reason); 554 GleanPings.serpCategorization.submit(reason); 555 556 this.#serpCategorizationsCount = 0; 557 } 558 559 /** 560 * Tests are able to clear telemetry on demand. When that happens, we need to 561 * ensure we're doing to the same here or else the internal count in tests 562 * will be inaccurate. 563 */ 564 testReset() { 565 if (Cu.isInAutomation) { 566 this.#resetCategorizationRecorderData(); 567 } 568 } 569 570 #incrementCategorizationsCount() { 571 this.#serpCategorizationsCount++; 572 573 if ( 574 this.#serpCategorizationsCount >= 575 CATEGORIZATION_SETTINGS.PING_SUBMISSION_THRESHOLD 576 ) { 577 this.submitPing("threshold_reached"); 578 } 579 } 580 581 #resetCategorizationRecorderData() { 582 this.#serpCategorizationsCount = 0; 583 this.#userInteractionStartTime = null; 584 } 585 } 586 587 /** 588 * @typedef {object} DomainToCategoriesRecord 589 * @property {boolean} isDefault 590 * Whether the record is a default if the user's region does not contain a 591 * more specific set of mappings. 592 * @property {string[]} includeRegions 593 * The region codes to include. If left blank, it applies to all regions. 594 * @property {string[]} excludeRegions 595 * The region codes to exclude. 596 * @property {number} version 597 * The version of the record. 598 */ 599 600 /** 601 * @typedef {object} DomainCategoryScore 602 * @property {number} category 603 * The index of the category. 604 * @property {number} score 605 * The score associated with the category. 606 */ 607 608 /** 609 * Maps domain to categories. Data is downloaded from Remote Settings and 610 * stored inside DomainToCategoriesStore. 611 */ 612 class DomainToCategoriesMap { 613 /** 614 * Latest version number of the attachments. 615 * 616 * @type {number | null} 617 */ 618 #version = null; 619 620 /** 621 * The Remote Settings client. 622 * 623 * @type {object | null} 624 */ 625 #client = null; 626 627 /** 628 * Whether this is synced with Remote Settings. 629 * 630 * @type {boolean} 631 */ 632 #init = false; 633 634 /** 635 * Callback when Remote Settings syncs. 636 * 637 * @type {Function | null} 638 */ 639 #onSettingsSync = null; 640 641 /** 642 * When downloading an attachment from Remote Settings fails, this will 643 * contain a timer which will eventually attempt to retry downloading 644 * attachments. 645 */ 646 #downloadTimer = null; 647 648 /** 649 * Number of times this has attempted to try another download. Will reset 650 * if the categorization preference has been toggled, or a sync event has 651 * been detected. 652 * 653 * @type {number} 654 */ 655 #downloadRetries = 0; 656 657 /** 658 * A reference to the data store. 659 * 660 * @type {DomainToCategoriesStore | null} 661 */ 662 #store = null; 663 664 /** 665 * Runs at application startup with startup idle tasks. If the SERP 666 * categorization preference is enabled, it creates a Remote Settings 667 * client to listen to updates, and populates the store. 668 */ 669 async init() { 670 if (this.#init) { 671 return; 672 } 673 lazy.logConsole.debug("Initializing domain-to-categories map."); 674 675 // Set early to allow un-init from an initialization. 676 this.#init = true; 677 678 try { 679 await this.#setupClientAndStore(); 680 } catch (ex) { 681 lazy.logConsole.error(ex); 682 await this.uninit(); 683 return; 684 } 685 686 // If we don't have a client and store, it likely means an un-init process 687 // started during the initialization process. 688 if (this.#client && this.#store) { 689 lazy.logConsole.debug("Initialized domain-to-categories map."); 690 Services.obs.notifyObservers(null, "domain-to-categories-map-init"); 691 } 692 } 693 694 async uninit(shouldDeleteStore) { 695 if (this.#init) { 696 lazy.logConsole.debug("Un-initializing domain-to-categories map."); 697 this.#clearClient(); 698 this.#cancelAndNullifyTimer(); 699 700 if (this.#store) { 701 if (shouldDeleteStore) { 702 try { 703 await this.#store.dropData(); 704 } catch (ex) { 705 lazy.logConsole.error(ex); 706 } 707 } 708 await this.#store.uninit(); 709 this.#store = null; 710 } 711 712 lazy.logConsole.debug("Un-initialized domain-to-categories map."); 713 this.#init = false; 714 Services.obs.notifyObservers(null, "domain-to-categories-map-uninit"); 715 } 716 } 717 718 /** 719 * Given a domain, find categories and relevant scores. 720 * 721 * @param {string} domain Domain to lookup. 722 * @returns {Promise<DomainCategoryScore[]>} 723 * An array containing categories and their respective score. If no record 724 * for the domain is available, return an empty array. 725 */ 726 async get(domain) { 727 if (!this.#store || this.#store.empty || !this.#store.ready) { 728 return []; 729 } 730 lazy.gCryptoHash.init(lazy.gCryptoHash.SHA256); 731 let bytes = new TextEncoder().encode(domain); 732 lazy.gCryptoHash.update(bytes, domain.length); 733 let hash = lazy.gCryptoHash.finish(true); 734 let rawValues = await this.#store.getCategories(hash); 735 if (rawValues?.length) { 736 let output = []; 737 // Transform data into a more readable format. 738 // [x, y] => { category: x, score: y } 739 for (let i = 0; i < rawValues.length; i += 2) { 740 output.push({ category: rawValues[i], score: rawValues[i + 1] }); 741 } 742 return output; 743 } 744 return []; 745 } 746 747 /** 748 * If the map was initialized, returns the version number for the data. 749 * The version number is determined by the record with the highest version 750 * number. Even if the records have different versions, only records from the 751 * latest version should be available. Returns null if the map was not 752 * initialized. 753 * 754 * @returns {null | number} The version number. 755 */ 756 get version() { 757 return this.#version; 758 } 759 760 /** 761 * Whether the store is empty of data. 762 * 763 * @returns {boolean} 764 */ 765 get empty() { 766 if (!this.#store) { 767 return true; 768 } 769 return this.#store.empty; 770 } 771 772 /** 773 * Unit test-only function, used to override the domainToCategoriesMap so 774 * that tests can set it to easy to test values. 775 * 776 * @param {object} domainToCategoriesMap 777 * An object where the key is a hashed domain and the value is an array 778 * containing an arbitrary number of DomainCategoryScores. 779 * @param {number} version 780 * The version number for the store. 781 * @param {boolean} isDefault 782 * Whether the records should be considered default. 783 */ 784 async overrideMapForTests( 785 domainToCategoriesMap, 786 version = 1, 787 isDefault = false 788 ) { 789 if (Cu.isInAutomation || Services.env.exists("XPCSHELL_TEST_PROFILE_DIR")) { 790 await this.#store.init(); 791 await this.#store.dropData(); 792 await this.#store.insertObject(domainToCategoriesMap, version, isDefault); 793 } 794 } 795 796 /** 797 * Given a list of records from Remote Settings, determine which ones should 798 * be matched based on the region. 799 * 800 * - If a set of records match the region, they should be derived from one 801 * source JSON file. The reason why it is split up is to make it less 802 * onerous to download and parse, though testing might find a single 803 * file to be sufficient. 804 * - If more than one set of records match the region, it would be from one 805 * set of records belonging to default mappings that apply to many regions. 806 * The more specific collection should override the default set. 807 * 808 * @param {DomainToCategoriesRecord[]} records 809 * The records from Remote Settings. 810 * @param {string|null} region 811 * The region to match. 812 * @returns {object|null} 813 */ 814 findRecordsForRegion(records, region) { 815 if (!region || !records?.length) { 816 return null; 817 } 818 819 let regionSpecificRecords = []; 820 let defaultRecords = []; 821 for (let record of records) { 822 if (this.recordMatchesRegion(record, region)) { 823 if (record.isDefault) { 824 defaultRecords.push(record); 825 } else { 826 regionSpecificRecords.push(record); 827 } 828 } 829 } 830 831 if (regionSpecificRecords.length) { 832 return { records: regionSpecificRecords, isDefault: false }; 833 } 834 835 if (defaultRecords.length) { 836 return { records: defaultRecords, isDefault: true }; 837 } 838 839 return null; 840 } 841 842 /** 843 * Checks the record matches the region. 844 * 845 * @param {DomainToCategoriesRecord} record 846 * The record to check. 847 * @param {string|null} region 848 * The region the record to be matched against. 849 * @returns {boolean} 850 */ 851 recordMatchesRegion(record, region) { 852 if (!region || !record) { 853 return false; 854 } 855 856 if (record.excludeRegions?.includes(region)) { 857 return false; 858 } 859 860 if (record.isDefault) { 861 return true; 862 } 863 864 if (!record.includeRegions?.includes(region)) { 865 return false; 866 } 867 868 return true; 869 } 870 871 async syncMayModifyStore(syncData, region) { 872 if (!syncData || !region) { 873 return false; 874 } 875 876 let currentResult = this.findRecordsForRegion(syncData?.current, region); 877 if (this.#store.empty && !currentResult) { 878 lazy.logConsole.debug("Store was empty and there were no results."); 879 return false; 880 } 881 882 if (!this.#store.empty && !currentResult) { 883 return true; 884 } 885 886 let storeHasDefault = await this.#store.isDefault(); 887 if (storeHasDefault != currentResult.isDefault) { 888 return true; 889 } 890 891 const recordsDifferFromStore = records => { 892 let result = this.findRecordsForRegion(records, region); 893 return result?.records.length && storeHasDefault == result.isDefault; 894 }; 895 896 if ( 897 recordsDifferFromStore(syncData.created) || 898 recordsDifferFromStore(syncData.deleted) || 899 recordsDifferFromStore(syncData.updated.map(obj => obj.new)) 900 ) { 901 return true; 902 } 903 904 return false; 905 } 906 907 /** 908 * Connect with Remote Settings and retrieve the records associated with 909 * categorization. Then, check if the records match the store version. If 910 * no records exist, return early. If records exist but the version stored 911 * on the records differ from the store version, then attempt to 912 * empty the store and fill it with data from downloaded attachments. Only 913 * reuse the store if the version in each record matches the store. 914 */ 915 async #setupClientAndStore() { 916 return; 917 // eslint-disable-next-line no-unreachable 918 if (this.#client && !this.empty) { 919 return; 920 } 921 lazy.logConsole.debug("Setting up domain-to-categories map."); 922 this.#client = lazy.RemoteSettings(TELEMETRY_CATEGORIZATION_KEY); 923 924 this.#onSettingsSync = event => this.#sync(event.data); 925 this.#client.on("sync", this.#onSettingsSync); 926 927 this.#store = new DomainToCategoriesStore(); 928 await this.#store.init(); 929 930 let records = await this.#client.get(); 931 // Even though records don't exist, we still consider the store initialized 932 // since a sync event from Remote Settings could populate the store with 933 // records eligible for the client to download. 934 if (!records.length) { 935 lazy.logConsole.debug("No records found for domain-to-categories map."); 936 return; 937 } 938 939 // At least one of the records must be eligible for the region. 940 let result = this.findRecordsForRegion(records, lazy.Region.home); 941 let matchingRecords = result?.records; 942 let matchingRecordsAreDefault = result?.isDefault; 943 let hasMatchingRecords = !!matchingRecords?.length; 944 Services.prefs.setBoolPref(CATEGORIZATION_REGION_PREF, hasMatchingRecords); 945 946 if (!hasMatchingRecords) { 947 lazy.logConsole.debug( 948 "No domain-to-category records match the current region:", 949 lazy.Region.home 950 ); 951 // If no matching record was found but the store is not empty, 952 // the user changed their home region. 953 if (!this.#store.empty) { 954 lazy.logConsole.debug( 955 "Drop store because it no longer matches the home region." 956 ); 957 await this.#store.dropData(); 958 } 959 return; 960 } 961 962 this.#version = this.#retrieveLatestVersion(matchingRecords); 963 let storeVersion = await this.#store.getVersion(); 964 let storeIsDefault = await this.#store.isDefault(); 965 if ( 966 storeVersion == this.#version && 967 !this.#store.empty && 968 storeIsDefault == matchingRecordsAreDefault 969 ) { 970 lazy.logConsole.debug("Reuse existing domain-to-categories map."); 971 Services.obs.notifyObservers( 972 null, 973 "domain-to-categories-map-update-complete" 974 ); 975 return; 976 } 977 978 await this.#clearAndPopulateStore(records); 979 } 980 981 #clearClient() { 982 if (this.#client) { 983 lazy.logConsole.debug("Removing Remote Settings client."); 984 this.#client.off("sync", this.#onSettingsSync); 985 this.#client = null; 986 this.#onSettingsSync = null; 987 this.#downloadRetries = 0; 988 } 989 } 990 991 /** 992 * Inspects a list of records from the categorization domain bucket and finds 993 * the maximum version score from the set of records. Each record should have 994 * the same version number but if for any reason one entry has a lower 995 * version number, the latest version can be used to filter it out. 996 * 997 * @param {DomainToCategoriesRecord[]} records 998 * An array containing the records from a Remote Settings collection. 999 * @returns {number} 1000 */ 1001 #retrieveLatestVersion(records) { 1002 return records.reduce((version, record) => { 1003 if (record.version > version) { 1004 return record.version; 1005 } 1006 return version; 1007 }, 0); 1008 } 1009 1010 /** 1011 * Callback when Remote Settings has indicated the collection has been 1012 * synced. Determine if the records changed should result in updating the map, 1013 * as some of the records changed might not affect the user's region. 1014 * Additionally, delete any attachment for records that no longer exist. 1015 * 1016 * @param {object} data 1017 * Object containing records that are current, deleted, created, or updated. 1018 */ 1019 async #sync(data) { 1020 lazy.logConsole.debug("Syncing domain-to-categories with Remote Settings."); 1021 1022 // Remove local files of deleted records. 1023 let toDelete = data?.deleted.filter(d => d.attachment); 1024 await Promise.all( 1025 toDelete.map(record => this.#client.attachments.deleteDownloaded(record)) 1026 ); 1027 1028 let couldModify = await this.syncMayModifyStore(data, lazy.Region.home); 1029 if (!couldModify) { 1030 lazy.logConsole.debug( 1031 "Domain-to-category records had no changes that matched the region." 1032 ); 1033 return; 1034 } 1035 1036 this.#downloadRetries = 0; 1037 1038 try { 1039 await this.#clearAndPopulateStore(data?.current); 1040 } catch (ex) { 1041 lazy.logConsole.error("Error populating map: ", ex); 1042 await this.uninit(); 1043 } 1044 } 1045 1046 /** 1047 * Clear the existing store and populate it with attachments found in the 1048 * records. If no attachments are found, or no record containing an 1049 * attachment contained the latest version, then nothing will change. 1050 * 1051 * @param {DomainToCategoriesRecord[]} records 1052 * The records containing attachments. 1053 * @throws {Error} 1054 * Will throw if it was not able to drop the store data, or it was unable 1055 * to insert data into the store. 1056 */ 1057 async #clearAndPopulateStore(records) { 1058 // If we don't have a handle to a store, it would mean that it was removed 1059 // during an uninitialization process. 1060 if (!this.#store) { 1061 lazy.logConsole.debug( 1062 "Could not populate store because no store was available." 1063 ); 1064 return; 1065 } 1066 1067 if (!this.#store.ready) { 1068 lazy.logConsole.debug( 1069 "Could not populate store because it was not ready." 1070 ); 1071 return; 1072 } 1073 1074 // Empty table so that if there are errors in the download process, callers 1075 // querying the map won't use information we know is probably outdated. 1076 await this.#store.dropData(); 1077 1078 this.#version = null; 1079 this.#cancelAndNullifyTimer(); 1080 1081 let result = this.findRecordsForRegion(records, lazy.Region.home); 1082 let recordsMatchingRegion = result?.records; 1083 let isDefault = result?.isDefault; 1084 let hasMatchingRecords = !!recordsMatchingRegion?.length; 1085 Services.prefs.setBoolPref(CATEGORIZATION_REGION_PREF, hasMatchingRecords); 1086 1087 // A collection with no records is still a valid init state. 1088 if (!records?.length) { 1089 lazy.logConsole.debug("No records found for domain-to-categories map."); 1090 return; 1091 } 1092 1093 if (!hasMatchingRecords) { 1094 lazy.logConsole.debug( 1095 "No domain-to-category records match the current region:", 1096 lazy.Region.home 1097 ); 1098 return; 1099 } 1100 1101 let fileContents = []; 1102 let start = ChromeUtils.now(); 1103 for (let record of recordsMatchingRegion) { 1104 let fetchedAttachment; 1105 // Downloading attachments can fail. 1106 try { 1107 fetchedAttachment = await this.#client.attachments.download(record); 1108 } catch (ex) { 1109 lazy.logConsole.error("Could not download file:", ex); 1110 this.#createTimerToPopulateMap(); 1111 return; 1112 } 1113 fileContents.push(fetchedAttachment.buffer); 1114 } 1115 ChromeUtils.addProfilerMarker( 1116 "SERPCategorization.#clearAndPopulateStore", 1117 start, 1118 "Download attachments." 1119 ); 1120 1121 this.#version = this.#retrieveLatestVersion(recordsMatchingRegion); 1122 if (!this.#version) { 1123 lazy.logConsole.debug("Could not find a version number for any record."); 1124 return; 1125 } 1126 1127 await this.#store.insertFileContents( 1128 fileContents, 1129 this.#version, 1130 isDefault 1131 ); 1132 1133 lazy.logConsole.debug("Finished updating domain-to-categories store."); 1134 Services.obs.notifyObservers( 1135 null, 1136 "domain-to-categories-map-update-complete" 1137 ); 1138 } 1139 1140 #cancelAndNullifyTimer() { 1141 if (this.#downloadTimer) { 1142 lazy.logConsole.debug("Cancel and nullify download timer."); 1143 this.#downloadTimer.cancel(); 1144 this.#downloadTimer = null; 1145 } 1146 } 1147 1148 #createTimerToPopulateMap() { 1149 if ( 1150 this.#downloadRetries >= 1151 TELEMETRY_CATEGORIZATION_DOWNLOAD_SETTINGS.maxTriesPerSession || 1152 !this.#client 1153 ) { 1154 return; 1155 } 1156 if (!this.#downloadTimer) { 1157 this.#downloadTimer = Cc["@mozilla.org/timer;1"].createInstance( 1158 Ci.nsITimer 1159 ); 1160 } 1161 lazy.logConsole.debug("Create timer to retry downloading attachments."); 1162 let delay = 1163 TELEMETRY_CATEGORIZATION_DOWNLOAD_SETTINGS.base + 1164 randomInteger( 1165 TELEMETRY_CATEGORIZATION_DOWNLOAD_SETTINGS.minAdjust, 1166 TELEMETRY_CATEGORIZATION_DOWNLOAD_SETTINGS.maxAdjust 1167 ); 1168 this.#downloadTimer.initWithCallback( 1169 async () => { 1170 this.#downloadRetries += 1; 1171 let records = await this.#client.get(); 1172 try { 1173 await this.#clearAndPopulateStore(records); 1174 } catch (ex) { 1175 lazy.logConsole.error("Error populating store: ", ex); 1176 await this.uninit(); 1177 } 1178 }, 1179 delay, 1180 Ci.nsITimer.TYPE_ONE_SHOT 1181 ); 1182 } 1183 } 1184 1185 /** 1186 * Handles the storage of data containing domains to categories. 1187 */ 1188 export class DomainToCategoriesStore { 1189 #init = false; 1190 1191 /** 1192 * The connection to the store. 1193 * 1194 * @type {object | null} 1195 */ 1196 #connection = null; 1197 1198 /** 1199 * Reference for the shutdown blocker in case we need to remove it before 1200 * shutdown. 1201 * 1202 * @type {Function | null} 1203 */ 1204 #asyncShutdownBlocker = null; 1205 1206 /** 1207 * Whether the store is empty of data. 1208 * 1209 * @type {boolean} 1210 */ 1211 #empty = true; 1212 1213 /** 1214 * For a particular subset of errors, we'll attempt to rebuild the database 1215 * from scratch. 1216 */ 1217 #rebuildableErrors = ["NS_ERROR_FILE_CORRUPTED"]; 1218 1219 /** 1220 * Initializes the store. If the store is initialized it should have cached 1221 * a connection to the store and ensured the store exists. 1222 */ 1223 async init() { 1224 if (this.#init) { 1225 return; 1226 } 1227 lazy.logConsole.debug("Initializing domain-to-categories store."); 1228 1229 // Attempts to cache a connection to the store. 1230 // If a failure occured, try to re-build the store. 1231 let rebuiltStore = false; 1232 try { 1233 await this.#initConnection(); 1234 } catch (ex1) { 1235 lazy.logConsole.error(`Error initializing a connection: ${ex1}`); 1236 if (this.#rebuildableErrors.includes(ex1.name)) { 1237 try { 1238 await this.#rebuildStore(); 1239 } catch (ex2) { 1240 await this.#closeConnection(); 1241 lazy.logConsole.error(`Could not rebuild store: ${ex2}`); 1242 return; 1243 } 1244 rebuiltStore = true; 1245 } 1246 } 1247 1248 // If we don't have a connection, bail because the browser could be 1249 // shutting down ASAP, or re-creating the store is impossible. 1250 if (!this.#connection) { 1251 lazy.logConsole.debug( 1252 "Bailing from DomainToCategoriesStore.init because connection doesn't exist." 1253 ); 1254 return; 1255 } 1256 1257 // If we weren't forced to re-build the store, we only have the connection. 1258 // We want to ensure the store exists so calls to public methods can pass 1259 // without throwing errors due to the absence of the store. 1260 if (!rebuiltStore) { 1261 try { 1262 await this.#initSchema(); 1263 } catch (ex) { 1264 lazy.logConsole.error(`Error trying to create store: ${ex}`); 1265 await this.#closeConnection(); 1266 return; 1267 } 1268 } 1269 1270 lazy.logConsole.debug("Initialized domain-to-categories store."); 1271 this.#init = true; 1272 } 1273 1274 async uninit() { 1275 if (this.#init) { 1276 lazy.logConsole.debug("Un-initializing domain-to-categories store."); 1277 await this.#closeConnection(); 1278 this.#asyncShutdownBlocker = null; 1279 lazy.logConsole.debug("Un-initialized domain-to-categories store."); 1280 } 1281 } 1282 1283 /** 1284 * Whether the store has an open connection to the physical store. 1285 * 1286 * @returns {boolean} 1287 */ 1288 get ready() { 1289 return this.#init; 1290 } 1291 1292 /** 1293 * Whether the store is devoid of data. 1294 * 1295 * @returns {boolean} 1296 */ 1297 get empty() { 1298 return this.#empty; 1299 } 1300 1301 /** 1302 * Clears information in the store. If dropping data encountered a failure, 1303 * try to delete the file containing the store and re-create it. 1304 * 1305 * @throws {Error} Will throw if it was unable to clear information from the 1306 * store. 1307 */ 1308 async dropData() { 1309 if (!this.#connection) { 1310 return; 1311 } 1312 let tableExists = await this.#connection.tableExists( 1313 CATEGORIZATION_SETTINGS.STORE_NAME 1314 ); 1315 if (tableExists) { 1316 lazy.logConsole.debug("Drop domain_to_categories."); 1317 // This can fail if the permissions of the store are read-only. 1318 await this.#connection.executeTransaction(async () => { 1319 await this.#connection.execute(`DROP TABLE domain_to_categories`); 1320 const createDomainToCategoriesTable = ` 1321 CREATE TABLE IF NOT EXISTS 1322 domain_to_categories ( 1323 string_id 1324 TEXT PRIMARY KEY NOT NULL, 1325 categories 1326 TEXT 1327 ); 1328 `; 1329 await this.#connection.execute(createDomainToCategoriesTable); 1330 await this.#connection.execute(`DELETE FROM moz_meta`); 1331 await this.#connection.executeCached( 1332 ` 1333 INSERT INTO 1334 moz_meta (key, value) 1335 VALUES 1336 (:key, :value) 1337 ON CONFLICT DO UPDATE SET 1338 value = :value 1339 `, 1340 { key: "version", value: 0 } 1341 ); 1342 }); 1343 1344 this.#empty = true; 1345 } 1346 } 1347 1348 /** 1349 * Given file contents, try moving them into the store. If a failure occurs, 1350 * it will attempt to drop existing data to ensure callers aren't accessing 1351 * a partially filled store. 1352 * 1353 * @param {ArrayBufferLike[]} fileContents 1354 * Contents to convert. 1355 * @param {number} version 1356 * The version for the store. 1357 * @param {boolean} isDefault 1358 * Whether the file contents are from a default collection. 1359 * @throws {Error} 1360 * Will throw if the insertion failed and dropData was unable to run 1361 * successfully. 1362 */ 1363 async insertFileContents(fileContents, version, isDefault = false) { 1364 if (!this.#init || !fileContents?.length || !version) { 1365 return; 1366 } 1367 1368 try { 1369 await this.#insert(fileContents, version, isDefault); 1370 } catch (ex) { 1371 lazy.logConsole.error(`Could not insert file contents: ${ex}`); 1372 await this.dropData(); 1373 } 1374 } 1375 1376 /** 1377 * Convenience function to make it trivial to insert Javascript objects into 1378 * the store. This avoids having to set up the collection in Remote Settings. 1379 * 1380 * @param {object} domainToCategoriesMap 1381 * An object whose keys should be hashed domains with values containing 1382 * an array of integers. 1383 * @param {number} version 1384 * The version for the store. 1385 * @param {boolean} isDefault 1386 * Whether the mappings are from a default record. 1387 * @returns {Promise<boolean>} 1388 * Whether the operation was successful. 1389 */ 1390 async insertObject(domainToCategoriesMap, version, isDefault) { 1391 if (!Cu.isInAutomation || !this.#init) { 1392 return false; 1393 } 1394 let buffer = new TextEncoder().encode( 1395 JSON.stringify(domainToCategoriesMap) 1396 ).buffer; 1397 await this.insertFileContents([buffer], version, isDefault); 1398 return true; 1399 } 1400 1401 /** 1402 * Retrieves domains mapped to the key. 1403 * 1404 * @param {string} key 1405 * The value to lookup in the store. 1406 * @returns {Promise<number[]>} 1407 * An array of numbers corresponding to the category and score. If the key 1408 * does not exist in the store or the store is having issues retrieving the 1409 * value, returns an empty array. 1410 */ 1411 async getCategories(key) { 1412 if (!this.#init) { 1413 return []; 1414 } 1415 1416 let rows; 1417 try { 1418 rows = await this.#connection.executeCached( 1419 ` 1420 SELECT 1421 categories 1422 FROM 1423 domain_to_categories 1424 WHERE 1425 string_id = :key 1426 `, 1427 { 1428 key, 1429 } 1430 ); 1431 } catch (ex) { 1432 lazy.logConsole.error(`Could not retrieve from the store: ${ex}`); 1433 return []; 1434 } 1435 1436 if (!rows.length) { 1437 return []; 1438 } 1439 return JSON.parse(rows[0].getResultByName("categories")) ?? []; 1440 } 1441 1442 /** 1443 * Retrieves the version number of the store. 1444 * 1445 * @returns {Promise<number>} 1446 * The version number. Returns 0 if the version was never set or if there 1447 * was an issue accessing the version number. 1448 */ 1449 async getVersion() { 1450 if (this.#connection) { 1451 let rows; 1452 try { 1453 rows = await this.#connection.executeCached( 1454 ` 1455 SELECT 1456 value 1457 FROM 1458 moz_meta 1459 WHERE 1460 key = "version" 1461 ` 1462 ); 1463 } catch (ex) { 1464 lazy.logConsole.error(`Could not retrieve version of the store: ${ex}`); 1465 return 0; 1466 } 1467 if (rows.length) { 1468 return parseInt(rows[0].getResultByName("value")) ?? 0; 1469 } 1470 } 1471 return 0; 1472 } 1473 1474 /** 1475 * Whether the data inside the store was derived from a default set of 1476 * records. 1477 * 1478 * @returns {Promise<boolean>} 1479 */ 1480 async isDefault() { 1481 if (this.#connection) { 1482 let rows; 1483 try { 1484 rows = await this.#connection.executeCached( 1485 ` 1486 SELECT 1487 value 1488 FROM 1489 moz_meta 1490 WHERE 1491 key = "is_default" 1492 ` 1493 ); 1494 } catch (ex) { 1495 lazy.logConsole.error( 1496 `Could not retrieve if the store is using default records: ${ex}` 1497 ); 1498 return false; 1499 } 1500 if (rows.length && parseInt(rows[0].getResultByName("value")) == 1) { 1501 return true; 1502 } 1503 } 1504 return false; 1505 } 1506 1507 /** 1508 * Test only function allowing tests to delete the store. 1509 */ 1510 async testDelete() { 1511 if (Cu.isInAutomation) { 1512 await this.#closeConnection(); 1513 await this.#delete(); 1514 } 1515 } 1516 1517 /** 1518 * If a connection is available, close it and remove shutdown blockers. 1519 */ 1520 async #closeConnection() { 1521 this.#init = false; 1522 this.#empty = true; 1523 if (this.#asyncShutdownBlocker) { 1524 lazy.Sqlite.shutdown.removeBlocker(this.#asyncShutdownBlocker); 1525 this.#asyncShutdownBlocker = null; 1526 } 1527 1528 if (this.#connection) { 1529 lazy.logConsole.debug("Closing connection."); 1530 // An error could occur while closing the connection. We suppress the 1531 // error since it is not a critical part of the browser. 1532 try { 1533 await this.#connection.close(); 1534 } catch (ex) { 1535 lazy.logConsole.error(ex); 1536 } 1537 this.#connection = null; 1538 } 1539 } 1540 1541 /** 1542 * Initialize the schema for the store. 1543 * 1544 * @throws {Error} 1545 * Will throw if a permissions error prevents creating the store. 1546 */ 1547 async #initSchema() { 1548 if (!this.#connection) { 1549 return; 1550 } 1551 lazy.logConsole.debug("Create store."); 1552 // Creation can fail if the store is read only. 1553 await this.#connection.executeTransaction(async () => { 1554 // Let outer try block handle the exception. 1555 const createDomainToCategoriesTable = ` 1556 CREATE TABLE IF NOT EXISTS 1557 domain_to_categories ( 1558 string_id 1559 TEXT PRIMARY KEY NOT NULL, 1560 categories 1561 TEXT 1562 ) WITHOUT ROWID; 1563 `; 1564 await this.#connection.execute(createDomainToCategoriesTable); 1565 const createMetaTable = ` 1566 CREATE TABLE IF NOT EXISTS 1567 moz_meta ( 1568 key 1569 TEXT PRIMARY KEY NOT NULL, 1570 value 1571 INTEGER 1572 ) WITHOUT ROWID; 1573 `; 1574 await this.#connection.execute(createMetaTable); 1575 await this.#connection.setSchemaVersion( 1576 CATEGORIZATION_SETTINGS.STORE_SCHEMA 1577 ); 1578 }); 1579 1580 let rows = await this.#connection.executeCached( 1581 "SELECT count(*) = 0 FROM domain_to_categories" 1582 ); 1583 this.#empty = !!rows[0].getResultByIndex(0); 1584 } 1585 1586 /** 1587 * Attempt to delete the store. 1588 * 1589 * @throws {Error} 1590 * Will throw if the permissions for the file prevent its deletion. 1591 */ 1592 async #delete() { 1593 lazy.logConsole.debug("Attempt to delete the store."); 1594 try { 1595 await IOUtils.remove( 1596 PathUtils.join( 1597 PathUtils.profileDir, 1598 CATEGORIZATION_SETTINGS.STORE_FILE 1599 ), 1600 { ignoreAbsent: true } 1601 ); 1602 } catch (ex) { 1603 lazy.logConsole.error(ex); 1604 } 1605 this.#empty = true; 1606 lazy.logConsole.debug("Store was deleted."); 1607 } 1608 1609 /** 1610 * Tries to establish a connection to the store. 1611 * 1612 * @throws {Error} 1613 * Will throw if there was an issue establishing a connection or adding 1614 * adding a shutdown blocker. 1615 */ 1616 async #initConnection() { 1617 if (this.#connection) { 1618 return; 1619 } 1620 1621 // This could fail if the store is corrupted. 1622 this.#connection = await lazy.Sqlite.openConnection({ 1623 path: PathUtils.join( 1624 PathUtils.profileDir, 1625 CATEGORIZATION_SETTINGS.STORE_FILE 1626 ), 1627 }); 1628 1629 await this.#connection.execute("PRAGMA journal_mode = TRUNCATE"); 1630 1631 this.#asyncShutdownBlocker = async () => { 1632 await this.#connection.close(); 1633 this.#connection = null; 1634 }; 1635 1636 // This could fail if we're adding it during shutdown. In this case, 1637 // don't throw but close the connection. 1638 try { 1639 lazy.Sqlite.shutdown.addBlocker( 1640 "SERPCategorization:DomainToCategoriesSqlite closing", 1641 this.#asyncShutdownBlocker 1642 ); 1643 } catch (ex) { 1644 lazy.logConsole.error(ex); 1645 await this.#closeConnection(); 1646 } 1647 } 1648 1649 /** 1650 * Inserts into the store. 1651 * 1652 * @param {ArrayBufferLike[]} fileContents 1653 * The data that should be converted and inserted into the store. 1654 * @param {number} version 1655 * The version number that should be inserted into the store. 1656 * @param {boolean} isDefault 1657 * Whether the file contents are a default set of records. 1658 * @throws {Error} 1659 * Will throw if a connection is not present, if the store is not 1660 * able to be updated (permissions error, corrupted file), or there is 1661 * something wrong with the file contents. 1662 */ 1663 async #insert(fileContents, version, isDefault) { 1664 let start = ChromeUtils.now(); 1665 await this.#connection.executeTransaction(async () => { 1666 lazy.logConsole.debug("Insert into domain_to_categories table."); 1667 for (let fileContent of fileContents) { 1668 await this.#connection.executeCached( 1669 ` 1670 INSERT INTO 1671 domain_to_categories (string_id, categories) 1672 SELECT 1673 json_each.key AS string_id, 1674 json_each.value AS categories 1675 FROM 1676 json_each(json(:obj)) 1677 `, 1678 { 1679 obj: new TextDecoder().decode(fileContent), 1680 } 1681 ); 1682 } 1683 // Once the insertions have successfully completed, update the version. 1684 await this.#connection.executeCached( 1685 ` 1686 INSERT INTO 1687 moz_meta (key, value) 1688 VALUES 1689 (:key, :value) 1690 ON CONFLICT DO UPDATE SET 1691 value = :value 1692 `, 1693 { key: "version", value: version } 1694 ); 1695 if (isDefault) { 1696 await this.#connection.executeCached( 1697 ` 1698 INSERT INTO 1699 moz_meta (key, value) 1700 VALUES 1701 (:key, :value) 1702 ON CONFLICT DO UPDATE SET 1703 value = :value 1704 `, 1705 { key: "is_default", value: 1 } 1706 ); 1707 } 1708 }); 1709 ChromeUtils.addProfilerMarker( 1710 "DomainToCategoriesSqlite.#insert", 1711 start, 1712 "Move file contents into table." 1713 ); 1714 1715 if (fileContents?.length) { 1716 this.#empty = false; 1717 } 1718 } 1719 1720 /** 1721 * Deletes and re-build's the store. Used in cases where we encounter a 1722 * failure and we want to try fixing the error by starting with an 1723 * entirely fresh store. 1724 * 1725 * @throws {Error} 1726 * Will throw if a connection could not be established, if it was 1727 * unable to delete the store, or it was unable to build a new store. 1728 */ 1729 async #rebuildStore() { 1730 lazy.logConsole.debug("Try rebuilding store."); 1731 // Step 1. Close all connections. 1732 await this.#closeConnection(); 1733 1734 // Step 2. Delete the existing store. 1735 await this.#delete(); 1736 1737 // Step 3. Re-establish the connection. 1738 await this.#initConnection(); 1739 1740 // Step 4. If a connection exists, try creating the store. 1741 await this.#initSchema(); 1742 } 1743 } 1744 1745 function randomInteger(min, max) { 1746 return Math.floor(Math.random() * (max - min + 1)) + min; 1747 } 1748 1749 export var SERPDomainToCategoriesMap = new DomainToCategoriesMap(); 1750 export var SERPCategorization = new Categorizer(); 1751 export var SERPCategorizationRecorder = new CategorizationRecorder(); 1752 export var SERPCategorizationEventScheduler = 1753 new CategorizationEventScheduler();