SearchSERPTelemetry.sys.mjs (73371B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 const lazy = {}; 6 7 ChromeUtils.defineESModuleGetters(lazy, { 8 BrowserSearchTelemetry: 9 "moz-src:///browser/components/search/BrowserSearchTelemetry.sys.mjs", 10 BrowserWindowTracker: "resource:///modules/BrowserWindowTracker.sys.mjs", 11 Region: "resource://gre/modules/Region.sys.mjs", 12 RemoteSettings: "resource://services-settings/remote-settings.sys.mjs", 13 SearchUtils: "moz-src:///toolkit/components/search/SearchUtils.sys.mjs", 14 SERPCategorization: 15 "moz-src:///browser/components/search/SERPCategorization.sys.mjs", 16 SERPCategorizationRecorder: 17 "moz-src:///browser/components/search/SERPCategorization.sys.mjs", 18 SERPCategorizationEventScheduler: 19 "moz-src:///browser/components/search/SERPCategorization.sys.mjs", 20 }); 21 22 // Exported for tests. 23 export const ADLINK_CHECK_TIMEOUT_MS = 1000; 24 // Unlike the standard adlink check, the timeout for single page apps is not 25 // based on a content event within the page, like DOMContentLoaded or load. 26 // Thus, we aim for a longer timeout to account for when the server might be 27 // slow to update the content on the page. 28 export const SPA_ADLINK_CHECK_TIMEOUT_MS = 2500; 29 export const TELEMETRY_SETTINGS_KEY = "search-telemetry-v2"; 30 31 export const SEARCH_TELEMETRY_SHARED = { 32 PROVIDER_INFO: "SearchTelemetry:ProviderInfo", 33 LOAD_TIMEOUT: "SearchTelemetry:LoadTimeout", 34 SPA_LOAD_TIMEOUT: "SearchTelemetry:SPALoadTimeout", 35 }; 36 37 const impressionIdsWithoutEngagementsSet = new Set(); 38 39 ChromeUtils.defineLazyGetter(lazy, "logConsole", () => { 40 return console.createInstance({ 41 prefix: "SearchTelemetry", 42 maxLogLevel: lazy.SearchUtils.loggingEnabled ? "Debug" : "Warn", 43 }); 44 }); 45 46 export const SearchSERPTelemetryUtils = { 47 ACTIONS: { 48 CLICKED: "clicked", 49 // specific to cookie banner 50 CLICKED_ACCEPT: "clicked_accept", 51 CLICKED_REJECT: "clicked_reject", 52 CLICKED_MORE_OPTIONS: "clicked_more_options", 53 EXPANDED: "expanded", 54 SUBMITTED: "submitted", 55 }, 56 COMPONENTS: { 57 AD_CAROUSEL: "ad_carousel", 58 AD_IMAGE_ROW: "ad_image_row", 59 AD_LINK: "ad_link", 60 AD_SIDEBAR: "ad_sidebar", 61 AD_SITELINK: "ad_sitelink", 62 AD_UNCATEGORIZED: "ad_uncategorized", 63 COOKIE_BANNER: "cookie_banner", 64 INCONTENT_SEARCHBOX: "incontent_searchbox", 65 NON_ADS_LINK: "non_ads_link", 66 REFINED_SEARCH_BUTTONS: "refined_search_buttons", 67 SHOPPING_TAB: "shopping_tab", 68 }, 69 ABANDONMENTS: { 70 NAVIGATION: "navigation", 71 TAB_CLOSE: "tab_close", 72 WINDOW_CLOSE: "window_close", 73 }, 74 INCONTENT_SOURCES: { 75 OPENED_IN_NEW_TAB: "opened_in_new_tab", 76 REFINE_ON_SERP: "follow_on_from_refine_on_SERP", 77 SEARCHBOX: "follow_on_from_refine_on_incontent_search", 78 }, 79 }; 80 81 const AD_COMPONENTS = [ 82 SearchSERPTelemetryUtils.COMPONENTS.AD_CAROUSEL, 83 SearchSERPTelemetryUtils.COMPONENTS.AD_IMAGE_ROW, 84 SearchSERPTelemetryUtils.COMPONENTS.AD_LINK, 85 SearchSERPTelemetryUtils.COMPONENTS.AD_SIDEBAR, 86 SearchSERPTelemetryUtils.COMPONENTS.AD_SITELINK, 87 SearchSERPTelemetryUtils.COMPONENTS.AD_UNCATEGORIZED, 88 ]; 89 90 /** 91 * @typedef {object} FollowOnCookies 92 * 93 * @property {string} codeParamName 94 * The parameter name within the cookie. 95 * @property {string} extraCodeParamName 96 * The query parameter name in the URL that indicates this might be a 97 * follow-on search. 98 * @property {string[]} extraCodePrefixes 99 * Possible values for the query parameter in the URL that indicates this 100 * might be a follow-on search. 101 * @property {string} host 102 * The hostname on which the cookie is stored. 103 * @property {string} name 104 * The name of the cookie to check. 105 */ 106 107 /** 108 * @typedef {object} SignedInCookies 109 * 110 * @property {string} host 111 * The host associated with a given cookie. 112 * @property {string} name 113 * The name associated with a given cookie. 114 */ 115 116 /** 117 * @typedef {object} ShoppingTab 118 * 119 * @property {boolean} inspectRegexpInSERP 120 * Whether the regexp should be used against hrefs the selector matches 121 * against. 122 * @property {RegExp} regexp 123 * The regular expression to match against a possible shopping tab. Must be 124 * provided if using this feature. 125 * @property {string} selector 126 * The elements on the page to inspect for the shopping tab. Should be anchor 127 * elements. 128 */ 129 130 /** 131 * @typedef {object} PageTypeConfig 132 * 133 * @property {string} name 134 * The name of the page type. 135 * @property {string[]} values 136 * The possible page types (ex: 'web', 'shopping' or 'images'). 137 * @property {string} target 138 * The target to be recorded in telemetry. 139 * @property {boolean} enabled 140 * Whether we should track this page type. 141 * @property {boolean} [isDefault=false] 142 * Whether this page type represents a default SERP. We fall back to this 143 * property in case, upon a page load, there is a delay in adding the page 144 * type param to the URL. 145 */ 146 147 /** 148 * @typedef {object} PageTypeParam 149 * 150 * @property {boolean} enableSPAHandling 151 * If true, process the SERP using the logic for single page apps. 152 * @property {string[]} keys 153 * A list of possible keys that may indicate the page type. 154 * @property {PageTypeConfig[]} pageTypes 155 * An array of potential page type configurations to match against. 156 */ 157 158 /** 159 * @typedef {object} ProviderInfo 160 * 161 * @property {string} codeParamName 162 * The name of the query parameter for the partner code. 163 * @property {object[]} components 164 * An array of components that could be on the SERP. 165 * @property {{key: string, value: string}} defaultPageQueryParam 166 * Default page query parameter. This was deprecated in Fx 142 and should no 167 * longer be used. 168 * @property {string[]} expectedOrganicCodes 169 * An array of partner codes to match against the parameters in the url. 170 * Matching these codes will report the SERP as organic:none which means the 171 * user has done a search through the search engine's website rather than 172 * through a SAP. 173 * @property {RegExp[]} extraAdServersRegexps 174 * An array of regular expressions that match URLs of potential ad servers. 175 * @property {FollowOnCookies[]} followOnCookies 176 * An array of cookie details that are used to identify follow-on searches. 177 * @property {string[]} followOnParamNames 178 * An array of query parameter names that are used when a follow-on search 179 * occurs. 180 * @property {boolean} isSPA 181 * Whether the provider is a single page app. This was deprecated in Fx 142 182 * and should no longer be used. 183 * @property {string[]} organicCodes 184 * An array of partner codes to match against the parameters in the url. 185 * Matching these codes will report the SERP as organic:<partner code>, which 186 * means the search was performed organically rather than through a SAP. 187 * @property {PageTypeParam} pageTypeParam 188 * The configuration for possible page type parameters. 189 * @property {string[]} queryParamNames 190 * An array of query parameters that may be used for the user's search string. 191 * @property {SignedInCookies[]} signedInCookies 192 * An array of cookie details that are used to determine whether a client is 193 * signed in to a provider's account. 194 * @property {ShoppingTab} shoppingTab 195 * Shopping page parameter. 196 * @property {string[]} taggedCodes 197 * An array of partner codes to match against the parameters in the url. 198 * Matching one of these codes will report the SERP as tagged. 199 * @property {string} telemetryId 200 * The telemetry identifier for the provider. 201 */ 202 203 /** 204 * @typedef {object} ChannelClickMetadata 205 * 206 * @property {boolean} _adClickRecorded 207 * Whether an ad click has already been recorded for the channel. 208 * @property {boolean} _recordedClick 209 * Whether any click has been recorded for the channel. This is distinct from 210 * _adClickRecorded as not all clicks are ad related. 211 */ 212 213 /** 214 * @typedef {ChannelWrapper & ChannelClickMetadata} TrackedChannel 215 */ 216 217 /** 218 * TelemetryHandler is the main class handling Search Engine Result Page (SERP) 219 * telemetry. It primarily deals with tracking of what pages are loaded into tabs. 220 * 221 * It handles the *in-content:sap* keys of the SEARCH_COUNTS histogram. 222 */ 223 class TelemetryHandler { 224 // Whether or not this class is initialised. 225 _initialized = false; 226 227 // An instance of ContentHandler. 228 _contentHandler; 229 230 // The original provider information, mainly used for tests. 231 _originalProviderInfo = null; 232 233 // The current search provider info. 234 _searchProviderInfo = null; 235 236 // An instance of remote settings that is used to access the provider info. 237 _telemetrySettings; 238 239 // Callback used when syncing telemetry settings. 240 #telemetrySettingsSync; 241 242 // _browserInfoByURL is a map of tracked search urls to objects containing: 243 // * {object} info 244 // the search provider information associated with the url. 245 // * {WeakMap} browserTelemetryStateMap 246 // a weak map of browsers that have the url loaded, their ad report state, 247 // and their impression id. 248 // * {integer} count 249 // a manual count of browsers logged. 250 // We keep a weak map of browsers, in case we miss something on our counts 251 // and cause a memory leak - worst case our map is slightly bigger than it 252 // needs to be. 253 // The manual count is because WeakMap doesn't give us size/length 254 // information, but we want to know when we can clean up our associated 255 // entry. 256 _browserInfoByURL = new Map(); 257 258 // Browser objects mapped to the info in _browserInfoByURL. 259 #browserToItemMap = new WeakMap(); 260 261 // An array of regular expressions that match urls that could be subframes 262 // on SERPs. 263 #subframeRegexps = []; 264 265 // _browserSourceMap is a map of the latest search source for a particular 266 // browser - one of the KNOWN_SEARCH_SOURCES in BrowserSearchTelemetry. 267 _browserSourceMap = new WeakMap(); 268 269 /** 270 * A WeakMap whose key is a browser with value of a source type found in 271 * INCONTENT_SOURCES. Kept separate to avoid overlapping with legacy 272 * search sources. These sources are specific to the content of a search 273 * provider page rather than something from within the browser itself. 274 */ 275 #browserContentSourceMap = new WeakMap(); 276 277 /** 278 * Sets the source of a SERP visit from something that occured in content 279 * rather than from the browser. 280 * 281 * @param {MozBrowser} browser 282 * The browser object associated with the page that should be a SERP. 283 * @param {string} source 284 * The source that started the load. One of 285 * SearchSERPTelemetryUtils.COMPONENTS.INCONTENT_SEARCHBOX, 286 * SearchSERPTelemetryUtils.INCONTENT_SOURCES.OPENED_IN_NEW_TAB or 287 * SearchSERPTelemetryUtils.INCONTENT_SOURCES.REFINE_ON_SERP. 288 */ 289 setBrowserContentSource(browser, source) { 290 this.#browserContentSourceMap.set(browser, source); 291 } 292 293 // _browserNewtabSessionMap is a map of the newtab session id for particular 294 // browsers. 295 _browserNewtabSessionMap = new WeakMap(); 296 297 constructor() { 298 this._contentHandler = new ContentHandler({ 299 browserInfoByURL: this._browserInfoByURL, 300 findBrowserItemForURL: this._findBrowserItemForURL.bind(this), 301 checkURLForSerpMatch: this._checkURLForSerpMatch.bind(this), 302 findItemForBrowser: this.findItemForBrowser.bind(this), 303 urlIsKnownSERPSubframe: this.urlIsKnownSERPSubframe.bind(this), 304 }); 305 } 306 307 /** 308 * Initializes the TelemetryHandler and its ContentHandler. It will add 309 * appropriate listeners to the window so that window opening and closing 310 * can be tracked. 311 */ 312 async init() { 313 // eslint-disable-next-line no-constant-condition 314 if (this._initialized || true) { 315 return; 316 } 317 318 this._telemetrySettings = lazy.RemoteSettings(TELEMETRY_SETTINGS_KEY); 319 let rawProviderInfo = []; 320 try { 321 rawProviderInfo = await this._telemetrySettings.get(); 322 } catch (ex) { 323 lazy.logConsole.error("Could not get settings:", ex); 324 } 325 326 this.#telemetrySettingsSync = event => this.#onSettingsSync(event); 327 this._telemetrySettings.on("sync", this.#telemetrySettingsSync); 328 329 // Send the provider info to the child handler. 330 this._contentHandler.init(rawProviderInfo); 331 this._originalProviderInfo = rawProviderInfo; 332 333 // Now convert the regexps into 334 this._setSearchProviderInfo(rawProviderInfo); 335 336 for (let win of Services.wm.getEnumerator("navigator:browser")) { 337 this._registerWindow(win); 338 } 339 Services.wm.addListener(this); 340 341 this._initialized = true; 342 } 343 344 async #onSettingsSync(event) { 345 let current = event.data?.current; 346 if (current) { 347 lazy.logConsole.debug( 348 "Update provider info due to Remote Settings sync." 349 ); 350 this._originalProviderInfo = current; 351 this._setSearchProviderInfo(current); 352 Services.ppmm.sharedData.set( 353 SEARCH_TELEMETRY_SHARED.PROVIDER_INFO, 354 current 355 ); 356 Services.ppmm.sharedData.flush(); 357 } else { 358 lazy.logConsole.debug( 359 "Ignoring Remote Settings sync data due to missing records." 360 ); 361 } 362 Services.obs.notifyObservers(null, "search-telemetry-v2-synced"); 363 } 364 365 /** 366 * Uninitializes the TelemetryHandler and its ContentHandler. 367 */ 368 uninit() { 369 if (!this._initialized) { 370 return; 371 } 372 373 this._contentHandler.uninit(); 374 375 for (let win of Services.wm.getEnumerator("navigator:browser")) { 376 this._unregisterWindow(win); 377 } 378 Services.wm.removeListener(this); 379 380 try { 381 this._telemetrySettings.off("sync", this.#telemetrySettingsSync); 382 } catch (ex) { 383 lazy.logConsole.error( 384 "Failed to shutdown SearchSERPTelemetry Remote Settings.", 385 ex 386 ); 387 } 388 this._telemetrySettings = null; 389 this.#telemetrySettingsSync = null; 390 391 this._initialized = false; 392 } 393 394 /** 395 * Records the search source for particular browsers, in case it needs 396 * to be associated with a SERP. 397 * 398 * @param {MozBrowser} browser 399 * The browser where the search originated. 400 * @param {string} source 401 * Where the search originated from. 402 */ 403 recordBrowserSource(browser, source) { 404 this._browserSourceMap.set(browser, source); 405 } 406 407 /** 408 * Records the newtab source for particular browsers, in case it needs 409 * to be associated with a SERP. 410 * 411 * @param {MozBrowser} browser 412 * The browser where the search originated. 413 * @param {string} newtabSessionId 414 * The sessionId of the newtab session the search originated from. 415 */ 416 recordBrowserNewtabSession(browser, newtabSessionId) { 417 this._browserNewtabSessionMap.set(browser, newtabSessionId); 418 } 419 420 /** 421 * Helper function for recording the reason for a Glean abandonment event. 422 * 423 * @param {string} impressionId 424 * The impression id for the abandonment event about to be recorded. 425 * @param {string} reason 426 * The reason the SERP is deemed abandoned. 427 * One of SearchSERPTelemetryUtils.ABANDONMENTS. 428 */ 429 recordAbandonmentTelemetry(impressionId, reason) { 430 impressionIdsWithoutEngagementsSet.delete(impressionId); 431 432 lazy.logConsole.debug( 433 `Recording an abandonment event for impression id ${impressionId} with reason: ${reason}` 434 ); 435 436 Glean.serp.abandonment.record({ 437 impression_id: impressionId, 438 reason, 439 }); 440 } 441 442 /** 443 * Handles the TabClose event received from the listeners. 444 * 445 * @param {object} event 446 * The event object provided by the listener. 447 */ 448 handleEvent(event) { 449 if (event.type != "TabClose") { 450 console.error("Received unexpected event type", event.type); 451 return; 452 } 453 454 this._browserNewtabSessionMap.delete(event.target.linkedBrowser); 455 this.stopTrackingBrowser( 456 event.target.linkedBrowser, 457 SearchSERPTelemetryUtils.ABANDONMENTS.TAB_CLOSE 458 ); 459 } 460 461 /** 462 * Test-only function, used to override the provider information, so that 463 * unit tests can set it to easy to test values. 464 * 465 * @param {Array} providerInfo 466 * See {@link https://searchfox.org/mozilla-central/search?q=search-telemetry-v2-schema.json} 467 * for type information. 468 */ 469 overrideSearchTelemetryForTests(providerInfo) { 470 let info = providerInfo ? providerInfo : this._originalProviderInfo; 471 this._contentHandler.overrideSearchTelemetryForTests(info); 472 this._setSearchProviderInfo(info); 473 } 474 475 /** 476 * Used to set the local version of the search provider information. 477 * This automatically maps the regexps to RegExp objects so that 478 * we don't have to create a new instance each time. 479 * 480 * @param {Array} providerInfo 481 * A raw array of provider information to set. 482 */ 483 _setSearchProviderInfo(providerInfo) { 484 this.#subframeRegexps = []; 485 this._searchProviderInfo = providerInfo.map(provider => { 486 let newProvider = { 487 ...provider, 488 searchPageRegexp: new RegExp(provider.searchPageRegexp), 489 }; 490 if (provider.extraAdServersRegexps) { 491 newProvider.extraAdServersRegexps = provider.extraAdServersRegexps.map( 492 r => new RegExp(r) 493 ); 494 } 495 496 newProvider.ignoreLinkRegexps = provider.ignoreLinkRegexps?.length 497 ? provider.ignoreLinkRegexps.map(r => new RegExp(r)) 498 : []; 499 500 newProvider.nonAdsLinkRegexps = provider.nonAdsLinkRegexps?.length 501 ? provider.nonAdsLinkRegexps.map(r => new RegExp(r)) 502 : []; 503 if (provider.shoppingTab?.regexp) { 504 newProvider.shoppingTab = { 505 selector: provider.shoppingTab.selector, 506 regexp: new RegExp(provider.shoppingTab.regexp), 507 }; 508 } 509 510 newProvider.nonAdsLinkQueryParamNames = 511 provider.nonAdsLinkQueryParamNames ?? []; 512 513 newProvider.subframes = 514 provider.subframes?.map(obj => { 515 let regexp = new RegExp(obj.regexp); 516 // Also add the Regexp to the list of urls to observe. 517 this.#subframeRegexps.push(regexp); 518 return { ...obj, regexp }; 519 }) ?? []; 520 521 return newProvider; 522 }); 523 this._contentHandler._searchProviderInfo = this._searchProviderInfo; 524 } 525 526 reportPageAction(info, browser) { 527 this._contentHandler._reportPageAction(info, browser); 528 } 529 530 reportPageWithAds(info, browser) { 531 this._contentHandler._reportPageWithAds(info, browser); 532 } 533 534 reportPageWithAdImpressions(info, browser) { 535 this._contentHandler._reportPageWithAdImpressions(info, browser); 536 } 537 538 async reportPageDomains(info, browser) { 539 await this._contentHandler._reportPageDomains(info, browser); 540 } 541 542 reportPageImpression(info, browser) { 543 this._contentHandler._reportPageImpression(info, browser); 544 } 545 546 /** 547 * This may start tracking a tab based on the URL. If the URL matches a search 548 * partner, and it has a code, then we'll start tracking it. This will aid 549 * determining if it is a page we should be tracking for adverts. 550 * 551 * @param {object} browser 552 * The browser associated with the page. 553 * @param {string} url 554 * The url that was loaded in the browser. 555 * @param {nsIDocShell.LoadCommand} loadType 556 * The load type associated with the page load. 557 */ 558 updateTrackingStatus(browser, url, loadType) { 559 if ( 560 !lazy.BrowserSearchTelemetry.shouldRecordSearchCount( 561 browser.getTabBrowser() 562 ) 563 ) { 564 return; 565 } 566 let info = this._checkURLForSerpMatch(url); 567 if (!info) { 568 this._browserNewtabSessionMap.delete(browser); 569 this.stopTrackingBrowser(browser); 570 return; 571 } 572 573 let source = "unknown"; 574 if (loadType & Ci.nsIDocShell.LOAD_CMD_RELOAD) { 575 source = "reload"; 576 } else if (loadType & Ci.nsIDocShell.LOAD_CMD_HISTORY) { 577 source = "tabhistory"; 578 } else if (this._browserSourceMap.has(browser)) { 579 source = this._browserSourceMap.get(browser); 580 this._browserSourceMap.delete(browser); 581 } 582 583 let newtabSessionId; 584 if (this._browserNewtabSessionMap.has(browser)) { 585 newtabSessionId = this._browserNewtabSessionMap.get(browser); 586 // We leave the newtabSessionId in the map for this browser 587 // until we stop loading SERP pages or the tab is closed. 588 } 589 590 // Generate metadata for the SERP impression. 591 let { impressionId, impressionInfo } = this._generateImpressionInfo( 592 browser, 593 url, 594 info, 595 source 596 ); 597 598 this._reportSerpPage(info, source, url); 599 600 // For single page apps, we store the page by its original URI so the 601 // network observers can recover the browser in a context when they only 602 // have access to the originURL. 603 let urlKey = 604 info.isSPA && browser.originalURI?.spec ? browser.originalURI.spec : url; 605 let item = this._browserInfoByURL.get(urlKey); 606 607 if (item) { 608 item.browserTelemetryStateMap.set(browser, { 609 adsReported: false, 610 adImpressionsReported: false, 611 impressionId, 612 urlToComponentMap: null, 613 impressionInfo, 614 impressionRecorded: false, 615 searchBoxSubmitted: false, 616 categorizationInfo: null, 617 adsClicked: 0, 618 adsHidden: 0, 619 adsLoaded: 0, 620 adsVisible: 0, 621 searchQuery: info.searchQuery, 622 currentPageType: info.pageType, 623 }); 624 item.count++; 625 item.source = source; 626 item.newtabSessionId = newtabSessionId; 627 } else { 628 item = { 629 browserTelemetryStateMap: new WeakMap().set(browser, { 630 adsReported: false, 631 adImpressionsReported: false, 632 impressionId, 633 urlToComponentMap: null, 634 impressionInfo, 635 impressionRecorded: false, 636 searchBoxSubmitted: false, 637 categorizationInfo: null, 638 adsClicked: 0, 639 adsHidden: 0, 640 adsLoaded: 0, 641 adsVisible: 0, 642 searchQuery: info.searchQuery, 643 currentPageType: info.pageType, 644 }), 645 info, 646 count: 1, 647 source, 648 newtabSessionId, 649 majorVersion: parseInt(Services.appinfo.version), 650 channel: lazy.SearchUtils.MODIFIED_APP_CHANNEL, 651 region: lazy.Region.home, 652 isSPA: info.isSPA, 653 }; 654 // For single page apps, we store the page by its original URI so that 655 // network observers can recover the browser in a context when they only 656 // have the originURL to work with. 657 this._browserInfoByURL.set(urlKey, item); 658 } 659 this.#browserToItemMap.set(browser, item); 660 } 661 662 /** 663 * Determines whether or not a browser should be untracked or tracked for 664 * SERPs who have single page app behaviour. 665 * 666 * The over-arching logic: 667 * 1. Only inspect the browser if the url matches a SERP that is a SPA. 668 * 2. Recording an engagement if we're tracking the browser and we're going 669 * to another page. 670 * 3. Untrack the browser if we're tracking it and switching pages. 671 * 4. Track the browser if we're now on a default search page. 672 * 673 * @param {MozBrowser} browser 674 * The browser element related to the request. 675 * @param {string} url 676 * The url of the request. 677 * @param {number} loadType 678 * The loadtype of a the request. 679 */ 680 async updateTrackingSinglePageApp(browser, url, loadType) { 681 let providerInfo = this._getProviderInfoForURL(url); 682 if (!providerInfo?.pageTypeParam?.enableSPAHandling) { 683 return; 684 } 685 686 let item = this.findItemForBrowser(browser); 687 let telemetryState = item?.browserTelemetryStateMap.get(browser); 688 689 let previousSearchTerm = telemetryState?.searchQuery ?? ""; 690 let searchTerm = this.urlSearchTerms(url, providerInfo); 691 let searchTermChanged = previousSearchTerm !== searchTerm; 692 693 // Get the current and previous page types. 694 let pageType = this._getPageTypeFromUrl(url, providerInfo); 695 let previousPageType = telemetryState?.currentPageType ?? ""; 696 // If both previous and current page types are empty, they are untracked 697 // and we should do nothing. 698 if (previousPageType === "" && pageType === "") { 699 return; 700 } 701 let pageTypeChanged = previousPageType != pageType; 702 703 let browserIsTracked = !!telemetryState; 704 let isTabHistory = loadType & Ci.nsIDocShell.LOAD_CMD_HISTORY; 705 706 // Step 1: Maybe record engagement. 707 if ( 708 browserIsTracked && 709 !isTabHistory && 710 (pageTypeChanged || searchTermChanged) 711 ) { 712 let shouldRecordEngagement = false; 713 if (pageTypeChanged) { 714 shouldRecordEngagement = true; 715 } else if (searchTermChanged) { 716 // User did a new search or navigated away from the SERP. Check if it 717 // was a submission event. 718 let actor = browser.browsingContext.currentWindowGlobal.getActor( 719 "SearchSERPTelemetry" 720 ); 721 // If we've changed to another SERP, it could have been caused by a 722 // submission event inside the content process. The event is sent to 723 // the parent and stored as `telemetryState.searchBoxSubmitted`, but if 724 // we check now, it may be too early. Instead we check with the content 725 // process directly to see if it recorded a submission event. 726 let didSubmit = await actor.sendQuery("SearchSERPTelemetry:DidSubmit"); 727 if (!telemetryState.searchBoxSubmitted && !didSubmit) { 728 shouldRecordEngagement = true; 729 } 730 } 731 732 if (shouldRecordEngagement) { 733 impressionIdsWithoutEngagementsSet.delete(telemetryState.impressionId); 734 let target = 735 providerInfo.pageTypeParam.pageTypes.find(p => p.name == pageType) 736 ?.target ?? SearchSERPTelemetryUtils.COMPONENTS.NON_ADS_LINK; 737 738 Glean.serp.engagement.record({ 739 impression_id: telemetryState.impressionId, 740 action: SearchSERPTelemetryUtils.ACTIONS.CLICKED, 741 target, 742 }); 743 lazy.logConsole.debug("Counting click:", { 744 impressionId: telemetryState.impressionId, 745 type: SearchSERPTelemetryUtils.COMPONENTS.NON_ADS_LINK, 746 URL: url, 747 }); 748 } 749 } 750 751 // Step 2: Maybe untrack the browser. 752 let shouldUntrack = false; 753 let abandonmentReason = ""; 754 755 if (browserIsTracked) { 756 // If we have to untrack it, it might be due to the user using the 757 // back/forward button. 758 if (isTabHistory) { 759 shouldUntrack = true; 760 abandonmentReason = SearchSERPTelemetryUtils.ABANDONMENTS.NAVIGATION; 761 } else if (searchTermChanged || pageTypeChanged) { 762 shouldUntrack = true; 763 } 764 } 765 766 if (shouldUntrack) { 767 let actor = browser.browsingContext.currentWindowGlobal.getActor( 768 "SearchSERPTelemetry" 769 ); 770 actor.sendAsyncMessage("SearchSERPTelemetry:StopTrackingDocument"); 771 this.stopTrackingBrowser(browser, abandonmentReason); 772 browserIsTracked = false; 773 } 774 775 // Step 3: Maybe track the browser. 776 if ( 777 this._isTrackablePageType(pageType, providerInfo) && 778 !browserIsTracked 779 ) { 780 this.updateTrackingStatus(browser, url, loadType); 781 let actor = browser.browsingContext.currentWindowGlobal.getActor( 782 "SearchSERPTelemetry" 783 ); 784 actor.sendAsyncMessage("SearchSERPTelemetry:WaitForSPAPageLoad"); 785 } 786 } 787 788 /** 789 * Determines the page type (ex: 'web', 'shopping' or 'images') by extracting 790 * a param from the url. 791 * 792 * @param {string} url 793 * The url for the request. 794 * @param {object} providerInfo 795 * The providerInfo associated with the url. 796 * @returns {string} 797 * The page type or if none is found, an empty string. 798 */ 799 _getPageTypeFromUrl(url, providerInfo) { 800 let pageTypeParam = providerInfo?.pageTypeParam; 801 if (!pageTypeParam) { 802 return ""; 803 } 804 805 let parsedUrl = new URL(url); 806 let paramValue; 807 for (let key of pageTypeParam.keys) { 808 paramValue = parsedUrl.searchParams.get(key); 809 if (paramValue) { 810 for (let pageType of pageTypeParam.pageTypes) { 811 if (pageType.values.includes(paramValue)) { 812 return pageType.name; 813 } 814 } 815 } 816 } 817 818 let defaultConfig = pageTypeParam.pageTypes.find( 819 pageType => pageType.isDefault 820 ); 821 if (defaultConfig) { 822 return defaultConfig.name; 823 } 824 825 return ""; 826 } 827 828 /** 829 * Determines whether we need to track a given page type. 830 * 831 * @param {string} pageType 832 * The page type associated with a url (ex: 'web', 'shopping' or 'images'). 833 * @param {object} providerInfo 834 * The providerInfo associated with the url. 835 * @returns {boolean} 836 * Whether we should track the given page type. 837 */ 838 _isTrackablePageType(pageType, providerInfo) { 839 if (!providerInfo?.pageTypeParam || !pageType) { 840 return false; 841 } 842 843 let config = providerInfo.pageTypeParam.pageTypes.find( 844 pageTypeConfig => pageTypeConfig.name == pageType 845 ); 846 return config?.enabled ?? false; 847 } 848 849 /** 850 * Stops tracking of a tab, for example the tab has loaded a different URL. 851 * Also records a Glean abandonment event if appropriate. 852 * 853 * @param {object} browser The browser associated with the tab to stop being 854 * tracked. 855 * @param {string} [abandonmentReason] 856 * An optional parameter that specifies why the browser is deemed abandoned. 857 * The reason will be recorded as part of Glean abandonment telemetry. 858 * One of SearchSERPTelemetryUtils.ABANDONMENTS. 859 */ 860 stopTrackingBrowser(browser, abandonmentReason) { 861 for (let [url, item] of this._browserInfoByURL) { 862 if (item.browserTelemetryStateMap.has(browser)) { 863 let telemetryState = item.browserTelemetryStateMap.get(browser); 864 let impressionId = telemetryState.impressionId; 865 866 if ( 867 telemetryState.impressionInfo && 868 !telemetryState.impressionRecorded 869 ) { 870 this._contentHandler._recordFallbackPageImpression(telemetryState); 871 } 872 873 if (impressionIdsWithoutEngagementsSet.has(impressionId)) { 874 this.recordAbandonmentTelemetry(impressionId, abandonmentReason); 875 } 876 877 if ( 878 lazy.SERPCategorization.enabled && 879 telemetryState.categorizationInfo 880 ) { 881 lazy.SERPCategorizationEventScheduler.sendCallback(browser); 882 } 883 884 item.browserTelemetryStateMap.delete(browser); 885 item.count--; 886 } 887 888 if (!item.count) { 889 this._browserInfoByURL.delete(url); 890 } 891 } 892 this.#browserToItemMap.delete(browser); 893 } 894 895 /** 896 * Calculate how close two urls are in equality. 897 * 898 * The scoring system: 899 * - If the URLs look exactly the same, including the ordering of query 900 * parameters, the score is Infinity. 901 * - If the origin is the same, the score is increased by 1. Otherwise the 902 * score is 0. 903 * - If the path is the same, the score is increased by 1. 904 * - For each query parameter, if the key exists the score is increased by 1. 905 * Likewise if the query parameter values match. 906 * - If the hash is the same, the score is increased by 1. This includes if 907 * the hash is missing in both URLs. 908 * 909 * @param {URL} url1 910 * Url to compare. 911 * @param {URL} url2 912 * Other url to compare. Ordering shouldn't matter. 913 * @param {object} [matchOptions] 914 * Options for checking equality. 915 * @param {boolean} [matchOptions.path] 916 * Whether the path must match. Default to false. 917 * @param {boolean} [matchOptions.paramValues] 918 * Whether the values of the query parameters must match if the query 919 * parameter key exists in the other. Defaults to false. 920 * @returns {number} 921 * A score of how closely the two URLs match. Returns 0 if there is no 922 * match or the equality check failed for an enabled match option. 923 */ 924 compareUrls(url1, url2, matchOptions = {}) { 925 // In case of an exact match, well, that's an obvious winner. 926 if (url1.href == url2.href) { 927 return Infinity; 928 } 929 930 // Each step we get closer to the two URLs being the same, we increase the 931 // score. The consumer of this method will use these scores to see which 932 // of the URLs is the best match. 933 let score = 0; 934 if (url1.origin == url2.origin) { 935 ++score; 936 if (url1.pathname == url2.pathname) { 937 ++score; 938 for (let [key1, value1] of url1.searchParams) { 939 // Let's not fuss about the ordering of search params, since the 940 // score effect will solve that. 941 if (url2.searchParams.has(key1)) { 942 ++score; 943 if (url2.searchParams.get(key1) == value1) { 944 ++score; 945 } else if (matchOptions.paramValues) { 946 return 0; 947 } 948 } 949 } 950 if (url1.hash == url2.hash) { 951 ++score; 952 } 953 } else if (matchOptions.path) { 954 return 0; 955 } 956 } 957 return score; 958 } 959 960 /** 961 * Extracts the search terms from the URL based on the provider info. 962 * 963 * @param {string} url 964 * The URL to inspect. 965 * @param {object} providerInfo 966 * The providerInfo associated with the URL. 967 * @returns {string} 968 * The search term or if none is found, a blank string. 969 */ 970 urlSearchTerms(url, providerInfo) { 971 if (providerInfo?.queryParamNames?.length) { 972 let { searchParams } = new URL(url); 973 for (let queryParamName of providerInfo.queryParamNames) { 974 let value = searchParams.get(queryParamName); 975 if (value) { 976 return value; 977 } 978 } 979 } 980 return ""; 981 } 982 983 /** 984 * Finds any SERP data associated with the given browser. 985 * 986 * @param {object} browser 987 * @returns {object} 988 */ 989 findItemForBrowser(browser) { 990 return this.#browserToItemMap.get(browser); 991 } 992 993 /** 994 * Parts of the URL, like search params and hashes, may be mutated by scripts 995 * on a page we're tracking. Since we don't want to keep track of that 996 * ourselves in order to keep the list of browser objects a weak-referenced 997 * set, we do optional fuzzy matching of URLs to fetch the most relevant item 998 * that contains tracking information. 999 * 1000 * @param {string} urlString URL to fetch the tracking data for. 1001 * @returns {object} Map containing the following members: 1002 * - {WeakMap} browsers 1003 * Map of browser elements that belong to `url` and their ad report state. 1004 * - {object} info 1005 * Info dictionary as returned by `_checkURLForSerpMatch`. 1006 * - {number} count 1007 * The number of browser element we can most accurately tell we're 1008 * tracking, since they're inside a WeakMap. 1009 */ 1010 _findBrowserItemForURL(urlString) { 1011 let url = URL.parse(urlString); 1012 if (!url) { 1013 return null; 1014 } 1015 1016 let item; 1017 let currentBestMatch = 0; 1018 for (let [trackingURL, candidateItem] of this._browserInfoByURL) { 1019 if (currentBestMatch === Infinity) { 1020 break; 1021 } 1022 // Make sure to cache the parsed URL object, since there's no reason to 1023 // do it twice. 1024 trackingURL = 1025 candidateItem._trackingURL || 1026 (candidateItem._trackingURL = URL.parse(trackingURL)); 1027 if (!trackingURL) { 1028 continue; 1029 } 1030 let score = this.compareUrls(url, trackingURL); 1031 if (score > currentBestMatch) { 1032 item = candidateItem; 1033 currentBestMatch = score; 1034 } 1035 } 1036 1037 return item; 1038 } 1039 1040 // nsIWindowMediatorListener 1041 1042 /** 1043 * This is called when a new window is opened, and handles registration of 1044 * that window if it is a browser window. 1045 * 1046 * @param {nsIAppWindow} appWin The xul window that was opened. 1047 */ 1048 onOpenWindow(appWin) { 1049 // Bug 1954851 - domWindow returns a proxy interface that references the 1050 // outer window and doesn't currently expose the real API surface in 1051 // Typescript. 1052 let win = /** @type {ChromeWindow} */ (appWin.docShell.domWindow); 1053 win.addEventListener( 1054 "load", 1055 () => { 1056 if ( 1057 win.document.documentElement.getAttribute("windowtype") != 1058 "navigator:browser" 1059 ) { 1060 return; 1061 } 1062 1063 this._registerWindow(win); 1064 }, 1065 { once: true } 1066 ); 1067 } 1068 1069 /** 1070 * Listener that is called when a window is closed, and handles deregistration of 1071 * that window if it is a browser window. 1072 * 1073 * @param {nsIAppWindow} appWin The xul window that was closed. 1074 */ 1075 onCloseWindow(appWin) { 1076 // Bug 1954851 - domWindow returns a proxy interface that references the 1077 // outer window and doesn't currently expose the real API surface in 1078 // Typescript. 1079 let win = /** @type {ChromeWindow} */ (appWin.docShell.domWindow); 1080 1081 if ( 1082 win.document.documentElement.getAttribute("windowtype") != 1083 "navigator:browser" 1084 ) { 1085 return; 1086 } 1087 1088 this._unregisterWindow(win); 1089 } 1090 1091 /** 1092 * Determines if a URL to be in this SERP's subframes. 1093 * 1094 * @param {string} url 1095 */ 1096 urlIsKnownSERPSubframe(url) { 1097 if (url) { 1098 for (let regexp of this.#subframeRegexps) { 1099 if (regexp.test(url)) { 1100 return true; 1101 } 1102 } 1103 } 1104 return false; 1105 } 1106 1107 /** 1108 * Adds event listeners for the window and registers it with the content handler. 1109 * 1110 * @param {object} win The window to register. 1111 */ 1112 _registerWindow(win) { 1113 win.gBrowser.tabContainer.addEventListener("TabClose", this); 1114 } 1115 1116 /** 1117 * Removes event listeners for the window and unregisters it with the content 1118 * handler. 1119 * 1120 * @param {object} win The window to unregister. 1121 */ 1122 _unregisterWindow(win) { 1123 for (let tab of win.gBrowser.tabs) { 1124 this.stopTrackingBrowser( 1125 tab.linkedBrowser, 1126 SearchSERPTelemetryUtils.ABANDONMENTS.WINDOW_CLOSE 1127 ); 1128 } 1129 1130 win.gBrowser.tabContainer.removeEventListener("TabClose", this); 1131 } 1132 1133 /** 1134 * Searches for provider information for a given url. 1135 * 1136 * @param {string} url 1137 * The url to match for a provider. 1138 * @returns {?ProviderInfo} 1139 * Returns a provider or undefined if no provider was found for the url. 1140 */ 1141 _getProviderInfoForURL(url) { 1142 return this._searchProviderInfo?.find(info => 1143 info.searchPageRegexp.test(url) 1144 ); 1145 } 1146 1147 /** 1148 * Checks to see if a url is a search partner location, and determines the 1149 * provider and codes used. 1150 * 1151 * @param {string} url The url to match. 1152 * @returns {null|object} Returns null if there is no match found. Otherwise, 1153 * returns an object of strings for provider, code, type, search query used, 1154 * whether it's a single page app, page type and search mode. 1155 */ 1156 _checkURLForSerpMatch(url) { 1157 let searchProviderInfo = this._getProviderInfoForURL(url); 1158 if (!searchProviderInfo) { 1159 return null; 1160 } 1161 1162 let queries = new URL(url).searchParams; 1163 queries.forEach((v, k) => { 1164 queries.set(k.toLowerCase(), v); 1165 }); 1166 1167 let isSPA = !!searchProviderInfo.pageTypeParam?.enableSPAHandling; 1168 let pageType; 1169 if (isSPA) { 1170 pageType = this._getPageTypeFromUrl(url, searchProviderInfo); 1171 let isValidPageType = this._isTrackablePageType( 1172 pageType, 1173 searchProviderInfo 1174 ); 1175 1176 if (!isValidPageType) { 1177 return null; 1178 } 1179 } 1180 1181 // Some URLs can match provider info but also be the provider's homepage 1182 // instead of a SERP. 1183 // e.g. https://example.com/ vs. https://example.com/?foo=bar 1184 // Look for the presence of the query parameter that contains a search term. 1185 let hasQuery = false; 1186 let searchQuery = ""; 1187 for (let queryParamName of searchProviderInfo.queryParamNames) { 1188 searchQuery = queries.get(queryParamName); 1189 if (searchQuery) { 1190 hasQuery = true; 1191 break; 1192 } 1193 } 1194 if (!hasQuery) { 1195 return null; 1196 } 1197 // Default to organic to simplify things. 1198 // We override type in the sap cases. 1199 let type = "organic"; 1200 let code; 1201 if (searchProviderInfo.codeParamName) { 1202 code = queries.get(searchProviderInfo.codeParamName.toLowerCase()); 1203 if (code) { 1204 // The code is only included if it matches one of the specific ones. 1205 if (searchProviderInfo.taggedCodes.includes(code)) { 1206 type = "tagged"; 1207 if ( 1208 searchProviderInfo.followOnParamNames && 1209 searchProviderInfo.followOnParamNames.some(p => queries.has(p)) 1210 ) { 1211 type += "-follow-on"; 1212 } 1213 } else if (searchProviderInfo.organicCodes.includes(code)) { 1214 type = "organic"; 1215 } else if (searchProviderInfo.expectedOrganicCodes?.includes(code)) { 1216 code = "none"; 1217 } else { 1218 code = "other"; 1219 } 1220 } else if (searchProviderInfo.followOnCookies) { 1221 // Especially Bing requires lots of extra work related to cookies. 1222 for (let followOnCookie of searchProviderInfo.followOnCookies) { 1223 if (followOnCookie.extraCodeParamName) { 1224 let eCode = queries.get( 1225 followOnCookie.extraCodeParamName.toLowerCase() 1226 ); 1227 if ( 1228 !eCode || 1229 !followOnCookie.extraCodePrefixes.some(p => eCode.startsWith(p)) 1230 ) { 1231 continue; 1232 } 1233 } 1234 1235 // If this cookie is present, it's probably an SAP follow-on. 1236 // This might be an organic follow-on in the same session, but there 1237 // is no way to tell the difference. 1238 for (let cookie of Services.cookies.getCookiesFromHost( 1239 followOnCookie.host, 1240 {} 1241 )) { 1242 if (cookie.name != followOnCookie.name) { 1243 continue; 1244 } 1245 1246 // Cookie values may take the form of "foo=bar&baz=1". 1247 let cookieItems = cookie.value 1248 ?.split("&") 1249 .map(p => p.split("=")) 1250 .filter(p => p[0] == followOnCookie.codeParamName); 1251 if (cookieItems.length == 1) { 1252 let cookieValue = cookieItems[0][1]; 1253 if (searchProviderInfo.taggedCodes.includes(cookieValue)) { 1254 type = "tagged-follow-on"; 1255 code = cookieValue; 1256 break; 1257 } 1258 } 1259 } 1260 } 1261 } 1262 } 1263 1264 /** @type {?string} */ 1265 let searchMode; 1266 if (searchProviderInfo.searchMode) { 1267 for (let [param, paramMode] of Object.entries( 1268 searchProviderInfo.searchMode 1269 )) { 1270 if (queries.has(param)) { 1271 searchMode = paramMode; 1272 } 1273 } 1274 } 1275 1276 return { 1277 provider: searchProviderInfo.telemetryId, 1278 type, 1279 code, 1280 searchQuery, 1281 isSPA, 1282 pageType, 1283 searchMode, 1284 }; 1285 } 1286 1287 /** 1288 * Logs telemetry for a search provider visit. 1289 * 1290 * @param {object} info The search provider information. 1291 * @param {string} info.provider The name of the provider. 1292 * @param {string} info.type The type of search. 1293 * @param {string} [info.code] The code for the provider. 1294 * @param {string} source Where the search originated from. 1295 * @param {string} url The url that was matched (for debug logging only). 1296 */ 1297 _reportSerpPage(info, source, url) { 1298 let payload = `${info.provider}:${info.type}:${info.code || "none"}`; 1299 let name = source.replace(/_([a-z])/g, (m, p) => p.toUpperCase()); 1300 Glean.browserSearchContent[name][payload].add(1); 1301 lazy.logConsole.debug("Impression:", payload, url); 1302 } 1303 1304 /** 1305 * @typedef {object} ImpressionInfo 1306 * @property {string} provider The name of the provider for the impression. 1307 * @property {boolean} tagged Whether the search has partner tags. 1308 * @property {string} source The search access point. 1309 * @property {boolean} isShoppingPage Whether the page is shopping. 1310 * @property {boolean} isPrivate Whether the SERP is in a private tab. 1311 * @property {boolean} isSignedIn Whether the user is signed on to the SERP. 1312 */ 1313 1314 /** 1315 * @typedef {object} ImpressionInfoResult 1316 * @property {string | null} impressionId The unique id of the impression. 1317 * @property {ImpressionInfo | null} impressionInfo General impresison info. 1318 */ 1319 1320 /** 1321 * If applicable for a tracked SERP provider, generates a unique id and 1322 * caches information that shouldn't be changed during the lifetime of the 1323 * impression. 1324 * 1325 * @param {MozBrowser} browser 1326 * The browser associated with the SERP. 1327 * @param {string} url 1328 * The URL of the SERP. 1329 * @param {object} info 1330 * General information about the tracked SERP. 1331 * @param {string} source 1332 * The originator of the SERP load. 1333 * @returns {ImpressionInfoResult} The result when attempting to generate 1334 * impression info. 1335 */ 1336 _generateImpressionInfo(browser, url, info, source) { 1337 let searchProviderInfo = this._getProviderInfoForURL(url); 1338 let data = { 1339 impressionId: null, 1340 impressionInfo: null, 1341 }; 1342 1343 if (!searchProviderInfo?.components?.length) { 1344 return data; 1345 } 1346 1347 // The UUID generated by Services.uuid contains leading and trailing braces. 1348 // Need to trim them first. 1349 data.impressionId = Services.uuid.generateUUID().toString().slice(1, -1); 1350 impressionIdsWithoutEngagementsSet.add(data.impressionId); 1351 1352 // If it's a SERP but doesn't have a browser source, the source might be 1353 // from something that happened in content. 1354 if (this.#browserContentSourceMap.has(browser)) { 1355 source = this.#browserContentSourceMap.get(browser); 1356 this.#browserContentSourceMap.delete(browser); 1357 } 1358 1359 let partnerCode = ""; 1360 if (info.code != "none" && info.code != null) { 1361 partnerCode = info.code; 1362 } 1363 1364 let isShoppingPage = false; 1365 if (searchProviderInfo.shoppingTab?.regexp) { 1366 isShoppingPage = searchProviderInfo.shoppingTab.regexp.test(url); 1367 } 1368 1369 let isPrivate = 1370 browser.contentPrincipal.originAttributes.privateBrowsingId > 0; 1371 1372 let isSignedIn = false; 1373 // Signed-in status should not be recorded when the client is in a private 1374 // window. 1375 if (!isPrivate && searchProviderInfo.signedInCookies) { 1376 isSignedIn = searchProviderInfo.signedInCookies.some(cookieObj => { 1377 return Services.cookies 1378 .getCookiesFromHost( 1379 cookieObj.host, 1380 browser.contentPrincipal.originAttributes 1381 ) 1382 .some(c => c.name == cookieObj.name); 1383 }); 1384 } 1385 1386 data.impressionInfo = { 1387 provider: info.provider, 1388 tagged: info.type.startsWith("tagged"), 1389 partnerCode, 1390 source, 1391 searchMode: info.searchMode, 1392 isShoppingPage, 1393 isPrivate, 1394 isSignedIn, 1395 }; 1396 1397 return data; 1398 } 1399 } 1400 1401 /** 1402 * ContentHandler deals with handling telemetry of the content within a tab - 1403 * when ads detected and when they are selected. 1404 */ 1405 class ContentHandler { 1406 /** @type {ProviderInfo[]} */ 1407 _searchProviderInfo = null; 1408 1409 /** 1410 * Constructor. 1411 * 1412 * @param {object} options 1413 * The options for the handler. 1414 * @param {Map} options.browserInfoByURL 1415 * The map of urls from TelemetryHandler. 1416 * @param {(urlString: string) => object} options.findBrowserItemForURL 1417 * The function for finding a browser item for the URL. 1418 * @param {(url: string) => null|object} options.checkURLForSerpMatch 1419 * The function for checking a URL for a SERP match. 1420 * @param {(browser: object) => object} options.findItemForBrowser 1421 * The function for finding an item for the browser. 1422 * @param {(url: string) => boolean} options.urlIsKnownSERPSubframe 1423 * The function for determining if a URL is a known SERP sub frame. 1424 */ 1425 constructor(options) { 1426 this._browserInfoByURL = options.browserInfoByURL; 1427 this._findBrowserItemForURL = options.findBrowserItemForURL; 1428 this._checkURLForSerpMatch = options.checkURLForSerpMatch; 1429 this._findItemForBrowser = options.findItemForBrowser; 1430 this._urlIsKnownSERPSubframe = options.urlIsKnownSERPSubframe; 1431 } 1432 1433 /** 1434 * Initializes the content handler. This will also set up the shared data that is 1435 * shared with the SearchTelemetryChild actor. 1436 * 1437 * @param {Array} providerInfo 1438 * The provider information for the search telemetry to record. 1439 */ 1440 init(providerInfo) { 1441 Services.ppmm.sharedData.set( 1442 SEARCH_TELEMETRY_SHARED.PROVIDER_INFO, 1443 providerInfo 1444 ); 1445 Services.ppmm.sharedData.set( 1446 SEARCH_TELEMETRY_SHARED.LOAD_TIMEOUT, 1447 ADLINK_CHECK_TIMEOUT_MS 1448 ); 1449 Services.ppmm.sharedData.set( 1450 SEARCH_TELEMETRY_SHARED.SPA_LOAD_TIMEOUT, 1451 SPA_ADLINK_CHECK_TIMEOUT_MS 1452 ); 1453 1454 Services.obs.addObserver(this, "http-on-examine-response"); 1455 Services.obs.addObserver(this, "http-on-examine-cached-response"); 1456 } 1457 1458 /** 1459 * Uninitializes the content handler. 1460 */ 1461 uninit() { 1462 Services.obs.removeObserver(this, "http-on-examine-response"); 1463 Services.obs.removeObserver(this, "http-on-examine-cached-response"); 1464 } 1465 1466 /** 1467 * Test-only function to override the search provider information for use 1468 * with tests. Passes it to the SearchTelemetryChild actor. 1469 * 1470 * @param {object} providerInfo @see SEARCH_PROVIDER_INFO for type information. 1471 */ 1472 overrideSearchTelemetryForTests(providerInfo) { 1473 Services.ppmm.sharedData.set("SearchTelemetry:ProviderInfo", providerInfo); 1474 } 1475 1476 observe(aSubject, aTopic) { 1477 switch (aTopic) { 1478 case "http-on-examine-response": 1479 case "http-on-examine-cached-response": 1480 this.observeActivity(aSubject); 1481 break; 1482 } 1483 } 1484 1485 /** 1486 * Listener that observes network activity, so that we can determine if a link 1487 * from a search provider page was followed, and if then if that link was an 1488 * ad click or not. 1489 * 1490 * @param {nsIChannel} channel The channel that generated the activity. 1491 */ 1492 observeActivity(channel) { 1493 if (!(channel instanceof Ci.nsIChannel)) { 1494 return; 1495 } 1496 1497 // We augment the channel wrapper with additional data specific to SERP 1498 // telemetry. 1499 /** @type {TrackedChannel} */ 1500 let wrappedChannel = /** @type {any} */ (ChannelWrapper.get(channel)); 1501 1502 // The channel we're observing might be a redirect of a channel we've 1503 // observed before. 1504 if (wrappedChannel._adClickRecorded) { 1505 lazy.logConsole.debug("Ad click already recorded"); 1506 return; 1507 } 1508 1509 Services.tm.dispatchToMainThread(() => { 1510 // We suspect that No Content (204) responses are used to transfer or 1511 // update beacons. They used to lead to double-counting ad-clicks, so let's 1512 // ignore them. 1513 if (wrappedChannel.statusCode == 204) { 1514 lazy.logConsole.debug("Ignoring activity from ambiguous responses"); 1515 return; 1516 } 1517 1518 // The wrapper is consistent across redirects, so we can use it to track state. 1519 let originURL = wrappedChannel.originURI && wrappedChannel.originURI.spec; 1520 if (!originURL) { 1521 return; 1522 } 1523 1524 let eligibleSubframeUrl = this.#getSerpUrlFromPossibleSubframeUrl( 1525 originURL, 1526 wrappedChannel 1527 ); 1528 let item = this._findBrowserItemForURL(eligibleSubframeUrl || originURL); 1529 if (!item) { 1530 return; 1531 } 1532 1533 let url = wrappedChannel.finalURL; 1534 1535 let providerInfo = item.info.provider; 1536 let info = this._searchProviderInfo.find(provider => { 1537 return provider.telemetryId == providerInfo; 1538 }); 1539 1540 // If an error occurs with Glean SERP telemetry logic, avoid 1541 // disrupting legacy telemetry. 1542 try { 1543 this.#maybeRecordSERPTelemetry(wrappedChannel, item, info); 1544 } catch (ex) { 1545 lazy.logConsole.error(ex); 1546 } 1547 1548 if (!info.extraAdServersRegexps?.some(regex => regex.test(url))) { 1549 return; 1550 } 1551 1552 try { 1553 let name = item.source.replace(/_([a-z])/g, (m, p) => p.toUpperCase()); 1554 Glean.browserSearchAdclicks[name][ 1555 `${info.telemetryId}:${item.info.type}` 1556 ].add(1); 1557 wrappedChannel._adClickRecorded = true; 1558 if (item.newtabSessionId) { 1559 Glean.newtabSearchAd.click.record({ 1560 newtab_visit_id: item.newtabSessionId, 1561 search_access_point: item.source, 1562 is_follow_on: item.info.type.endsWith("follow-on"), 1563 is_tagged: item.info.type.startsWith("tagged"), 1564 telemetry_id: item.info.provider, 1565 }); 1566 } 1567 1568 lazy.logConsole.debug("Counting ad click in page for:", { 1569 source: item.source, 1570 originURL, 1571 URL: url, 1572 }); 1573 } catch (e) { 1574 console.error(e); 1575 } 1576 }); 1577 } 1578 1579 /** 1580 * Checks if a request should record an ad click if it can be traced to a 1581 * browser containing an observed SERP. 1582 * 1583 * @param {TrackedChannel} wrappedChannel 1584 * The wrapped channel. 1585 * @param {object} item 1586 * The browser item associated with the origin URL of the request. 1587 * @param {object} info 1588 * The search provider info associated with the item. 1589 */ 1590 #maybeRecordSERPTelemetry(wrappedChannel, item, info) { 1591 if (wrappedChannel._recordedClick) { 1592 lazy.logConsole.debug("Click already recorded."); 1593 return; 1594 } 1595 1596 let originURL = wrappedChannel.originURI?.spec; 1597 let url = wrappedChannel.finalURL; 1598 1599 if (info.ignoreLinkRegexps.some(r => r.test(url))) { 1600 lazy.logConsole.debug("Ignore url."); 1601 return; 1602 } 1603 1604 // Some channels re-direct by loading pages that return 200. The result 1605 // is the channel will have an originURL that changes from the SERP to 1606 // either a nonAdsRegexp or an extraAdServersRegexps. This is typical 1607 // for loading a page in a new tab. The channel will have changed so any 1608 // properties attached to them to record state (e.g. _recordedClick) 1609 // won't be present. 1610 if ( 1611 info.nonAdsLinkRegexps.some(r => r.test(originURL)) || 1612 info.extraAdServersRegexps.some(r => r.test(originURL)) 1613 ) { 1614 lazy.logConsole.debug("Expecting redirect."); 1615 return; 1616 } 1617 1618 // A click event is recorded if a user loads a resource from an 1619 // originURL that is a SERP. 1620 // 1621 // Typically, we only want top level loads containing documents to avoid 1622 // recording any event on an in-page resource a SERP might load 1623 // (e.g. CSS files). 1624 // 1625 // The exception to this is if a subframe loads a resource that matches 1626 // a non ad link. Some SERPs encode non ad search results with a URL 1627 // that gets loaded into an iframe, which then tells the container of 1628 // the iframe to change the location of the page. 1629 if ( 1630 wrappedChannel.channel.isDocument && 1631 (wrappedChannel.channel.loadInfo.isTopLevelLoad || 1632 info.nonAdsLinkRegexps.some(r => r.test(url))) 1633 ) { 1634 let browser = /** @type {MozBrowser} */ (wrappedChannel.browserElement); 1635 1636 // If the load is from history, don't record an event. 1637 if ( 1638 browser?.browsingContext.webProgress?.loadType & 1639 Ci.nsIDocShell.LOAD_CMD_HISTORY 1640 ) { 1641 lazy.logConsole.debug("Ignoring load from history"); 1642 return; 1643 } 1644 1645 // Step 1: Check if the browser associated with the request was a 1646 // tracked SERP. 1647 let start = ChromeUtils.now(); 1648 let telemetryState; 1649 let isFromNewtab = false; 1650 if (item.browserTelemetryStateMap.has(browser)) { 1651 // If the map contains the browser, then it means that the request is 1652 // the SERP is going from one page to another. We know this because 1653 // previous conditions prevent non-top level loads from occuring here. 1654 telemetryState = item.browserTelemetryStateMap.get(browser); 1655 } else if (browser) { 1656 // Alternatively, it could be the case that the request is occuring in 1657 // a new tab but was triggered by one of the browsers in the state map. 1658 // If only one browser exists in the state map, it must be that one. 1659 if (item.count === 1) { 1660 let sourceBrowsers = ChromeUtils.nondeterministicGetWeakMapKeys( 1661 item.browserTelemetryStateMap 1662 ); 1663 if (sourceBrowsers?.length) { 1664 telemetryState = item.browserTelemetryStateMap.get( 1665 sourceBrowsers[0] 1666 ); 1667 } 1668 } else if (item.count > 1) { 1669 // If the count is more than 1, then multiple open SERPs contain the 1670 // same search term, so try to find the specific browser that opened 1671 // the request. 1672 let tabBrowser = browser.getTabBrowser(); 1673 let tab = tabBrowser.getTabForBrowser(browser).openerTab; 1674 // A tab will not always have an openerTab, as first tabs in new 1675 // windows don't have an openerTab. 1676 // Bug 1867582: We should also handle the case where multiple tabs 1677 // contain the same search term. 1678 if (tab) { 1679 telemetryState = item.browserTelemetryStateMap.get( 1680 tab.linkedBrowser 1681 ); 1682 } 1683 } 1684 if (telemetryState) { 1685 isFromNewtab = true; 1686 } 1687 } 1688 1689 lazy.logConsole.debug("Telemetry state:", telemetryState); 1690 1691 // Step 2: If we have telemetryState, the browser object must be 1692 // associated with another browser that is tracked. Try to find the 1693 // component type on the SERP responsible for the request. 1694 // Exceptions: 1695 // - If a searchbox was used to initiate the load, don't record another 1696 // engagement because the event was logged elsewhere. 1697 // - If the ad impression hasn't been recorded yet, we have no way of 1698 // knowing precisely what kind of component was selected. 1699 let isSerp = false; 1700 if ( 1701 telemetryState && 1702 telemetryState.adImpressionsReported && 1703 !telemetryState.searchBoxSubmitted 1704 ) { 1705 if (info.searchPageRegexp?.test(originURL)) { 1706 isSerp = true; 1707 } 1708 1709 let startFindComponent = ChromeUtils.now(); 1710 let parsedUrl = new URL(url); 1711 1712 // Organic links may contain query param values mapped to links shown 1713 // on the SERP at page load. If a stored component depends on that 1714 // value, we need to be able to recover it or else we'll always consider 1715 // it a non_ads_link. 1716 if ( 1717 info.nonAdsLinkQueryParamNames.length && 1718 info.nonAdsLinkRegexps.some(r => r.test(url)) 1719 ) { 1720 for (let key of info.nonAdsLinkQueryParamNames) { 1721 let paramValue = parsedUrl.searchParams.get(key); 1722 if (paramValue) { 1723 let newParsedUrl = /^https?:\/\//.test(paramValue) 1724 ? URL.parse(paramValue) 1725 : URL.parse(paramValue, parsedUrl.origin); 1726 if (newParsedUrl) { 1727 parsedUrl = newParsedUrl; 1728 break; 1729 } 1730 } 1731 } 1732 } 1733 1734 // Determine the component type of the link. 1735 let type; 1736 for (let [ 1737 storedUrl, 1738 componentType, 1739 ] of telemetryState.urlToComponentMap.entries()) { 1740 // The URL we're navigating to may have more query parameters if 1741 // the provider adds query parameters when the user clicks on a link. 1742 // On the other hand, the URL we are navigating to may have have 1743 // fewer query parameters because of query param stripping. 1744 // Thus, if a query parameter is missing, a match can still be made 1745 // provided keys that exist in both URLs contain equal values. 1746 let score = SearchSERPTelemetry.compareUrls(storedUrl, parsedUrl, { 1747 paramValues: true, 1748 path: true, 1749 }); 1750 if (score) { 1751 type = componentType; 1752 break; 1753 } 1754 } 1755 ChromeUtils.addProfilerMarker( 1756 "SearchSERPTelemetry._observeActivity", 1757 startFindComponent, 1758 "Find component for URL" 1759 ); 1760 1761 // If no component was found, it's possible the link was added after 1762 // components were categorized. 1763 if (!type) { 1764 let isAd = info.extraAdServersRegexps?.some(regex => regex.test(url)); 1765 type = isAd 1766 ? SearchSERPTelemetryUtils.COMPONENTS.AD_UNCATEGORIZED 1767 : SearchSERPTelemetryUtils.COMPONENTS.NON_ADS_LINK; 1768 } 1769 1770 if ( 1771 type == SearchSERPTelemetryUtils.COMPONENTS.REFINED_SEARCH_BUTTONS 1772 ) { 1773 SearchSERPTelemetry.setBrowserContentSource( 1774 browser, 1775 SearchSERPTelemetryUtils.INCONTENT_SOURCES.REFINE_ON_SERP 1776 ); 1777 } else if (isSerp && isFromNewtab) { 1778 SearchSERPTelemetry.setBrowserContentSource( 1779 browser, 1780 SearchSERPTelemetryUtils.INCONTENT_SOURCES.OPENED_IN_NEW_TAB 1781 ); 1782 } 1783 1784 // Step 3: Record the engagement. 1785 impressionIdsWithoutEngagementsSet.delete(telemetryState.impressionId); 1786 if (AD_COMPONENTS.includes(type)) { 1787 telemetryState.adsClicked += 1; 1788 } 1789 Glean.serp.engagement.record({ 1790 impression_id: telemetryState.impressionId, 1791 action: SearchSERPTelemetryUtils.ACTIONS.CLICKED, 1792 target: type, 1793 }); 1794 lazy.logConsole.debug("Counting click:", { 1795 impressionId: telemetryState.impressionId, 1796 type, 1797 URL: url, 1798 }); 1799 // Prevent re-directed channels from being examined more than once. 1800 wrappedChannel._recordedClick = true; 1801 } 1802 ChromeUtils.addProfilerMarker( 1803 "SearchSERPTelemetry._observeActivity", 1804 start, 1805 "Maybe record user engagement." 1806 ); 1807 } 1808 } 1809 1810 /** 1811 * Checks if the url associated with a request is actually coming from a 1812 * subframe within a SERP. If so, try to find the best url associated with 1813 * the frame. 1814 * 1815 * @param {string} originURL 1816 * The url associated with the request. 1817 * @param {ChannelWrapper} wrappedChannel 1818 * The wrapped channel. 1819 * @returns {string?} 1820 * The url associated with the subframe. 1821 */ 1822 #getSerpUrlFromPossibleSubframeUrl(originURL, wrappedChannel) { 1823 if (!this._urlIsKnownSERPSubframe(originURL)) { 1824 return null; 1825 } 1826 1827 // The sponsored link could be opened in a new tab, in which case the 1828 // browser URI may not match a SERP. Thus, try to find a tab that contains 1829 // a URI matching a SERP. 1830 let browser = /** @type {MozBrowser} */ (wrappedChannel.browserElement); 1831 if (browser?.currentURI.spec == "about:blank") { 1832 let tabBrowser = browser.getTabBrowser(); 1833 let tab = tabBrowser.getTabForBrowser(browser).openerTab; 1834 if (tab) { 1835 return tab.linkedBrowser.currentURI.spec; 1836 } 1837 // If no opener tab was found, we're likely looking at the first tab of 1838 // a new window. As a last resort, check if the window below the newly 1839 // opened window contains a tab with a matching SERP. 1840 let windows = lazy.BrowserWindowTracker.orderedWindows; 1841 let win = windows.at(1); 1842 if (win) { 1843 let url = win.gBrowser.selectedBrowser.originalURI?.spec; 1844 if (url) { 1845 return url; 1846 } 1847 } 1848 // If we couldn't find a matching tab or window, then return null to 1849 // indicate to the caller we weren't able to find an appropriate SERP. 1850 return null; 1851 } 1852 1853 return browser?.currentURI.spec; 1854 } 1855 1856 /** 1857 * Logs telemetry for a page with adverts, if it is one of the partner search 1858 * provider pages that we're tracking. 1859 * 1860 * @param {object} info 1861 * The search provider information for the page. 1862 * @param {boolean} info.hasAds 1863 * Whether or not the page has adverts. 1864 * @param {string} info.url 1865 * The url of the page. 1866 * @param {object} browser 1867 * The browser associated with the page. 1868 */ 1869 _reportPageWithAds(info, browser) { 1870 let item = this._findItemForBrowser(browser); 1871 if (!item) { 1872 lazy.logConsole.warn( 1873 "Expected to report URI for", 1874 info.url, 1875 "with ads but couldn't find the information" 1876 ); 1877 return; 1878 } 1879 1880 let telemetryState = item.browserTelemetryStateMap.get(browser); 1881 if (telemetryState.adsReported) { 1882 lazy.logConsole.debug( 1883 "Ad was previously reported for browser with URI", 1884 info.url 1885 ); 1886 return; 1887 } 1888 1889 lazy.logConsole.debug( 1890 "Counting ads in page for", 1891 item.info.provider, 1892 item.info.type, 1893 item.source, 1894 info.url 1895 ); 1896 let name = item.source.replace(/_([a-z])/g, (m, p) => p.toUpperCase()); 1897 Glean.browserSearchWithads[name][ 1898 `${item.info.provider}:${item.info.type}` 1899 ].add(1); 1900 Services.obs.notifyObservers(null, "reported-page-with-ads"); 1901 1902 telemetryState.adsReported = true; 1903 1904 if (item.newtabSessionId) { 1905 Glean.newtabSearchAd.impression.record({ 1906 newtab_visit_id: item.newtabSessionId, 1907 search_access_point: item.source, 1908 is_follow_on: item.info.type.endsWith("follow-on"), 1909 is_tagged: item.info.type.startsWith("tagged"), 1910 telemetry_id: item.info.provider, 1911 }); 1912 } 1913 } 1914 1915 /** 1916 * Logs ad impression telemetry for a page with adverts, if it is 1917 * one of the partner search provider pages that we're tracking. 1918 * 1919 * @param {object} info 1920 * The search provider information for the page. 1921 * @param {string} info.url 1922 * The url of the page. 1923 * @param {Map<string, object>} info.adImpressions 1924 * A map of ad impressions found for the page, where the key 1925 * is the type of ad component and the value is an object 1926 * containing the number of ads that were loaded, visible, 1927 * and hidden. 1928 * @param {Map<string, string>} info.hrefToComponentMap 1929 * A map of hrefs to their component type. Contains both ads 1930 * and non-ads. 1931 * @param {object} browser 1932 * The browser associated with the page. 1933 */ 1934 _reportPageWithAdImpressions(info, browser) { 1935 let item = this._findItemForBrowser(browser); 1936 if (!item) { 1937 return; 1938 } 1939 let telemetryState = item.browserTelemetryStateMap.get(browser); 1940 if ( 1941 info.adImpressions && 1942 telemetryState && 1943 !telemetryState.adImpressionsReported 1944 ) { 1945 for (let [componentType, data] of info.adImpressions.entries()) { 1946 // Not all ad impressions are sponsored. 1947 if (AD_COMPONENTS.includes(componentType)) { 1948 telemetryState.adsHidden += data.adsHidden; 1949 telemetryState.adsLoaded += data.adsLoaded; 1950 telemetryState.adsVisible += data.adsVisible; 1951 } 1952 1953 lazy.logConsole.debug("Counting ad:", { type: componentType, ...data }); 1954 Glean.serp.adImpression.record({ 1955 impression_id: telemetryState.impressionId, 1956 component: componentType, 1957 ads_loaded: data.adsLoaded, 1958 ads_visible: data.adsVisible, 1959 ads_hidden: data.adsHidden, 1960 }); 1961 } 1962 // Convert hrefToComponentMap to a urlToComponentMap in order to cache 1963 // the query parameters of the href. 1964 let urlToComponentMap = new Map(); 1965 for (let [href, adType] of info.hrefToComponentMap) { 1966 urlToComponentMap.set(new URL(href), adType); 1967 } 1968 telemetryState.urlToComponentMap = urlToComponentMap; 1969 telemetryState.adImpressionsReported = true; 1970 Services.obs.notifyObservers(null, "reported-page-with-ad-impressions"); 1971 } 1972 } 1973 1974 /** 1975 * Records a page action from a SERP page. Normally, actions are tracked in 1976 * parent process by observing network events but some actions are not 1977 * possible to detect outside of subscribing to the child process. 1978 * 1979 * @param {object} info 1980 * The search provider infomation for the page. 1981 * @param {string} info.target 1982 * The target component that was interacted with. 1983 * @param {string} info.action 1984 * The action taken on the page. 1985 * @param {object} browser 1986 * The browser associated with the page. 1987 */ 1988 _reportPageAction(info, browser) { 1989 let item = this._findItemForBrowser(browser); 1990 if (!item) { 1991 return; 1992 } 1993 let telemetryState = item.browserTelemetryStateMap.get(browser); 1994 let impressionId = telemetryState?.impressionId; 1995 if (info.target && impressionId) { 1996 lazy.logConsole.debug(`Recorded page action:`, { 1997 impressionId: telemetryState.impressionId, 1998 target: info.target, 1999 action: info.action, 2000 }); 2001 Glean.serp.engagement.record({ 2002 impression_id: impressionId, 2003 action: info.action, 2004 target: info.target, 2005 }); 2006 impressionIdsWithoutEngagementsSet.delete(impressionId); 2007 // In-content searches are not be categorized with a type, so they will 2008 // not be picked up in the network processes. 2009 if ( 2010 info.target == 2011 SearchSERPTelemetryUtils.COMPONENTS.INCONTENT_SEARCHBOX && 2012 info.action == SearchSERPTelemetryUtils.ACTIONS.SUBMITTED 2013 ) { 2014 telemetryState.searchBoxSubmitted = true; 2015 SearchSERPTelemetry.setBrowserContentSource( 2016 browser, 2017 SearchSERPTelemetryUtils.INCONTENT_SOURCES.SEARCHBOX 2018 ); 2019 } 2020 Services.obs.notifyObservers(null, "reported-page-with-action"); 2021 } else { 2022 lazy.logConsole.warn( 2023 "Expected to report a", 2024 info.action, 2025 "engagement but couldn't find an impression id." 2026 ); 2027 } 2028 } 2029 2030 _reportPageImpression(info, browser) { 2031 let item = this._findItemForBrowser(browser); 2032 let telemetryState = item?.browserTelemetryStateMap.get(browser); 2033 if (!telemetryState?.impressionInfo) { 2034 lazy.logConsole.debug( 2035 "Could not find telemetry state or impression info." 2036 ); 2037 return; 2038 } 2039 let impressionId = telemetryState.impressionId; 2040 if (impressionId && !telemetryState.impressionRecorded) { 2041 let impressionInfo = telemetryState.impressionInfo; 2042 Glean.serp.impression.record({ 2043 impression_id: impressionId, 2044 provider: impressionInfo.provider, 2045 tagged: impressionInfo.tagged, 2046 partner_code: impressionInfo.partnerCode, 2047 search_mode: impressionInfo.searchMode, 2048 source: impressionInfo.source, 2049 shopping_tab_displayed: info.shoppingTabDisplayed, 2050 is_shopping_page: impressionInfo.isShoppingPage, 2051 is_private: impressionInfo.isPrivate, 2052 is_signed_in: impressionInfo.isSignedIn, 2053 }); 2054 2055 telemetryState.impressionRecorded = true; 2056 2057 lazy.logConsole.debug(`Reported Impression:`, { 2058 impressionId, 2059 ...impressionInfo, 2060 shoppingTabDisplayed: info.shoppingTabDisplayed, 2061 searchMode: impressionInfo.searchMode, 2062 }); 2063 Services.obs.notifyObservers(null, "reported-page-with-impression"); 2064 } else if (telemetryState.impressionRecorded) { 2065 lazy.logConsole.debug("Impression already recorded for browser."); 2066 } else { 2067 lazy.logConsole.debug("Could not find an impression id."); 2068 } 2069 } 2070 2071 _recordFallbackPageImpression(telemetryState) { 2072 if (!telemetryState?.impressionInfo) { 2073 return; 2074 } 2075 let impressionInfo = telemetryState.impressionInfo; 2076 Glean.serp.impression.record({ 2077 impression_id: telemetryState.impressionId, 2078 provider: impressionInfo.provider, 2079 tagged: impressionInfo.tagged, 2080 partner_code: impressionInfo.partnerCode, 2081 search_mode: impressionInfo.searchMode, 2082 source: impressionInfo.source, 2083 shopping_tab_displayed: false, 2084 is_shopping_page: impressionInfo.isShoppingPage, 2085 is_private: impressionInfo.isPrivate, 2086 is_signed_in: impressionInfo.isSignedIn, 2087 }); 2088 2089 telemetryState.impressionRecorded = true; 2090 2091 lazy.logConsole.debug(`Reported Impression:`, { 2092 impressionId: telemetryState.impressionId, 2093 ...impressionInfo, 2094 shoppingTabDisplayed: false, 2095 search_mode: impressionInfo.searchMode, 2096 }); 2097 Services.obs.notifyObservers(null, "reported-page-with-impression"); 2098 } 2099 2100 /** 2101 * Initiates the categorization and reporting of domains extracted from 2102 * SERPs. 2103 * 2104 * @param {object} info 2105 * The search provider infomation for the page. 2106 * @param {Set} info.nonAdDomains 2107 The non-ad domains extracted from the page. 2108 * @param {Set} info.adDomains 2109 The ad domains extracted from the page. 2110 * @param {object} browser 2111 * The browser associated with the page. 2112 */ 2113 async _reportPageDomains(info, browser) { 2114 let item = this._findItemForBrowser(browser); 2115 let telemetryState = item?.browserTelemetryStateMap.get(browser); 2116 if (lazy.SERPCategorization.enabled && telemetryState) { 2117 lazy.logConsole.debug("Ad domains:", Array.from(info.adDomains)); 2118 lazy.logConsole.debug("Non ad domains:", Array.from(info.nonAdDomains)); 2119 let result = await lazy.SERPCategorization.maybeCategorizeSERP( 2120 info.nonAdDomains, 2121 info.adDomains 2122 ); 2123 if (result) { 2124 telemetryState.categorizationInfo = result; 2125 let callback = () => { 2126 let impressionInfo = telemetryState.impressionInfo; 2127 lazy.SERPCategorizationRecorder.recordCategorizationTelemetry({ 2128 ...telemetryState.categorizationInfo, 2129 app_version: item.majorVersion, 2130 channel: item.channel, 2131 region: item.region, 2132 partner_code: impressionInfo.partnerCode, 2133 provider: impressionInfo.provider, 2134 tagged: impressionInfo.tagged, 2135 is_shopping_page: impressionInfo.isShoppingPage, 2136 num_ads_clicked: telemetryState.adsClicked, 2137 num_ads_hidden: telemetryState.adsHidden, 2138 num_ads_loaded: telemetryState.adsLoaded, 2139 num_ads_visible: telemetryState.adsVisible, 2140 }); 2141 }; 2142 lazy.SERPCategorizationEventScheduler.addCallback(browser, callback); 2143 } 2144 } 2145 Services.obs.notifyObservers( 2146 null, 2147 "reported-page-with-categorized-domains" 2148 ); 2149 } 2150 } 2151 2152 export var SearchSERPTelemetry = new TelemetryHandler();