SearchSERPTelemetryChild.sys.mjs (56216B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 import { XPCOMUtils } from "resource://gre/modules/XPCOMUtils.sys.mjs"; 6 7 const lazy = {}; 8 9 ChromeUtils.defineESModuleGetters(lazy, { 10 clearTimeout: "resource://gre/modules/Timer.sys.mjs", 11 SearchUtils: "moz-src:///toolkit/components/search/SearchUtils.sys.mjs", 12 setTimeout: "resource://gre/modules/Timer.sys.mjs", 13 }); 14 15 XPCOMUtils.defineLazyPreferenceGetter( 16 lazy, 17 "serpEventTelemetryCategorization", 18 "browser.search.serpEventTelemetryCategorization.enabled", 19 false 20 ); 21 22 XPCOMUtils.defineLazyPreferenceGetter( 23 lazy, 24 "serpEventTelemetryCategorizationRegionEnabled", 25 "browser.search.serpEventTelemetryCategorization.regionEnabled", 26 false 27 ); 28 29 ChromeUtils.defineLazyGetter(lazy, "logConsole", () => { 30 return console.createInstance({ 31 prefix: "SearchTelemetry", 32 maxLogLevel: lazy.SearchUtils.loggingEnabled ? "Debug" : "Warn", 33 }); 34 }); 35 36 export const CATEGORIZATION_SETTINGS = { 37 MAX_DOMAINS_TO_CATEGORIZE: 10, 38 HAS_MATCHING_REGION: "SearchTelemetry:HasMatchingRegion", 39 }; 40 41 // Duplicated from SearchSERPTelemetry to avoid loading the module on content 42 // startup. 43 const SEARCH_TELEMETRY_SHARED = { 44 PROVIDER_INFO: "SearchTelemetry:ProviderInfo", 45 LOAD_TIMEOUT: "SearchTelemetry:LoadTimeout", 46 SPA_LOAD_TIMEOUT: "SearchTelemetry:SPALoadTimeout", 47 }; 48 49 /** 50 * Standard events mapped to the telemetry action. 51 */ 52 const EVENT_TYPE_TO_ACTION = { 53 click: "clicked", 54 }; 55 56 /** 57 * A map of object conditions mapped to the condition that should be run when 58 * an event is triggered. The condition name is referenced in Remote Settings 59 * under the optional `condition` string for an event listener. 60 */ 61 const CONDITIONS = { 62 keydownEnter: event => event.key == "Enter", 63 }; 64 65 export const VISIBILITY_THRESHOLD = 0.5; 66 67 /** 68 * SearchProviders looks after keeping track of the search provider information 69 * received from the main process. 70 * 71 * It is separate to SearchTelemetryChild so that it is not constructed for each 72 * tab, but once per process. 73 */ 74 class SearchProviders { 75 constructor() { 76 this._searchProviderInfo = null; 77 Services.cpmm.sharedData.addEventListener("change", this); 78 } 79 80 /** 81 * Gets the search provider information for any provider with advert information. 82 * If there is nothing in the cache, it will obtain it from shared data. 83 * 84 * @returns {object} Returns the search provider information. 85 * @see SearchTelemetry.sys.mjs 86 */ 87 get info() { 88 if (this._searchProviderInfo) { 89 return this._searchProviderInfo; 90 } 91 92 this._searchProviderInfo = Services.cpmm.sharedData.get( 93 SEARCH_TELEMETRY_SHARED.PROVIDER_INFO 94 ); 95 96 if (!this._searchProviderInfo) { 97 return null; 98 } 99 100 this._searchProviderInfo = this._searchProviderInfo 101 // Filter-out non-ad providers so that we're not trying to match against 102 // those unnecessarily. 103 .filter(p => "extraAdServersRegexps" in p) 104 // Pre-build the regular expressions. 105 .map(p => { 106 p.adServerAttributes = p.adServerAttributes ?? []; 107 if (p.shoppingTab?.inspectRegexpInSERP) { 108 p.shoppingTab.regexp = new RegExp(p.shoppingTab.regexp); 109 } 110 let subframes = 111 p.subframes 112 ?.filter(obj => obj.inspectRegexpInSERP) 113 .map(obj => { 114 return { ...obj, regexp: new RegExp(obj.regexp) }; 115 }) ?? []; 116 return { 117 ...p, 118 searchPageRegexp: new RegExp(p.searchPageRegexp), 119 extraAdServersRegexps: p.extraAdServersRegexps.map( 120 r => new RegExp(r) 121 ), 122 subframes, 123 }; 124 }); 125 126 return this._searchProviderInfo; 127 } 128 129 /** 130 * Handles events received from sharedData notifications. 131 * 132 * @param {object} event The event details. 133 */ 134 handleEvent(event) { 135 switch (event.type) { 136 case "change": { 137 if (event.changedKeys.includes(SEARCH_TELEMETRY_SHARED.PROVIDER_INFO)) { 138 // Just null out the provider information for now, we'll fetch it next 139 // time we need it. 140 this._searchProviderInfo = null; 141 } 142 break; 143 } 144 } 145 } 146 } 147 148 /** 149 * @typedef {object} EventListenerParam 150 * @property {string} eventType 151 * The type of event the listener should listen for. If the event type is 152 * is non-standard, it should correspond to a definition in 153 * CUSTOM_EVENT_TYPE_TO_DATA that will re-map it to a standard type. TODO 154 * @property {string} target 155 * The type of component that was the source of the event. 156 * @property {string | null} action 157 * The action that should be reported in telemetry. 158 */ 159 160 /** 161 * Provides a way to add listeners to elements, as well as unload them. 162 */ 163 class ListenerHelper { 164 /** 165 * Adds each event listener in an array of event listeners to each element 166 * in an array of elements, and sets their unloading. 167 * 168 * @param {Array<Element>} elements 169 * DOM elements to add event listeners to. 170 * @param {Array<EventListenerParam>} eventListenerParams 171 * The type of event to add the listener to. 172 * @param {string} target 173 */ 174 static addListeners(elements, eventListenerParams, target) { 175 if (!elements?.length || !eventListenerParams?.length) { 176 return; 177 } 178 179 let document = elements[0].ownerGlobal.document; 180 let callback = documentToEventCallbackMap.get(document); 181 if (!callback) { 182 return; 183 } 184 185 // The map might have entries from previous callers, so we must ensure 186 // we don't discard existing event listener callbacks. 187 let removeListenerCallbacks = []; 188 if (documentToRemoveEventListenersMap.has(document)) { 189 removeListenerCallbacks = documentToRemoveEventListenersMap.get(document); 190 } 191 192 for (let params of eventListenerParams) { 193 let removeListeners = ListenerHelper.addListener( 194 elements, 195 params, 196 target, 197 callback 198 ); 199 removeListenerCallbacks = removeListenerCallbacks.concat(removeListeners); 200 } 201 202 documentToRemoveEventListenersMap.set(document, removeListenerCallbacks); 203 } 204 205 /** 206 * Add an event listener to each element in an array of elements. 207 * 208 * @param {Array<Element>} elements 209 * DOM elements to add event listeners to. 210 * @param {EventListenerParam} eventListenerParam 211 * @param {string} target 212 * @param {Function} callback 213 * @returns {Array<Function>} Array of remove event listener functions. 214 */ 215 static addListener(elements, eventListenerParam, target, callback) { 216 let { action, eventType, target: customTarget } = eventListenerParam; 217 218 if (customTarget) { 219 target = customTarget; 220 } 221 222 if (!action) { 223 action = EVENT_TYPE_TO_ACTION[eventType]; 224 if (!action) { 225 return []; 226 } 227 } 228 229 // Some events might have specific conditions we want to check before 230 // registering an engagement event. 231 let eventCallback; 232 if (eventListenerParam.condition) { 233 if (CONDITIONS[eventListenerParam.condition]) { 234 let condition = CONDITIONS[eventListenerParam.condition]; 235 eventCallback = async event => { 236 let start = ChromeUtils.now(); 237 if (condition(event)) { 238 callback({ action, target }); 239 } 240 ChromeUtils.addProfilerMarker( 241 "SearchSERPTelemetryChild._eventCallback", 242 start, 243 "Call cached function before callback." 244 ); 245 }; 246 } else { 247 // If a component included a condition, but it wasn't found it is 248 // due to the fact that it was added in a more recent Firefox version 249 // than what is provided via search-telemetry-v2. Since the version of 250 // Firefox the user is using doesn't include this condition, 251 // we shouldn't add the event. 252 return []; 253 } 254 } else { 255 eventCallback = () => { 256 callback({ action, target }); 257 }; 258 } 259 260 let removeListenerCallbacks = []; 261 for (let element of elements) { 262 element.addEventListener(eventType, eventCallback); 263 removeListenerCallbacks.push(() => { 264 element.removeEventListener(eventType, eventCallback); 265 }); 266 } 267 return removeListenerCallbacks; 268 } 269 } 270 271 /** 272 * Scans SERPs for ad components. 273 */ 274 class SearchAdImpression { 275 /** 276 * A reference to ad component information that is used if an anchor 277 * element could not be categorized to a specific ad component. 278 * 279 * @type {object} 280 */ 281 #defaultComponent = null; 282 283 /** 284 * Maps DOM elements to AdData. 285 * 286 * @type {Map<Element, AdData>} 287 * 288 * @typedef AdData 289 * @type {object} 290 * @property {string} type 291 * The type of ad component. 292 * @property {number} adsLoaded 293 * The number of ads counted as loaded for the component. 294 * @property {boolean} countChildren 295 * Whether all the children were counted for the component. 296 */ 297 #elementToAdDataMap = new Map(); 298 299 /** 300 * An array of components to do a top-down search. 301 */ 302 #topDownComponents = []; 303 304 /** 305 * A reference the providerInfo for this SERP. 306 * 307 * @type {object} 308 */ 309 #providerInfo = null; 310 311 set providerInfo(providerInfo) { 312 if (this.#providerInfo?.telemetryId == providerInfo.telemetryId) { 313 return; 314 } 315 316 this.#providerInfo = providerInfo; 317 318 // Reset values. 319 this.#topDownComponents = []; 320 321 for (let component of this.#providerInfo.components) { 322 if (component.default) { 323 this.#defaultComponent = component; 324 continue; 325 } 326 if (component.topDown) { 327 this.#topDownComponents.push(component); 328 } 329 } 330 } 331 332 /** 333 * Check if the page has a shopping tab. 334 * 335 * @param {Document} document 336 * @return {boolean} 337 * Whether the page has a shopping tab. Defaults to false. 338 */ 339 hasShoppingTab(document) { 340 if (!this.#providerInfo?.shoppingTab) { 341 return false; 342 } 343 344 // If a provider has the inspectRegexpInSERP, we assume there must be an 345 // associated regexp that must be used on any hrefs matched by the elements 346 // found using the selector. If inspectRegexpInSERP is false, then check if 347 // the number of items found using the selector matches exactly one element 348 // to ensure we've used a fine-grained search. 349 let elements = document.querySelectorAll( 350 this.#providerInfo.shoppingTab.selector 351 ); 352 if (this.#providerInfo.shoppingTab.inspectRegexpInSERP) { 353 let regexp = this.#providerInfo.shoppingTab.regexp; 354 for (let element of elements) { 355 let href = element.getAttribute("href"); 356 if (href && regexp.test(href)) { 357 this.#recordElementData(element, { 358 type: "shopping_tab", 359 count: 1, 360 }); 361 return true; 362 } 363 } 364 } else if (elements.length == 1) { 365 this.#recordElementData(elements[0], { 366 type: "shopping_tab", 367 count: 1, 368 }); 369 return true; 370 } 371 return false; 372 } 373 374 /** 375 * Examine the list of anchors and the document object and find components 376 * on the page. 377 * 378 * With the list of anchors, go through each and find the component it 379 * belongs to and save it in elementToAdDataMap. 380 * 381 * Then, with the document object find components and save the results to 382 * elementToAdDataMap. 383 * 384 * Lastly, combine the results together in a new Map that contains the number 385 * of loaded, visible, and blocked results for the component. 386 * 387 * @param {HTMLCollectionOf<HTMLAnchorElement>} anchors 388 * @param {Document} document 389 * 390 * @returns {Map<string, object>} 391 * A map where the key is a string containing the type of ad component 392 * and the value is an object containing the number of adsLoaded, 393 * adsVisible, and adsHidden within the component. 394 */ 395 categorize(anchors, document) { 396 // Used for various functions to make relative URLs absolute. 397 let origin = new URL(document.documentURI).origin; 398 399 // Bottom up approach. 400 this.#categorizeAnchors(anchors, origin); 401 402 // Top down approach. 403 this.#categorizeDocument(document); 404 405 let componentToVisibilityMap = new Map(); 406 let hrefToComponentMap = new Map(); 407 408 let innerWindowHeight = document.ownerGlobal.innerHeight; 409 let scrollY = document.ownerGlobal.scrollY; 410 411 // Iterate over the results: 412 // - If it's searchbox add event listeners. 413 // - If it is a non_ads_link, map its href to component type. 414 // - For others, map its component type and check visibility. 415 for (let [element, data] of this.#elementToAdDataMap.entries()) { 416 if (data.type == "incontent_searchbox") { 417 // Bug 1880413: Deprecate hard coding the incontent search box. 418 // If searchbox has child elements, observe those, otherwise 419 // fallback to its parent element. 420 let searchElements = data.childElements.length 421 ? data.childElements 422 : [element]; 423 ListenerHelper.addListeners( 424 searchElements, 425 [ 426 { eventType: "click", target: data.type }, 427 { 428 eventType: "keydown", 429 target: data.type, 430 action: "submitted", 431 condition: "keydownEnter", 432 }, 433 ], 434 data.type 435 ); 436 continue; 437 } 438 if (data.childElements.length) { 439 for (let child of data.childElements) { 440 let href = this.#extractHref(child, origin); 441 if (href) { 442 hrefToComponentMap.set(href, data.type); 443 } 444 } 445 } else { 446 let href = this.#extractHref(element, origin); 447 if (href) { 448 hrefToComponentMap.set(href, data.type); 449 } 450 } 451 452 // If the component is a non_ads_link, skip visibility checks. 453 if (data.type == "non_ads_link") { 454 continue; 455 } 456 457 // If proxy children were found, check the visibility of all of them 458 // otherwise just check the visiblity of the first child. 459 let childElements; 460 if (data.proxyChildElements.length) { 461 childElements = data.proxyChildElements; 462 } else if (data.childElements.length) { 463 childElements = [data.childElements[0]]; 464 } 465 466 let count = this.#countVisibleAndHiddenAds( 467 element, 468 data.adsLoaded, 469 childElements, 470 innerWindowHeight, 471 scrollY 472 ); 473 if (componentToVisibilityMap.has(data.type)) { 474 let componentInfo = componentToVisibilityMap.get(data.type); 475 componentInfo.adsLoaded += data.adsLoaded; 476 componentInfo.adsVisible += count.adsVisible; 477 componentInfo.adsHidden += count.adsHidden; 478 } else { 479 componentToVisibilityMap.set(data.type, { 480 adsLoaded: data.adsLoaded, 481 adsVisible: count.adsVisible, 482 adsHidden: count.adsHidden, 483 }); 484 } 485 } 486 487 // Release the DOM elements from the Map. 488 this.#elementToAdDataMap.clear(); 489 490 return { componentToVisibilityMap, hrefToComponentMap }; 491 } 492 493 /** 494 * Given an element, find the href that is most likely to make the request if 495 * the element is clicked. If the element contains a specific data attribute 496 * known to contain the url used to make the initial request, use it, 497 * otherwise use its href. Specific character conversions are done to mimic 498 * conversions likely to take place when urls are observed in network 499 * activity. 500 * 501 * @param {Element} element 502 * The element to inspect. 503 * @param {string} origin 504 * The origin for relative urls. 505 * @returns {string} 506 * The href of the element. 507 */ 508 #extractHref(element, origin) { 509 let href; 510 // Prioritize the href from a known data attribute value instead of 511 // its href property, as the former is the initial url the page will 512 // navigate to before being re-directed to the href. 513 for (let name of this.#providerInfo.adServerAttributes) { 514 if ( 515 element.dataset[name] && 516 this.#providerInfo.extraAdServersRegexps.some(regexp => 517 regexp.test(element.dataset[name]) 518 ) 519 ) { 520 href = element.dataset[name]; 521 break; 522 } 523 } 524 // If a data attribute value was not found, fallback to the href. 525 href = href ?? element.getAttribute("href"); 526 if (!href) { 527 return ""; 528 } 529 530 let url = URL.parse(href, origin); 531 if (!url || (url.protocol !== "https:" && url.protocol !== "http:")) { 532 return ""; 533 } 534 535 return url.href; 536 } 537 538 /** 539 * Given a list of anchor elements, group them into ad components. 540 * 541 * The first step in the process is to check if the anchor should be 542 * inspected. This is based on whether it contains an href or a 543 * data-attribute values that matches an ad link, or if it contains a 544 * pattern caught by a components included regular expression. 545 * 546 * Determine which component it belongs to and the number of matches for 547 * the component. The heuristic is described in findDataForAnchor. 548 * If there was a result and we haven't seen it before, save it in 549 * elementToAdDataMap. 550 * 551 * @param {HTMLCollectionOf<HTMLAnchorElement>} anchors 552 * The list of anchors to inspect. 553 * @param {string} origin 554 * The origin of the document the anchors belong to. 555 */ 556 #categorizeAnchors(anchors, origin) { 557 for (let anchor of anchors) { 558 if (this.#shouldInspectAnchor(anchor, origin)) { 559 let result; 560 try { 561 // We use a schema to ensure the values for each search provider 562 // aligns to what is expected, but tests don't enforce the schema 563 // and thus, can technically input faulty values. 564 result = this.#findDataForAnchor(anchor); 565 } catch (ex) { 566 lazy.logConsole.error("Could not find data for anchor:", ex); 567 continue; 568 } 569 if (result) { 570 this.#recordElementData(result.element, { 571 type: result.type, 572 count: result.count, 573 proxyChildElements: result.proxyChildElements, 574 childElements: result.childElements, 575 }); 576 } 577 if (result?.relatedElements?.length) { 578 // Bug 1880413: Deprecate related elements. 579 // Bottom-up approach with related elements are only used for 580 // non-link elements related to ads, like carousel arrows. 581 ListenerHelper.addListeners( 582 result.relatedElements, 583 [ 584 { 585 action: "expanded", 586 eventType: "click", 587 }, 588 ], 589 result.type 590 ); 591 } 592 } 593 } 594 } 595 596 /** 597 * Find components from the document object. This is mostly relevant for 598 * components that are non-ads and don't have an obvious regular expression 599 * that could match the pattern of the href. 600 * 601 * @param {Document} document 602 */ 603 #categorizeDocument(document) { 604 // using the subset of components that are top down, 605 // go through each one. 606 for (let component of this.#topDownComponents) { 607 // Top-down searches must have the topDown attribute. 608 if (!component.topDown) { 609 continue; 610 } 611 // Top down searches must include a parent. 612 if (!component.included?.parent) { 613 continue; 614 } 615 let parents = document.querySelectorAll( 616 component.included.parent.selector 617 ); 618 if (parents.length) { 619 let eventListeners = component.included.parent.eventListeners; 620 if (eventListeners?.length) { 621 ListenerHelper.addListeners(parents, eventListeners, component.type); 622 } 623 for (let parent of parents) { 624 // Bug 1880413: Deprecate related elements. 625 // Top-down related elements are either used for auto-suggested 626 // elements of a searchbox, or elements on a page which we can't 627 // find through a bottom up approach but we want an add a listener, 628 // like carousels with arrows. 629 if (component.included.related?.selector) { 630 let relatedElements = parent.querySelectorAll( 631 component.included.related.selector 632 ); 633 if (relatedElements.length) { 634 // For the search box, related elements with event listeners are 635 // auto-suggested terms. For everything else (e.g. carousels) 636 // they are expanded. 637 ListenerHelper.addListeners( 638 relatedElements, 639 [ 640 { 641 action: 642 component.type == "incontent_searchbox" 643 ? "submitted" 644 : "expanded", 645 eventType: "click", 646 }, 647 ], 648 component.type 649 ); 650 } 651 } 652 if (component.included.children) { 653 for (let child of component.included.children) { 654 let childElements = parent.querySelectorAll(child.selector); 655 if (childElements.length) { 656 if (child.eventListeners) { 657 childElements = Array.from(childElements); 658 ListenerHelper.addListeners( 659 childElements, 660 child.eventListeners, 661 child.type ?? component.type 662 ); 663 } 664 if (!child.skipCount) { 665 this.#recordElementData(parent, { 666 type: component.type, 667 childElements: Array.from(childElements), 668 }); 669 } 670 } 671 } 672 } else if (!component.included.parent.skipCount) { 673 this.#recordElementData(parent, { 674 type: component.type, 675 }); 676 } 677 } 678 } 679 } 680 } 681 682 /** 683 * Evaluates whether an anchor should be inspected based on matching 684 * regular expressions on either its href or specified data-attribute values. 685 * 686 * @param {HTMLAnchorElement} anchor 687 * @param {string} origin 688 * @returns {boolean} 689 */ 690 #shouldInspectAnchor(anchor, origin) { 691 let href = anchor.getAttribute("href"); 692 if (!href) { 693 return false; 694 } 695 696 // Some hrefs might be relative. 697 if (!href.startsWith("https://") && !href.startsWith("http://")) { 698 href = origin + href; 699 } 700 701 let regexps = this.#providerInfo.extraAdServersRegexps; 702 // Anchors can contain ad links in a data-attribute. 703 for (let name of this.#providerInfo.adServerAttributes) { 704 let attributeValue = anchor.dataset[name]; 705 if ( 706 attributeValue && 707 regexps.some(regexp => regexp.test(attributeValue)) 708 ) { 709 return true; 710 } 711 } 712 // Anchors can contain ad links in a specific href. 713 if (regexps.some(regexp => regexp.test(href))) { 714 return true; 715 } 716 return false; 717 } 718 719 /** 720 * Find the component data for an anchor. 721 * 722 * To categorize the anchor, we iterate over the list of possible components 723 * the anchor could be categorized. If the component is default, we skip 724 * checking because the fallback option for all anchor links is the default. 725 * 726 * First, get the "parent" of the anchor which best represents the DOM element 727 * that contains the anchor links for the component and no other component. 728 * This parent will be cached so that other anchors that share the same 729 * parent can be counted together. 730 * 731 * The check for a parent is a loop because we can define more than one best 732 * parent since on certain SERPs, it's possible for a "better" DOM element 733 * parent to appear occassionally. 734 * 735 * If no parent is found, skip this component. 736 * 737 * If a parent was found, check for specific child elements. 738 * 739 * Finding child DOM elements of a parent is optional. One reason to do so is 740 * to use child elements instead of anchor links to count the number of ads for 741 * a component via the `countChildren` property. This is provided because some ads 742 * (i.e. carousels) have multiple ad links in a single child element that go to the 743 * same location. In this scenario, all instances of the child are recorded as ads. 744 * Subsequent anchor elements that map to the same parent are ignored. 745 * 746 * Whether or not a child was found, return the information that was found, 747 * including whether or not all child elements were counted instead of anchors. 748 * 749 * If another anchor belonging to a parent that was previously recorded is the input 750 * for this function, we either increment the ad count by 1 or don't increment the ad 751 * count because the parent used `countChildren` completed the calculation in a 752 * previous step. 753 * 754 * @param {HTMLAnchorElement} anchor 755 * The anchor to be inspected. 756 * @returns {object | null} 757 * An object containing the element representing the root DOM element for 758 * the component, the type of component, how many ads were counted, 759 * and whether or not the count was of all the children. 760 * @throws {Error} 761 * Will throw an error if certain properties of a component are missing. 762 * Required properties are listed in search-telemetry-v2-schema.json. 763 */ 764 #findDataForAnchor(anchor) { 765 for (let component of this.#providerInfo.components) { 766 // First, check various conditions for skipping a component. 767 768 // A component should always have at least one included statement. 769 if (!component.included) { 770 continue; 771 } 772 773 // Top down searches are done after the bottom up search. 774 if (component.topDown) { 775 continue; 776 } 777 778 // The default component doesn't need to be checked, 779 // as it will be the fallback option. 780 if (component.default) { 781 continue; 782 } 783 784 // The anchor shouldn't belong to an excluded parent component if one 785 // is provided. 786 if ( 787 component.excluded?.parent?.selector && 788 anchor.closest(component.excluded.parent.selector) 789 ) { 790 continue; 791 } 792 793 // All components with included should have a parent entry. 794 if (!component.included.parent) { 795 continue; 796 } 797 798 // Find the parent of the anchor. 799 let parent = anchor.closest(component.included.parent.selector); 800 801 if (!parent) { 802 continue; 803 } 804 805 // If a parent was found, we may want to ignore reporting the element 806 // to telemetry. 807 if (component.included.parent.skipCount) { 808 return null; 809 } 810 811 // If we've already inspected the parent, add the child element to the 812 // list of anchors. Don't increment the ads loaded count, as we only care 813 // about grouping the anchor with the correct parent. 814 if (this.#elementToAdDataMap.has(parent)) { 815 return { 816 element: parent, 817 childElements: [anchor], 818 }; 819 } 820 821 let relatedElements = []; 822 if (component.included.related?.selector) { 823 relatedElements = parent.querySelectorAll( 824 component.included.related.selector 825 ); 826 } 827 828 // If the component has no defined children, return the parent element. 829 if (component.included.children) { 830 // Look for the first instance of a matching child selector. 831 for (let child of component.included.children) { 832 // If counting by child, get all of them at once. 833 if (child.countChildren) { 834 let proxyChildElements = parent.querySelectorAll(child.selector); 835 if (child.skipCount) { 836 return null; 837 } 838 if (proxyChildElements.length) { 839 return { 840 element: parent, 841 type: child.type ?? component.type, 842 proxyChildElements: Array.from(proxyChildElements), 843 count: proxyChildElements.length, 844 childElements: [anchor], 845 relatedElements, 846 }; 847 } 848 } else if (parent.querySelector(child.selector)) { 849 if (child.skipCount) { 850 return null; 851 } 852 return { 853 element: parent, 854 type: child.type ?? component.type, 855 childElements: [anchor], 856 relatedElements, 857 }; 858 } 859 } 860 } 861 // If no children were defined for this component, or none were found 862 // in the DOM, use the default definition. 863 return { 864 element: parent, 865 type: component.type, 866 childElements: [anchor], 867 relatedElements, 868 }; 869 } 870 // If no component was found, use default values. 871 return { 872 element: anchor, 873 type: this.#defaultComponent.type, 874 }; 875 } 876 877 /** 878 * Determines whether or not an ad was visible or hidden. 879 * 880 * An ad is considered visible if the parent element containing the 881 * component has non-zero dimensions, and all child element in the 882 * component have non-zero dimensions and mostly (50% height) fits within 883 * the window at the time when the impression was taken. If the element is to 884 * the left of the visible area, we also consider it viewed as it's possible 885 * the user interacted with a carousel which typically scrolls new content 886 * leftward. 887 * 888 * For some components, like text ads, we don't send every child 889 * element for visibility, just the first text ad. For other components 890 * like carousels, we send all child elements because we do care about 891 * counting how many elements of the carousel were visible. 892 * 893 * @param {Element} element 894 * Element to be inspected 895 * @param {number} adsLoaded 896 * Number of ads initially determined to be loaded for this element. 897 * @param {Array<Element>} childElements 898 * List of children belonging to element. 899 * @param {number} innerWindowHeight 900 * Current height of the window containing the elements. 901 * @param {number} scrollY 902 * Current distance the window has been scrolled. 903 * @returns {object} 904 * Contains adsVisible which is the number of ads shown for the element 905 * and adsHidden, the number of ads not visible to the user. 906 */ 907 #countVisibleAndHiddenAds( 908 element, 909 adsLoaded, 910 childElements, 911 innerWindowHeight, 912 scrollY 913 ) { 914 let elementRect = 915 element.ownerGlobal.windowUtils.getBoundsWithoutFlushing(element); 916 917 // If the parent element is not visible, assume all ads within are 918 // also not visible. 919 if ( 920 !element.checkVisibility({ 921 visibilityProperty: true, 922 opacityProperty: true, 923 }) 924 ) { 925 Glean.serp.adsBlockedCount.hidden_parent.add(); 926 return { 927 adsVisible: 0, 928 adsHidden: adsLoaded, 929 }; 930 } 931 932 // If an ad is far above the possible visible area of a window, an 933 // adblocker might be doing it as a workaround for blocking the ad. 934 if ( 935 elementRect.bottom < 0 && 936 innerWindowHeight + scrollY + elementRect.bottom < 0 937 ) { 938 Glean.serp.adsBlockedCount.beyond_viewport.add(); 939 return { 940 adsVisible: 0, 941 adsHidden: adsLoaded, 942 }; 943 } 944 945 // If the element has no child elements, check if the element 946 // was ever viewed by the user at this moment. 947 if (!childElements?.length) { 948 // Most ads don't require horizontal scrolling to view it. Thus, we only 949 // check if it could've appeared with some vertical scrolling. 950 let visible = VisibilityHelper.elementWasVisibleVertically( 951 elementRect, 952 innerWindowHeight, 953 VISIBILITY_THRESHOLD 954 ); 955 return { 956 adsVisible: visible ? 1 : 0, 957 adsHidden: 0, 958 }; 959 } 960 961 let adsVisible = 0; 962 let adsHidden = 0; 963 for (let child of childElements) { 964 if ( 965 !child.checkVisibility({ 966 visibilityProperty: true, 967 opacityProperty: true, 968 }) 969 ) { 970 adsHidden += 1; 971 Glean.serp.adsBlockedCount.hidden_child.add(); 972 continue; 973 } 974 975 let itemRect = 976 child.ownerGlobal.windowUtils.getBoundsWithoutFlushing(child); 977 // If the child element is to the right of the containing element and 978 // can't be viewed, skip it. We do this check because some elements like 979 // carousels can hide additional content horizontally. We don't apply the 980 // same logic if the element is to the left because we assume carousels 981 // scroll elements to the left when the user wants to see more contents. 982 // Thus, the elements to the left must've been visible. 983 if ( 984 !VisibilityHelper.childElementWasVisibleHorizontally( 985 elementRect, 986 itemRect, 987 VISIBILITY_THRESHOLD 988 ) 989 ) { 990 continue; 991 } 992 993 // If the height of child element is not visible, skip it. 994 if ( 995 !VisibilityHelper.elementWasVisibleVertically( 996 itemRect, 997 innerWindowHeight, 998 VISIBILITY_THRESHOLD 999 ) 1000 ) { 1001 continue; 1002 } 1003 ++adsVisible; 1004 } 1005 1006 return { 1007 adsVisible, 1008 adsHidden, 1009 }; 1010 } 1011 1012 /** 1013 * Caches ad data for a DOM element. The key of the map is by Element rather 1014 * than Component for fast lookup on whether an Element has been already been 1015 * categorized as a component. Subsequent calls to this passing the same 1016 * element will update the list of child elements. 1017 * 1018 * @param {Element} element 1019 * The element considered to be the root for the component. 1020 * @param {object} params 1021 * Various parameters that can be recorded. Whether the input values exist 1022 * or not depends on which component was found, which heuristic should be used 1023 * to determine whether an ad was visible, and whether we've already seen this 1024 * element. 1025 * @param {string | null} params.type 1026 * The type of component. 1027 * @param {number} params.count 1028 * The number of ads found for a component. The number represents either 1029 * the number of elements that match an ad expression or the number of DOM 1030 * elements containing an ad link. 1031 * @param {Array<Element>} params.proxyChildElements 1032 * An array of DOM elements that should be inspected for visibility instead 1033 * of the actual child elements, possibly because they are grouped. 1034 * @param {Array<Element>} params.childElements 1035 * An array of DOM elements to inspect. 1036 */ 1037 #recordElementData( 1038 element, 1039 { type, count = 1, proxyChildElements = [], childElements = [] } = {} 1040 ) { 1041 if (this.#elementToAdDataMap.has(element)) { 1042 let recordedValues = this.#elementToAdDataMap.get(element); 1043 if (childElements.length) { 1044 recordedValues.childElements = 1045 recordedValues.childElements.concat(childElements); 1046 } 1047 } else { 1048 this.#elementToAdDataMap.set(element, { 1049 type, 1050 adsLoaded: count, 1051 proxyChildElements, 1052 childElements, 1053 }); 1054 } 1055 } 1056 } 1057 1058 export class VisibilityHelper { 1059 /** 1060 * Whether the element was vertically visible. It assumes elements above the 1061 * viewable area were visible at some point in time. 1062 * 1063 * @param {DOMRect} rect 1064 * The bounds of the element. 1065 * @param {number} innerWindowHeight 1066 * The height of the window. 1067 * @param {number} threshold 1068 * What percentage of the element should vertically be visible. 1069 * @returns {boolean} 1070 * Whether the element was visible. 1071 */ 1072 static elementWasVisibleVertically(rect, innerWindowHeight, threshold) { 1073 return rect.top + rect.height * threshold <= innerWindowHeight; 1074 } 1075 1076 /** 1077 * Whether the child element was horizontally visible. It assumes elements to 1078 * the left were visible at some point in time. 1079 * 1080 * @param {DOMRect} parentRect 1081 * The bounds of the element that contains the child. 1082 * @param {DOMRect} childRect 1083 * The bounds of the child element. 1084 * @param {number} threshold 1085 * What percentage of the child element should horizontally be visible. 1086 * @returns {boolean} 1087 * Whether the child element was visible. 1088 */ 1089 static childElementWasVisibleHorizontally(parentRect, childRect, threshold) { 1090 return ( 1091 childRect.left + childRect.width * threshold <= 1092 parentRect.left + parentRect.width 1093 ); 1094 } 1095 } 1096 1097 /** 1098 * An object indicating which elements to examine for domains to extract and 1099 * which heuristic technique to use to extract that element's domain. 1100 * 1101 * @typedef {object} ExtractorInfo 1102 * @property {string} selectors 1103 * A string representing the CSS selector that targets the elements on the 1104 * page that contain domains we want to extract. 1105 * @property {string} method 1106 * A string representing which domain extraction heuristic to use. 1107 * One of: "href", "dataAttribute" or "textContent". 1108 * @property {object | null} options 1109 * Options related to the domain extraction heuristic used. 1110 * @property {string | null} options.dataAttributeKey 1111 * The key name of the data attribute to lookup. 1112 * @property {string | null} options.queryParamKey 1113 * The key name of the query param value to lookup. 1114 * @property {boolean | null} options.queryParamValueIsHref 1115 * Whether the query param value is expected to contain an href. 1116 */ 1117 1118 /** 1119 * DomainExtractor examines elements on a page to retrieve the domains. 1120 */ 1121 class DomainExtractor { 1122 /** 1123 * Extract domains from the page using an array of information pertaining to 1124 * the SERP. 1125 * 1126 * @param {Document} document 1127 * The document for the SERP we are extracting domains from. 1128 * @param {Array<ExtractorInfo>} extractorInfos 1129 * Information used to target the domains we need to extract. 1130 * @param {string} providerName 1131 * Name of the search provider. 1132 * @return {Set<string>} 1133 * A set of the domains extracted from the page. 1134 */ 1135 extractDomainsFromDocument(document, extractorInfos, providerName) { 1136 let extractedDomains = new Set(); 1137 if (!extractorInfos?.length) { 1138 return extractedDomains; 1139 } 1140 1141 for (let extractorInfo of extractorInfos) { 1142 if (!extractorInfo.selectors) { 1143 continue; 1144 } 1145 1146 let elements = document.querySelectorAll(extractorInfo.selectors); 1147 if (!elements.length) { 1148 continue; 1149 } 1150 1151 switch (extractorInfo.method) { 1152 case "href": { 1153 // Origin is used in case a URL needs to be made absolute. 1154 let origin = new URL(document.documentURI).origin; 1155 this.#fromElementsConvertHrefsIntoDomains( 1156 elements, 1157 origin, 1158 providerName, 1159 extractedDomains, 1160 extractorInfo.options?.queryParamKey, 1161 extractorInfo.options?.queryParamValueIsHref 1162 ); 1163 break; 1164 } 1165 case "dataAttribute": { 1166 this.#fromElementsRetrieveDataAttributeValues( 1167 elements, 1168 providerName, 1169 extractorInfo.options?.dataAttributeKey, 1170 extractedDomains 1171 ); 1172 break; 1173 } 1174 case "textContent": { 1175 this.#fromElementsRetrieveTextContent( 1176 elements, 1177 extractedDomains, 1178 providerName 1179 ); 1180 break; 1181 } 1182 } 1183 } 1184 1185 return extractedDomains; 1186 } 1187 1188 /** 1189 * Given a list of elements, extract domains using href attributes. If the 1190 * URL in the href includes the specified query param, the domain will be 1191 * that query param's value. Otherwise it will be the hostname of the href 1192 * attribute's URL. 1193 * 1194 * @param {NodeList<Element>} elements 1195 * A list of elements from the page whose href attributes we want to 1196 * inspect. 1197 * @param {string} origin 1198 * Origin of the current page. 1199 * @param {string} providerName 1200 * The name of the search provider. 1201 * @param {Set<string>} extractedDomains 1202 * The result set of domains extracted from the page. 1203 * @param {string | null} queryParam 1204 * An optional query param to search for in an element's href attribute. 1205 * @param {boolean | null} queryParamValueIsHref 1206 * Whether the query param value is expected to contain an href. 1207 */ 1208 #fromElementsConvertHrefsIntoDomains( 1209 elements, 1210 origin, 1211 providerName, 1212 extractedDomains, 1213 queryParam, 1214 queryParamValueIsHref 1215 ) { 1216 for (let element of elements) { 1217 if (this.#exceedsThreshold(extractedDomains.size)) { 1218 return; 1219 } 1220 1221 let href = element.getAttribute("href"); 1222 1223 let url = URL.parse(href, origin); 1224 if (!url) { 1225 continue; 1226 } 1227 1228 // Ignore non-standard protocols. 1229 if (url.protocol != "https:" && url.protocol != "http:") { 1230 continue; 1231 } 1232 1233 if (queryParam) { 1234 let paramValue = url.searchParams.get(queryParam); 1235 if (queryParamValueIsHref) { 1236 paramValue = URL.parse(paramValue)?.hostname; 1237 if (!paramValue) { 1238 continue; 1239 } 1240 paramValue = this.#processDomain(paramValue, providerName); 1241 } 1242 if (paramValue && !extractedDomains.has(paramValue)) { 1243 extractedDomains.add(paramValue); 1244 } 1245 } else if (url.hostname) { 1246 let processedHostname = this.#processDomain(url.hostname, providerName); 1247 if (processedHostname && !extractedDomains.has(processedHostname)) { 1248 extractedDomains.add(processedHostname); 1249 } 1250 } 1251 } 1252 } 1253 1254 /** 1255 * Given a list of elements, examine each for the specified data attribute. 1256 * If found, add that data attribute's value to the result set of extracted 1257 * domains as is. 1258 * 1259 * @param {NodeList<Element>} elements 1260 * A list of elements from the page whose data attributes we want to 1261 * inspect. 1262 * @param {string} providerName 1263 * The name of the search provider. 1264 * @param {string} attribute 1265 * The name of a data attribute to search for within an element. 1266 * @param {Set<string>} extractedDomains 1267 * The result set of domains extracted from the page. 1268 */ 1269 #fromElementsRetrieveDataAttributeValues( 1270 elements, 1271 providerName, 1272 attribute, 1273 extractedDomains 1274 ) { 1275 for (let element of elements) { 1276 if (this.#exceedsThreshold(extractedDomains.size)) { 1277 return; 1278 } 1279 let value = element.dataset[attribute]; 1280 value = this.#processDomain(value, providerName); 1281 if (value && !extractedDomains.has(value)) { 1282 extractedDomains.add(value); 1283 } 1284 } 1285 } 1286 1287 /** 1288 * Given a list of elements, examine the text content for each element, which 1289 * may be 1) a URL from which we can extract a domain or 2) text we can fix 1290 * up to create a best guess as to a URL. If either condition is met, we add 1291 * the domain to the result set. 1292 * 1293 * @param {NodeList<Element>} elements 1294 * A list of elements from the page whose text content we want to inspect. 1295 * @param {Set<string>} extractedDomains 1296 * The result set of domains extracted from the page. 1297 * @param {string} providerName 1298 * The name of the search provider. 1299 */ 1300 #fromElementsRetrieveTextContent(elements, extractedDomains, providerName) { 1301 // Not an exhaustive regex, but it fits our purpose for this method. 1302 const LOOSE_URL_REGEX = 1303 /^(?:https?:\/\/)?(?:www\.)?(?:[\w\-]+\.)+(?:[\w\-]{2,})/i; 1304 1305 // Known but acceptable limitations to this function, where the return 1306 // value won't be correctly fixed up: 1307 // 1) A url is embedded within other text. Ex: "xkcd.com is cool." 1308 // 2) The url contains legal but unusual characters. Ex: $ ! * ' 1309 function fixup(textContent) { 1310 return textContent 1311 .toLowerCase() 1312 .replaceAll(" ", "") 1313 .replace(/\.$/, "") 1314 .concat(".com"); 1315 } 1316 1317 for (let element of elements) { 1318 if (this.#exceedsThreshold(extractedDomains.size)) { 1319 return; 1320 } 1321 let textContent = element.textContent; 1322 if (!textContent) { 1323 continue; 1324 } 1325 1326 let domain; 1327 if (LOOSE_URL_REGEX.test(textContent)) { 1328 // Creating a new URL object will throw if the protocol is missing. 1329 if (!/^https?:\/\//.test(textContent)) { 1330 textContent = "https://" + textContent; 1331 } 1332 1333 domain = URL.parse(textContent)?.hostname; 1334 if (!domain) { 1335 domain = fixup(textContent); 1336 } 1337 } else { 1338 domain = fixup(textContent); 1339 } 1340 1341 let processedDomain = this.#processDomain(domain, providerName); 1342 if (processedDomain && !extractedDomains.has(processedDomain)) { 1343 extractedDomains.add(processedDomain); 1344 } 1345 } 1346 } 1347 1348 /** 1349 * Processes a raw domain extracted from the SERP into its final form before 1350 * categorization. 1351 * 1352 * @param {string} domain 1353 * The domain extracted from the page. 1354 * @param {string} providerName 1355 * The provider associated with the page. 1356 * @returns {string} 1357 * The domain without any subdomains. 1358 */ 1359 #processDomain(domain, providerName) { 1360 if ( 1361 domain.startsWith(`${providerName}.`) || 1362 domain.includes(`.${providerName}.`) 1363 ) { 1364 return ""; 1365 } 1366 return this.#stripDomainOfSubdomains(domain); 1367 } 1368 1369 /** 1370 * Helper to strip domains of any subdomains. 1371 * 1372 * @param {string} domain 1373 * The domain to strip of any subdomains. 1374 * @returns {object} browser 1375 * The given domain with any subdomains removed. 1376 */ 1377 #stripDomainOfSubdomains(domain) { 1378 let tld; 1379 // Can throw an exception if the input has too few domain levels. 1380 try { 1381 tld = Services.eTLD.getKnownPublicSuffixFromHost(domain); 1382 } catch (ex) { 1383 return ""; 1384 } 1385 1386 let domainWithoutTLD = domain.substring(0, domain.length - tld.length); 1387 let secondLevelDomain = domainWithoutTLD.split(".").at(-2); 1388 1389 return secondLevelDomain ? `${secondLevelDomain}.${tld}` : ""; 1390 } 1391 1392 /** 1393 * Per a request from Data Science, we need to limit the number of domains 1394 * categorized to 10 non-ad domains and 10 ad domains. 1395 * 1396 * @param {number} nDomains The number of domains processed. 1397 * @returns {boolean} Whether or not the threshold was exceeded. 1398 */ 1399 #exceedsThreshold(nDomains) { 1400 return nDomains >= CATEGORIZATION_SETTINGS.MAX_DOMAINS_TO_CATEGORIZE; 1401 } 1402 } 1403 1404 export const domainExtractor = new DomainExtractor(); 1405 const searchProviders = new SearchProviders(); 1406 const searchAdImpression = new SearchAdImpression(); 1407 1408 const documentToEventCallbackMap = new WeakMap(); 1409 const documentToRemoveEventListenersMap = new WeakMap(); 1410 const documentToSubmitMap = new WeakMap(); 1411 1412 /** 1413 * SearchTelemetryChild monitors for pages that are partner searches, and 1414 * looks through them to find links which looks like adverts and sends back 1415 * a notification to SearchTelemetry for possible telemetry reporting. 1416 * 1417 * Only the partner details and the fact that at least one ad was found on the 1418 * page are returned to SearchTelemetry. If no ads are found, no notification is 1419 * given. 1420 */ 1421 export class SearchSERPTelemetryChild extends JSWindowActorChild { 1422 /** 1423 * Amount of time to wait after a page event before examining the page 1424 * for ads. 1425 * 1426 * @type {number | null} 1427 */ 1428 #adTimeout; 1429 1430 /** 1431 * Determines if there is a provider that matches the supplied URL and returns 1432 * the information associated with that provider. 1433 * 1434 * @param {string} url The url to check 1435 * @returns {Array | null} Returns null if there's no match, otherwise an array 1436 * of provider name and the provider information. 1437 */ 1438 _getProviderInfoForUrl(url) { 1439 return searchProviders.info?.find(info => info.searchPageRegexp.test(url)); 1440 } 1441 1442 /** 1443 * Checks to see if the page is a partner and has an ad link within it. If so, 1444 * it will notify SearchTelemetry. 1445 */ 1446 _checkForAdLink(eventType) { 1447 try { 1448 if (!this.contentWindow) { 1449 return; 1450 } 1451 } catch (ex) { 1452 // unload occurred before the timer expired 1453 return; 1454 } 1455 1456 let doc = this.document; 1457 let url = doc.documentURI; 1458 let providerInfo = this._getProviderInfoForUrl(url); 1459 if (!providerInfo) { 1460 return; 1461 } 1462 1463 let regexps = providerInfo.extraAdServersRegexps; 1464 let anchors = doc.getElementsByTagName("a"); 1465 let hasAds = false; 1466 for (let anchor of anchors) { 1467 if (!anchor.href) { 1468 continue; 1469 } 1470 for (let name of providerInfo.adServerAttributes) { 1471 hasAds = regexps.some(regexp => regexp.test(anchor.dataset[name])); 1472 if (hasAds) { 1473 break; 1474 } 1475 } 1476 if (!hasAds) { 1477 hasAds = regexps.some(regexp => regexp.test(anchor.href)); 1478 } 1479 if (hasAds) { 1480 break; 1481 } 1482 } 1483 1484 // If there are no ads in hrefs, they could be present in a subframe. 1485 if (!hasAds) { 1486 hasAds = this.#checkForSponsoredSubframes(this.document, providerInfo); 1487 } 1488 1489 if (hasAds) { 1490 this.sendAsyncMessage("SearchTelemetry:PageInfo", { 1491 hasAds, 1492 url, 1493 }); 1494 } 1495 1496 if ( 1497 providerInfo.components?.length && 1498 (eventType == "load" || eventType == "pageshow") 1499 ) { 1500 // Start performance measurements. 1501 let start = ChromeUtils.now(); 1502 let timerId = Glean.serp.categorizationDuration.start(); 1503 1504 let pageActionCallback = info => { 1505 if (info.action == "submitted") { 1506 documentToSubmitMap.set(doc, true); 1507 } 1508 this.sendAsyncMessage("SearchTelemetry:Action", { 1509 target: info.target, 1510 action: info.action, 1511 }); 1512 }; 1513 documentToEventCallbackMap.set(this.document, pageActionCallback); 1514 1515 let componentToVisibilityMap, hrefToComponentMap; 1516 try { 1517 let result = searchAdImpression.categorize(anchors, doc); 1518 componentToVisibilityMap = result.componentToVisibilityMap; 1519 hrefToComponentMap = result.hrefToComponentMap; 1520 } catch (e) { 1521 // Cancel the timer if an error encountered. 1522 Glean.serp.categorizationDuration.cancel(timerId); 1523 } 1524 1525 if (componentToVisibilityMap && hrefToComponentMap) { 1526 // End measurements. 1527 ChromeUtils.addProfilerMarker( 1528 "SearchSERPTelemetryChild._checkForAdLink", 1529 start, 1530 "Checked anchors for visibility" 1531 ); 1532 Glean.serp.categorizationDuration.stopAndAccumulate(timerId); 1533 this.sendAsyncMessage("SearchTelemetry:AdImpressions", { 1534 adImpressions: componentToVisibilityMap, 1535 hrefToComponentMap, 1536 url, 1537 }); 1538 } 1539 } 1540 1541 if ( 1542 lazy.serpEventTelemetryCategorization && 1543 lazy.serpEventTelemetryCategorizationRegionEnabled && 1544 providerInfo.domainExtraction && 1545 (eventType == "load" || eventType == "pageshow") 1546 ) { 1547 let start = ChromeUtils.now(); 1548 let nonAdDomains = domainExtractor.extractDomainsFromDocument( 1549 doc, 1550 providerInfo.domainExtraction.nonAds, 1551 providerInfo.telemetryId 1552 ); 1553 1554 let adDomains = domainExtractor.extractDomainsFromDocument( 1555 doc, 1556 providerInfo.domainExtraction.ads, 1557 providerInfo.telemetryId 1558 ); 1559 1560 this.sendAsyncMessage("SearchTelemetry:Domains", { 1561 url, 1562 nonAdDomains, 1563 adDomains, 1564 }); 1565 1566 ChromeUtils.addProfilerMarker( 1567 "SearchSERPTelemetryChild._checkForAdLink", 1568 start, 1569 "Extract domains from elements" 1570 ); 1571 } 1572 } 1573 1574 /** 1575 * Checks for the presence of certain components on the page that are 1576 * required for recording the page impression. 1577 */ 1578 #checkForPageImpressionComponents() { 1579 let url = this.document.documentURI; 1580 let providerInfo = this._getProviderInfoForUrl(url); 1581 if (providerInfo.components?.length) { 1582 searchAdImpression.providerInfo = providerInfo; 1583 let start = ChromeUtils.now(); 1584 let shoppingTabDisplayed = searchAdImpression.hasShoppingTab( 1585 this.document 1586 ); 1587 ChromeUtils.addProfilerMarker( 1588 "SearchSERPTelemetryChild.#recordImpression", 1589 start, 1590 "Checked for shopping tab" 1591 ); 1592 this.sendAsyncMessage("SearchTelemetry:PageImpression", { 1593 url, 1594 shoppingTabDisplayed, 1595 }); 1596 } 1597 } 1598 1599 #checkForSponsoredSubframes(document, providerInfo) { 1600 if (!providerInfo.subframes?.length) { 1601 return false; 1602 } 1603 1604 let subframes = document.querySelectorAll("iframe"); 1605 for (let subframe of subframes) { 1606 let foundMatch = providerInfo.subframes.some(obj => 1607 obj.regexp?.test(subframe.src) 1608 ); 1609 if ( 1610 foundMatch && 1611 subframe.checkVisibility({ 1612 visibilityProperty: true, 1613 contentVisibilityAuto: true, 1614 }) 1615 ) { 1616 return true; 1617 } 1618 } 1619 1620 return false; 1621 } 1622 1623 #removeEventListeners() { 1624 let callbacks = documentToRemoveEventListenersMap.get(this.document); 1625 if (callbacks) { 1626 for (let callback of callbacks) { 1627 callback(); 1628 } 1629 documentToRemoveEventListenersMap.delete(this.document); 1630 } 1631 } 1632 1633 /** 1634 * Handles events received from the actor child notifications. 1635 * 1636 * @param {object} event The event details. 1637 */ 1638 handleEvent(event) { 1639 if (!this.#urlIsSERP()) { 1640 return; 1641 } 1642 switch (event.type) { 1643 case "pageshow": { 1644 // If a page is loaded from the bfcache, we won't get a "DOMContentLoaded" 1645 // event, so we need to rely on "pageshow" in this case. Note: we do this 1646 // so that we remain consistent with the *.in-content:sap* count for the 1647 // SEARCH_COUNTS histogram. 1648 if (event.persisted) { 1649 this.#checkForPageImpressionComponents(); 1650 this.#check(event.type); 1651 } 1652 break; 1653 } 1654 case "DOMContentLoaded": { 1655 this.#check(event.type); 1656 break; 1657 } 1658 case "load": { 1659 this.#checkForPageImpressionComponents(); 1660 // We check both DOMContentLoaded and load in case the page has 1661 // taken a long time to load and the ad is only detected on load. 1662 // We still check at DOMContentLoaded because if the page hasn't 1663 // finished loading and the user navigates away, we still want to know 1664 // if there were ads on the page or not at that time. 1665 this.#check(event.type); 1666 break; 1667 } 1668 case "pagehide": { 1669 let callbacks = documentToRemoveEventListenersMap.get(this.document); 1670 if (callbacks) { 1671 for (let removeEventListenerCallback of callbacks) { 1672 removeEventListenerCallback(); 1673 } 1674 documentToRemoveEventListenersMap.delete(this.document); 1675 } 1676 this.#cancelCheck(); 1677 break; 1678 } 1679 } 1680 } 1681 1682 async receiveMessage(message) { 1683 switch (message.name) { 1684 case "SearchSERPTelemetry:WaitForSPAPageLoad": 1685 lazy.setTimeout(() => { 1686 this.#checkForPageImpressionComponents(); 1687 this._checkForAdLink("load"); 1688 }, Services.cpmm.sharedData.get(SEARCH_TELEMETRY_SHARED.SPA_LOAD_TIMEOUT)); 1689 break; 1690 case "SearchSERPTelemetry:StopTrackingDocument": 1691 this.#removeDocumentFromSubmitMap(); 1692 this.#removeEventListeners(); 1693 break; 1694 case "SearchSERPTelemetry:DidSubmit": 1695 return this.#didSubmit(); 1696 } 1697 return null; 1698 } 1699 1700 #didSubmit() { 1701 return documentToSubmitMap.get(this.document); 1702 } 1703 1704 #removeDocumentFromSubmitMap() { 1705 documentToSubmitMap.delete(this.document); 1706 } 1707 1708 #urlIsSERP() { 1709 let provider = this._getProviderInfoForUrl(this.document.documentURI); 1710 if (provider) { 1711 // Some URLs can match provider info but also be the provider's homepage 1712 // instead of a SERP. 1713 // e.g. https://example.com/ vs. https://example.com/?foo=bar 1714 // To check this, we look for the presence of the query parameter 1715 // that contains a search term. 1716 let queries = URL.fromURI(this.document.documentURIObject).searchParams; 1717 for (let queryParamName of provider.queryParamNames) { 1718 if (queries.get(queryParamName)) { 1719 return true; 1720 } 1721 } 1722 } 1723 return false; 1724 } 1725 1726 #cancelCheck() { 1727 if (this._waitForContentTimeout) { 1728 lazy.clearTimeout(this._waitForContentTimeout); 1729 } 1730 } 1731 1732 #check(eventType) { 1733 if (!this.#adTimeout) { 1734 this.#adTimeout = Services.cpmm.sharedData.get( 1735 SEARCH_TELEMETRY_SHARED.LOAD_TIMEOUT 1736 ); 1737 } 1738 this.#cancelCheck(); 1739 this._waitForContentTimeout = lazy.setTimeout(() => { 1740 this._checkForAdLink(eventType); 1741 }, this.#adTimeout); 1742 } 1743 }