tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

SearchSERPTelemetryChild.sys.mjs (56216B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 
      5 import { XPCOMUtils } from "resource://gre/modules/XPCOMUtils.sys.mjs";
      6 
      7 const lazy = {};
      8 
      9 ChromeUtils.defineESModuleGetters(lazy, {
     10  clearTimeout: "resource://gre/modules/Timer.sys.mjs",
     11  SearchUtils: "moz-src:///toolkit/components/search/SearchUtils.sys.mjs",
     12  setTimeout: "resource://gre/modules/Timer.sys.mjs",
     13 });
     14 
     15 XPCOMUtils.defineLazyPreferenceGetter(
     16  lazy,
     17  "serpEventTelemetryCategorization",
     18  "browser.search.serpEventTelemetryCategorization.enabled",
     19  false
     20 );
     21 
     22 XPCOMUtils.defineLazyPreferenceGetter(
     23  lazy,
     24  "serpEventTelemetryCategorizationRegionEnabled",
     25  "browser.search.serpEventTelemetryCategorization.regionEnabled",
     26  false
     27 );
     28 
     29 ChromeUtils.defineLazyGetter(lazy, "logConsole", () => {
     30  return console.createInstance({
     31    prefix: "SearchTelemetry",
     32    maxLogLevel: lazy.SearchUtils.loggingEnabled ? "Debug" : "Warn",
     33  });
     34 });
     35 
     36 export const CATEGORIZATION_SETTINGS = {
     37  MAX_DOMAINS_TO_CATEGORIZE: 10,
     38  HAS_MATCHING_REGION: "SearchTelemetry:HasMatchingRegion",
     39 };
     40 
     41 // Duplicated from SearchSERPTelemetry to avoid loading the module on content
     42 // startup.
     43 const SEARCH_TELEMETRY_SHARED = {
     44  PROVIDER_INFO: "SearchTelemetry:ProviderInfo",
     45  LOAD_TIMEOUT: "SearchTelemetry:LoadTimeout",
     46  SPA_LOAD_TIMEOUT: "SearchTelemetry:SPALoadTimeout",
     47 };
     48 
     49 /**
     50 * Standard events mapped to the telemetry action.
     51 */
     52 const EVENT_TYPE_TO_ACTION = {
     53  click: "clicked",
     54 };
     55 
     56 /**
     57 * A map of object conditions mapped to the condition that should be run when
     58 * an event is triggered. The condition name is referenced in Remote Settings
     59 * under the optional `condition` string for an event listener.
     60 */
     61 const CONDITIONS = {
     62  keydownEnter: event => event.key == "Enter",
     63 };
     64 
     65 export const VISIBILITY_THRESHOLD = 0.5;
     66 
     67 /**
     68 * SearchProviders looks after keeping track of the search provider information
     69 * received from the main process.
     70 *
     71 * It is separate to SearchTelemetryChild so that it is not constructed for each
     72 * tab, but once per process.
     73 */
     74 class SearchProviders {
     75  constructor() {
     76    this._searchProviderInfo = null;
     77    Services.cpmm.sharedData.addEventListener("change", this);
     78  }
     79 
     80  /**
     81   * Gets the search provider information for any provider with advert information.
     82   * If there is nothing in the cache, it will obtain it from shared data.
     83   *
     84   * @returns {object} Returns the search provider information.
     85   * @see SearchTelemetry.sys.mjs
     86   */
     87  get info() {
     88    if (this._searchProviderInfo) {
     89      return this._searchProviderInfo;
     90    }
     91 
     92    this._searchProviderInfo = Services.cpmm.sharedData.get(
     93      SEARCH_TELEMETRY_SHARED.PROVIDER_INFO
     94    );
     95 
     96    if (!this._searchProviderInfo) {
     97      return null;
     98    }
     99 
    100    this._searchProviderInfo = this._searchProviderInfo
    101      // Filter-out non-ad providers so that we're not trying to match against
    102      // those unnecessarily.
    103      .filter(p => "extraAdServersRegexps" in p)
    104      // Pre-build the regular expressions.
    105      .map(p => {
    106        p.adServerAttributes = p.adServerAttributes ?? [];
    107        if (p.shoppingTab?.inspectRegexpInSERP) {
    108          p.shoppingTab.regexp = new RegExp(p.shoppingTab.regexp);
    109        }
    110        let subframes =
    111          p.subframes
    112            ?.filter(obj => obj.inspectRegexpInSERP)
    113            .map(obj => {
    114              return { ...obj, regexp: new RegExp(obj.regexp) };
    115            }) ?? [];
    116        return {
    117          ...p,
    118          searchPageRegexp: new RegExp(p.searchPageRegexp),
    119          extraAdServersRegexps: p.extraAdServersRegexps.map(
    120            r => new RegExp(r)
    121          ),
    122          subframes,
    123        };
    124      });
    125 
    126    return this._searchProviderInfo;
    127  }
    128 
    129  /**
    130   * Handles events received from sharedData notifications.
    131   *
    132   * @param {object} event The event details.
    133   */
    134  handleEvent(event) {
    135    switch (event.type) {
    136      case "change": {
    137        if (event.changedKeys.includes(SEARCH_TELEMETRY_SHARED.PROVIDER_INFO)) {
    138          // Just null out the provider information for now, we'll fetch it next
    139          // time we need it.
    140          this._searchProviderInfo = null;
    141        }
    142        break;
    143      }
    144    }
    145  }
    146 }
    147 
    148 /**
    149 * @typedef {object} EventListenerParam
    150 * @property {string} eventType
    151 *  The type of event the listener should listen for. If the event type is
    152 *  is non-standard, it should correspond to a definition in
    153 *  CUSTOM_EVENT_TYPE_TO_DATA that will re-map it to a standard type. TODO
    154 * @property {string} target
    155 *  The type of component that was the source of the event.
    156 * @property {string | null} action
    157 *  The action that should be reported in telemetry.
    158 */
    159 
    160 /**
    161 * Provides a way to add listeners to elements, as well as unload them.
    162 */
    163 class ListenerHelper {
    164  /**
    165   * Adds each event listener in an array of event listeners to each element
    166   * in an array of elements, and sets their unloading.
    167   *
    168   * @param {Array<Element>} elements
    169   *  DOM elements to add event listeners to.
    170   * @param {Array<EventListenerParam>} eventListenerParams
    171   *  The type of event to add the listener to.
    172   * @param {string} target
    173   */
    174  static addListeners(elements, eventListenerParams, target) {
    175    if (!elements?.length || !eventListenerParams?.length) {
    176      return;
    177    }
    178 
    179    let document = elements[0].ownerGlobal.document;
    180    let callback = documentToEventCallbackMap.get(document);
    181    if (!callback) {
    182      return;
    183    }
    184 
    185    // The map might have entries from previous callers, so we must ensure
    186    // we don't discard existing event listener callbacks.
    187    let removeListenerCallbacks = [];
    188    if (documentToRemoveEventListenersMap.has(document)) {
    189      removeListenerCallbacks = documentToRemoveEventListenersMap.get(document);
    190    }
    191 
    192    for (let params of eventListenerParams) {
    193      let removeListeners = ListenerHelper.addListener(
    194        elements,
    195        params,
    196        target,
    197        callback
    198      );
    199      removeListenerCallbacks = removeListenerCallbacks.concat(removeListeners);
    200    }
    201 
    202    documentToRemoveEventListenersMap.set(document, removeListenerCallbacks);
    203  }
    204 
    205  /**
    206   * Add an event listener to each element in an array of elements.
    207   *
    208   * @param {Array<Element>} elements
    209   *  DOM elements to add event listeners to.
    210   * @param {EventListenerParam} eventListenerParam
    211   * @param {string} target
    212   * @param {Function} callback
    213   * @returns {Array<Function>} Array of remove event listener functions.
    214   */
    215  static addListener(elements, eventListenerParam, target, callback) {
    216    let { action, eventType, target: customTarget } = eventListenerParam;
    217 
    218    if (customTarget) {
    219      target = customTarget;
    220    }
    221 
    222    if (!action) {
    223      action = EVENT_TYPE_TO_ACTION[eventType];
    224      if (!action) {
    225        return [];
    226      }
    227    }
    228 
    229    // Some events might have specific conditions we want to check before
    230    // registering an engagement event.
    231    let eventCallback;
    232    if (eventListenerParam.condition) {
    233      if (CONDITIONS[eventListenerParam.condition]) {
    234        let condition = CONDITIONS[eventListenerParam.condition];
    235        eventCallback = async event => {
    236          let start = ChromeUtils.now();
    237          if (condition(event)) {
    238            callback({ action, target });
    239          }
    240          ChromeUtils.addProfilerMarker(
    241            "SearchSERPTelemetryChild._eventCallback",
    242            start,
    243            "Call cached function before callback."
    244          );
    245        };
    246      } else {
    247        // If a component included a condition, but it wasn't found it is
    248        // due to the fact that it was added in a more recent Firefox version
    249        // than what is provided via search-telemetry-v2. Since the version of
    250        // Firefox the user is using doesn't include this condition,
    251        // we shouldn't add the event.
    252        return [];
    253      }
    254    } else {
    255      eventCallback = () => {
    256        callback({ action, target });
    257      };
    258    }
    259 
    260    let removeListenerCallbacks = [];
    261    for (let element of elements) {
    262      element.addEventListener(eventType, eventCallback);
    263      removeListenerCallbacks.push(() => {
    264        element.removeEventListener(eventType, eventCallback);
    265      });
    266    }
    267    return removeListenerCallbacks;
    268  }
    269 }
    270 
    271 /**
    272 * Scans SERPs for ad components.
    273 */
    274 class SearchAdImpression {
    275  /**
    276   * A reference to ad component information that is used if an anchor
    277   * element could not be categorized to a specific ad component.
    278   *
    279   * @type {object}
    280   */
    281  #defaultComponent = null;
    282 
    283  /**
    284   * Maps DOM elements to AdData.
    285   *
    286   * @type {Map<Element, AdData>}
    287   *
    288   * @typedef AdData
    289   * @type {object}
    290   * @property {string} type
    291   *  The type of ad component.
    292   * @property {number} adsLoaded
    293   *  The number of ads counted as loaded for the component.
    294   * @property {boolean} countChildren
    295   *  Whether all the children were counted for the component.
    296   */
    297  #elementToAdDataMap = new Map();
    298 
    299  /**
    300   * An array of components to do a top-down search.
    301   */
    302  #topDownComponents = [];
    303 
    304  /**
    305   * A reference the providerInfo for this SERP.
    306   *
    307   * @type {object}
    308   */
    309  #providerInfo = null;
    310 
    311  set providerInfo(providerInfo) {
    312    if (this.#providerInfo?.telemetryId == providerInfo.telemetryId) {
    313      return;
    314    }
    315 
    316    this.#providerInfo = providerInfo;
    317 
    318    // Reset values.
    319    this.#topDownComponents = [];
    320 
    321    for (let component of this.#providerInfo.components) {
    322      if (component.default) {
    323        this.#defaultComponent = component;
    324        continue;
    325      }
    326      if (component.topDown) {
    327        this.#topDownComponents.push(component);
    328      }
    329    }
    330  }
    331 
    332  /**
    333   * Check if the page has a shopping tab.
    334   *
    335   * @param {Document} document
    336   * @return {boolean}
    337   *   Whether the page has a shopping tab. Defaults to false.
    338   */
    339  hasShoppingTab(document) {
    340    if (!this.#providerInfo?.shoppingTab) {
    341      return false;
    342    }
    343 
    344    // If a provider has the inspectRegexpInSERP, we assume there must be an
    345    // associated regexp that must be used on any hrefs matched by the elements
    346    // found using the selector. If inspectRegexpInSERP is false, then check if
    347    // the number of items found using the selector matches exactly one element
    348    // to ensure we've used a fine-grained search.
    349    let elements = document.querySelectorAll(
    350      this.#providerInfo.shoppingTab.selector
    351    );
    352    if (this.#providerInfo.shoppingTab.inspectRegexpInSERP) {
    353      let regexp = this.#providerInfo.shoppingTab.regexp;
    354      for (let element of elements) {
    355        let href = element.getAttribute("href");
    356        if (href && regexp.test(href)) {
    357          this.#recordElementData(element, {
    358            type: "shopping_tab",
    359            count: 1,
    360          });
    361          return true;
    362        }
    363      }
    364    } else if (elements.length == 1) {
    365      this.#recordElementData(elements[0], {
    366        type: "shopping_tab",
    367        count: 1,
    368      });
    369      return true;
    370    }
    371    return false;
    372  }
    373 
    374  /**
    375   * Examine the list of anchors and the document object and find components
    376   * on the page.
    377   *
    378   * With the list of anchors, go through each and find the component it
    379   * belongs to and save it in elementToAdDataMap.
    380   *
    381   * Then, with the document object find components and save the results to
    382   * elementToAdDataMap.
    383   *
    384   * Lastly, combine the results together in a new Map that contains the number
    385   * of loaded, visible, and blocked results for the component.
    386   *
    387   * @param {HTMLCollectionOf<HTMLAnchorElement>} anchors
    388   * @param {Document} document
    389   *
    390   * @returns {Map<string, object>}
    391   *  A map where the key is a string containing the type of ad component
    392   *  and the value is an object containing the number of adsLoaded,
    393   *  adsVisible, and adsHidden within the component.
    394   */
    395  categorize(anchors, document) {
    396    // Used for various functions to make relative URLs absolute.
    397    let origin = new URL(document.documentURI).origin;
    398 
    399    // Bottom up approach.
    400    this.#categorizeAnchors(anchors, origin);
    401 
    402    // Top down approach.
    403    this.#categorizeDocument(document);
    404 
    405    let componentToVisibilityMap = new Map();
    406    let hrefToComponentMap = new Map();
    407 
    408    let innerWindowHeight = document.ownerGlobal.innerHeight;
    409    let scrollY = document.ownerGlobal.scrollY;
    410 
    411    // Iterate over the results:
    412    // - If it's searchbox add event listeners.
    413    // - If it is a non_ads_link, map its href to component type.
    414    // - For others, map its component type and check visibility.
    415    for (let [element, data] of this.#elementToAdDataMap.entries()) {
    416      if (data.type == "incontent_searchbox") {
    417        // Bug 1880413: Deprecate hard coding the incontent search box.
    418        // If searchbox has child elements, observe those, otherwise
    419        // fallback to its parent element.
    420        let searchElements = data.childElements.length
    421          ? data.childElements
    422          : [element];
    423        ListenerHelper.addListeners(
    424          searchElements,
    425          [
    426            { eventType: "click", target: data.type },
    427            {
    428              eventType: "keydown",
    429              target: data.type,
    430              action: "submitted",
    431              condition: "keydownEnter",
    432            },
    433          ],
    434          data.type
    435        );
    436        continue;
    437      }
    438      if (data.childElements.length) {
    439        for (let child of data.childElements) {
    440          let href = this.#extractHref(child, origin);
    441          if (href) {
    442            hrefToComponentMap.set(href, data.type);
    443          }
    444        }
    445      } else {
    446        let href = this.#extractHref(element, origin);
    447        if (href) {
    448          hrefToComponentMap.set(href, data.type);
    449        }
    450      }
    451 
    452      // If the component is a non_ads_link, skip visibility checks.
    453      if (data.type == "non_ads_link") {
    454        continue;
    455      }
    456 
    457      // If proxy children were found, check the visibility of all of them
    458      // otherwise just check the visiblity of the first child.
    459      let childElements;
    460      if (data.proxyChildElements.length) {
    461        childElements = data.proxyChildElements;
    462      } else if (data.childElements.length) {
    463        childElements = [data.childElements[0]];
    464      }
    465 
    466      let count = this.#countVisibleAndHiddenAds(
    467        element,
    468        data.adsLoaded,
    469        childElements,
    470        innerWindowHeight,
    471        scrollY
    472      );
    473      if (componentToVisibilityMap.has(data.type)) {
    474        let componentInfo = componentToVisibilityMap.get(data.type);
    475        componentInfo.adsLoaded += data.adsLoaded;
    476        componentInfo.adsVisible += count.adsVisible;
    477        componentInfo.adsHidden += count.adsHidden;
    478      } else {
    479        componentToVisibilityMap.set(data.type, {
    480          adsLoaded: data.adsLoaded,
    481          adsVisible: count.adsVisible,
    482          adsHidden: count.adsHidden,
    483        });
    484      }
    485    }
    486 
    487    // Release the DOM elements from the Map.
    488    this.#elementToAdDataMap.clear();
    489 
    490    return { componentToVisibilityMap, hrefToComponentMap };
    491  }
    492 
    493  /**
    494   * Given an element, find the href that is most likely to make the request if
    495   * the element is clicked. If the element contains a specific data attribute
    496   * known to contain the url used to make the initial request, use it,
    497   * otherwise use its href. Specific character conversions are done to mimic
    498   * conversions likely to take place when urls are observed in network
    499   * activity.
    500   *
    501   * @param {Element} element
    502   *  The element to inspect.
    503   * @param {string} origin
    504   *  The origin for relative urls.
    505   * @returns {string}
    506   *   The href of the element.
    507   */
    508  #extractHref(element, origin) {
    509    let href;
    510    // Prioritize the href from a known data attribute value instead of
    511    // its href property, as the former is the initial url the page will
    512    // navigate to before being re-directed to the href.
    513    for (let name of this.#providerInfo.adServerAttributes) {
    514      if (
    515        element.dataset[name] &&
    516        this.#providerInfo.extraAdServersRegexps.some(regexp =>
    517          regexp.test(element.dataset[name])
    518        )
    519      ) {
    520        href = element.dataset[name];
    521        break;
    522      }
    523    }
    524    // If a data attribute value was not found, fallback to the href.
    525    href = href ?? element.getAttribute("href");
    526    if (!href) {
    527      return "";
    528    }
    529 
    530    let url = URL.parse(href, origin);
    531    if (!url || (url.protocol !== "https:" && url.protocol !== "http:")) {
    532      return "";
    533    }
    534 
    535    return url.href;
    536  }
    537 
    538  /**
    539   * Given a list of anchor elements, group them into ad components.
    540   *
    541   * The first step in the process is to check if the anchor should be
    542   * inspected. This is based on whether it contains an href or a
    543   * data-attribute values that matches an ad link, or if it contains a
    544   * pattern caught by a components included regular expression.
    545   *
    546   * Determine which component it belongs to and the number of matches for
    547   * the component. The heuristic is described in findDataForAnchor.
    548   * If there was a result and we haven't seen it before, save it in
    549   * elementToAdDataMap.
    550   *
    551   * @param {HTMLCollectionOf<HTMLAnchorElement>} anchors
    552   *  The list of anchors to inspect.
    553   * @param {string} origin
    554   *  The origin of the document the anchors belong to.
    555   */
    556  #categorizeAnchors(anchors, origin) {
    557    for (let anchor of anchors) {
    558      if (this.#shouldInspectAnchor(anchor, origin)) {
    559        let result;
    560        try {
    561          // We use a schema to ensure the values for each search provider
    562          // aligns to what is expected, but tests don't enforce the schema
    563          // and thus, can technically input faulty values.
    564          result = this.#findDataForAnchor(anchor);
    565        } catch (ex) {
    566          lazy.logConsole.error("Could not find data for anchor:", ex);
    567          continue;
    568        }
    569        if (result) {
    570          this.#recordElementData(result.element, {
    571            type: result.type,
    572            count: result.count,
    573            proxyChildElements: result.proxyChildElements,
    574            childElements: result.childElements,
    575          });
    576        }
    577        if (result?.relatedElements?.length) {
    578          // Bug 1880413: Deprecate related elements.
    579          // Bottom-up approach with related elements are only used for
    580          // non-link elements related to ads, like carousel arrows.
    581          ListenerHelper.addListeners(
    582            result.relatedElements,
    583            [
    584              {
    585                action: "expanded",
    586                eventType: "click",
    587              },
    588            ],
    589            result.type
    590          );
    591        }
    592      }
    593    }
    594  }
    595 
    596  /**
    597   * Find components from the document object. This is mostly relevant for
    598   * components that are non-ads and don't have an obvious regular expression
    599   * that could match the pattern of the href.
    600   *
    601   * @param {Document} document
    602   */
    603  #categorizeDocument(document) {
    604    // using the subset of components that are top down,
    605    // go through each one.
    606    for (let component of this.#topDownComponents) {
    607      // Top-down searches must have the topDown attribute.
    608      if (!component.topDown) {
    609        continue;
    610      }
    611      // Top down searches must include a parent.
    612      if (!component.included?.parent) {
    613        continue;
    614      }
    615      let parents = document.querySelectorAll(
    616        component.included.parent.selector
    617      );
    618      if (parents.length) {
    619        let eventListeners = component.included.parent.eventListeners;
    620        if (eventListeners?.length) {
    621          ListenerHelper.addListeners(parents, eventListeners, component.type);
    622        }
    623        for (let parent of parents) {
    624          // Bug 1880413: Deprecate related elements.
    625          // Top-down related elements are either used for auto-suggested
    626          // elements of a searchbox, or elements on a page which we can't
    627          // find through a bottom up approach but we want an add a listener,
    628          // like carousels with arrows.
    629          if (component.included.related?.selector) {
    630            let relatedElements = parent.querySelectorAll(
    631              component.included.related.selector
    632            );
    633            if (relatedElements.length) {
    634              // For the search box, related elements with event listeners are
    635              // auto-suggested terms. For everything else (e.g. carousels)
    636              // they are expanded.
    637              ListenerHelper.addListeners(
    638                relatedElements,
    639                [
    640                  {
    641                    action:
    642                      component.type == "incontent_searchbox"
    643                        ? "submitted"
    644                        : "expanded",
    645                    eventType: "click",
    646                  },
    647                ],
    648                component.type
    649              );
    650            }
    651          }
    652          if (component.included.children) {
    653            for (let child of component.included.children) {
    654              let childElements = parent.querySelectorAll(child.selector);
    655              if (childElements.length) {
    656                if (child.eventListeners) {
    657                  childElements = Array.from(childElements);
    658                  ListenerHelper.addListeners(
    659                    childElements,
    660                    child.eventListeners,
    661                    child.type ?? component.type
    662                  );
    663                }
    664                if (!child.skipCount) {
    665                  this.#recordElementData(parent, {
    666                    type: component.type,
    667                    childElements: Array.from(childElements),
    668                  });
    669                }
    670              }
    671            }
    672          } else if (!component.included.parent.skipCount) {
    673            this.#recordElementData(parent, {
    674              type: component.type,
    675            });
    676          }
    677        }
    678      }
    679    }
    680  }
    681 
    682  /**
    683   * Evaluates whether an anchor should be inspected based on matching
    684   * regular expressions on either its href or specified data-attribute values.
    685   *
    686   * @param {HTMLAnchorElement} anchor
    687   * @param {string} origin
    688   * @returns {boolean}
    689   */
    690  #shouldInspectAnchor(anchor, origin) {
    691    let href = anchor.getAttribute("href");
    692    if (!href) {
    693      return false;
    694    }
    695 
    696    // Some hrefs might be relative.
    697    if (!href.startsWith("https://") && !href.startsWith("http://")) {
    698      href = origin + href;
    699    }
    700 
    701    let regexps = this.#providerInfo.extraAdServersRegexps;
    702    // Anchors can contain ad links in a data-attribute.
    703    for (let name of this.#providerInfo.adServerAttributes) {
    704      let attributeValue = anchor.dataset[name];
    705      if (
    706        attributeValue &&
    707        regexps.some(regexp => regexp.test(attributeValue))
    708      ) {
    709        return true;
    710      }
    711    }
    712    // Anchors can contain ad links in a specific href.
    713    if (regexps.some(regexp => regexp.test(href))) {
    714      return true;
    715    }
    716    return false;
    717  }
    718 
    719  /**
    720   * Find the component data for an anchor.
    721   *
    722   * To categorize the anchor, we iterate over the list of possible components
    723   * the anchor could be categorized. If the component is default, we skip
    724   * checking because the fallback option for all anchor links is the default.
    725   *
    726   * First, get the "parent" of the anchor which best represents the DOM element
    727   * that contains the anchor links for the component and no other component.
    728   * This parent will be cached so that other anchors that share the same
    729   * parent can be counted together.
    730   *
    731   * The check for a parent is a loop because we can define more than one best
    732   * parent since on certain SERPs, it's possible for a "better" DOM element
    733   * parent to appear occassionally.
    734   *
    735   * If no parent is found, skip this component.
    736   *
    737   * If a parent was found, check for specific child elements.
    738   *
    739   * Finding child DOM elements of a parent is optional. One reason to do so is
    740   * to use child elements instead of anchor links to count the number of ads for
    741   * a component via the `countChildren` property. This is provided because some ads
    742   * (i.e. carousels) have multiple ad links in a single child element that go to the
    743   * same location. In this scenario, all instances of the child are recorded as ads.
    744   * Subsequent anchor elements that map to the same parent are ignored.
    745   *
    746   * Whether or not a child was found, return the information that was found,
    747   * including whether or not all child elements were counted instead of anchors.
    748   *
    749   * If another anchor belonging to a parent that was previously recorded is the input
    750   * for this function, we either increment the ad count by 1 or don't increment the ad
    751   * count because the parent used `countChildren` completed the calculation in a
    752   * previous step.
    753   *
    754   * @param {HTMLAnchorElement} anchor
    755   *  The anchor to be inspected.
    756   * @returns {object | null}
    757   *  An object containing the element representing the root DOM element for
    758   *  the component, the type of component, how many ads were counted,
    759   *  and whether or not the count was of all the children.
    760   * @throws {Error}
    761   *  Will throw an error if certain properties of a component are missing.
    762   *  Required properties are listed in search-telemetry-v2-schema.json.
    763   */
    764  #findDataForAnchor(anchor) {
    765    for (let component of this.#providerInfo.components) {
    766      // First, check various conditions for skipping a component.
    767 
    768      // A component should always have at least one included statement.
    769      if (!component.included) {
    770        continue;
    771      }
    772 
    773      // Top down searches are done after the bottom up search.
    774      if (component.topDown) {
    775        continue;
    776      }
    777 
    778      // The default component doesn't need to be checked,
    779      // as it will be the fallback option.
    780      if (component.default) {
    781        continue;
    782      }
    783 
    784      // The anchor shouldn't belong to an excluded parent component if one
    785      // is provided.
    786      if (
    787        component.excluded?.parent?.selector &&
    788        anchor.closest(component.excluded.parent.selector)
    789      ) {
    790        continue;
    791      }
    792 
    793      // All components with included should have a parent entry.
    794      if (!component.included.parent) {
    795        continue;
    796      }
    797 
    798      // Find the parent of the anchor.
    799      let parent = anchor.closest(component.included.parent.selector);
    800 
    801      if (!parent) {
    802        continue;
    803      }
    804 
    805      // If a parent was found, we may want to ignore reporting the element
    806      // to telemetry.
    807      if (component.included.parent.skipCount) {
    808        return null;
    809      }
    810 
    811      // If we've already inspected the parent, add the child element to the
    812      // list of anchors. Don't increment the ads loaded count, as we only care
    813      // about grouping the anchor with the correct parent.
    814      if (this.#elementToAdDataMap.has(parent)) {
    815        return {
    816          element: parent,
    817          childElements: [anchor],
    818        };
    819      }
    820 
    821      let relatedElements = [];
    822      if (component.included.related?.selector) {
    823        relatedElements = parent.querySelectorAll(
    824          component.included.related.selector
    825        );
    826      }
    827 
    828      // If the component has no defined children, return the parent element.
    829      if (component.included.children) {
    830        // Look for the first instance of a matching child selector.
    831        for (let child of component.included.children) {
    832          // If counting by child, get all of them at once.
    833          if (child.countChildren) {
    834            let proxyChildElements = parent.querySelectorAll(child.selector);
    835            if (child.skipCount) {
    836              return null;
    837            }
    838            if (proxyChildElements.length) {
    839              return {
    840                element: parent,
    841                type: child.type ?? component.type,
    842                proxyChildElements: Array.from(proxyChildElements),
    843                count: proxyChildElements.length,
    844                childElements: [anchor],
    845                relatedElements,
    846              };
    847            }
    848          } else if (parent.querySelector(child.selector)) {
    849            if (child.skipCount) {
    850              return null;
    851            }
    852            return {
    853              element: parent,
    854              type: child.type ?? component.type,
    855              childElements: [anchor],
    856              relatedElements,
    857            };
    858          }
    859        }
    860      }
    861      // If no children were defined for this component, or none were found
    862      // in the DOM, use the default definition.
    863      return {
    864        element: parent,
    865        type: component.type,
    866        childElements: [anchor],
    867        relatedElements,
    868      };
    869    }
    870    // If no component was found, use default values.
    871    return {
    872      element: anchor,
    873      type: this.#defaultComponent.type,
    874    };
    875  }
    876 
    877  /**
    878   * Determines whether or not an ad was visible or hidden.
    879   *
    880   * An ad is considered visible if the parent element containing the
    881   * component has non-zero dimensions, and all child element in the
    882   * component have non-zero dimensions and mostly (50% height) fits within
    883   * the window at the time when the impression was taken. If the element is to
    884   * the left of the visible area, we also consider it viewed as it's possible
    885   * the user interacted with a carousel which typically scrolls new content
    886   * leftward.
    887   *
    888   * For some components, like text ads, we don't send every child
    889   * element for visibility, just the first text ad. For other components
    890   * like carousels, we send all child elements because we do care about
    891   * counting how many elements of the carousel were visible.
    892   *
    893   * @param {Element} element
    894   *  Element to be inspected
    895   * @param {number} adsLoaded
    896   *  Number of ads initially determined to be loaded for this element.
    897   * @param {Array<Element>} childElements
    898   *  List of children belonging to element.
    899   * @param {number} innerWindowHeight
    900   *  Current height of the window containing the elements.
    901   * @param {number} scrollY
    902   *  Current distance the window has been scrolled.
    903   * @returns {object}
    904   *  Contains adsVisible which is the number of ads shown for the element
    905   *  and adsHidden, the number of ads not visible to the user.
    906   */
    907  #countVisibleAndHiddenAds(
    908    element,
    909    adsLoaded,
    910    childElements,
    911    innerWindowHeight,
    912    scrollY
    913  ) {
    914    let elementRect =
    915      element.ownerGlobal.windowUtils.getBoundsWithoutFlushing(element);
    916 
    917    // If the parent element is not visible, assume all ads within are
    918    // also not visible.
    919    if (
    920      !element.checkVisibility({
    921        visibilityProperty: true,
    922        opacityProperty: true,
    923      })
    924    ) {
    925      Glean.serp.adsBlockedCount.hidden_parent.add();
    926      return {
    927        adsVisible: 0,
    928        adsHidden: adsLoaded,
    929      };
    930    }
    931 
    932    // If an ad is far above the possible visible area of a window, an
    933    // adblocker might be doing it as a workaround for blocking the ad.
    934    if (
    935      elementRect.bottom < 0 &&
    936      innerWindowHeight + scrollY + elementRect.bottom < 0
    937    ) {
    938      Glean.serp.adsBlockedCount.beyond_viewport.add();
    939      return {
    940        adsVisible: 0,
    941        adsHidden: adsLoaded,
    942      };
    943    }
    944 
    945    // If the element has no child elements, check if the element
    946    // was ever viewed by the user at this moment.
    947    if (!childElements?.length) {
    948      // Most ads don't require horizontal scrolling to view it. Thus, we only
    949      // check if it could've appeared with some vertical scrolling.
    950      let visible = VisibilityHelper.elementWasVisibleVertically(
    951        elementRect,
    952        innerWindowHeight,
    953        VISIBILITY_THRESHOLD
    954      );
    955      return {
    956        adsVisible: visible ? 1 : 0,
    957        adsHidden: 0,
    958      };
    959    }
    960 
    961    let adsVisible = 0;
    962    let adsHidden = 0;
    963    for (let child of childElements) {
    964      if (
    965        !child.checkVisibility({
    966          visibilityProperty: true,
    967          opacityProperty: true,
    968        })
    969      ) {
    970        adsHidden += 1;
    971        Glean.serp.adsBlockedCount.hidden_child.add();
    972        continue;
    973      }
    974 
    975      let itemRect =
    976        child.ownerGlobal.windowUtils.getBoundsWithoutFlushing(child);
    977      // If the child element is to the right of the containing element and
    978      // can't be viewed, skip it. We do this check because some elements like
    979      // carousels can hide additional content horizontally. We don't apply the
    980      // same logic if the element is to the left because we assume carousels
    981      // scroll elements to the left when the user wants to see more contents.
    982      // Thus, the elements to the left must've been visible.
    983      if (
    984        !VisibilityHelper.childElementWasVisibleHorizontally(
    985          elementRect,
    986          itemRect,
    987          VISIBILITY_THRESHOLD
    988        )
    989      ) {
    990        continue;
    991      }
    992 
    993      // If the height of child element is not visible, skip it.
    994      if (
    995        !VisibilityHelper.elementWasVisibleVertically(
    996          itemRect,
    997          innerWindowHeight,
    998          VISIBILITY_THRESHOLD
    999        )
   1000      ) {
   1001        continue;
   1002      }
   1003      ++adsVisible;
   1004    }
   1005 
   1006    return {
   1007      adsVisible,
   1008      adsHidden,
   1009    };
   1010  }
   1011 
   1012  /**
   1013   * Caches ad data for a DOM element. The key of the map is by Element rather
   1014   * than Component for fast lookup on whether an Element has been already been
   1015   * categorized as a component. Subsequent calls to this passing the same
   1016   * element will update the list of child elements.
   1017   *
   1018   * @param {Element} element
   1019   *  The element considered to be the root for the component.
   1020   * @param {object} params
   1021   *  Various parameters that can be recorded. Whether the input values exist
   1022   *  or not depends on which component was found, which heuristic should be used
   1023   *  to determine whether an ad was visible, and whether we've already seen this
   1024   *  element.
   1025   * @param {string | null} params.type
   1026   *  The type of component.
   1027   * @param {number} params.count
   1028   *  The number of ads found for a component. The number represents either
   1029   *  the number of elements that match an ad expression or the number of DOM
   1030   *  elements containing an ad link.
   1031   * @param {Array<Element>} params.proxyChildElements
   1032   *  An array of DOM elements that should be inspected for visibility instead
   1033   *  of the actual child elements, possibly because they are grouped.
   1034   * @param {Array<Element>} params.childElements
   1035   *  An array of DOM elements to inspect.
   1036   */
   1037  #recordElementData(
   1038    element,
   1039    { type, count = 1, proxyChildElements = [], childElements = [] } = {}
   1040  ) {
   1041    if (this.#elementToAdDataMap.has(element)) {
   1042      let recordedValues = this.#elementToAdDataMap.get(element);
   1043      if (childElements.length) {
   1044        recordedValues.childElements =
   1045          recordedValues.childElements.concat(childElements);
   1046      }
   1047    } else {
   1048      this.#elementToAdDataMap.set(element, {
   1049        type,
   1050        adsLoaded: count,
   1051        proxyChildElements,
   1052        childElements,
   1053      });
   1054    }
   1055  }
   1056 }
   1057 
   1058 export class VisibilityHelper {
   1059  /**
   1060   * Whether the element was vertically visible. It assumes elements above the
   1061   * viewable area were visible at some point in time.
   1062   *
   1063   * @param {DOMRect} rect
   1064   *   The bounds of the element.
   1065   * @param {number} innerWindowHeight
   1066   *   The height of the window.
   1067   * @param {number} threshold
   1068   *   What percentage of the element should vertically be visible.
   1069   * @returns {boolean}
   1070   *   Whether the element was visible.
   1071   */
   1072  static elementWasVisibleVertically(rect, innerWindowHeight, threshold) {
   1073    return rect.top + rect.height * threshold <= innerWindowHeight;
   1074  }
   1075 
   1076  /**
   1077   * Whether the child element was horizontally visible. It assumes elements to
   1078   * the left were visible at some point in time.
   1079   *
   1080   * @param {DOMRect} parentRect
   1081   *   The bounds of the element that contains the child.
   1082   * @param {DOMRect} childRect
   1083   *   The bounds of the child element.
   1084   * @param {number} threshold
   1085   *   What percentage of the child element should horizontally be visible.
   1086   * @returns {boolean}
   1087   *   Whether the child element was visible.
   1088   */
   1089  static childElementWasVisibleHorizontally(parentRect, childRect, threshold) {
   1090    return (
   1091      childRect.left + childRect.width * threshold <=
   1092      parentRect.left + parentRect.width
   1093    );
   1094  }
   1095 }
   1096 
   1097 /**
   1098 * An object indicating which elements to examine for domains to extract and
   1099 * which heuristic technique to use to extract that element's domain.
   1100 *
   1101 * @typedef {object} ExtractorInfo
   1102 * @property {string} selectors
   1103 *  A string representing the CSS selector that targets the elements on the
   1104 *  page that contain domains we want to extract.
   1105 * @property {string} method
   1106 *  A string representing which domain extraction heuristic to use.
   1107 *  One of: "href", "dataAttribute" or "textContent".
   1108 * @property {object | null} options
   1109 *  Options related to the domain extraction heuristic used.
   1110 * @property {string | null} options.dataAttributeKey
   1111 *  The key name of the data attribute to lookup.
   1112 * @property {string | null} options.queryParamKey
   1113 *  The key name of the query param value to lookup.
   1114 * @property {boolean | null} options.queryParamValueIsHref
   1115 *  Whether the query param value is expected to contain an href.
   1116 */
   1117 
   1118 /**
   1119 * DomainExtractor examines elements on a page to retrieve the domains.
   1120 */
   1121 class DomainExtractor {
   1122  /**
   1123   * Extract domains from the page using an array of information pertaining to
   1124   * the SERP.
   1125   *
   1126   * @param {Document} document
   1127   *  The document for the SERP we are extracting domains from.
   1128   * @param {Array<ExtractorInfo>} extractorInfos
   1129   *  Information used to target the domains we need to extract.
   1130   * @param {string} providerName
   1131   *  Name of the search provider.
   1132   * @return {Set<string>}
   1133   *  A set of the domains extracted from the page.
   1134   */
   1135  extractDomainsFromDocument(document, extractorInfos, providerName) {
   1136    let extractedDomains = new Set();
   1137    if (!extractorInfos?.length) {
   1138      return extractedDomains;
   1139    }
   1140 
   1141    for (let extractorInfo of extractorInfos) {
   1142      if (!extractorInfo.selectors) {
   1143        continue;
   1144      }
   1145 
   1146      let elements = document.querySelectorAll(extractorInfo.selectors);
   1147      if (!elements.length) {
   1148        continue;
   1149      }
   1150 
   1151      switch (extractorInfo.method) {
   1152        case "href": {
   1153          // Origin is used in case a URL needs to be made absolute.
   1154          let origin = new URL(document.documentURI).origin;
   1155          this.#fromElementsConvertHrefsIntoDomains(
   1156            elements,
   1157            origin,
   1158            providerName,
   1159            extractedDomains,
   1160            extractorInfo.options?.queryParamKey,
   1161            extractorInfo.options?.queryParamValueIsHref
   1162          );
   1163          break;
   1164        }
   1165        case "dataAttribute": {
   1166          this.#fromElementsRetrieveDataAttributeValues(
   1167            elements,
   1168            providerName,
   1169            extractorInfo.options?.dataAttributeKey,
   1170            extractedDomains
   1171          );
   1172          break;
   1173        }
   1174        case "textContent": {
   1175          this.#fromElementsRetrieveTextContent(
   1176            elements,
   1177            extractedDomains,
   1178            providerName
   1179          );
   1180          break;
   1181        }
   1182      }
   1183    }
   1184 
   1185    return extractedDomains;
   1186  }
   1187 
   1188  /**
   1189   * Given a list of elements, extract domains using href attributes. If the
   1190   * URL in the href includes the specified query param, the domain will be
   1191   * that query param's value. Otherwise it will be the hostname of the href
   1192   * attribute's URL.
   1193   *
   1194   * @param {NodeList<Element>} elements
   1195   *  A list of elements from the page whose href attributes we want to
   1196   *  inspect.
   1197   * @param {string} origin
   1198   *  Origin of the current page.
   1199   * @param {string} providerName
   1200   *  The name of the search provider.
   1201   * @param {Set<string>} extractedDomains
   1202   *  The result set of domains extracted from the page.
   1203   * @param {string | null} queryParam
   1204   *  An optional query param to search for in an element's href attribute.
   1205   * @param {boolean | null} queryParamValueIsHref
   1206   *  Whether the query param value is expected to contain an href.
   1207   */
   1208  #fromElementsConvertHrefsIntoDomains(
   1209    elements,
   1210    origin,
   1211    providerName,
   1212    extractedDomains,
   1213    queryParam,
   1214    queryParamValueIsHref
   1215  ) {
   1216    for (let element of elements) {
   1217      if (this.#exceedsThreshold(extractedDomains.size)) {
   1218        return;
   1219      }
   1220 
   1221      let href = element.getAttribute("href");
   1222 
   1223      let url = URL.parse(href, origin);
   1224      if (!url) {
   1225        continue;
   1226      }
   1227 
   1228      // Ignore non-standard protocols.
   1229      if (url.protocol != "https:" && url.protocol != "http:") {
   1230        continue;
   1231      }
   1232 
   1233      if (queryParam) {
   1234        let paramValue = url.searchParams.get(queryParam);
   1235        if (queryParamValueIsHref) {
   1236          paramValue = URL.parse(paramValue)?.hostname;
   1237          if (!paramValue) {
   1238            continue;
   1239          }
   1240          paramValue = this.#processDomain(paramValue, providerName);
   1241        }
   1242        if (paramValue && !extractedDomains.has(paramValue)) {
   1243          extractedDomains.add(paramValue);
   1244        }
   1245      } else if (url.hostname) {
   1246        let processedHostname = this.#processDomain(url.hostname, providerName);
   1247        if (processedHostname && !extractedDomains.has(processedHostname)) {
   1248          extractedDomains.add(processedHostname);
   1249        }
   1250      }
   1251    }
   1252  }
   1253 
   1254  /**
   1255   * Given a list of elements, examine each for the specified data attribute.
   1256   * If found, add that data attribute's value to the result set of extracted
   1257   * domains as is.
   1258   *
   1259   * @param {NodeList<Element>} elements
   1260   *  A list of elements from the page whose data attributes we want to
   1261   *  inspect.
   1262   * @param {string} providerName
   1263   *  The name of the search provider.
   1264   * @param {string} attribute
   1265   *  The name of a data attribute to search for within an element.
   1266   * @param {Set<string>} extractedDomains
   1267   *  The result set of domains extracted from the page.
   1268   */
   1269  #fromElementsRetrieveDataAttributeValues(
   1270    elements,
   1271    providerName,
   1272    attribute,
   1273    extractedDomains
   1274  ) {
   1275    for (let element of elements) {
   1276      if (this.#exceedsThreshold(extractedDomains.size)) {
   1277        return;
   1278      }
   1279      let value = element.dataset[attribute];
   1280      value = this.#processDomain(value, providerName);
   1281      if (value && !extractedDomains.has(value)) {
   1282        extractedDomains.add(value);
   1283      }
   1284    }
   1285  }
   1286 
   1287  /**
   1288   * Given a list of elements, examine the text content for each element, which
   1289   * may be 1) a URL from which we can extract a domain or 2) text we can fix
   1290   * up to create a best guess as to a URL. If either condition is met, we add
   1291   * the domain to the result set.
   1292   *
   1293   * @param {NodeList<Element>} elements
   1294   *  A list of elements from the page whose text content we want to inspect.
   1295   * @param {Set<string>} extractedDomains
   1296   *  The result set of domains extracted from the page.
   1297   * @param {string} providerName
   1298   *  The name of the search provider.
   1299   */
   1300  #fromElementsRetrieveTextContent(elements, extractedDomains, providerName) {
   1301    // Not an exhaustive regex, but it fits our purpose for this method.
   1302    const LOOSE_URL_REGEX =
   1303      /^(?:https?:\/\/)?(?:www\.)?(?:[\w\-]+\.)+(?:[\w\-]{2,})/i;
   1304 
   1305    // Known but acceptable limitations to this function, where the return
   1306    // value won't be correctly fixed up:
   1307    //   1) A url is embedded within other text. Ex: "xkcd.com is cool."
   1308    //   2) The url contains legal but unusual characters. Ex: $ ! * '
   1309    function fixup(textContent) {
   1310      return textContent
   1311        .toLowerCase()
   1312        .replaceAll(" ", "")
   1313        .replace(/\.$/, "")
   1314        .concat(".com");
   1315    }
   1316 
   1317    for (let element of elements) {
   1318      if (this.#exceedsThreshold(extractedDomains.size)) {
   1319        return;
   1320      }
   1321      let textContent = element.textContent;
   1322      if (!textContent) {
   1323        continue;
   1324      }
   1325 
   1326      let domain;
   1327      if (LOOSE_URL_REGEX.test(textContent)) {
   1328        // Creating a new URL object will throw if the protocol is missing.
   1329        if (!/^https?:\/\//.test(textContent)) {
   1330          textContent = "https://" + textContent;
   1331        }
   1332 
   1333        domain = URL.parse(textContent)?.hostname;
   1334        if (!domain) {
   1335          domain = fixup(textContent);
   1336        }
   1337      } else {
   1338        domain = fixup(textContent);
   1339      }
   1340 
   1341      let processedDomain = this.#processDomain(domain, providerName);
   1342      if (processedDomain && !extractedDomains.has(processedDomain)) {
   1343        extractedDomains.add(processedDomain);
   1344      }
   1345    }
   1346  }
   1347 
   1348  /**
   1349   * Processes a raw domain extracted from the SERP into its final form before
   1350   * categorization.
   1351   *
   1352   * @param {string} domain
   1353   *   The domain extracted from the page.
   1354   * @param {string} providerName
   1355   *   The provider associated with the page.
   1356   * @returns {string}
   1357   *   The domain without any subdomains.
   1358   */
   1359  #processDomain(domain, providerName) {
   1360    if (
   1361      domain.startsWith(`${providerName}.`) ||
   1362      domain.includes(`.${providerName}.`)
   1363    ) {
   1364      return "";
   1365    }
   1366    return this.#stripDomainOfSubdomains(domain);
   1367  }
   1368 
   1369  /**
   1370   * Helper to strip domains of any subdomains.
   1371   *
   1372   * @param {string} domain
   1373   *   The domain to strip of any subdomains.
   1374   * @returns {object} browser
   1375   *   The given domain with any subdomains removed.
   1376   */
   1377  #stripDomainOfSubdomains(domain) {
   1378    let tld;
   1379    // Can throw an exception if the input has too few domain levels.
   1380    try {
   1381      tld = Services.eTLD.getKnownPublicSuffixFromHost(domain);
   1382    } catch (ex) {
   1383      return "";
   1384    }
   1385 
   1386    let domainWithoutTLD = domain.substring(0, domain.length - tld.length);
   1387    let secondLevelDomain = domainWithoutTLD.split(".").at(-2);
   1388 
   1389    return secondLevelDomain ? `${secondLevelDomain}.${tld}` : "";
   1390  }
   1391 
   1392  /**
   1393   * Per a request from Data Science, we need to limit the number of domains
   1394   * categorized to 10 non-ad domains and 10 ad domains.
   1395   *
   1396   * @param {number} nDomains The number of domains processed.
   1397   * @returns {boolean} Whether or not the threshold was exceeded.
   1398   */
   1399  #exceedsThreshold(nDomains) {
   1400    return nDomains >= CATEGORIZATION_SETTINGS.MAX_DOMAINS_TO_CATEGORIZE;
   1401  }
   1402 }
   1403 
   1404 export const domainExtractor = new DomainExtractor();
   1405 const searchProviders = new SearchProviders();
   1406 const searchAdImpression = new SearchAdImpression();
   1407 
   1408 const documentToEventCallbackMap = new WeakMap();
   1409 const documentToRemoveEventListenersMap = new WeakMap();
   1410 const documentToSubmitMap = new WeakMap();
   1411 
   1412 /**
   1413 * SearchTelemetryChild monitors for pages that are partner searches, and
   1414 * looks through them to find links which looks like adverts and sends back
   1415 * a notification to SearchTelemetry for possible telemetry reporting.
   1416 *
   1417 * Only the partner details and the fact that at least one ad was found on the
   1418 * page are returned to SearchTelemetry. If no ads are found, no notification is
   1419 * given.
   1420 */
   1421 export class SearchSERPTelemetryChild extends JSWindowActorChild {
   1422  /**
   1423   * Amount of time to wait after a page event before examining the page
   1424   * for ads.
   1425   *
   1426   * @type {number | null}
   1427   */
   1428  #adTimeout;
   1429 
   1430  /**
   1431   * Determines if there is a provider that matches the supplied URL and returns
   1432   * the information associated with that provider.
   1433   *
   1434   * @param {string} url The url to check
   1435   * @returns {Array | null} Returns null if there's no match, otherwise an array
   1436   *   of provider name and the provider information.
   1437   */
   1438  _getProviderInfoForUrl(url) {
   1439    return searchProviders.info?.find(info => info.searchPageRegexp.test(url));
   1440  }
   1441 
   1442  /**
   1443   * Checks to see if the page is a partner and has an ad link within it. If so,
   1444   * it will notify SearchTelemetry.
   1445   */
   1446  _checkForAdLink(eventType) {
   1447    try {
   1448      if (!this.contentWindow) {
   1449        return;
   1450      }
   1451    } catch (ex) {
   1452      // unload occurred before the timer expired
   1453      return;
   1454    }
   1455 
   1456    let doc = this.document;
   1457    let url = doc.documentURI;
   1458    let providerInfo = this._getProviderInfoForUrl(url);
   1459    if (!providerInfo) {
   1460      return;
   1461    }
   1462 
   1463    let regexps = providerInfo.extraAdServersRegexps;
   1464    let anchors = doc.getElementsByTagName("a");
   1465    let hasAds = false;
   1466    for (let anchor of anchors) {
   1467      if (!anchor.href) {
   1468        continue;
   1469      }
   1470      for (let name of providerInfo.adServerAttributes) {
   1471        hasAds = regexps.some(regexp => regexp.test(anchor.dataset[name]));
   1472        if (hasAds) {
   1473          break;
   1474        }
   1475      }
   1476      if (!hasAds) {
   1477        hasAds = regexps.some(regexp => regexp.test(anchor.href));
   1478      }
   1479      if (hasAds) {
   1480        break;
   1481      }
   1482    }
   1483 
   1484    // If there are no ads in hrefs, they could be present in a subframe.
   1485    if (!hasAds) {
   1486      hasAds = this.#checkForSponsoredSubframes(this.document, providerInfo);
   1487    }
   1488 
   1489    if (hasAds) {
   1490      this.sendAsyncMessage("SearchTelemetry:PageInfo", {
   1491        hasAds,
   1492        url,
   1493      });
   1494    }
   1495 
   1496    if (
   1497      providerInfo.components?.length &&
   1498      (eventType == "load" || eventType == "pageshow")
   1499    ) {
   1500      // Start performance measurements.
   1501      let start = ChromeUtils.now();
   1502      let timerId = Glean.serp.categorizationDuration.start();
   1503 
   1504      let pageActionCallback = info => {
   1505        if (info.action == "submitted") {
   1506          documentToSubmitMap.set(doc, true);
   1507        }
   1508        this.sendAsyncMessage("SearchTelemetry:Action", {
   1509          target: info.target,
   1510          action: info.action,
   1511        });
   1512      };
   1513      documentToEventCallbackMap.set(this.document, pageActionCallback);
   1514 
   1515      let componentToVisibilityMap, hrefToComponentMap;
   1516      try {
   1517        let result = searchAdImpression.categorize(anchors, doc);
   1518        componentToVisibilityMap = result.componentToVisibilityMap;
   1519        hrefToComponentMap = result.hrefToComponentMap;
   1520      } catch (e) {
   1521        // Cancel the timer if an error encountered.
   1522        Glean.serp.categorizationDuration.cancel(timerId);
   1523      }
   1524 
   1525      if (componentToVisibilityMap && hrefToComponentMap) {
   1526        // End measurements.
   1527        ChromeUtils.addProfilerMarker(
   1528          "SearchSERPTelemetryChild._checkForAdLink",
   1529          start,
   1530          "Checked anchors for visibility"
   1531        );
   1532        Glean.serp.categorizationDuration.stopAndAccumulate(timerId);
   1533        this.sendAsyncMessage("SearchTelemetry:AdImpressions", {
   1534          adImpressions: componentToVisibilityMap,
   1535          hrefToComponentMap,
   1536          url,
   1537        });
   1538      }
   1539    }
   1540 
   1541    if (
   1542      lazy.serpEventTelemetryCategorization &&
   1543      lazy.serpEventTelemetryCategorizationRegionEnabled &&
   1544      providerInfo.domainExtraction &&
   1545      (eventType == "load" || eventType == "pageshow")
   1546    ) {
   1547      let start = ChromeUtils.now();
   1548      let nonAdDomains = domainExtractor.extractDomainsFromDocument(
   1549        doc,
   1550        providerInfo.domainExtraction.nonAds,
   1551        providerInfo.telemetryId
   1552      );
   1553 
   1554      let adDomains = domainExtractor.extractDomainsFromDocument(
   1555        doc,
   1556        providerInfo.domainExtraction.ads,
   1557        providerInfo.telemetryId
   1558      );
   1559 
   1560      this.sendAsyncMessage("SearchTelemetry:Domains", {
   1561        url,
   1562        nonAdDomains,
   1563        adDomains,
   1564      });
   1565 
   1566      ChromeUtils.addProfilerMarker(
   1567        "SearchSERPTelemetryChild._checkForAdLink",
   1568        start,
   1569        "Extract domains from elements"
   1570      );
   1571    }
   1572  }
   1573 
   1574  /**
   1575   * Checks for the presence of certain components on the page that are
   1576   * required for recording the page impression.
   1577   */
   1578  #checkForPageImpressionComponents() {
   1579    let url = this.document.documentURI;
   1580    let providerInfo = this._getProviderInfoForUrl(url);
   1581    if (providerInfo.components?.length) {
   1582      searchAdImpression.providerInfo = providerInfo;
   1583      let start = ChromeUtils.now();
   1584      let shoppingTabDisplayed = searchAdImpression.hasShoppingTab(
   1585        this.document
   1586      );
   1587      ChromeUtils.addProfilerMarker(
   1588        "SearchSERPTelemetryChild.#recordImpression",
   1589        start,
   1590        "Checked for shopping tab"
   1591      );
   1592      this.sendAsyncMessage("SearchTelemetry:PageImpression", {
   1593        url,
   1594        shoppingTabDisplayed,
   1595      });
   1596    }
   1597  }
   1598 
   1599  #checkForSponsoredSubframes(document, providerInfo) {
   1600    if (!providerInfo.subframes?.length) {
   1601      return false;
   1602    }
   1603 
   1604    let subframes = document.querySelectorAll("iframe");
   1605    for (let subframe of subframes) {
   1606      let foundMatch = providerInfo.subframes.some(obj =>
   1607        obj.regexp?.test(subframe.src)
   1608      );
   1609      if (
   1610        foundMatch &&
   1611        subframe.checkVisibility({
   1612          visibilityProperty: true,
   1613          contentVisibilityAuto: true,
   1614        })
   1615      ) {
   1616        return true;
   1617      }
   1618    }
   1619 
   1620    return false;
   1621  }
   1622 
   1623  #removeEventListeners() {
   1624    let callbacks = documentToRemoveEventListenersMap.get(this.document);
   1625    if (callbacks) {
   1626      for (let callback of callbacks) {
   1627        callback();
   1628      }
   1629      documentToRemoveEventListenersMap.delete(this.document);
   1630    }
   1631  }
   1632 
   1633  /**
   1634   * Handles events received from the actor child notifications.
   1635   *
   1636   * @param {object} event The event details.
   1637   */
   1638  handleEvent(event) {
   1639    if (!this.#urlIsSERP()) {
   1640      return;
   1641    }
   1642    switch (event.type) {
   1643      case "pageshow": {
   1644        // If a page is loaded from the bfcache, we won't get a "DOMContentLoaded"
   1645        // event, so we need to rely on "pageshow" in this case. Note: we do this
   1646        // so that we remain consistent with the *.in-content:sap* count for the
   1647        // SEARCH_COUNTS histogram.
   1648        if (event.persisted) {
   1649          this.#checkForPageImpressionComponents();
   1650          this.#check(event.type);
   1651        }
   1652        break;
   1653      }
   1654      case "DOMContentLoaded": {
   1655        this.#check(event.type);
   1656        break;
   1657      }
   1658      case "load": {
   1659        this.#checkForPageImpressionComponents();
   1660        // We check both DOMContentLoaded and load in case the page has
   1661        // taken a long time to load and the ad is only detected on load.
   1662        // We still check at DOMContentLoaded because if the page hasn't
   1663        // finished loading and the user navigates away, we still want to know
   1664        // if there were ads on the page or not at that time.
   1665        this.#check(event.type);
   1666        break;
   1667      }
   1668      case "pagehide": {
   1669        let callbacks = documentToRemoveEventListenersMap.get(this.document);
   1670        if (callbacks) {
   1671          for (let removeEventListenerCallback of callbacks) {
   1672            removeEventListenerCallback();
   1673          }
   1674          documentToRemoveEventListenersMap.delete(this.document);
   1675        }
   1676        this.#cancelCheck();
   1677        break;
   1678      }
   1679    }
   1680  }
   1681 
   1682  async receiveMessage(message) {
   1683    switch (message.name) {
   1684      case "SearchSERPTelemetry:WaitForSPAPageLoad":
   1685        lazy.setTimeout(() => {
   1686          this.#checkForPageImpressionComponents();
   1687          this._checkForAdLink("load");
   1688        }, Services.cpmm.sharedData.get(SEARCH_TELEMETRY_SHARED.SPA_LOAD_TIMEOUT));
   1689        break;
   1690      case "SearchSERPTelemetry:StopTrackingDocument":
   1691        this.#removeDocumentFromSubmitMap();
   1692        this.#removeEventListeners();
   1693        break;
   1694      case "SearchSERPTelemetry:DidSubmit":
   1695        return this.#didSubmit();
   1696    }
   1697    return null;
   1698  }
   1699 
   1700  #didSubmit() {
   1701    return documentToSubmitMap.get(this.document);
   1702  }
   1703 
   1704  #removeDocumentFromSubmitMap() {
   1705    documentToSubmitMap.delete(this.document);
   1706  }
   1707 
   1708  #urlIsSERP() {
   1709    let provider = this._getProviderInfoForUrl(this.document.documentURI);
   1710    if (provider) {
   1711      // Some URLs can match provider info but also be the provider's homepage
   1712      // instead of a SERP.
   1713      // e.g. https://example.com/ vs. https://example.com/?foo=bar
   1714      // To check this, we look for the presence of the query parameter
   1715      // that contains a search term.
   1716      let queries = URL.fromURI(this.document.documentURIObject).searchParams;
   1717      for (let queryParamName of provider.queryParamNames) {
   1718        if (queries.get(queryParamName)) {
   1719          return true;
   1720        }
   1721      }
   1722    }
   1723    return false;
   1724  }
   1725 
   1726  #cancelCheck() {
   1727    if (this._waitForContentTimeout) {
   1728      lazy.clearTimeout(this._waitForContentTimeout);
   1729    }
   1730  }
   1731 
   1732  #check(eventType) {
   1733    if (!this.#adTimeout) {
   1734      this.#adTimeout = Services.cpmm.sharedData.get(
   1735        SEARCH_TELEMETRY_SHARED.LOAD_TIMEOUT
   1736      );
   1737    }
   1738    this.#cancelCheck();
   1739    this._waitForContentTimeout = lazy.setTimeout(() => {
   1740      this._checkForAdLink(eventType);
   1741    }, this.#adTimeout);
   1742  }
   1743 }