tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

UrlbarSearchUtils.sys.mjs (13637B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 
      5 /*
      6 * Search service utilities for urlbar.  The only reason these functions aren't
      7 * a part of UrlbarUtils is that we want O(1) case-insensitive lookup for search
      8 * aliases, and to do that we need to observe the search service, persistent
      9 * state, and an init method.  A separate object is easier.
     10 */
     11 
     12 /**
     13 * @typedef {typeof import("UrlbarUtils.sys.mjs").UrlbarUtils.RESULT_SOURCE} RESULT_SOURCE
     14 */
     15 
     16 import { XPCOMUtils } from "resource://gre/modules/XPCOMUtils.sys.mjs";
     17 
     18 const lazy = XPCOMUtils.declareLazy({
     19  UrlUtils: "resource://gre/modules/UrlUtils.sys.mjs",
     20  UrlbarUtils: "moz-src:///browser/components/urlbar/UrlbarUtils.sys.mjs",
     21  separatePrivateDefaultUIEnabled: {
     22    pref: "browser.search.separatePrivateDefault.ui.enabled",
     23    default: false,
     24  },
     25  separatePrivateDefault: {
     26    pref: "browser.search.separatePrivateDefault",
     27    default: false,
     28  },
     29 });
     30 
     31 const SEARCH_ENGINE_TOPIC = "browser-search-engine-modified";
     32 
     33 /**
     34 * Search service utilities for urlbar.
     35 */
     36 class SearchUtils {
     37  constructor() {
     38    this._refreshEnginesByAliasPromise = Promise.resolve();
     39    this.QueryInterface = ChromeUtils.generateQI([
     40      "nsIObserver",
     41      "nsISupportsWeakReference",
     42    ]);
     43  }
     44 
     45  /**
     46   * Initializes the instance and also Services.search.
     47   */
     48  async init() {
     49    if (!this._initPromise) {
     50      this._initPromise = this._initInternal();
     51    }
     52    await this._initPromise;
     53  }
     54 
     55  /**
     56   * Gets the engines whose domains match a given prefix.
     57   *
     58   * @param {string} prefix
     59   *   String containing the first part of the matching domain name(s).
     60   * @param {object} [options]
     61   *   Options object.
     62   * @param {boolean} [options.matchAllDomainLevels]
     63   *   Match at each sub domain, for example "a.b.c.com" will be matched at
     64   *   "a.b.c.com", "b.c.com", and "c.com". Partial matches are always returned
     65   *   after perfect matches.
     66   * @returns {Promise<nsISearchEngine[]>}
     67   *   An array of all matching engines. An empty array if there are none.
     68   */
     69  async enginesForDomainPrefix(prefix, { matchAllDomainLevels = false } = {}) {
     70    try {
     71      await this.init();
     72    } catch {
     73      return [];
     74    }
     75    prefix = prefix.toLowerCase();
     76 
     77    // Array of partially matched engines, added through matchPrefix().
     78    let partialMatchEngines = [];
     79    function matchPrefix(engine, engineHost) {
     80      let parts = engineHost.split(".");
     81      for (let i = 1; i < parts.length - 1; ++i) {
     82        if (parts.slice(i).join(".").startsWith(prefix)) {
     83          partialMatchEngines.push(engine);
     84        }
     85      }
     86    }
     87 
     88    // Array of perfectly matched engines. We also keep a Set for O(1) lookup.
     89    let perfectMatchEngines = [];
     90    let perfectMatchEngineSet = new Set();
     91    for (let engine of await Services.search.getVisibleEngines()) {
     92      if (engine.hideOneOffButton) {
     93        continue;
     94      }
     95      let domain = engine.searchUrlDomain;
     96      if (domain.startsWith(prefix) || domain.startsWith("www." + prefix)) {
     97        perfectMatchEngines.push(engine);
     98        perfectMatchEngineSet.add(engine);
     99      }
    100 
    101      if (matchAllDomainLevels) {
    102        // The prefix may or may not contain part of the public suffix. If
    103        // it contains a dot, we must match with and without the public suffix,
    104        // otherwise it's sufficient to just match without it.
    105        if (prefix.includes(".")) {
    106          matchPrefix(engine, domain);
    107        }
    108        matchPrefix(
    109          engine,
    110          domain.substr(0, domain.length - engine.searchUrlPublicSuffix.length)
    111        );
    112      }
    113    }
    114 
    115    // Build the final list of matching engines. Partial matches come after
    116    // perfect matches. Partial matches may be included in the perfect matches
    117    // list, so be careful not to include the same engine more than once.
    118    let engines = perfectMatchEngines;
    119    let engineSet = perfectMatchEngineSet;
    120    for (let engine of partialMatchEngines) {
    121      if (!engineSet.has(engine)) {
    122        engineSet.add(engine);
    123        engines.push(engine);
    124      }
    125    }
    126    return engines;
    127  }
    128 
    129  /**
    130   * Gets the engine with a given alias.
    131   *
    132   * Note: engines returned from this list may be updated at any time. If you
    133   * are caching the icon or other fields for more than a single engagement of
    134   * the urlbar, consider observing the SEARCH_ENGINE_TOPIC.
    135   *
    136   * @param {string} alias
    137   *   A search engine alias.  The alias string comparison is case insensitive.
    138   * @param {string} [searchString]
    139   *   Optional. If provided, we also enforce that there must be a space after
    140   *   the alias in the search string.
    141   * @returns {Promise<nsISearchEngine>}
    142   *   The matching engine or null if there isn't one.
    143   */
    144  async engineForAlias(alias, searchString = null) {
    145    try {
    146      await Promise.all([this.init(), this._refreshEnginesByAliasPromise]);
    147    } catch {
    148      return null;
    149    }
    150 
    151    let engine = this._enginesByAlias.get(alias.toLocaleLowerCase());
    152    if (engine && searchString) {
    153      let query = lazy.UrlbarUtils.substringAfter(searchString, alias);
    154      // Match an alias only when it has a space after it.  If there's no trailing
    155      // space, then continue to treat it as part of the search string.
    156      if (!lazy.UrlUtils.REGEXP_SPACES_START.test(query)) {
    157        return null;
    158      }
    159    }
    160    return engine || null;
    161  }
    162 
    163  /**
    164   * The list of engines with token ("@") aliases. May be empty if the search
    165   * service has not initialized.
    166   */
    167  async tokenAliasEngines() {
    168    try {
    169      await this.init();
    170    } catch {
    171      return [];
    172    }
    173 
    174    let tokenAliasEngines = [];
    175    for (let engine of await Services.search.getVisibleEngines()) {
    176      let tokenAliases = this._aliasesForEngine(engine).filter(a =>
    177        a.startsWith("@")
    178      );
    179      if (tokenAliases.length) {
    180        tokenAliasEngines.push({ engine, tokenAliases });
    181      }
    182    }
    183    return tokenAliasEngines;
    184  }
    185 
    186  /**
    187   * @param {nsISearchEngine} engine
    188   *   The engine to get the root domain of
    189   * @returns {string}
    190   *   The root domain of a search engine. e.g. If `engine` has the domain
    191   *   www.subdomain.rootdomain.com, `rootdomain` is returned. Returns the
    192   *   engine's domain if the engine's URL does not have a valid TLD.
    193   */
    194  getRootDomainFromEngine(engine) {
    195    let domain = engine.searchUrlDomain;
    196    let suffix = engine.searchUrlPublicSuffix;
    197    if (!suffix) {
    198      if (domain.endsWith(".test")) {
    199        suffix = "test";
    200      } else {
    201        return domain;
    202      }
    203    }
    204    domain = domain.substr(
    205      0,
    206      // -1 to remove the trailing dot.
    207      domain.length - suffix.length - 1
    208    );
    209    let domainParts = domain.split(".");
    210    return domainParts.pop();
    211  }
    212 
    213  /**
    214   * @param {boolean} [isPrivate]
    215   *   True if in a private context.
    216   * @returns {nsISearchEngine}
    217   *   The default engine or null if SearchService has not initialized.
    218   */
    219  getDefaultEngine(isPrivate = false) {
    220    if (!Services.search.hasSuccessfullyInitialized) {
    221      return null;
    222    }
    223 
    224    return lazy.separatePrivateDefaultUIEnabled &&
    225      lazy.separatePrivateDefault &&
    226      isPrivate
    227      ? Services.search.defaultPrivateEngine
    228      : Services.search.defaultEngine;
    229  }
    230 
    231  /**
    232   * Returns true if the UI is enabled for allowing a separate default search
    233   * engine in private windows.
    234   */
    235  get separatePrivateDefaultUIEnabled() {
    236    return lazy.separatePrivateDefaultUIEnabled;
    237  }
    238 
    239  /**
    240   * Returns true if there is potentially a different engine set for searches
    241   * in private windows.
    242   */
    243  get separatePrivateDefault() {
    244    return lazy.separatePrivateDefault;
    245  }
    246 
    247  /**
    248   * To make analysis easier, we sanitize some engine names when
    249   * recording telemetry about search mode. This function returns the sanitized
    250   * key name to record in telemetry.
    251   *
    252   * @param {object} searchMode
    253   *   A search mode object. See UrlbarInput.setSearchMode.
    254   * @returns {string}
    255   *   A sanitized scalar key, used to access Telemetry data.
    256   */
    257  getSearchModeScalarKey(searchMode) {
    258    let scalarKey;
    259    if (searchMode.engineName) {
    260      let engine = Services.search.getEngineByName(searchMode.engineName);
    261      let resultDomain = engine.searchUrlDomain;
    262      // For config engines, sanitize the data in a few special cases to make
    263      // analysis easier.
    264      if (!engine.isConfigEngine) {
    265        scalarKey = "other";
    266      } else if (resultDomain.includes("amazon.")) {
    267        // Group all the localized Amazon sites together.
    268        scalarKey = "Amazon";
    269      } else if (resultDomain.endsWith("wikipedia.org")) {
    270        // Group all the localized Wikipedia sites together.
    271        scalarKey = "Wikipedia";
    272      } else {
    273        scalarKey = searchMode.engineName;
    274      }
    275    } else if (searchMode.source) {
    276      scalarKey =
    277        lazy.UrlbarUtils.getResultSourceName(searchMode.source) || "other";
    278      scalarKey += searchMode.restrictType ? `_${searchMode.restrictType}` : "";
    279    }
    280 
    281    return scalarKey;
    282  }
    283 
    284  /**
    285   *
    286   * @param {UrlbarResult} result
    287   *   The result to evaluate
    288   * @param {Array<RESULT_SOURCE>} [allowedSources]
    289   *   Array of allowed result sources. if defined, the result must be from one
    290   *   of these sources to be evaluated as a SERP, otherwise this will return
    291   *   false.
    292   *
    293   * @returns {boolean} Whether it may be a SERP
    294   */
    295  resultIsSERP(result, allowedSources = null) {
    296    if (allowedSources && !allowedSources?.includes(result.source)) {
    297      return false;
    298    }
    299    try {
    300      return !!Services.search.parseSubmissionURL(result.payload.url)?.engine;
    301    } catch (ex) {
    302      return false;
    303    }
    304  }
    305 
    306  async _initInternal() {
    307    await Services.search.init();
    308    await this._refreshEnginesByAlias();
    309    Services.obs.addObserver(this, SEARCH_ENGINE_TOPIC, true);
    310  }
    311 
    312  async _refreshEnginesByAlias() {
    313    // See the comment at the top of this file.  The only reason we need this
    314    // class is for O(1) case-insensitive lookup for search aliases, which is
    315    // facilitated by _enginesByAlias.
    316    this._enginesByAlias = new Map();
    317    for (let engine of await Services.search.getVisibleEngines()) {
    318      if (!engine.hidden) {
    319        for (let alias of this._aliasesForEngine(engine)) {
    320          this._enginesByAlias.set(alias, engine);
    321        }
    322      }
    323    }
    324  }
    325 
    326  /**
    327   * Compares the query parameters of two SERPs to see if one is equivalent to
    328   * the other. URL `x` is equivalent to URL `y` if
    329   *   (a) `y` contains at least all the query parameters contained in `x`, and
    330   *   (b) The values of the query parameters contained in both `x` and `y `are
    331   *       the same.
    332   *
    333   * This function does not compare the SERPs' origins or pathnames.
    334   * `historySerp` can have a different origin and/or pathname than
    335   * `generatedSerp` and still be considered equivalent.
    336   *
    337   * @param {string} historySerp
    338   *   The SERP from history whose params should be contained in
    339   *   `generatedSerp`.
    340   * @param {string} generatedSerp
    341   *   The search URL we would generate for a search result with the same search
    342   *   string used in `historySerp`.
    343   * @param {Array} [ignoreParams]
    344   *   A list of params to ignore in the matching, i.e. params that can be
    345   *   contained in `historySerp` but not be in `generatedSerp`.
    346   * @returns {boolean} True if `historySerp` can be deduped by `generatedSerp`.
    347   */
    348  serpsAreEquivalent(historySerp, generatedSerp, ignoreParams = []) {
    349    let historyParams = new URL(historySerp).searchParams;
    350    let generatedParams = new URL(generatedSerp).searchParams;
    351    if (
    352      !Array.from(historyParams.entries()).every(
    353        ([key, value]) =>
    354          ignoreParams.includes(key) || value === generatedParams.get(key)
    355      )
    356    ) {
    357      return false;
    358    }
    359 
    360    return true;
    361  }
    362 
    363  /**
    364   * Gets the aliases of an engine.  For the user's convenience, we recognize
    365   * token versions of all non-token aliases.  For example, if the user has an
    366   * alias of "foo", then we recognize both "foo" and "@foo" as aliases for
    367   * foo's engine.  The returned list is therefore a superset of
    368   * `engine.aliases`.  Additionally, the returned aliases will be lower-cased
    369   * to make lookups and comparisons easier.
    370   *
    371   * @param {nsISearchEngine} engine
    372   *   The aliases of this search engine will be returned.
    373   * @returns {Array}
    374   *   An array of lower-cased string aliases as described above.
    375   */
    376  _aliasesForEngine(engine) {
    377    return engine.aliases.reduce((aliases, aliasWithCase) => {
    378      // We store lower-cased aliases to make lookups and comparisons easier.
    379      let alias = aliasWithCase.toLocaleLowerCase();
    380      aliases.push(alias);
    381      if (!alias.startsWith("@")) {
    382        aliases.push("@" + alias);
    383      }
    384      return aliases;
    385    }, []);
    386  }
    387 
    388  /**
    389   * @param {string} engineName
    390   *   Name of the search engine.
    391   * @returns {nsISearchEngine}
    392   *   The engine based on engineName or null if SearchService has not
    393   *   initialized.
    394   */
    395  getEngineByName(engineName) {
    396    if (!Services.search.hasSuccessfullyInitialized) {
    397      return null;
    398    }
    399 
    400    return Services.search.getEngineByName(engineName);
    401  }
    402 
    403  observe(subject, topic, data) {
    404    switch (data) {
    405      case "engine-added":
    406      case "engine-changed":
    407      case "engine-removed":
    408      case "engine-default":
    409        this._refreshEnginesByAliasPromise = this._refreshEnginesByAlias();
    410        break;
    411    }
    412  }
    413 }
    414 
    415 export var UrlbarSearchUtils = new SearchUtils();