tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

UrlbarSearchTermsPersistence.sys.mjs (15725B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 
      5 const lazy = {};
      6 
      7 import { UrlbarUtils } from "moz-src:///browser/components/urlbar/UrlbarUtils.sys.mjs";
      8 
      9 ChromeUtils.defineESModuleGetters(lazy, {
     10  RemoteSettings: "resource://services-settings/remote-settings.sys.mjs",
     11 });
     12 
     13 /**
     14 * @import {RemoteSettingsClient} from "resource://services-settings/RemoteSettingsClient.sys.mjs"
     15 */
     16 
     17 /**
     18 * @typedef {object} PersistedTermsProviderInfo
     19 * @property {string} providerId
     20 *   The search engine provider id associated with the persisted terms.
     21 * @property {RegExp} [searchPageRegexp]
     22 *   The regular expression for determining if the search page URL matches.
     23 * @property {{key: string, values: string[], canBeMissing: boolean}[]} includeParams
     24 *   The parameters that should be included in determining if the search page URL matches.
     25 * @property {{key: string, values: string[]}[]} excludeParams
     26 *   The parameters that should be excluded in determining if the search page URL matches.
     27 */
     28 
     29 ChromeUtils.defineLazyGetter(lazy, "logger", () =>
     30  UrlbarUtils.getLogger({ prefix: "UrlbarSearchTermsPersistence" })
     31 );
     32 
     33 const URLBAR_PERSISTENCE_SETTINGS_KEY = "urlbar-persisted-search-terms";
     34 
     35 /**
     36 * Provides utilities to manage and validate search terms persistence in the URL
     37 * bar. This class is designed to handle the identification of default search
     38 * engine results pages (SERPs), retrieval of search terms, and validation of
     39 * conditions for persisting search terms based on predefined provider
     40 * information.
     41 */
     42 class _UrlbarSearchTermsPersistence {
     43  // Whether or not this class is initialised.
     44  #initialized = false;
     45 
     46  // The original provider information, mainly used for tests.
     47  #originalProviderInfo = [];
     48 
     49  /**
     50   * @type {PersistedTermsProviderInfo[]}
     51   *  The current search provider info.
     52   */
     53  #searchProviderInfo = [];
     54 
     55  /**
     56   * @type {RemoteSettingsClient}
     57   * An instance of remote settings that is used to access the provider info.
     58   */
     59  #urlbarSearchTermsPersistenceSettings;
     60 
     61  // Callback used when syncing Urlbar Search Terms Persistence config settings.
     62  #urlbarSearchTermsPersistenceSettingsSync;
     63 
     64  async init() {
     65    if (this.#initialized) {
     66      return;
     67    }
     68 
     69    this.#urlbarSearchTermsPersistenceSettings = lazy.RemoteSettings(
     70      URLBAR_PERSISTENCE_SETTINGS_KEY
     71    );
     72    let rawProviderInfo = [];
     73    try {
     74      rawProviderInfo = await this.#urlbarSearchTermsPersistenceSettings.get();
     75    } catch (ex) {
     76      lazy.logger.error("Could not get settings:", ex);
     77    }
     78 
     79    this.#urlbarSearchTermsPersistenceSettingsSync = event =>
     80      this.#onSettingsSync(event);
     81    this.#urlbarSearchTermsPersistenceSettings.on(
     82      "sync",
     83      this.#urlbarSearchTermsPersistenceSettingsSync
     84    );
     85 
     86    this.#originalProviderInfo = rawProviderInfo;
     87    this.#setSearchProviderInfo(rawProviderInfo);
     88 
     89    this.#initialized = true;
     90  }
     91 
     92  uninit() {
     93    if (!this.#initialized) {
     94      return;
     95    }
     96 
     97    try {
     98      this.#urlbarSearchTermsPersistenceSettings.off(
     99        "sync",
    100        this.#urlbarSearchTermsPersistenceSettingsSync
    101      );
    102    } catch (ex) {
    103      lazy.logger.error(
    104        "Failed to shutdown UrlbarSearchTermsPersistence Remote Settings.",
    105        ex
    106      );
    107    }
    108    this.#urlbarSearchTermsPersistenceSettings = null;
    109    this.#urlbarSearchTermsPersistenceSettingsSync = null;
    110 
    111    this.#initialized = false;
    112  }
    113 
    114  getSearchProviderInfo() {
    115    return this.#searchProviderInfo;
    116  }
    117 
    118  /**
    119   * Test-only function, used to override the provider information, so that
    120   * unit tests can set it to easy to test values.
    121   *
    122   * @param {Array} providerInfo
    123   *   An array of provider information to set.
    124   */
    125  overrideSearchTermsPersistenceForTests(providerInfo) {
    126    let info = providerInfo ? providerInfo : this.#originalProviderInfo;
    127    this.#setSearchProviderInfo(info);
    128  }
    129 
    130  /**
    131   * Determines if the URIs represent an config search engine
    132   * results page (SERP) and retrieves the search terms used.
    133   *
    134   * @param {nsIURI} uri
    135   *   The primary URI that is checked to determine if it matches the expected
    136   *   structure of a default SERP.
    137   * @returns {string}
    138   *   The search terms used.
    139   *   Will return an empty string if it's not a default SERP, the search term
    140   *   looks too similar to a URL, the string exceeds the maximum characters,
    141   *   or the default engine hasn't been initialized.
    142   */
    143  getSearchTerm(uri) {
    144    if (!Services.search.hasSuccessfullyInitialized || !uri?.spec) {
    145      return "";
    146    }
    147 
    148    // Avoid inspecting URIs if they are non-http(s).
    149    if (!/^https?:\/\//.test(uri.spec)) {
    150      return "";
    151    }
    152 
    153    let searchTerm = "";
    154 
    155    // If we have a provider, we have specific rules for dealing and can
    156    // understand changes to params.
    157    let provider = this.#getProviderInfoForURL(uri.spec);
    158    if (provider) {
    159      let result = Services.search.parseSubmissionURL(uri.spec);
    160      if (
    161        !result.engine?.isConfigEngine ||
    162        !this.isDefaultPage(uri, provider)
    163      ) {
    164        return "";
    165      }
    166      searchTerm = result.terms;
    167    } else {
    168      let result = Services.search.parseSubmissionURL(uri.spec);
    169      if (!result.engine?.isConfigEngine) {
    170        return "";
    171      }
    172      searchTerm = result.engine.searchTermFromResult(uri);
    173    }
    174 
    175    if (!searchTerm || searchTerm.length > UrlbarUtils.MAX_TEXT_LENGTH) {
    176      return "";
    177    }
    178 
    179    let searchTermWithSpacesRemoved = searchTerm.replaceAll(/\s/g, "");
    180 
    181    // Check if the search string uses a commonly used URL protocol. This
    182    // avoids doing a fixup if we already know it matches a URL. Additionally,
    183    // it ensures neither http:// nor https:// will appear by themselves in
    184    // UrlbarInput. This is important because http:// can be trimmed, which in
    185    // the Persisted Search Terms case, will cause the UrlbarInput to appear
    186    // blank.
    187    if (
    188      searchTermWithSpacesRemoved.startsWith("https://") ||
    189      searchTermWithSpacesRemoved.startsWith("http://")
    190    ) {
    191      return "";
    192    }
    193 
    194    // We pass the search term to URIFixup to determine if it could be
    195    // interpreted as a URL, including typos in the scheme and/or the domain
    196    // suffix. This is to prevent search terms from persisting in the Urlbar if
    197    // they look too similar to a URL, but still allow phrases with periods
    198    // that are unlikely to be a URL.
    199    try {
    200      let info = Services.uriFixup.getFixupURIInfo(
    201        searchTermWithSpacesRemoved,
    202        Ci.nsIURIFixup.FIXUP_FLAG_FIX_SCHEME_TYPOS |
    203          Ci.nsIURIFixup.FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP
    204      );
    205      if (info.keywordAsSent) {
    206        return searchTerm;
    207      }
    208    } catch (e) {}
    209 
    210    return "";
    211  }
    212 
    213  shouldPersist(state, { uri, isSameDocument, userTypedValue, firstView }) {
    214    let persist = state.persist;
    215    if (!persist) {
    216      return false;
    217    }
    218 
    219    // Don't persist if there are no search terms to show.
    220    if (!persist.searchTerms) {
    221      return false;
    222    }
    223 
    224    // If there is a userTypedValue and it differs from the search terms, the
    225    // user must've modified the text.
    226    if (userTypedValue && userTypedValue !== persist.searchTerms) {
    227      return false;
    228    }
    229 
    230    // For some search engines, particularly single page applications, check
    231    // if the URL matches a default search results page as page changes will
    232    // occur within the same document.
    233    if (
    234      isSameDocument &&
    235      state.persist.provider &&
    236      !this.isDefaultPage(uri, state.persist.provider)
    237    ) {
    238      return false;
    239    }
    240 
    241    // The first page view will set the search mode but after that, the search
    242    // mode could differ. Since persisting the search guarantees the correct
    243    // search mode is shown, we don't want to undo changes the user could've
    244    // done, like removing/adding the search mode.
    245    if (
    246      !firstView &&
    247      !this.searchModeMatchesState(state.searchModes?.confirmed, state)
    248    ) {
    249      return false;
    250    }
    251 
    252    let origin, pathname;
    253    try {
    254      let url = URL.fromURI(uri);
    255      origin = url.origin;
    256      pathname = url.pathname;
    257    } catch (ex) {
    258      return false;
    259    }
    260 
    261    // Bug 1972464: Prevent search terms from persisting across different origin
    262    // or pathnames. This should be refactored later to be simplified.
    263    if (
    264      origin !== state.persist.origin ||
    265      pathname !== state.persist.pathname
    266    ) {
    267      return false;
    268    }
    269 
    270    return true;
    271  }
    272 
    273  // Resets and assigns initial values for Search Terms Persistence state.
    274  setPersistenceState(state, uri) {
    275    state.persist = {
    276      // Whether the engine that loaded the URI is the default search engine.
    277      isDefaultEngine: null,
    278 
    279      // Temporary until we resolve Bug 1972464 - refactor the architecture.
    280      origin: null,
    281 
    282      // The name of the engine that was used to load the URI.
    283      originalEngineName: null,
    284 
    285      // Temporary until we resolve Bug 1972464 - refactor the architecture.
    286      originalURI: null,
    287 
    288      // Temporary until we resolve Bug 1972464 - refactor the architecture.
    289      path: null,
    290 
    291      // The search provider associated with the URI. If one exists, it means
    292      // we have custom rules for this search provider to determine whether or
    293      // not the URI corresponds to a default search engine results page.
    294      provider: null,
    295 
    296      // The search string within the URI.
    297      searchTerms: "",
    298 
    299      // Whether the search terms should persist.
    300      shouldPersist: null,
    301    };
    302 
    303    let origin, pathname;
    304    try {
    305      let url = URL.fromURI(uri);
    306      origin = url.origin;
    307      pathname = url.pathname;
    308    } catch (ex) {
    309      return;
    310    }
    311 
    312    let searchTerms = this.getSearchTerm(uri);
    313    // Avoid setting state if either are missing.
    314    if (!searchTerms || !origin || !pathname) {
    315      return;
    316    }
    317 
    318    state.persist.origin = origin;
    319    state.persist.searchTerms = searchTerms;
    320    state.persist.pathname = pathname;
    321    state.persist.originalURI = uri;
    322 
    323    let provider = this.#getProviderInfoForURL(uri?.spec);
    324    // If we have specific Remote Settings defined providers for the URL,
    325    // it's because changing the page won't clear the search terms unless we
    326    // observe changes of the params in the URL.
    327    if (provider) {
    328      state.persist.provider = provider;
    329    }
    330 
    331    let result = this.#searchModeForUrl(uri.spec);
    332    state.persist.originalEngineName = result.engineName;
    333    state.persist.isDefaultEngine = result.isDefaultEngine;
    334  }
    335 
    336  /**
    337   * Determines if search mode is in alignment with the persisted
    338   * search state. Returns true in either of these cases:
    339   *
    340   * - The search mode engine is the same as the persisted engine.
    341   * - There's no search mode, but the persisted engine is a default engine.
    342   *
    343   * @param {object} searchMode
    344   *   The search mode for the address bar.
    345   * @param {object} state
    346   *   The address bar state associated with the browser.
    347   * @returns {boolean}
    348   */
    349  searchModeMatchesState(searchMode, state) {
    350    if (searchMode?.engineName === state.persist?.originalEngineName) {
    351      return true;
    352    }
    353    if (!searchMode && state.persist?.isDefaultEngine) {
    354      return true;
    355    }
    356    return false;
    357  }
    358 
    359  onSearchModeChanged(window) {
    360    let urlbar = window.gURLBar;
    361    if (!urlbar) {
    362      return;
    363    }
    364    let state = urlbar.getBrowserState(window.gBrowser.selectedBrowser);
    365    if (!state?.persist) {
    366      return;
    367    }
    368 
    369    // Exit search terms persistence when search mode changes and it's not
    370    // consistent with the persisted engine.
    371    if (
    372      state.persist.shouldPersist &&
    373      !this.searchModeMatchesState(state.searchModes?.confirmed, state)
    374    ) {
    375      state.persist.shouldPersist = false;
    376      urlbar.removeAttribute("persistsearchterms");
    377    }
    378  }
    379 
    380  async #onSettingsSync(event) {
    381    let current = event.data?.current;
    382    if (current) {
    383      lazy.logger.debug("Update provider info due to Remote Settings sync.");
    384      this.#originalProviderInfo = current;
    385      this.#setSearchProviderInfo(current);
    386    } else {
    387      lazy.logger.debug(
    388        "Ignoring Remote Settings sync data due to missing records."
    389      );
    390    }
    391    Services.obs.notifyObservers(null, "urlbar-persisted-search-terms-synced");
    392  }
    393 
    394  /**
    395   * Gets the search mode for a URL, if it matches an engine.
    396   *
    397   * @param {string} url
    398   */
    399  #searchModeForUrl(url) {
    400    // If there's no default engine, no engines are available.
    401    if (!Services.search.defaultEngine) {
    402      return null;
    403    }
    404    let result = Services.search.parseSubmissionURL(url);
    405    if (!result.engine?.isConfigEngine) {
    406      return null;
    407    }
    408    return {
    409      engineName: result.engine.name,
    410      isDefaultEngine: result.engine === Services.search.defaultEngine,
    411    };
    412  }
    413 
    414  /**
    415   * Used to set the local version of the search provider information.
    416   * This automatically maps the regexps to RegExp objects so that
    417   * we don't have to create a new instance each time.
    418   *
    419   * @param {Array} providerInfo
    420   *   A raw array of provider information to set.
    421   */
    422  #setSearchProviderInfo(providerInfo) {
    423    this.#searchProviderInfo = providerInfo.map(provider => {
    424      let newProvider = {
    425        ...provider,
    426        searchPageRegexp: new RegExp(provider.searchPageRegexp),
    427      };
    428      return newProvider;
    429    });
    430  }
    431 
    432  /**
    433   * Searches for provider information for a given url.
    434   *
    435   * @param {string} url
    436   *   The url to match for a provider.
    437   * @returns {PersistedTermsProviderInfo|null}
    438   *   Returns the provider information.
    439   */
    440  #getProviderInfoForURL(url) {
    441    return this.#searchProviderInfo.find(info =>
    442      info.searchPageRegexp.test(url)
    443    );
    444  }
    445 
    446  /**
    447   * Determines whether the search terms in the provided URL should be persisted
    448   * based on whether we find it's a default web SERP.
    449   *
    450   * @param {nsIURI} currentURI
    451   *   The current URI
    452   * @param {PersistedTermsProviderInfo} provider
    453   *   An array of provider information
    454   * @returns {boolean}
    455   *   Returns true if the parameteres match, null otherwise.
    456   */
    457  isDefaultPage(currentURI, provider) {
    458    let { searchParams } = URL.fromURI(currentURI);
    459    if (!searchParams.size) {
    460      return false;
    461    }
    462 
    463    if (provider.includeParams?.length) {
    464      let foundMatch = false;
    465      for (let param of provider.includeParams) {
    466        // The param might not be present on page load.
    467        if (param.canBeMissing && !searchParams.has(param.key)) {
    468          foundMatch = true;
    469          break;
    470        }
    471 
    472        // If we didn't provide a specific param value,
    473        // the presence of the name is sufficient.
    474        if (searchParams.has(param.key) && !param.values?.length) {
    475          foundMatch = true;
    476          break;
    477        }
    478 
    479        let value = searchParams.get(param.key);
    480        // The param name and value must be present.
    481        if (value && param?.values.includes(value)) {
    482          foundMatch = true;
    483          break;
    484        }
    485      }
    486      if (!foundMatch) {
    487        return false;
    488      }
    489    }
    490 
    491    if (provider.excludeParams) {
    492      for (let param of provider.excludeParams) {
    493        let value = searchParams.get(param.key);
    494        // If we found a value for a key but didn't
    495        // provide a specific value to match.
    496        if (!param.values?.length && value) {
    497          return false;
    498        }
    499        // If we provided a value and it was present.
    500        if (param.values?.includes(value)) {
    501          return false;
    502        }
    503      }
    504    }
    505    return true;
    506  }
    507 }
    508 
    509 export var UrlbarSearchTermsPersistence = new _UrlbarSearchTermsPersistence();