tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

MerinoClient.sys.mjs (18765B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 
      5 import { XPCOMUtils } from "resource://gre/modules/XPCOMUtils.sys.mjs";
      6 
      7 const lazy = XPCOMUtils.declareLazy({
      8  ObliviousHTTP: "resource://gre/modules/ObliviousHTTP.sys.mjs",
      9  SkippableTimer: "moz-src:///browser/components/urlbar/UrlbarUtils.sys.mjs",
     10  UrlbarPrefs: "moz-src:///browser/components/urlbar/UrlbarPrefs.sys.mjs",
     11  UrlbarUtils: "moz-src:///browser/components/urlbar/UrlbarUtils.sys.mjs",
     12 });
     13 
     14 /**
     15 * @import {SkippableTimer} from "moz-src:///browser/components/urlbar/UrlbarUtils.sys.mjs"
     16 * @import {OHTTPResponse} from "resource://gre/modules/ObliviousHTTP.sys.mjs"
     17 */
     18 
     19 /**
     20 * @typedef {object} MerinoClientBaseSuggestion
     21 * @property {string} request_id
     22 *   The request id associated with the suggestion.
     23 * @property {string} source
     24 *   The source of the suggestion.
     25 *
     26 * @typedef {{[key: string]:any} & MerinoClientBaseSuggestion} MerinoClientSuggestion
     27 *   Details of a suggestion received from Merino. Whilst the base properties are
     28 *   consistent the suggestion properties may vary depending on the provider.
     29 */
     30 
     31 const SEARCH_PARAMS = Object.freeze({
     32  CLIENT_VARIANTS: "client_variants",
     33  PROVIDERS: "providers",
     34  QUERY: "q",
     35  SEQUENCE_NUMBER: "seq",
     36  SESSION_ID: "sid",
     37 });
     38 
     39 const SESSION_TIMEOUT_MS = 5 * 60 * 1000; // 5 minutes
     40 
     41 /**
     42 * Client class for querying the Merino server. Each instance maintains its own
     43 * session state including a session ID and sequence number that is included in
     44 * its requests to Merino.
     45 */
     46 export class MerinoClient {
     47  #lazy = XPCOMUtils.declareLazy({
     48    logger: () =>
     49      lazy.UrlbarUtils.getLogger({ prefix: `MerinoClient [${this.#name}]` }),
     50  });
     51 
     52  /**
     53   * The names of URL search params.
     54   */
     55  static get SEARCH_PARAMS() {
     56    return { ...SEARCH_PARAMS };
     57  }
     58 
     59  /**
     60   * @param {string} [name]
     61   *   An optional name for the client. It will be included in log messages.
     62   * @param {object} [options]
     63   *   Options object
     64   * @param {boolean} [options.allowOhttp]
     65   *   Whether the client is allowed to make its requests using OHTTP. When true
     66   *   and the following prefs are defined, all requests made by the client will
     67   *   use OHTTP:
     68   *
     69   *   browser.urlbar.merino.ohttpConfigURL (Nimbus: merinoOhttpConfigURL)
     70   *   browser.urlbar.merino.ohttpRelayURL (Nimbus: merinoOhttpRelayURL)
     71   *
     72   * @param {number} [options.cachePeriodMs]
     73   *   Enables caching when nonzero. The client will cache the response
     74   *   suggestions from its most recent successful request for the specified
     75   *   period. The client will serve the cached suggestions for all fetches for
     76   *   the same URL until either the cache period elapses or a successful fetch
     77   *   for a different URL is made (ignoring session-related URL params like
     78   *   session ID and sequence number). Caching is per `MerinoClient` instance
     79   *   and is not shared across instances.
     80   *
     81   *   WARNING: Cached suggestions are only ever evicted when new suggestions
     82   *   are cached. They are not evicted on a timer. If the client has cached
     83   *   some suggestions and no further fetches are made, they'll stay cached
     84   *   indefinitely. If your request URLs contain senstive data that should not
     85   *   stick around in the object graph indefinitely, you should either not use
     86   *   caching or you should implement an eviction mechanism.
     87   *
     88   *   This cache strategy is intentionally simplistic and designed to be used
     89   *   by the urlbar with very short cache periods to make sure Firefox doesn't
     90   *   repeatedly call the same Merino URL on each keystroke in a urlbar
     91   *   session, which is wasteful and can cause a suggestion to flicker out of
     92   *   and into the urlbar panel as the user matches it again and again,
     93   *   especially when Merino latency is high. It is not designed to be a
     94   *   general caching mechanism. If you need more complex or long-lived
     95   *   caching, try working with the Merino team to add cache headers to the
     96   *   relevant responses so you can leverage Firefox's HTTP cache.
     97   */
     98  constructor(
     99    name = "anonymous",
    100    { allowOhttp = false, cachePeriodMs = 0 } = {}
    101  ) {
    102    this.#name = name;
    103    this.#allowOhttp = allowOhttp;
    104    this.#cachePeriodMs = cachePeriodMs;
    105  }
    106 
    107  /**
    108   * @returns {string}
    109   *   The name of the client.
    110   */
    111  get name() {
    112    return this.#name;
    113  }
    114 
    115  /**
    116   * @returns {number}
    117   *   If `resetSession()` is not called within this timeout period after a
    118   *   session starts, the session will time out and the next fetch will begin a
    119   *   new session.
    120   */
    121  get sessionTimeoutMs() {
    122    return this.#sessionTimeoutMs;
    123  }
    124  set sessionTimeoutMs(value) {
    125    this.#sessionTimeoutMs = value;
    126  }
    127 
    128  // Note: Cannot be JSDoc due to https://github.com/pyodide/sphinx-js/issues/242
    129  // The current session ID. Null when there is no active session.
    130  get sessionID() {
    131    return this.#sessionID;
    132  }
    133 
    134  /**
    135   * @returns {number}
    136   *   The current sequence number in the current session. Zero when there is no
    137   *   active session.
    138   */
    139  get sequenceNumber() {
    140    return this.#sequenceNumber;
    141  }
    142 
    143  // Note: Cannot be JSDoc due to https://github.com/pyodide/sphinx-js/issues/242
    144  // A string that indicates the status of the last fetch. Possible values:
    145  // success, timeout, network_error, http_error
    146  get lastFetchStatus() {
    147    return this.#lastFetchStatus;
    148  }
    149 
    150  /**
    151   * Fetches Merino suggestions.
    152   *
    153   * @param {object} options
    154   *   Options object
    155   * @param {string} options.query
    156   *   The search string.
    157   * @param {string[]} options.providers
    158   *   Array of provider names to request from Merino. If this is given it will
    159   *   override the `merinoProviders` Nimbus variable and its fallback pref
    160   *   `browser.urlbar.merino.providers`.
    161   * @param {number} options.timeoutMs
    162   *   Timeout in milliseconds. This method will return once the timeout
    163   *   elapses, a response is received, or an error occurs, whichever happens
    164   *   first.
    165   * @param {{[key: string]: string}} options.otherParams
    166   *   If specified, the otherParams will be added as a query params. Currently
    167   *   used for accuweather's location autocomplete endpoint
    168   * @returns {Promise<MerinoClientSuggestion[]>}
    169   *   The Merino suggestions or null if there's an error or unexpected
    170   *   response.
    171   */
    172  async fetch({
    173    query,
    174    providers = null,
    175    timeoutMs = lazy.UrlbarPrefs.get("merinoTimeoutMs"),
    176    otherParams = {},
    177  }) {
    178    this.#lazy.logger.debug("Fetch start", { query });
    179 
    180    // Get the endpoint URL. It's empty by default when running tests so they
    181    // don't hit the network.
    182    let endpointString = lazy.UrlbarPrefs.get("merinoEndpointURL");
    183    if (!endpointString) {
    184      return [];
    185    }
    186    let url = URL.parse(endpointString);
    187    if (!url) {
    188      let error = new Error(`${endpointString} is not a valid URL`);
    189      this.#lazy.logger.error("Error creating endpoint URL", error);
    190      return [];
    191    }
    192 
    193    // Start setting search params. Leave session-related params for last.
    194    url.searchParams.set(SEARCH_PARAMS.QUERY, query);
    195 
    196    let clientVariants = lazy.UrlbarPrefs.get("merinoClientVariants");
    197    if (clientVariants) {
    198      url.searchParams.set(SEARCH_PARAMS.CLIENT_VARIANTS, clientVariants);
    199    }
    200 
    201    let providersString;
    202    if (providers != null) {
    203      if (!Array.isArray(providers)) {
    204        throw new Error("providers must be an array if given");
    205      }
    206      providersString = providers.join(",");
    207    } else {
    208      let value = lazy.UrlbarPrefs.get("merinoProviders");
    209      if (value) {
    210        // The Nimbus variable/pref is used only if it's a non-empty string.
    211        providersString = value;
    212      }
    213    }
    214 
    215    // An empty providers string is a valid value and means Merino should
    216    // receive the request but not return any suggestions, so do not do a simple
    217    // `if (providersString)` here.
    218    if (typeof providersString == "string") {
    219      url.searchParams.set(SEARCH_PARAMS.PROVIDERS, providersString);
    220    }
    221 
    222    // if otherParams are present add them to the url
    223    for (const [param, value] of Object.entries(otherParams)) {
    224      url.searchParams.set(param, value);
    225    }
    226 
    227    // At this point, all search params should be set except for session-related
    228    // params.
    229 
    230    let details = { query, providers, timeoutMs, url: url.toString() };
    231 
    232    // If caching is enabled, generate the cache key for this request URL.
    233    let cacheKey;
    234    if (this.#cachePeriodMs && !MerinoClient._test_disableCache) {
    235      url.searchParams.sort();
    236      cacheKey = url.toString();
    237 
    238      // If we have cached suggestions and they're still valid, return them.
    239      if (
    240        this.#cache.suggestions &&
    241        Date.now() < this.#cache.dateMs + this.#cachePeriodMs &&
    242        this.#cache.key == cacheKey
    243      ) {
    244        this.#lazy.logger.debug("Fetch served from cache", details);
    245        return this.#cache.suggestions;
    246      }
    247    }
    248 
    249    // At this point, we're calling Merino.
    250 
    251    // Set up the Merino session ID and related state. The session ID is a UUID
    252    // without leading and trailing braces.
    253    if (!this.#sessionID) {
    254      let uuid = Services.uuid.generateUUID().toString();
    255      this.#sessionID = uuid.substring(1, uuid.length - 1);
    256      this.#sequenceNumber = 0;
    257      this.#sessionTimer?.cancel();
    258 
    259      // Per spec, for the user's privacy, the session should time out and a new
    260      // session ID should be used if the engagement does not end soon.
    261      this.#sessionTimer = new lazy.SkippableTimer({
    262        name: "Merino session timeout",
    263        time: this.#sessionTimeoutMs,
    264        logger: this.#lazy.logger,
    265        callback: () => this.resetSession(),
    266      });
    267    }
    268    url.searchParams.set(SEARCH_PARAMS.SESSION_ID, this.#sessionID);
    269    url.searchParams.set(
    270      SEARCH_PARAMS.SEQUENCE_NUMBER,
    271      this.#sequenceNumber.toString()
    272    );
    273    this.#sequenceNumber++;
    274 
    275    this.#lazy.logger.debug("Fetch details", {
    276      ...details,
    277      url: url.toString(),
    278    });
    279 
    280    /** @type {(category: string) => void} */
    281    let recordResponse = category => {
    282      this.#lazy.logger.debug("Fetch done", { status: category });
    283      this.#lastFetchStatus = category;
    284      recordResponse = null;
    285    };
    286 
    287    // Set up the timeout timer.
    288    let timer = (this.#timeoutTimer = new lazy.SkippableTimer({
    289      name: "Merino timeout",
    290      time: timeoutMs,
    291      logger: this.#lazy.logger,
    292      callback: () => {
    293        // The fetch timed out.
    294        this.#lazy.logger.debug("Fetch timed out", { timeoutMs });
    295        recordResponse?.("timeout");
    296      },
    297    }));
    298 
    299    // If there's an ongoing fetch, abort it so there's only one at a time. By
    300    // design we do not abort fetches on timeout or when the query is canceled
    301    // so we can record their latency.
    302    try {
    303      this.#fetchController?.abort();
    304    } catch (error) {
    305      this.#lazy.logger.error("Error aborting previous fetch", error);
    306    }
    307 
    308    // Do the fetch.
    309    /** @type {?OHTTPResponse|?Response} */
    310    let response;
    311    let controller = (this.#fetchController = new AbortController());
    312    await Promise.race([
    313      timer.promise,
    314      (async () => {
    315        try {
    316          // Canceling the timer below resolves its promise, which can resolve
    317          // the outer promise created by `Promise.race`. This inner async
    318          // function happens not to await anything after canceling the timer,
    319          // but if it did, `timer.promise` could win the race and resolve the
    320          // outer promise without a value. For that reason, we declare
    321          // `response` in the outer scope and set it here instead of returning
    322          // the response from this inner function and assuming it will also be
    323          // returned by `Promise.race`.
    324          let result = await this.#fetch(url, { signal: controller.signal });
    325          response = result?.response;
    326          this.#lazy.logger.debug("Got response", {
    327            status: response?.status,
    328            elapsedMs: result ? result.elapsedMs : "n/a",
    329            ...details,
    330          });
    331          if (!response?.ok) {
    332            recordResponse?.("http_error");
    333          }
    334        } catch (error) {
    335          if (error.name != "AbortError") {
    336            this.#lazy.logger.error("Fetch error", error);
    337            recordResponse?.("network_error");
    338          }
    339        } finally {
    340          // Now that the fetch is done, cancel the timeout timer so it doesn't
    341          // fire and record a timeout. If it already fired, which it would have
    342          // on timeout, or was already canceled, this is a no-op.
    343          timer.cancel();
    344          if (controller == this.#fetchController) {
    345            this.#fetchController = null;
    346          }
    347          this.#nextResponseDeferred?.resolve(response);
    348          this.#nextResponseDeferred = null;
    349        }
    350      })(),
    351    ]);
    352    if (timer == this.#timeoutTimer) {
    353      this.#timeoutTimer = null;
    354    }
    355 
    356    if (!response?.ok) {
    357      // `recordResponse()` was already called above, no need to call it here.
    358      return [];
    359    }
    360 
    361    if (response.status == 204) {
    362      // No content. We check for this because `response.json()` (below) throws
    363      // in this case, and since we log the error it can spam the console.
    364      recordResponse?.("no_suggestion");
    365      return [];
    366    }
    367 
    368    // Get the response body as an object.
    369    /** @type {{suggestions: MerinoClientSuggestion[], request_id: string }} */
    370    let body;
    371    try {
    372      body = /** @type {any} */ (await response.json());
    373    } catch (error) {
    374      this.#lazy.logger.error("Error getting response as JSON", error);
    375    }
    376 
    377    if (body) {
    378      this.#lazy.logger.debug("Response body", body);
    379    }
    380 
    381    if (!body?.suggestions?.length) {
    382      recordResponse?.("no_suggestion");
    383      return [];
    384    }
    385 
    386    let { suggestions, request_id } = body;
    387    if (!Array.isArray(suggestions)) {
    388      this.#lazy.logger.error("Unexpected response", body);
    389      recordResponse?.("no_suggestion");
    390      return [];
    391    }
    392 
    393    recordResponse?.("success");
    394    suggestions = suggestions.map(suggestion => ({
    395      ...suggestion,
    396      request_id,
    397      source: "merino",
    398    }));
    399 
    400    if (cacheKey) {
    401      this.#cache = {
    402        suggestions,
    403        key: cacheKey,
    404        dateMs: Date.now(),
    405      };
    406    }
    407 
    408    return suggestions;
    409  }
    410 
    411  /**
    412   * Resets the Merino session ID and related state.
    413   */
    414  resetSession() {
    415    this.#sessionID = null;
    416    this.#sequenceNumber = 0;
    417    this.#sessionTimer?.cancel();
    418    this.#sessionTimer = null;
    419    this.#nextSessionResetDeferred?.resolve();
    420    this.#nextSessionResetDeferred = null;
    421  }
    422 
    423  /**
    424   * Cancels the timeout timer.
    425   */
    426  cancelTimeoutTimer() {
    427    this.#timeoutTimer?.cancel();
    428  }
    429 
    430  /**
    431   * Returns a promise that's resolved when the next response is received or a
    432   * network error occurs.
    433   *
    434   * @returns {Promise<?Response|?OHTTPResponse>}
    435   *   The promise is resolved with the `Response` object or undefined if a
    436   *   network error occurred.
    437   */
    438  waitForNextResponse() {
    439    if (!this.#nextResponseDeferred) {
    440      this.#nextResponseDeferred = Promise.withResolvers();
    441    }
    442    return this.#nextResponseDeferred.promise;
    443  }
    444 
    445  /**
    446   * Returns a promise that's resolved when the session is next reset, including
    447   * on session timeout.
    448   *
    449   * @returns {Promise<void>}
    450   */
    451  waitForNextSessionReset() {
    452    if (!this.#nextSessionResetDeferred) {
    453      this.#nextSessionResetDeferred = Promise.withResolvers();
    454    }
    455    return this.#nextSessionResetDeferred.promise;
    456  }
    457 
    458  /**
    459   * Sends the Merino request. Uses OHTTP if `allowOhttp` is true and the Merino
    460   * OHTTP prefs are defined.
    461   *
    462   * @param {URL} url
    463   *   The request URL.
    464   * @param {object} options
    465   *   Options object.
    466   * @param {AbortSignal} options.signal
    467   *   An `AbortController.signal` for the fetch.
    468   * @returns {Promise<?FetchResult>}
    469   *   The fetch result, or null if the fetch couldn't be started.
    470   *
    471   * @typedef {object} FetchResult
    472   * @property {OHTTPResponse|Response} response
    473   *   The response object.
    474   * @property {number} elapsedMs
    475   *   The duration of the fetch in ms.
    476   */
    477  async #fetch(url, { signal }) {
    478    let configUrl;
    479    let relayUrl;
    480    if (this.#allowOhttp) {
    481      configUrl = lazy.UrlbarPrefs.get("merinoOhttpConfigURL");
    482      relayUrl = lazy.UrlbarPrefs.get("merinoOhttpRelayURL");
    483    }
    484 
    485    let useOhttp = configUrl && relayUrl;
    486 
    487    let response;
    488    let startMs = ChromeUtils.now();
    489    if (!useOhttp) {
    490      response = await fetch(url, { signal });
    491    } else {
    492      let config = await lazy.ObliviousHTTP.getOHTTPConfig(configUrl);
    493      if (!config) {
    494        this.#lazy.logger.error("Couldn't get OHTTP config");
    495        return null;
    496      }
    497 
    498      this.#lazy.logger.debug("Sending request using OHTTP", { url });
    499      response = await lazy.ObliviousHTTP.ohttpRequest(relayUrl, config, url, {
    500        signal,
    501        headers: {},
    502      });
    503    }
    504 
    505    let elapsedMs = ChromeUtils.now() - startMs;
    506    let label = response.status.toString();
    507    if (useOhttp) {
    508      label += "_ohttp";
    509    }
    510    Glean.urlbarMerino.latencyByResponseStatus[label].accumulateSamples([
    511      elapsedMs,
    512    ]);
    513 
    514    return { response, elapsedMs };
    515  }
    516 
    517  static _test_disableCache = false;
    518 
    519  get _test_sessionTimer() {
    520    return this.#sessionTimer;
    521  }
    522 
    523  get _test_timeoutTimer() {
    524    return this.#timeoutTimer;
    525  }
    526 
    527  get _test_fetchController() {
    528    return this.#fetchController;
    529  }
    530 
    531  // State related to the current session.
    532  /** @type {string} */
    533  #sessionID = null;
    534  #sequenceNumber = 0;
    535  /** @type {SkippableTimer} */
    536  #sessionTimer = null;
    537  #sessionTimeoutMs = SESSION_TIMEOUT_MS;
    538 
    539  #name;
    540  /** @type {SkippableTimer} */
    541  #timeoutTimer = null;
    542  /** @type {AbortController} */
    543  #fetchController = null;
    544  /** @type {string} */
    545  #lastFetchStatus = null;
    546  /** @type {PromiseWithResolvers<?Response|?OHTTPResponse>} */
    547  #nextResponseDeferred = null;
    548  /** @type {PromiseWithResolvers<void>} */
    549  #nextSessionResetDeferred = null;
    550  #cachePeriodMs = 0;
    551  #allowOhttp = false;
    552 
    553  // When caching is enabled, we cache response suggestions from the most recent
    554  // successful request.
    555  #cache = {
    556    /**
    557     * @type {MerinoClientSuggestion[]}
    558     *   The cached suggestions array.
    559     */
    560    suggestions: null,
    561    /**
    562     * @type {string}
    563     *   The cache key: the stringified request URL without session-related
    564     *   params (session ID and sequence number).
    565     */
    566    key: null,
    567    /**
    568     * The date the suggestions were cached as returned by `Date.now()`.
    569     */
    570    dateMs: 0,
    571  };
    572 }