MerinoClient.sys.mjs (18765B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 import { XPCOMUtils } from "resource://gre/modules/XPCOMUtils.sys.mjs"; 6 7 const lazy = XPCOMUtils.declareLazy({ 8 ObliviousHTTP: "resource://gre/modules/ObliviousHTTP.sys.mjs", 9 SkippableTimer: "moz-src:///browser/components/urlbar/UrlbarUtils.sys.mjs", 10 UrlbarPrefs: "moz-src:///browser/components/urlbar/UrlbarPrefs.sys.mjs", 11 UrlbarUtils: "moz-src:///browser/components/urlbar/UrlbarUtils.sys.mjs", 12 }); 13 14 /** 15 * @import {SkippableTimer} from "moz-src:///browser/components/urlbar/UrlbarUtils.sys.mjs" 16 * @import {OHTTPResponse} from "resource://gre/modules/ObliviousHTTP.sys.mjs" 17 */ 18 19 /** 20 * @typedef {object} MerinoClientBaseSuggestion 21 * @property {string} request_id 22 * The request id associated with the suggestion. 23 * @property {string} source 24 * The source of the suggestion. 25 * 26 * @typedef {{[key: string]:any} & MerinoClientBaseSuggestion} MerinoClientSuggestion 27 * Details of a suggestion received from Merino. Whilst the base properties are 28 * consistent the suggestion properties may vary depending on the provider. 29 */ 30 31 const SEARCH_PARAMS = Object.freeze({ 32 CLIENT_VARIANTS: "client_variants", 33 PROVIDERS: "providers", 34 QUERY: "q", 35 SEQUENCE_NUMBER: "seq", 36 SESSION_ID: "sid", 37 }); 38 39 const SESSION_TIMEOUT_MS = 5 * 60 * 1000; // 5 minutes 40 41 /** 42 * Client class for querying the Merino server. Each instance maintains its own 43 * session state including a session ID and sequence number that is included in 44 * its requests to Merino. 45 */ 46 export class MerinoClient { 47 #lazy = XPCOMUtils.declareLazy({ 48 logger: () => 49 lazy.UrlbarUtils.getLogger({ prefix: `MerinoClient [${this.#name}]` }), 50 }); 51 52 /** 53 * The names of URL search params. 54 */ 55 static get SEARCH_PARAMS() { 56 return { ...SEARCH_PARAMS }; 57 } 58 59 /** 60 * @param {string} [name] 61 * An optional name for the client. It will be included in log messages. 62 * @param {object} [options] 63 * Options object 64 * @param {boolean} [options.allowOhttp] 65 * Whether the client is allowed to make its requests using OHTTP. When true 66 * and the following prefs are defined, all requests made by the client will 67 * use OHTTP: 68 * 69 * browser.urlbar.merino.ohttpConfigURL (Nimbus: merinoOhttpConfigURL) 70 * browser.urlbar.merino.ohttpRelayURL (Nimbus: merinoOhttpRelayURL) 71 * 72 * @param {number} [options.cachePeriodMs] 73 * Enables caching when nonzero. The client will cache the response 74 * suggestions from its most recent successful request for the specified 75 * period. The client will serve the cached suggestions for all fetches for 76 * the same URL until either the cache period elapses or a successful fetch 77 * for a different URL is made (ignoring session-related URL params like 78 * session ID and sequence number). Caching is per `MerinoClient` instance 79 * and is not shared across instances. 80 * 81 * WARNING: Cached suggestions are only ever evicted when new suggestions 82 * are cached. They are not evicted on a timer. If the client has cached 83 * some suggestions and no further fetches are made, they'll stay cached 84 * indefinitely. If your request URLs contain senstive data that should not 85 * stick around in the object graph indefinitely, you should either not use 86 * caching or you should implement an eviction mechanism. 87 * 88 * This cache strategy is intentionally simplistic and designed to be used 89 * by the urlbar with very short cache periods to make sure Firefox doesn't 90 * repeatedly call the same Merino URL on each keystroke in a urlbar 91 * session, which is wasteful and can cause a suggestion to flicker out of 92 * and into the urlbar panel as the user matches it again and again, 93 * especially when Merino latency is high. It is not designed to be a 94 * general caching mechanism. If you need more complex or long-lived 95 * caching, try working with the Merino team to add cache headers to the 96 * relevant responses so you can leverage Firefox's HTTP cache. 97 */ 98 constructor( 99 name = "anonymous", 100 { allowOhttp = false, cachePeriodMs = 0 } = {} 101 ) { 102 this.#name = name; 103 this.#allowOhttp = allowOhttp; 104 this.#cachePeriodMs = cachePeriodMs; 105 } 106 107 /** 108 * @returns {string} 109 * The name of the client. 110 */ 111 get name() { 112 return this.#name; 113 } 114 115 /** 116 * @returns {number} 117 * If `resetSession()` is not called within this timeout period after a 118 * session starts, the session will time out and the next fetch will begin a 119 * new session. 120 */ 121 get sessionTimeoutMs() { 122 return this.#sessionTimeoutMs; 123 } 124 set sessionTimeoutMs(value) { 125 this.#sessionTimeoutMs = value; 126 } 127 128 // Note: Cannot be JSDoc due to https://github.com/pyodide/sphinx-js/issues/242 129 // The current session ID. Null when there is no active session. 130 get sessionID() { 131 return this.#sessionID; 132 } 133 134 /** 135 * @returns {number} 136 * The current sequence number in the current session. Zero when there is no 137 * active session. 138 */ 139 get sequenceNumber() { 140 return this.#sequenceNumber; 141 } 142 143 // Note: Cannot be JSDoc due to https://github.com/pyodide/sphinx-js/issues/242 144 // A string that indicates the status of the last fetch. Possible values: 145 // success, timeout, network_error, http_error 146 get lastFetchStatus() { 147 return this.#lastFetchStatus; 148 } 149 150 /** 151 * Fetches Merino suggestions. 152 * 153 * @param {object} options 154 * Options object 155 * @param {string} options.query 156 * The search string. 157 * @param {string[]} options.providers 158 * Array of provider names to request from Merino. If this is given it will 159 * override the `merinoProviders` Nimbus variable and its fallback pref 160 * `browser.urlbar.merino.providers`. 161 * @param {number} options.timeoutMs 162 * Timeout in milliseconds. This method will return once the timeout 163 * elapses, a response is received, or an error occurs, whichever happens 164 * first. 165 * @param {{[key: string]: string}} options.otherParams 166 * If specified, the otherParams will be added as a query params. Currently 167 * used for accuweather's location autocomplete endpoint 168 * @returns {Promise<MerinoClientSuggestion[]>} 169 * The Merino suggestions or null if there's an error or unexpected 170 * response. 171 */ 172 async fetch({ 173 query, 174 providers = null, 175 timeoutMs = lazy.UrlbarPrefs.get("merinoTimeoutMs"), 176 otherParams = {}, 177 }) { 178 this.#lazy.logger.debug("Fetch start", { query }); 179 180 // Get the endpoint URL. It's empty by default when running tests so they 181 // don't hit the network. 182 let endpointString = lazy.UrlbarPrefs.get("merinoEndpointURL"); 183 if (!endpointString) { 184 return []; 185 } 186 let url = URL.parse(endpointString); 187 if (!url) { 188 let error = new Error(`${endpointString} is not a valid URL`); 189 this.#lazy.logger.error("Error creating endpoint URL", error); 190 return []; 191 } 192 193 // Start setting search params. Leave session-related params for last. 194 url.searchParams.set(SEARCH_PARAMS.QUERY, query); 195 196 let clientVariants = lazy.UrlbarPrefs.get("merinoClientVariants"); 197 if (clientVariants) { 198 url.searchParams.set(SEARCH_PARAMS.CLIENT_VARIANTS, clientVariants); 199 } 200 201 let providersString; 202 if (providers != null) { 203 if (!Array.isArray(providers)) { 204 throw new Error("providers must be an array if given"); 205 } 206 providersString = providers.join(","); 207 } else { 208 let value = lazy.UrlbarPrefs.get("merinoProviders"); 209 if (value) { 210 // The Nimbus variable/pref is used only if it's a non-empty string. 211 providersString = value; 212 } 213 } 214 215 // An empty providers string is a valid value and means Merino should 216 // receive the request but not return any suggestions, so do not do a simple 217 // `if (providersString)` here. 218 if (typeof providersString == "string") { 219 url.searchParams.set(SEARCH_PARAMS.PROVIDERS, providersString); 220 } 221 222 // if otherParams are present add them to the url 223 for (const [param, value] of Object.entries(otherParams)) { 224 url.searchParams.set(param, value); 225 } 226 227 // At this point, all search params should be set except for session-related 228 // params. 229 230 let details = { query, providers, timeoutMs, url: url.toString() }; 231 232 // If caching is enabled, generate the cache key for this request URL. 233 let cacheKey; 234 if (this.#cachePeriodMs && !MerinoClient._test_disableCache) { 235 url.searchParams.sort(); 236 cacheKey = url.toString(); 237 238 // If we have cached suggestions and they're still valid, return them. 239 if ( 240 this.#cache.suggestions && 241 Date.now() < this.#cache.dateMs + this.#cachePeriodMs && 242 this.#cache.key == cacheKey 243 ) { 244 this.#lazy.logger.debug("Fetch served from cache", details); 245 return this.#cache.suggestions; 246 } 247 } 248 249 // At this point, we're calling Merino. 250 251 // Set up the Merino session ID and related state. The session ID is a UUID 252 // without leading and trailing braces. 253 if (!this.#sessionID) { 254 let uuid = Services.uuid.generateUUID().toString(); 255 this.#sessionID = uuid.substring(1, uuid.length - 1); 256 this.#sequenceNumber = 0; 257 this.#sessionTimer?.cancel(); 258 259 // Per spec, for the user's privacy, the session should time out and a new 260 // session ID should be used if the engagement does not end soon. 261 this.#sessionTimer = new lazy.SkippableTimer({ 262 name: "Merino session timeout", 263 time: this.#sessionTimeoutMs, 264 logger: this.#lazy.logger, 265 callback: () => this.resetSession(), 266 }); 267 } 268 url.searchParams.set(SEARCH_PARAMS.SESSION_ID, this.#sessionID); 269 url.searchParams.set( 270 SEARCH_PARAMS.SEQUENCE_NUMBER, 271 this.#sequenceNumber.toString() 272 ); 273 this.#sequenceNumber++; 274 275 this.#lazy.logger.debug("Fetch details", { 276 ...details, 277 url: url.toString(), 278 }); 279 280 /** @type {(category: string) => void} */ 281 let recordResponse = category => { 282 this.#lazy.logger.debug("Fetch done", { status: category }); 283 this.#lastFetchStatus = category; 284 recordResponse = null; 285 }; 286 287 // Set up the timeout timer. 288 let timer = (this.#timeoutTimer = new lazy.SkippableTimer({ 289 name: "Merino timeout", 290 time: timeoutMs, 291 logger: this.#lazy.logger, 292 callback: () => { 293 // The fetch timed out. 294 this.#lazy.logger.debug("Fetch timed out", { timeoutMs }); 295 recordResponse?.("timeout"); 296 }, 297 })); 298 299 // If there's an ongoing fetch, abort it so there's only one at a time. By 300 // design we do not abort fetches on timeout or when the query is canceled 301 // so we can record their latency. 302 try { 303 this.#fetchController?.abort(); 304 } catch (error) { 305 this.#lazy.logger.error("Error aborting previous fetch", error); 306 } 307 308 // Do the fetch. 309 /** @type {?OHTTPResponse|?Response} */ 310 let response; 311 let controller = (this.#fetchController = new AbortController()); 312 await Promise.race([ 313 timer.promise, 314 (async () => { 315 try { 316 // Canceling the timer below resolves its promise, which can resolve 317 // the outer promise created by `Promise.race`. This inner async 318 // function happens not to await anything after canceling the timer, 319 // but if it did, `timer.promise` could win the race and resolve the 320 // outer promise without a value. For that reason, we declare 321 // `response` in the outer scope and set it here instead of returning 322 // the response from this inner function and assuming it will also be 323 // returned by `Promise.race`. 324 let result = await this.#fetch(url, { signal: controller.signal }); 325 response = result?.response; 326 this.#lazy.logger.debug("Got response", { 327 status: response?.status, 328 elapsedMs: result ? result.elapsedMs : "n/a", 329 ...details, 330 }); 331 if (!response?.ok) { 332 recordResponse?.("http_error"); 333 } 334 } catch (error) { 335 if (error.name != "AbortError") { 336 this.#lazy.logger.error("Fetch error", error); 337 recordResponse?.("network_error"); 338 } 339 } finally { 340 // Now that the fetch is done, cancel the timeout timer so it doesn't 341 // fire and record a timeout. If it already fired, which it would have 342 // on timeout, or was already canceled, this is a no-op. 343 timer.cancel(); 344 if (controller == this.#fetchController) { 345 this.#fetchController = null; 346 } 347 this.#nextResponseDeferred?.resolve(response); 348 this.#nextResponseDeferred = null; 349 } 350 })(), 351 ]); 352 if (timer == this.#timeoutTimer) { 353 this.#timeoutTimer = null; 354 } 355 356 if (!response?.ok) { 357 // `recordResponse()` was already called above, no need to call it here. 358 return []; 359 } 360 361 if (response.status == 204) { 362 // No content. We check for this because `response.json()` (below) throws 363 // in this case, and since we log the error it can spam the console. 364 recordResponse?.("no_suggestion"); 365 return []; 366 } 367 368 // Get the response body as an object. 369 /** @type {{suggestions: MerinoClientSuggestion[], request_id: string }} */ 370 let body; 371 try { 372 body = /** @type {any} */ (await response.json()); 373 } catch (error) { 374 this.#lazy.logger.error("Error getting response as JSON", error); 375 } 376 377 if (body) { 378 this.#lazy.logger.debug("Response body", body); 379 } 380 381 if (!body?.suggestions?.length) { 382 recordResponse?.("no_suggestion"); 383 return []; 384 } 385 386 let { suggestions, request_id } = body; 387 if (!Array.isArray(suggestions)) { 388 this.#lazy.logger.error("Unexpected response", body); 389 recordResponse?.("no_suggestion"); 390 return []; 391 } 392 393 recordResponse?.("success"); 394 suggestions = suggestions.map(suggestion => ({ 395 ...suggestion, 396 request_id, 397 source: "merino", 398 })); 399 400 if (cacheKey) { 401 this.#cache = { 402 suggestions, 403 key: cacheKey, 404 dateMs: Date.now(), 405 }; 406 } 407 408 return suggestions; 409 } 410 411 /** 412 * Resets the Merino session ID and related state. 413 */ 414 resetSession() { 415 this.#sessionID = null; 416 this.#sequenceNumber = 0; 417 this.#sessionTimer?.cancel(); 418 this.#sessionTimer = null; 419 this.#nextSessionResetDeferred?.resolve(); 420 this.#nextSessionResetDeferred = null; 421 } 422 423 /** 424 * Cancels the timeout timer. 425 */ 426 cancelTimeoutTimer() { 427 this.#timeoutTimer?.cancel(); 428 } 429 430 /** 431 * Returns a promise that's resolved when the next response is received or a 432 * network error occurs. 433 * 434 * @returns {Promise<?Response|?OHTTPResponse>} 435 * The promise is resolved with the `Response` object or undefined if a 436 * network error occurred. 437 */ 438 waitForNextResponse() { 439 if (!this.#nextResponseDeferred) { 440 this.#nextResponseDeferred = Promise.withResolvers(); 441 } 442 return this.#nextResponseDeferred.promise; 443 } 444 445 /** 446 * Returns a promise that's resolved when the session is next reset, including 447 * on session timeout. 448 * 449 * @returns {Promise<void>} 450 */ 451 waitForNextSessionReset() { 452 if (!this.#nextSessionResetDeferred) { 453 this.#nextSessionResetDeferred = Promise.withResolvers(); 454 } 455 return this.#nextSessionResetDeferred.promise; 456 } 457 458 /** 459 * Sends the Merino request. Uses OHTTP if `allowOhttp` is true and the Merino 460 * OHTTP prefs are defined. 461 * 462 * @param {URL} url 463 * The request URL. 464 * @param {object} options 465 * Options object. 466 * @param {AbortSignal} options.signal 467 * An `AbortController.signal` for the fetch. 468 * @returns {Promise<?FetchResult>} 469 * The fetch result, or null if the fetch couldn't be started. 470 * 471 * @typedef {object} FetchResult 472 * @property {OHTTPResponse|Response} response 473 * The response object. 474 * @property {number} elapsedMs 475 * The duration of the fetch in ms. 476 */ 477 async #fetch(url, { signal }) { 478 let configUrl; 479 let relayUrl; 480 if (this.#allowOhttp) { 481 configUrl = lazy.UrlbarPrefs.get("merinoOhttpConfigURL"); 482 relayUrl = lazy.UrlbarPrefs.get("merinoOhttpRelayURL"); 483 } 484 485 let useOhttp = configUrl && relayUrl; 486 487 let response; 488 let startMs = ChromeUtils.now(); 489 if (!useOhttp) { 490 response = await fetch(url, { signal }); 491 } else { 492 let config = await lazy.ObliviousHTTP.getOHTTPConfig(configUrl); 493 if (!config) { 494 this.#lazy.logger.error("Couldn't get OHTTP config"); 495 return null; 496 } 497 498 this.#lazy.logger.debug("Sending request using OHTTP", { url }); 499 response = await lazy.ObliviousHTTP.ohttpRequest(relayUrl, config, url, { 500 signal, 501 headers: {}, 502 }); 503 } 504 505 let elapsedMs = ChromeUtils.now() - startMs; 506 let label = response.status.toString(); 507 if (useOhttp) { 508 label += "_ohttp"; 509 } 510 Glean.urlbarMerino.latencyByResponseStatus[label].accumulateSamples([ 511 elapsedMs, 512 ]); 513 514 return { response, elapsedMs }; 515 } 516 517 static _test_disableCache = false; 518 519 get _test_sessionTimer() { 520 return this.#sessionTimer; 521 } 522 523 get _test_timeoutTimer() { 524 return this.#timeoutTimer; 525 } 526 527 get _test_fetchController() { 528 return this.#fetchController; 529 } 530 531 // State related to the current session. 532 /** @type {string} */ 533 #sessionID = null; 534 #sequenceNumber = 0; 535 /** @type {SkippableTimer} */ 536 #sessionTimer = null; 537 #sessionTimeoutMs = SESSION_TIMEOUT_MS; 538 539 #name; 540 /** @type {SkippableTimer} */ 541 #timeoutTimer = null; 542 /** @type {AbortController} */ 543 #fetchController = null; 544 /** @type {string} */ 545 #lastFetchStatus = null; 546 /** @type {PromiseWithResolvers<?Response|?OHTTPResponse>} */ 547 #nextResponseDeferred = null; 548 /** @type {PromiseWithResolvers<void>} */ 549 #nextSessionResetDeferred = null; 550 #cachePeriodMs = 0; 551 #allowOhttp = false; 552 553 // When caching is enabled, we cache response suggestions from the most recent 554 // successful request. 555 #cache = { 556 /** 557 * @type {MerinoClientSuggestion[]} 558 * The cached suggestions array. 559 */ 560 suggestions: null, 561 /** 562 * @type {string} 563 * The cache key: the stringified request URL without session-related 564 * params (session ID and sequence number). 565 */ 566 key: null, 567 /** 568 * The date the suggestions were cached as returned by `Date.now()`. 569 */ 570 dateMs: 0, 571 }; 572 }