tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

commit fe96a03c97e8fab2d79580c6151eb72656af9a2c
parent 3de6cdd148ec058b4b3287390c0311c3a87547b5
Author: Taimur Hasan <taimurhasan@proton.me>
Date:   Fri, 12 Dec 2025 18:23:48 +0000

Bug 2000033: Integrate Glean MLTelemetry into the inference pipeline r=ai-ondevice-reviewers,gregtatum,rconcepcion

Differential Revision: https://phabricator.services.mozilla.com/D272945

Diffstat:
Mtoolkit/components/ml/MLTelemetry.sys.mjs | 151+++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------
Mtoolkit/components/ml/actors/MLEngineParent.sys.mjs | 86+++++++++++++++++++++++++++++++++++--------------------------------------------
Mtoolkit/components/ml/content/backends/ONNXPipeline.mjs | 20++++++++++++++++++++
Mtoolkit/components/ml/metrics.yaml | 29++++++++++++++++++++++++++++-
Mtoolkit/components/ml/tests/browser/browser_ml_semantic_history_search_perf.js | 4+++-
Mtoolkit/components/ml/tests/browser/browser_ml_telemetry.js | 45+++++++++++++++++++++++++++++++++++++++++++--
Mtoolkit/components/ml/tests/browser/head.js | 20+++++++++++++-------
7 files changed, 254 insertions(+), 101 deletions(-)

diff --git a/toolkit/components/ml/MLTelemetry.sys.mjs b/toolkit/components/ml/MLTelemetry.sys.mjs @@ -11,6 +11,10 @@ ChromeUtils.defineLazyGetter(lazy, "console", () => { }); }); +ChromeUtils.defineESModuleGetters(lazy, { + isAddonEngineId: "chrome://global/content/ml/Utils.sys.mjs", +}); + /** * MLTelemetry provides a mechanism tracking a "flow" of operations * related to a machine learning feature. A flow is a sequence of related @@ -70,6 +74,20 @@ export class MLTelemetry { } /** + * Returns the label used in telemetry for a given engine ID. + * Converts addon engine IDs to "webextension" label. + * + * @param {string} engineId - The engine ID to convert. + * @returns {string} The Glean label for the engine. + */ + static getGleanLabel(engineId) { + if (lazy.isAddonEngineId(engineId)) { + return "webextension"; + } + return engineId; + } + + /** * Starts a telemetry session for the given flow. * * @param {object} [options] - Session start options. @@ -149,13 +167,12 @@ export class MLTelemetry { * @param {object} options - Engine creation success options. * @param {string} [options.flowId] - The flow ID. Uses instance flowId if not provided. * @param {string} options.engineId - The engine identifier (e.g., "pdfjs", "ml-suggest-intent"). - * @param {string} [options.label] - Label for the old timing distribution metric. Defaults to engineId if not provided. * @param {number} options.duration - Engine creation time in milliseconds. */ - recordEngineCreationSuccessFlow({ flowId, engineId, label, duration }) { + recordEngineCreationSuccessFlow({ flowId, engineId, duration }) { const currentFlowId = flowId || this.#flowId; const actualEngineId = engineId; - const actualLabel = label || engineId; + const actualLabel = MLTelemetry.getGleanLabel(engineId); Glean.firefoxAiRuntime.engineCreationSuccessFlow.record({ flow_id: currentFlowId, @@ -185,7 +202,7 @@ export class MLTelemetry { * @param {string} options.featureId - The feature identifier. * @param {string} options.taskName - The task name. * @param {string} options.engineId - The engine identifier. - * @param {string} options.error - The error class/message. + * @param {string|object} options.error - The error class/message or object. */ recordEngineCreationFailure({ flowId, @@ -196,6 +213,11 @@ export class MLTelemetry { error, }) { const currentFlowId = flowId || this.#flowId; + // Ensure error is always a string + const errorString = + typeof error === "object" && error !== null + ? String(error.name || error.message || error) + : String(error); Glean.firefoxAiRuntime.engineCreationFailure.record({ flow_id: currentFlowId, @@ -203,7 +225,7 @@ export class MLTelemetry { featureId, taskName, engineId, - error, + error: errorString, }); this.logEventToConsole(this.recordEngineCreationFailure, { @@ -212,67 +234,112 @@ export class MLTelemetry { featureId, taskName, engineId, - error, + error: errorString, }); } /** * Records a successful inference run event. * - * @param {object} options - Inference success options. - * @param {string} [options.flowId] - The flow ID. Uses instance flowId if not provided. - * @param {string} [options.engineId] - The engine identifier. Defaults to undefined. - * @param {string} [options.label] - Label for the old timing distribution metric. Defaults to no-label if not provided. - * @param {number} options.tokenizingTime - Time spent tokenizing in milliseconds. - * @param {number} options.inferenceTime - Time spent on inference in milliseconds. + * @param {string} engineId - The engine identifier. + * @param {object} metrics - The inference metrics object. + * @param {number} [metrics.preprocessingTime] - Time spent preprocessing (legacy). + * @param {number} [metrics.tokenizingTime] - Time spent tokenizing in milliseconds. + * @param {number} [metrics.inferenceTime] - Time spent on inference in milliseconds. + * @param {number} [metrics.decodingTime] - Time spent decoding in milliseconds. + * @param {number} [metrics.inputTokens] - Number of input tokens. + * @param {number} [metrics.outputTokens] - Number of output tokens. + * @param {number} [metrics.timeToFirstToken] - Time to first token in milliseconds. + * @param {number} [metrics.tokensPerSecond] - Tokens per second. + * @param {number} [metrics.timePerOutputToken] - Time per output token in milliseconds. */ - recordRunInferenceSuccessFlow({ - flowId, - engineId, - label, - tokenizingTime, - inferenceTime, - }) { - const currentFlowId = flowId || this.#flowId; - const EngineId = engineId || undefined; - const Label = label || "no-label"; + recordRunInferenceSuccessFlow(engineId, metrics) { + try { + const currentFlowId = this.#flowId; + const EngineId = engineId || undefined; + const Label = engineId ? MLTelemetry.getGleanLabel(engineId) : "no-label"; - Glean.firefoxAiRuntime.runInferenceSuccessFlow.record({ - flow_id: currentFlowId, - tokenizing_time: Math.round(tokenizingTime), - inference_time: Math.round(inferenceTime), - }); + // Handle legacy preprocessingTime field + const tokenizingTime = + metrics.preprocessingTime ?? metrics.tokenizingTime; - // Also record the old labeled timing distribution metric - const totalTime = Math.round(tokenizingTime + inferenceTime); - Glean.firefoxAiRuntime.runInferenceSuccess[Label].accumulateSingleSample( - totalTime - ); + // Ensure all metrics are properly rounded/typed for Glean + // This will be updated to use the method from revision(D271263) + const gleanPayload = { + flow_id: currentFlowId, + tokenizing_time: + tokenizingTime != null ? Math.round(tokenizingTime) : undefined, + inference_time: + metrics.inferenceTime != null + ? Math.round(metrics.inferenceTime) + : undefined, + decoding_time: + metrics.decodingTime != null + ? Math.round(metrics.decodingTime) + : undefined, + input_tokens: + metrics.inputTokens != null + ? Math.round(metrics.inputTokens) + : undefined, + output_tokens: + metrics.outputTokens != null + ? Math.round(metrics.outputTokens) + : undefined, + time_to_first_token: + metrics.timeToFirstToken != null + ? Math.round(metrics.timeToFirstToken) + : undefined, + tokens_per_second: + metrics.tokensPerSecond != null + ? Math.round(metrics.tokensPerSecond * 100) / 100 + : undefined, + time_per_output_token: + metrics.timePerOutputToken != null + ? Math.round(metrics.timePerOutputToken * 100) / 100 + : undefined, + }; - this.logEventToConsole(this.recordRunInferenceSuccessFlow, { - flowId: currentFlowId, - engineId: EngineId, - label: Label, - tokenizingTime, - inferenceTime, - }); + Glean.firefoxAiRuntime.runInferenceSuccessFlow.record(gleanPayload); + + // record the old labeled timing distribution metric + const totalTime = Math.round( + (tokenizingTime || 0) + + (metrics.inferenceTime || 0) + + (metrics.decodingTime || 0) + ); + + Glean.firefoxAiRuntime.runInferenceSuccess[Label].accumulateSingleSample( + totalTime + ); + + this.logEventToConsole(this.recordRunInferenceSuccessFlow, { + ...gleanPayload, + engineId: EngineId, + label: Label, + }); + } catch (telemetryError) { + lazy.console.error("Failed to record ML telemetry:", telemetryError); + } } /** * Records a failed inference run event. * - * @param {string} error - The error class/message. + * @param {string|object} error - The error class/message or object. * @param {string} [flow_id=this.#flowId] - The flow ID. Uses instance flowId if not provided. */ recordRunInferenceFailure(error, flow_id = this.flowId) { + // Ensure error is always a string + const errorString = error instanceof Error ? error.message : String(error); + Glean.firefoxAiRuntime.runInferenceFailure.record({ flow_id, - error, + error: errorString, }); this.logEventToConsole(this.recordRunInferenceFailure, { flow_id, - error, + error: errorString, }); } diff --git a/toolkit/components/ml/actors/MLEngineParent.sys.mjs b/toolkit/components/ml/actors/MLEngineParent.sys.mjs @@ -2,6 +2,7 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ import { XPCOMUtils } from "resource://gre/modules/XPCOMUtils.sys.mjs"; +import { MLTelemetry } from "chrome://global/content/ml/MLTelemetry.sys.mjs"; /** * @import { MLEngineChild } from "./MLEngineChild.sys.mjs" @@ -15,7 +16,6 @@ const lazy = XPCOMUtils.declareLazy({ clearTimeout: "resource://gre/modules/Timer.sys.mjs", ModelHub: "chrome://global/content/ml/ModelHub.sys.mjs", Progress: "chrome://global/content/ml/Utils.sys.mjs", - isAddonEngineId: "chrome://global/content/ml/Utils.sys.mjs", OPFS: "chrome://global/content/ml/OPFS.sys.mjs", BACKENDS: "chrome://global/content/ml/EngineProcess.sys.mjs", stringifyForLog: "chrome://global/content/ml/Utils.sys.mjs", @@ -222,7 +222,7 @@ export class MLEngineParent extends JSProcessActorParent { }); } - const engineId = pipelineOptions.engineId; + const { featureId, engineId } = pipelineOptions; // Allow notifications callback changes even when reusing engine. this.notificationsCallback = notificationsCallback; @@ -235,6 +235,7 @@ export class MLEngineParent extends JSProcessActorParent { Promise.withResolvers(); MLEngineParent.engineLocks.set(engineId, lockPromise); MLEngineParent.engineCreationAbortSignal.set(engineId, abortSignal); + try { const currentEngine = MLEngine.getInstance(engineId); if (currentEngine) { @@ -273,12 +274,24 @@ export class MLEngineParent extends JSProcessActorParent { const creationTime = ChromeUtils.now() - start; - Glean.firefoxAiRuntime.engineCreationSuccess[ - engine.getGleanLabel() - ].accumulateSingleSample(creationTime); + engine.telemetry.recordEngineCreationSuccessFlow({ + engineId, + duration: creationTime, + }); // TODO - What happens if the engine is already killed here? return engine; + } catch (error) { + const { modelId, taskName, flowId } = pipelineOptions; + const telemetry = new MLTelemetry({ featureId, flowId }); + telemetry.recordEngineCreationFailure({ + modelId, + featureId, + taskName, + engineId, + error, + }); + throw error; } finally { MLEngineParent.engineLocks.delete(engineId); MLEngineParent.engineCreationAbortSignal.delete(engineId); @@ -981,18 +994,6 @@ export class MLEngine { notificationsCallback = null; /** - * Returns the label used in telemetry for that engine id - * - * @returns {string} - */ - getGleanLabel() { - if (lazy.isAddonEngineId(this.engineId)) { - return "webextension"; - } - return this.engineId; - } - - /** * Removes an instance of the MLEngine with the given engineId. * * @param {string} engineId - The ID of the engine instance to be removed. @@ -1037,6 +1038,10 @@ export class MLEngine { this.mlEngineParent = mlEngineParent; this.pipelineOptions = pipelineOptions; this.notificationsCallback = notificationsCallback; + this.telemetry = new MLTelemetry({ + featureId: pipelineOptions.featureId, + flowId: pipelineOptions.flowId, + }); } /** @@ -1325,28 +1330,20 @@ export class MLEngine { const request = this.#requests.get(requestId); if (request) { if (error) { - Glean.firefoxAiRuntime.runInferenceFailure.record({ - engineId: this.engineId, - modelId: this.pipelineOptions.modelId, - featureId: this.pipelineOptions.featureId, - }); - } - if (response) { + this.telemetry.recordRunInferenceFailure(error); + request.reject(error); + } else if (response) { // Validate response before returning to caller const validatedResponse = this.#validateResponse(response); if (!validatedResponse) { request.reject(new Error("Response failed security validation")); } else { - const totalTime = - validatedResponse.metrics.tokenizingTime + - validatedResponse.metrics.inferenceTime; - Glean.firefoxAiRuntime.runInferenceSuccess[ - this.getGleanLabel() - ].accumulateSingleSample(totalTime); + this.telemetry.recordRunInferenceSuccessFlow( + this.engineId, + validatedResponse.metrics + ); request.resolve(validatedResponse); } - } else { - request.reject(error); } } else { lazy.console.error( @@ -1548,26 +1545,19 @@ export class MLEngine { (resourcesAfter.cpuTime - resourcesBefore.cpuTime) / 1_000_000; const wallMilliseconds = ChromeUtils.now() - beforeRun; const cores = lazy.mlUtils.getOptimalCPUConcurrency(); - const cpuUtilization = cpuMilliseconds / wallMilliseconds / cores; + const cpuUtilization = (cpuMilliseconds / wallMilliseconds / cores) * 100; const memoryBytes = resourcesAfter.memory; - const data = { - // Timing: - cpu_milliseconds: cpuMilliseconds, - wall_milliseconds: wallMilliseconds, + this.telemetry.recordEngineRun({ + cpuMilliseconds, + wallMilliseconds, cores, - cpu_utilization: cpuUtilization, - memory_bytes: memoryBytes, - - // Model information: - engine_id: this.engineId, - model_id: this.pipelineOptions.modelId, - feature_id: this.pipelineOptions.featureId, + cpuUtilization, + memoryBytes, + engineId: this.engineId, + modelId: this.pipelineOptions.modelId, backend: this.pipelineOptions.backend, - }; - - lazy.console?.debug("[Glean.firefoxAiRuntime.engineRun]", data); - Glean.firefoxAiRuntime.engineRun.record(data); + }); }); return resolvers.promise; diff --git a/toolkit/components/ml/content/backends/ONNXPipeline.mjs b/toolkit/components/ml/content/backends/ONNXPipeline.mjs @@ -12,6 +12,7 @@ /** * @typedef {object} PipelineMetrics * @property {number} [preprocessingTime] - Time spent preprocessing inputs (ms). + * @property {number} [tokenizingTime] - Time spent tokenizing (same as preprocessingTime, for Glean consistency) (ms). * @property {number} [inferenceTime] - Time spent running the model (ms). * @property {number} [decodingTime] - Time spent decoding outputs (ms). * @property {number} inputTokens - Number of tokens in the input. @@ -792,9 +793,12 @@ export class ONNXPipeline { inputTokens: 0, outputTokens: 0, preprocessingTime: 0, + tokenizingTime: 0, // Same as preprocessingTime, but named for Glean consistency inferenceTime: 0, decodingTime: 0, timeToFirstToken: null, + tokensPerSecond: 0, + timePerOutputToken: 0, runTimestamps: [], }; @@ -971,9 +975,25 @@ export class ONNXPipeline { } else { metrics.decodingTime = metrics.inferenceTime; } + + // Sync tokenizingTime with preprocessingTime for Glean metrics consistency + metrics.tokenizingTime = metrics.preprocessingTime; + + // Calculate throughput metrics if we have the necessary data + if (metrics.inferenceTime > 0 && metrics.outputTokens > 0) { + metrics.tokensPerSecond = + metrics.outputTokens / (metrics.inferenceTime / 1000); + metrics.timePerOutputToken = + metrics.inferenceTime / metrics.outputTokens; + } } catch (e) { lazy.console.debug("Error computing throughput metrics", e); } + + // Merge initialization snapshots from this.#metrics into runTimestamps + // so tests can access all timing data + metrics.runTimestamps = [...this.#metrics, ...metrics.runTimestamps]; + result.metrics = metrics; if (streamer) { diff --git a/toolkit/components/ml/metrics.yaml b/toolkit/components/ml/metrics.yaml @@ -181,6 +181,9 @@ firefox.ai.runtime: engineId: description: Engine id type: string + error: + description: error class or message + type: string run_inference_success_flow: type: event @@ -200,11 +203,35 @@ firefox.ai.runtime: tokenizing_time: type: quantity description: > - Time taken for tokenization in milliseconds + Time taken for tokenization (preprocessing) in milliseconds inference_time: type: quantity description: > Time taken for inference in milliseconds + decoding_time: + type: quantity + description: > + Time taken for decoding in milliseconds + input_tokens: + type: quantity + description: > + Number of tokens in the input prompt + output_tokens: + type: quantity + description: > + Number of tokens generated + time_to_first_token: + type: quantity + description: > + Time to first token in milliseconds (null if not applicable) + tokens_per_second: + type: quantity + description: > + Inference throughput in tokens per second (rounded integer) + time_per_output_token: + type: quantity + description: > + Average latency per output token in milliseconds (rounded integer) run_inference_success: type: labeled_timing_distribution diff --git a/toolkit/components/ml/tests/browser/browser_ml_semantic_history_search_perf.js b/toolkit/components/ml/tests/browser/browser_ml_semantic_history_search_perf.js @@ -308,7 +308,9 @@ async function runInferenceAndCollectMetrics({ } const memUsage = await getTotalMemoryUsage(); - const metrics = fetchMetrics(res.metrics); + const metrics = fetchMetrics( + (res.metrics && res.metrics.runTimestamps) || [] + ); let embeddingLatency = 0; for (const [metricName, value] of Object.entries(metrics)) { diff --git a/toolkit/components/ml/tests/browser/browser_ml_telemetry.js b/toolkit/components/ml/tests/browser/browser_ml_telemetry.js @@ -58,11 +58,52 @@ add_task(async function test_default_telemetry() { ); { + info("Test the run_inference_success_flow event"); + const inferenceFlowEvents = + Glean.firefoxAiRuntime.runInferenceSuccessFlow.testGetValue(); + Assert.ok( + inferenceFlowEvents && !!inferenceFlowEvents.length, + "At least one run_inference_success_flow event was recorded" + ); + const lastInferenceEvent = inferenceFlowEvents.at(-1); + const { extra: inferenceExtra } = lastInferenceEvent; + + // Helper to check that a number field is present and >= 0 + const checkNumber = (key, isOptional = false) => { + const value = inferenceExtra[key]; + if (isOptional && (value === null || value === undefined)) { + return; // Optional field not present is OK + } + Assert.notEqual(value, null, `${key} should be present`); + const number = Number(value); // Quantities are stored as strings + Assert.ok(!Number.isNaN(number), `${key} should be a number`); + Assert.greaterOrEqual(number, 0, `${key} should be >= 0`); + }; + + // Check flow_id is present + Assert.ok(inferenceExtra.flow_id, "flow_id should be present"); + + // Check all required timing/token metrics + checkNumber("tokenizing_time", true); + checkNumber("inference_time", true); + checkNumber("decoding_time", true); + checkNumber("input_tokens", true); + checkNumber("output_tokens", true); + checkNumber("time_to_first_token", true); + checkNumber("tokens_per_second", true); + checkNumber("time_per_output_token", true); + } + + { info("Test the engine_run event"); await engineInstance.lastResourceRequest; const value = Glean.firefoxAiRuntime.engineRun.testGetValue(); - Assert.equal(value?.length, 1, "One engine_run event was recorded"); - const [{ extra }] = value; + Assert.ok( + value && !!value.length, + "At least one engine_run event was recorded" + ); + const lastEngineRunEvent = value.at(-1); + const { extra } = lastEngineRunEvent; const checkNumber = key => { const value = extra[key]; Assert.notEqual(value, null, `${key} should be present`); diff --git a/toolkit/components/ml/tests/browser/head.js b/toolkit/components/ml/tests/browser/head.js @@ -309,15 +309,21 @@ function fetchMLMetric(metrics, name, key) { } function fetchLatencyMetrics(metrics, isFirstRun) { + let timestamps = []; + if (Array.isArray(metrics?.runTimestamps)) { + timestamps = metrics.runTimestamps; + } else if (Array.isArray(metrics)) { + timestamps = metrics; + } const pipelineLatency = - fetchMLMetric(metrics, PIPELINE_READY_END, WHEN) - - fetchMLMetric(metrics, PIPELINE_READY_START, WHEN); + fetchMLMetric(timestamps, PIPELINE_READY_END, WHEN) - + fetchMLMetric(timestamps, PIPELINE_READY_START, WHEN); const initLatency = - fetchMLMetric(metrics, INIT_END, WHEN) - - fetchMLMetric(metrics, INIT_START, WHEN); + fetchMLMetric(timestamps, INIT_END, WHEN) - + fetchMLMetric(timestamps, INIT_START, WHEN); const runLatency = - fetchMLMetric(metrics, RUN_END, WHEN) - - fetchMLMetric(metrics, RUN_START, WHEN); + fetchMLMetric(timestamps, RUN_END, WHEN) - + fetchMLMetric(timestamps, RUN_START, WHEN); return { [`${isFirstRun ? COLD_START_PREFIX : ""}${PIPELINE_READY_LATENCY}`]: pipelineLatency, @@ -573,7 +579,7 @@ async function runInference({ const res = await run(); runEndTime = performance.now(); const decodingTime = runEndTime - startTime; - metrics = fetchMetrics(res.metrics || [], isFirstRun); + metrics = fetchMetrics(res.metrics?.runTimestamps || [], isFirstRun); metrics[`${isFirstRun ? COLD_START_PREFIX : ""}${TOTAL_MEMORY_USAGE}`] = await getTotalMemoryUsage();