commit fe96a03c97e8fab2d79580c6151eb72656af9a2c
parent 3de6cdd148ec058b4b3287390c0311c3a87547b5
Author: Taimur Hasan <taimurhasan@proton.me>
Date: Fri, 12 Dec 2025 18:23:48 +0000
Bug 2000033: Integrate Glean MLTelemetry into the inference pipeline r=ai-ondevice-reviewers,gregtatum,rconcepcion
Differential Revision: https://phabricator.services.mozilla.com/D272945
Diffstat:
7 files changed, 254 insertions(+), 101 deletions(-)
diff --git a/toolkit/components/ml/MLTelemetry.sys.mjs b/toolkit/components/ml/MLTelemetry.sys.mjs
@@ -11,6 +11,10 @@ ChromeUtils.defineLazyGetter(lazy, "console", () => {
});
});
+ChromeUtils.defineESModuleGetters(lazy, {
+ isAddonEngineId: "chrome://global/content/ml/Utils.sys.mjs",
+});
+
/**
* MLTelemetry provides a mechanism tracking a "flow" of operations
* related to a machine learning feature. A flow is a sequence of related
@@ -70,6 +74,20 @@ export class MLTelemetry {
}
/**
+ * Returns the label used in telemetry for a given engine ID.
+ * Converts addon engine IDs to "webextension" label.
+ *
+ * @param {string} engineId - The engine ID to convert.
+ * @returns {string} The Glean label for the engine.
+ */
+ static getGleanLabel(engineId) {
+ if (lazy.isAddonEngineId(engineId)) {
+ return "webextension";
+ }
+ return engineId;
+ }
+
+ /**
* Starts a telemetry session for the given flow.
*
* @param {object} [options] - Session start options.
@@ -149,13 +167,12 @@ export class MLTelemetry {
* @param {object} options - Engine creation success options.
* @param {string} [options.flowId] - The flow ID. Uses instance flowId if not provided.
* @param {string} options.engineId - The engine identifier (e.g., "pdfjs", "ml-suggest-intent").
- * @param {string} [options.label] - Label for the old timing distribution metric. Defaults to engineId if not provided.
* @param {number} options.duration - Engine creation time in milliseconds.
*/
- recordEngineCreationSuccessFlow({ flowId, engineId, label, duration }) {
+ recordEngineCreationSuccessFlow({ flowId, engineId, duration }) {
const currentFlowId = flowId || this.#flowId;
const actualEngineId = engineId;
- const actualLabel = label || engineId;
+ const actualLabel = MLTelemetry.getGleanLabel(engineId);
Glean.firefoxAiRuntime.engineCreationSuccessFlow.record({
flow_id: currentFlowId,
@@ -185,7 +202,7 @@ export class MLTelemetry {
* @param {string} options.featureId - The feature identifier.
* @param {string} options.taskName - The task name.
* @param {string} options.engineId - The engine identifier.
- * @param {string} options.error - The error class/message.
+ * @param {string|object} options.error - The error class/message or object.
*/
recordEngineCreationFailure({
flowId,
@@ -196,6 +213,11 @@ export class MLTelemetry {
error,
}) {
const currentFlowId = flowId || this.#flowId;
+ // Ensure error is always a string
+ const errorString =
+ typeof error === "object" && error !== null
+ ? String(error.name || error.message || error)
+ : String(error);
Glean.firefoxAiRuntime.engineCreationFailure.record({
flow_id: currentFlowId,
@@ -203,7 +225,7 @@ export class MLTelemetry {
featureId,
taskName,
engineId,
- error,
+ error: errorString,
});
this.logEventToConsole(this.recordEngineCreationFailure, {
@@ -212,67 +234,112 @@ export class MLTelemetry {
featureId,
taskName,
engineId,
- error,
+ error: errorString,
});
}
/**
* Records a successful inference run event.
*
- * @param {object} options - Inference success options.
- * @param {string} [options.flowId] - The flow ID. Uses instance flowId if not provided.
- * @param {string} [options.engineId] - The engine identifier. Defaults to undefined.
- * @param {string} [options.label] - Label for the old timing distribution metric. Defaults to no-label if not provided.
- * @param {number} options.tokenizingTime - Time spent tokenizing in milliseconds.
- * @param {number} options.inferenceTime - Time spent on inference in milliseconds.
+ * @param {string} engineId - The engine identifier.
+ * @param {object} metrics - The inference metrics object.
+ * @param {number} [metrics.preprocessingTime] - Time spent preprocessing (legacy).
+ * @param {number} [metrics.tokenizingTime] - Time spent tokenizing in milliseconds.
+ * @param {number} [metrics.inferenceTime] - Time spent on inference in milliseconds.
+ * @param {number} [metrics.decodingTime] - Time spent decoding in milliseconds.
+ * @param {number} [metrics.inputTokens] - Number of input tokens.
+ * @param {number} [metrics.outputTokens] - Number of output tokens.
+ * @param {number} [metrics.timeToFirstToken] - Time to first token in milliseconds.
+ * @param {number} [metrics.tokensPerSecond] - Tokens per second.
+ * @param {number} [metrics.timePerOutputToken] - Time per output token in milliseconds.
*/
- recordRunInferenceSuccessFlow({
- flowId,
- engineId,
- label,
- tokenizingTime,
- inferenceTime,
- }) {
- const currentFlowId = flowId || this.#flowId;
- const EngineId = engineId || undefined;
- const Label = label || "no-label";
+ recordRunInferenceSuccessFlow(engineId, metrics) {
+ try {
+ const currentFlowId = this.#flowId;
+ const EngineId = engineId || undefined;
+ const Label = engineId ? MLTelemetry.getGleanLabel(engineId) : "no-label";
- Glean.firefoxAiRuntime.runInferenceSuccessFlow.record({
- flow_id: currentFlowId,
- tokenizing_time: Math.round(tokenizingTime),
- inference_time: Math.round(inferenceTime),
- });
+ // Handle legacy preprocessingTime field
+ const tokenizingTime =
+ metrics.preprocessingTime ?? metrics.tokenizingTime;
- // Also record the old labeled timing distribution metric
- const totalTime = Math.round(tokenizingTime + inferenceTime);
- Glean.firefoxAiRuntime.runInferenceSuccess[Label].accumulateSingleSample(
- totalTime
- );
+ // Ensure all metrics are properly rounded/typed for Glean
+ // This will be updated to use the method from revision(D271263)
+ const gleanPayload = {
+ flow_id: currentFlowId,
+ tokenizing_time:
+ tokenizingTime != null ? Math.round(tokenizingTime) : undefined,
+ inference_time:
+ metrics.inferenceTime != null
+ ? Math.round(metrics.inferenceTime)
+ : undefined,
+ decoding_time:
+ metrics.decodingTime != null
+ ? Math.round(metrics.decodingTime)
+ : undefined,
+ input_tokens:
+ metrics.inputTokens != null
+ ? Math.round(metrics.inputTokens)
+ : undefined,
+ output_tokens:
+ metrics.outputTokens != null
+ ? Math.round(metrics.outputTokens)
+ : undefined,
+ time_to_first_token:
+ metrics.timeToFirstToken != null
+ ? Math.round(metrics.timeToFirstToken)
+ : undefined,
+ tokens_per_second:
+ metrics.tokensPerSecond != null
+ ? Math.round(metrics.tokensPerSecond * 100) / 100
+ : undefined,
+ time_per_output_token:
+ metrics.timePerOutputToken != null
+ ? Math.round(metrics.timePerOutputToken * 100) / 100
+ : undefined,
+ };
- this.logEventToConsole(this.recordRunInferenceSuccessFlow, {
- flowId: currentFlowId,
- engineId: EngineId,
- label: Label,
- tokenizingTime,
- inferenceTime,
- });
+ Glean.firefoxAiRuntime.runInferenceSuccessFlow.record(gleanPayload);
+
+ // record the old labeled timing distribution metric
+ const totalTime = Math.round(
+ (tokenizingTime || 0) +
+ (metrics.inferenceTime || 0) +
+ (metrics.decodingTime || 0)
+ );
+
+ Glean.firefoxAiRuntime.runInferenceSuccess[Label].accumulateSingleSample(
+ totalTime
+ );
+
+ this.logEventToConsole(this.recordRunInferenceSuccessFlow, {
+ ...gleanPayload,
+ engineId: EngineId,
+ label: Label,
+ });
+ } catch (telemetryError) {
+ lazy.console.error("Failed to record ML telemetry:", telemetryError);
+ }
}
/**
* Records a failed inference run event.
*
- * @param {string} error - The error class/message.
+ * @param {string|object} error - The error class/message or object.
* @param {string} [flow_id=this.#flowId] - The flow ID. Uses instance flowId if not provided.
*/
recordRunInferenceFailure(error, flow_id = this.flowId) {
+ // Ensure error is always a string
+ const errorString = error instanceof Error ? error.message : String(error);
+
Glean.firefoxAiRuntime.runInferenceFailure.record({
flow_id,
- error,
+ error: errorString,
});
this.logEventToConsole(this.recordRunInferenceFailure, {
flow_id,
- error,
+ error: errorString,
});
}
diff --git a/toolkit/components/ml/actors/MLEngineParent.sys.mjs b/toolkit/components/ml/actors/MLEngineParent.sys.mjs
@@ -2,6 +2,7 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
import { XPCOMUtils } from "resource://gre/modules/XPCOMUtils.sys.mjs";
+import { MLTelemetry } from "chrome://global/content/ml/MLTelemetry.sys.mjs";
/**
* @import { MLEngineChild } from "./MLEngineChild.sys.mjs"
@@ -15,7 +16,6 @@ const lazy = XPCOMUtils.declareLazy({
clearTimeout: "resource://gre/modules/Timer.sys.mjs",
ModelHub: "chrome://global/content/ml/ModelHub.sys.mjs",
Progress: "chrome://global/content/ml/Utils.sys.mjs",
- isAddonEngineId: "chrome://global/content/ml/Utils.sys.mjs",
OPFS: "chrome://global/content/ml/OPFS.sys.mjs",
BACKENDS: "chrome://global/content/ml/EngineProcess.sys.mjs",
stringifyForLog: "chrome://global/content/ml/Utils.sys.mjs",
@@ -222,7 +222,7 @@ export class MLEngineParent extends JSProcessActorParent {
});
}
- const engineId = pipelineOptions.engineId;
+ const { featureId, engineId } = pipelineOptions;
// Allow notifications callback changes even when reusing engine.
this.notificationsCallback = notificationsCallback;
@@ -235,6 +235,7 @@ export class MLEngineParent extends JSProcessActorParent {
Promise.withResolvers();
MLEngineParent.engineLocks.set(engineId, lockPromise);
MLEngineParent.engineCreationAbortSignal.set(engineId, abortSignal);
+
try {
const currentEngine = MLEngine.getInstance(engineId);
if (currentEngine) {
@@ -273,12 +274,24 @@ export class MLEngineParent extends JSProcessActorParent {
const creationTime = ChromeUtils.now() - start;
- Glean.firefoxAiRuntime.engineCreationSuccess[
- engine.getGleanLabel()
- ].accumulateSingleSample(creationTime);
+ engine.telemetry.recordEngineCreationSuccessFlow({
+ engineId,
+ duration: creationTime,
+ });
// TODO - What happens if the engine is already killed here?
return engine;
+ } catch (error) {
+ const { modelId, taskName, flowId } = pipelineOptions;
+ const telemetry = new MLTelemetry({ featureId, flowId });
+ telemetry.recordEngineCreationFailure({
+ modelId,
+ featureId,
+ taskName,
+ engineId,
+ error,
+ });
+ throw error;
} finally {
MLEngineParent.engineLocks.delete(engineId);
MLEngineParent.engineCreationAbortSignal.delete(engineId);
@@ -981,18 +994,6 @@ export class MLEngine {
notificationsCallback = null;
/**
- * Returns the label used in telemetry for that engine id
- *
- * @returns {string}
- */
- getGleanLabel() {
- if (lazy.isAddonEngineId(this.engineId)) {
- return "webextension";
- }
- return this.engineId;
- }
-
- /**
* Removes an instance of the MLEngine with the given engineId.
*
* @param {string} engineId - The ID of the engine instance to be removed.
@@ -1037,6 +1038,10 @@ export class MLEngine {
this.mlEngineParent = mlEngineParent;
this.pipelineOptions = pipelineOptions;
this.notificationsCallback = notificationsCallback;
+ this.telemetry = new MLTelemetry({
+ featureId: pipelineOptions.featureId,
+ flowId: pipelineOptions.flowId,
+ });
}
/**
@@ -1325,28 +1330,20 @@ export class MLEngine {
const request = this.#requests.get(requestId);
if (request) {
if (error) {
- Glean.firefoxAiRuntime.runInferenceFailure.record({
- engineId: this.engineId,
- modelId: this.pipelineOptions.modelId,
- featureId: this.pipelineOptions.featureId,
- });
- }
- if (response) {
+ this.telemetry.recordRunInferenceFailure(error);
+ request.reject(error);
+ } else if (response) {
// Validate response before returning to caller
const validatedResponse = this.#validateResponse(response);
if (!validatedResponse) {
request.reject(new Error("Response failed security validation"));
} else {
- const totalTime =
- validatedResponse.metrics.tokenizingTime +
- validatedResponse.metrics.inferenceTime;
- Glean.firefoxAiRuntime.runInferenceSuccess[
- this.getGleanLabel()
- ].accumulateSingleSample(totalTime);
+ this.telemetry.recordRunInferenceSuccessFlow(
+ this.engineId,
+ validatedResponse.metrics
+ );
request.resolve(validatedResponse);
}
- } else {
- request.reject(error);
}
} else {
lazy.console.error(
@@ -1548,26 +1545,19 @@ export class MLEngine {
(resourcesAfter.cpuTime - resourcesBefore.cpuTime) / 1_000_000;
const wallMilliseconds = ChromeUtils.now() - beforeRun;
const cores = lazy.mlUtils.getOptimalCPUConcurrency();
- const cpuUtilization = cpuMilliseconds / wallMilliseconds / cores;
+ const cpuUtilization = (cpuMilliseconds / wallMilliseconds / cores) * 100;
const memoryBytes = resourcesAfter.memory;
- const data = {
- // Timing:
- cpu_milliseconds: cpuMilliseconds,
- wall_milliseconds: wallMilliseconds,
+ this.telemetry.recordEngineRun({
+ cpuMilliseconds,
+ wallMilliseconds,
cores,
- cpu_utilization: cpuUtilization,
- memory_bytes: memoryBytes,
-
- // Model information:
- engine_id: this.engineId,
- model_id: this.pipelineOptions.modelId,
- feature_id: this.pipelineOptions.featureId,
+ cpuUtilization,
+ memoryBytes,
+ engineId: this.engineId,
+ modelId: this.pipelineOptions.modelId,
backend: this.pipelineOptions.backend,
- };
-
- lazy.console?.debug("[Glean.firefoxAiRuntime.engineRun]", data);
- Glean.firefoxAiRuntime.engineRun.record(data);
+ });
});
return resolvers.promise;
diff --git a/toolkit/components/ml/content/backends/ONNXPipeline.mjs b/toolkit/components/ml/content/backends/ONNXPipeline.mjs
@@ -12,6 +12,7 @@
/**
* @typedef {object} PipelineMetrics
* @property {number} [preprocessingTime] - Time spent preprocessing inputs (ms).
+ * @property {number} [tokenizingTime] - Time spent tokenizing (same as preprocessingTime, for Glean consistency) (ms).
* @property {number} [inferenceTime] - Time spent running the model (ms).
* @property {number} [decodingTime] - Time spent decoding outputs (ms).
* @property {number} inputTokens - Number of tokens in the input.
@@ -792,9 +793,12 @@ export class ONNXPipeline {
inputTokens: 0,
outputTokens: 0,
preprocessingTime: 0,
+ tokenizingTime: 0, // Same as preprocessingTime, but named for Glean consistency
inferenceTime: 0,
decodingTime: 0,
timeToFirstToken: null,
+ tokensPerSecond: 0,
+ timePerOutputToken: 0,
runTimestamps: [],
};
@@ -971,9 +975,25 @@ export class ONNXPipeline {
} else {
metrics.decodingTime = metrics.inferenceTime;
}
+
+ // Sync tokenizingTime with preprocessingTime for Glean metrics consistency
+ metrics.tokenizingTime = metrics.preprocessingTime;
+
+ // Calculate throughput metrics if we have the necessary data
+ if (metrics.inferenceTime > 0 && metrics.outputTokens > 0) {
+ metrics.tokensPerSecond =
+ metrics.outputTokens / (metrics.inferenceTime / 1000);
+ metrics.timePerOutputToken =
+ metrics.inferenceTime / metrics.outputTokens;
+ }
} catch (e) {
lazy.console.debug("Error computing throughput metrics", e);
}
+
+ // Merge initialization snapshots from this.#metrics into runTimestamps
+ // so tests can access all timing data
+ metrics.runTimestamps = [...this.#metrics, ...metrics.runTimestamps];
+
result.metrics = metrics;
if (streamer) {
diff --git a/toolkit/components/ml/metrics.yaml b/toolkit/components/ml/metrics.yaml
@@ -181,6 +181,9 @@ firefox.ai.runtime:
engineId:
description: Engine id
type: string
+ error:
+ description: error class or message
+ type: string
run_inference_success_flow:
type: event
@@ -200,11 +203,35 @@ firefox.ai.runtime:
tokenizing_time:
type: quantity
description: >
- Time taken for tokenization in milliseconds
+ Time taken for tokenization (preprocessing) in milliseconds
inference_time:
type: quantity
description: >
Time taken for inference in milliseconds
+ decoding_time:
+ type: quantity
+ description: >
+ Time taken for decoding in milliseconds
+ input_tokens:
+ type: quantity
+ description: >
+ Number of tokens in the input prompt
+ output_tokens:
+ type: quantity
+ description: >
+ Number of tokens generated
+ time_to_first_token:
+ type: quantity
+ description: >
+ Time to first token in milliseconds (null if not applicable)
+ tokens_per_second:
+ type: quantity
+ description: >
+ Inference throughput in tokens per second (rounded integer)
+ time_per_output_token:
+ type: quantity
+ description: >
+ Average latency per output token in milliseconds (rounded integer)
run_inference_success:
type: labeled_timing_distribution
diff --git a/toolkit/components/ml/tests/browser/browser_ml_semantic_history_search_perf.js b/toolkit/components/ml/tests/browser/browser_ml_semantic_history_search_perf.js
@@ -308,7 +308,9 @@ async function runInferenceAndCollectMetrics({
}
const memUsage = await getTotalMemoryUsage();
- const metrics = fetchMetrics(res.metrics);
+ const metrics = fetchMetrics(
+ (res.metrics && res.metrics.runTimestamps) || []
+ );
let embeddingLatency = 0;
for (const [metricName, value] of Object.entries(metrics)) {
diff --git a/toolkit/components/ml/tests/browser/browser_ml_telemetry.js b/toolkit/components/ml/tests/browser/browser_ml_telemetry.js
@@ -58,11 +58,52 @@ add_task(async function test_default_telemetry() {
);
{
+ info("Test the run_inference_success_flow event");
+ const inferenceFlowEvents =
+ Glean.firefoxAiRuntime.runInferenceSuccessFlow.testGetValue();
+ Assert.ok(
+ inferenceFlowEvents && !!inferenceFlowEvents.length,
+ "At least one run_inference_success_flow event was recorded"
+ );
+ const lastInferenceEvent = inferenceFlowEvents.at(-1);
+ const { extra: inferenceExtra } = lastInferenceEvent;
+
+ // Helper to check that a number field is present and >= 0
+ const checkNumber = (key, isOptional = false) => {
+ const value = inferenceExtra[key];
+ if (isOptional && (value === null || value === undefined)) {
+ return; // Optional field not present is OK
+ }
+ Assert.notEqual(value, null, `${key} should be present`);
+ const number = Number(value); // Quantities are stored as strings
+ Assert.ok(!Number.isNaN(number), `${key} should be a number`);
+ Assert.greaterOrEqual(number, 0, `${key} should be >= 0`);
+ };
+
+ // Check flow_id is present
+ Assert.ok(inferenceExtra.flow_id, "flow_id should be present");
+
+ // Check all required timing/token metrics
+ checkNumber("tokenizing_time", true);
+ checkNumber("inference_time", true);
+ checkNumber("decoding_time", true);
+ checkNumber("input_tokens", true);
+ checkNumber("output_tokens", true);
+ checkNumber("time_to_first_token", true);
+ checkNumber("tokens_per_second", true);
+ checkNumber("time_per_output_token", true);
+ }
+
+ {
info("Test the engine_run event");
await engineInstance.lastResourceRequest;
const value = Glean.firefoxAiRuntime.engineRun.testGetValue();
- Assert.equal(value?.length, 1, "One engine_run event was recorded");
- const [{ extra }] = value;
+ Assert.ok(
+ value && !!value.length,
+ "At least one engine_run event was recorded"
+ );
+ const lastEngineRunEvent = value.at(-1);
+ const { extra } = lastEngineRunEvent;
const checkNumber = key => {
const value = extra[key];
Assert.notEqual(value, null, `${key} should be present`);
diff --git a/toolkit/components/ml/tests/browser/head.js b/toolkit/components/ml/tests/browser/head.js
@@ -309,15 +309,21 @@ function fetchMLMetric(metrics, name, key) {
}
function fetchLatencyMetrics(metrics, isFirstRun) {
+ let timestamps = [];
+ if (Array.isArray(metrics?.runTimestamps)) {
+ timestamps = metrics.runTimestamps;
+ } else if (Array.isArray(metrics)) {
+ timestamps = metrics;
+ }
const pipelineLatency =
- fetchMLMetric(metrics, PIPELINE_READY_END, WHEN) -
- fetchMLMetric(metrics, PIPELINE_READY_START, WHEN);
+ fetchMLMetric(timestamps, PIPELINE_READY_END, WHEN) -
+ fetchMLMetric(timestamps, PIPELINE_READY_START, WHEN);
const initLatency =
- fetchMLMetric(metrics, INIT_END, WHEN) -
- fetchMLMetric(metrics, INIT_START, WHEN);
+ fetchMLMetric(timestamps, INIT_END, WHEN) -
+ fetchMLMetric(timestamps, INIT_START, WHEN);
const runLatency =
- fetchMLMetric(metrics, RUN_END, WHEN) -
- fetchMLMetric(metrics, RUN_START, WHEN);
+ fetchMLMetric(timestamps, RUN_END, WHEN) -
+ fetchMLMetric(timestamps, RUN_START, WHEN);
return {
[`${isFirstRun ? COLD_START_PREFIX : ""}${PIPELINE_READY_LATENCY}`]:
pipelineLatency,
@@ -573,7 +579,7 @@ async function runInference({
const res = await run();
runEndTime = performance.now();
const decodingTime = runEndTime - startTime;
- metrics = fetchMetrics(res.metrics || [], isFirstRun);
+ metrics = fetchMetrics(res.metrics?.runTimestamps || [], isFirstRun);
metrics[`${isFirstRun ? COLD_START_PREFIX : ""}${TOTAL_MEMORY_USAGE}`] =
await getTotalMemoryUsage();