commit 4103369fab82494b3fe519d6d0fcedeec87b42e8
parent 6417e29cfda97edd0032f5109e03c3f02f330854
Author: Taimur Hasan <taimurhasan@proton.me>
Date: Wed, 12 Nov 2025 16:03:37 +0000
Bug 1997016 - Add Glean telemetry for ML workflow sessions r=firefox-ai-ml-reviewers,gregtatum
Differential Revision: https://phabricator.services.mozilla.com/D270909
Diffstat:
4 files changed, 568 insertions(+), 2 deletions(-)
diff --git a/toolkit/components/ml/MLTelemetry.sys.mjs b/toolkit/components/ml/MLTelemetry.sys.mjs
@@ -0,0 +1,323 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+const lazy = {};
+
+ChromeUtils.defineLazyGetter(lazy, "console", () => {
+ return console.createInstance({
+ maxLogLevelPref: "browser.ml.logLevel",
+ prefix: "MLTelemetry",
+ });
+});
+
+/**
+ * MLTelemetry provides a mechanism tracking a "flow" of operations
+ * related to a machine learning feature. A flow is a sequence of related
+ * events that represent a single, complete user-level operation, for example
+ * generating a summary for a page.
+ *
+ * This class uses a correlation ID pattern where flowId is passed to each
+ * method, allowing flexible tracking across different parts of the system.
+ *
+ * @example
+ * new MLTelemetry({ featureId: "ml-suggest-intent" }).sessionStart({ interaction: "button_click" });
+ * @example
+ * new MLTelemetry({ featureId: "ml-suggest-intent", flowId: "1234-5678" }).sessionStart({ interaction: "keyboard_shortcut"});
+ */
+export class MLTelemetry {
+ /** @type {string} */
+ #flowId;
+ /** @type {string|undefined} */
+ #featureId;
+ /** @type {number|undefined} */
+ #startTime;
+
+ /**
+ * Creates a new MLTelemetry instance.
+ *
+ * @param {object} [options] - Configuration options.
+ * @param {string} [options.featureId] - The identifier for the ML feature.
+ * @param {string} [options.flowId] - An optional unique identifier for
+ * this flow. If not provided, a new UUID will be generated.
+ */
+ constructor(options = {}) {
+ this.#featureId = options.featureId;
+ this.#flowId = options.flowId || crypto.randomUUID();
+
+ this.logEventToConsole(this.constructor, {
+ featureId: this.#featureId,
+ flowId: this.#flowId,
+ });
+ }
+
+ /**
+ * The unique identifier for this flow.
+ *
+ * @returns {string} The flow ID.
+ */
+ get flowId() {
+ return this.#flowId;
+ }
+
+ /**
+ * The feature identifier for this telemetry instance.
+ *
+ * @returns {string} The feature ID.
+ */
+ get featureId() {
+ return this.#featureId;
+ }
+
+ /**
+ * Starts a telemetry session for the given flow.
+ *
+ * @param {object} [options] - Session start options.
+ * @param {string} [options.interaction] - The interaction type (e.g., "button_click", "keyboard_shortcut").
+ * @throws {Error} If session already exists for this flowId.
+ */
+ sessionStart({ interaction } = {}) {
+ if (this.#startTime) {
+ throw new Error(`Session already started for flowId: ${this.#flowId}`);
+ }
+
+ this.#startTime = ChromeUtils.now();
+
+ Glean.firefoxAiRuntime.sessionStart.record({
+ flow_id: this.flowId,
+ feature_id: this.featureId,
+ interaction,
+ });
+
+ this.logEventToConsole(this.sessionStart, {
+ flow_id: this.flowId,
+ feature_id: this.featureId,
+ interaction,
+ });
+ }
+
+ /**
+ * Logs a debug message to the browser console, prefixed with the flow ID.
+ *
+ * @param {object} caller - The calling function or class.
+ * @param {object} [data] - Optional data to be JSON-stringified and logged.
+ */
+ logEventToConsole(caller, data) {
+ const flowId = data?.flowId || this.#flowId;
+ const id = flowId.substring(0, 5);
+ lazy.console.debug("flowId[%s]: %s", id, caller.name, data);
+ }
+
+ /**
+ * Ends the telemetry session and records the final status and duration.
+ *
+ * @param {string} status - The final status of the session.
+ * @throws {Error} If no active session found or status parameter is missing.
+ * @returns {number} Duration in milliseconds.
+ */
+ endSession(status) {
+ // Validate status
+ if (!status) {
+ throw new Error("status parameter is required");
+ }
+ // Validate that session was started
+ if (!this.#startTime) {
+ throw new Error(
+ `sessionStart() was not called for flowId: ${this.#flowId}`
+ );
+ }
+
+ const duration_ms = ChromeUtils.now() - this.#startTime;
+ Glean.firefoxAiRuntime.sessionEnd.record({
+ flow_id: this.#flowId,
+ duration: Math.round(duration_ms),
+ status,
+ });
+
+ this.logEventToConsole(this.endSession, {
+ flowId: this.#flowId,
+ feature_id: this.#featureId,
+ status,
+ duration_ms,
+ });
+ return duration_ms;
+ }
+
+ /**
+ * Records a successful engine creation event.
+ *
+ * @param {object} options - Engine creation success options.
+ * @param {string} [options.flowId] - The flow ID. Uses instance flowId if not provided.
+ * @param {string} options.engineId - The engine identifier (e.g., "pdfjs", "ml-suggest-intent").
+ * @param {string} [options.label] - Label for the old timing distribution metric. Defaults to engineId if not provided.
+ * @param {number} options.duration - Engine creation time in milliseconds.
+ */
+ recordEngineCreationSuccessFlow({ flowId, engineId, label, duration }) {
+ const currentFlowId = flowId || this.#flowId;
+ const actualEngineId = engineId;
+ const actualLabel = label || engineId;
+
+ Glean.firefoxAiRuntime.engineCreationSuccessFlow.record({
+ flow_id: currentFlowId,
+ engineId: actualEngineId,
+ duration: Math.round(duration),
+ });
+
+ // Also record the old labeled timing distribution metric
+ Glean.firefoxAiRuntime.engineCreationSuccess[
+ actualLabel
+ ].accumulateSingleSample(Math.round(duration));
+
+ this.logEventToConsole(this.recordEngineCreationSuccessFlow, {
+ flowId: currentFlowId,
+ engineId: actualEngineId,
+ label: actualLabel,
+ duration,
+ });
+ }
+
+ /**
+ * Records a failed engine creation event.
+ *
+ * @param {object} options - Engine creation failure options.
+ * @param {string} [options.flowId] - The flow ID. Uses instance flowId if not provided.
+ * @param {string} options.modelId - The model identifier.
+ * @param {string} options.featureId - The feature identifier.
+ * @param {string} options.taskName - The task name.
+ * @param {string} options.engineId - The engine identifier.
+ * @param {string} options.error - The error class/message.
+ */
+ recordEngineCreationFailure({
+ flowId,
+ modelId,
+ featureId,
+ taskName,
+ engineId,
+ error,
+ }) {
+ const currentFlowId = flowId || this.#flowId;
+
+ Glean.firefoxAiRuntime.engineCreationFailure.record({
+ flow_id: currentFlowId,
+ modelId,
+ featureId,
+ taskName,
+ engineId,
+ error,
+ });
+
+ this.logEventToConsole(this.recordEngineCreationFailure, {
+ flowId: currentFlowId,
+ modelId,
+ featureId,
+ taskName,
+ engineId,
+ error,
+ });
+ }
+
+ /**
+ * Records a successful inference run event.
+ *
+ * @param {object} options - Inference success options.
+ * @param {string} [options.flowId] - The flow ID. Uses instance flowId if not provided.
+ * @param {string} [options.engineId] - The engine identifier. Defaults to undefined.
+ * @param {string} [options.label] - Label for the old timing distribution metric. Defaults to no-label if not provided.
+ * @param {number} options.tokenizingTime - Time spent tokenizing in milliseconds.
+ * @param {number} options.inferenceTime - Time spent on inference in milliseconds.
+ */
+ recordRunInferenceSuccessFlow({
+ flowId,
+ engineId,
+ label,
+ tokenizingTime,
+ inferenceTime,
+ }) {
+ const currentFlowId = flowId || this.#flowId;
+ const EngineId = engineId || undefined;
+ const Label = label || "no-label";
+
+ Glean.firefoxAiRuntime.runInferenceSuccessFlow.record({
+ flow_id: currentFlowId,
+ tokenizing_time: Math.round(tokenizingTime),
+ inference_time: Math.round(inferenceTime),
+ });
+
+ // Also record the old labeled timing distribution metric
+ const totalTime = Math.round(tokenizingTime + inferenceTime);
+ Glean.firefoxAiRuntime.runInferenceSuccess[Label].accumulateSingleSample(
+ totalTime
+ );
+
+ this.logEventToConsole(this.recordRunInferenceSuccessFlow, {
+ flowId: currentFlowId,
+ engineId: EngineId,
+ label: Label,
+ tokenizingTime,
+ inferenceTime,
+ });
+ }
+
+ /**
+ * Records a failed inference run event.
+ *
+ * @param {string} error - The error class/message.
+ * @param {string} [flow_id=this.#flowId] - The flow ID. Uses instance flowId if not provided.
+ */
+ recordRunInferenceFailure(error, flow_id = this.flowId) {
+ Glean.firefoxAiRuntime.runInferenceFailure.record({
+ flow_id,
+ error,
+ });
+
+ this.logEventToConsole(this.recordRunInferenceFailure, {
+ flow_id,
+ error,
+ });
+ }
+
+ /**
+ * Records an engine run event.
+ *
+ * @param {object} options - Engine run options.
+ * @param {string} [options.flow_id] - The flow ID. Uses instance flowId if not provided.
+ * @param {number} options.cpuMilliseconds - The combined milliseconds of every cpu core that was running.
+ * @param {number} options.wallMilliseconds - The amount of wall time the run request took.
+ * @param {number} options.cores - The number of cores on the machine.
+ * @param {number} options.cpuUtilization - The percentage of the user's CPU used (0-100).
+ * @param {number} options.memoryBytes - The number of RSS bytes for the inference process.
+ * @param {string} [options.feature_id] - The feature identifier. Uses instance featureId if not provided.
+ * @param {string} options.engineId - The engine identifier.
+ * @param {string} options.modelId - The model identifier.
+ * @param {string} options.backend - The backend that is being used.
+ */
+ recordEngineRun({
+ cpuMilliseconds,
+ wallMilliseconds,
+ cores,
+ cpuUtilization,
+ memoryBytes,
+ engineId,
+ modelId,
+ backend,
+ flow_id = this.#flowId,
+ feature_id = this.#featureId,
+ }) {
+ const payload = {
+ flow_id,
+ cpu_milliseconds: Math.round(cpuMilliseconds),
+ wall_milliseconds: Math.round(wallMilliseconds),
+ cores: Math.round(cores),
+ cpu_utilization: Math.round(cpuUtilization),
+ memory_bytes: Math.round(memoryBytes),
+ feature_id,
+ engine_id: engineId,
+ model_id: modelId,
+ backend,
+ };
+
+ Glean.firefoxAiRuntime.engineRun.record(payload);
+
+ this.logEventToConsole(this.recordEngineRun, payload);
+ }
+}
diff --git a/toolkit/components/ml/jar.mn b/toolkit/components/ml/jar.mn
@@ -27,6 +27,7 @@ toolkit.jar:
content/global/ml/backends/OpenAIPipeline.mjs (content/backends/OpenAIPipeline.mjs)
content/global/ml/backends/StaticEmbeddingsPipeline.mjs (content/backends/StaticEmbeddingsPipeline.mjs)
content/global/ml/openai.mjs (vendor/openai/dist/openai.mjs)
+ content/global/ml/MLTelemetry.sys.mjs (MLTelemetry.sys.mjs)
#ifdef NIGHTLY_BUILD
content/global/ml/ort.webgpu-dev.mjs (vendor/ort.webgpu-dev.mjs)
content/global/ml/transformers-dev.js (vendor/transformers-dev.js)
diff --git a/toolkit/components/ml/metrics.yaml b/toolkit/components/ml/metrics.yaml
@@ -8,10 +8,90 @@
---
$schema: moz://mozilla.org/schemas/glean/metrics/2-0-0
$tags:
- - 'Core :: Machine Learning'
-
+ - "Core :: Machine Learning"
firefox.ai.runtime:
+ session_start:
+ type: event
+ description: >
+ Marks the beginning of an ML inference session or workflow.
+ bugs:
+ - https://bugzilla.mozilla.org/show_bug.cgi?id=1997016
+ data_reviews:
+ - https://phabricator.services.mozilla.com/D270909
+ data_sensitivity:
+ - interaction
+ notification_emails:
+ - thasan@mozilla.com
+ expires: never
+ extra_keys:
+ flow_id: &flow_id
+ type: string
+ description: >
+ Unique identifier for this ML workflow session. Used to correlate all
+ events within this session across components.
+ feature_id:
+ type: string
+ description: >
+ The feature initiating this session
+ interaction:
+ type: string
+ description: >
+ The interaction type that initiated this session
+
+ session_end:
+ type: event
+ description: >
+ Marks the end of an ML inference session or workflow.
+ This event is recorded when the session completes successfully or is terminated.
+ bugs:
+ - https://bugzilla.mozilla.org/show_bug.cgi?id=1997016
+ data_reviews:
+ - https://phabricator.services.mozilla.com/D270909
+ data_sensitivity:
+ - interaction
+ notification_emails:
+ - thasan@mozilla.com
+ expires: never
+ extra_keys:
+ flow_id: *flow_id
+ feature_id:
+ type: string
+ description: >
+ The feature associated with this session.
+ duration:
+ type: quantity
+ description: >
+ Total session duration in milliseconds.
+ status:
+ type: string
+ description: >
+ Session completion status
+
+ engine_creation_success_flow:
+ type: event
+ description: >
+ Records a successful engine creation within a tracked flow
+ bugs:
+ - https://bugzilla.mozilla.org/show_bug.cgi?id=1997016
+ data_reviews:
+ - https://phabricator.services.mozilla.com/D270909
+ data_sensitivity:
+ - interaction
+ notification_emails:
+ - thasan@mozilla.com
+ expires: never
+ extra_keys:
+ flow_id: *flow_id
+ engineId:
+ type: string
+ description: >
+ The Engine Id of the created inference engine
+ duration:
+ type: quantity
+ description: >
+ Engine creation time in milliseconds
+
engine_creation_success:
type: labeled_timing_distribution
time_unit: millisecond
@@ -46,14 +126,18 @@ firefox.ai.runtime:
An inference engine has failed to create
bugs:
- https://bugzilla.mozilla.org/show_bug.cgi?id=1891685
+ - https://bugzilla.mozilla.org/show_bug.cgi?id=1997016
data_reviews:
- https://phabricator.services.mozilla.com/D238583
+ - https://phabricator.services.mozilla.com/D270909
data_sensitivity:
- interaction
notification_emails:
- tziade@mozilla.com
+ - tshasan@mozilla.com
expires: never
extra_keys:
+ flow_id: *flow_id
modelId:
type: string
description: model id
@@ -76,14 +160,18 @@ firefox.ai.runtime:
An inference run has failed
bugs:
- https://bugzilla.mozilla.org/show_bug.cgi?id=1891685
+ - https://bugzilla.mozilla.org/show_bug.cgi?id=1997016
data_reviews:
- https://phabricator.services.mozilla.com/D238583
+ - https://phabricator.services.mozilla.com/D270909
data_sensitivity:
- interaction
notification_emails:
- tziade@mozilla.com
+ - tshasan@mozilla.com
expires: never
extra_keys:
+ flow_id: *flow_id
modelId:
type: string
description: model id
@@ -94,6 +182,30 @@ firefox.ai.runtime:
description: Engine id
type: string
+ run_inference_success_flow:
+ type: event
+ description: >
+ Records a successful inference run within a tracked flow
+ bugs:
+ - https://bugzilla.mozilla.org/show_bug.cgi?id=1997016
+ data_reviews:
+ - https://phabricator.services.mozilla.com/D270909
+ data_sensitivity:
+ - interaction
+ notification_emails:
+ - thasan@mozilla.com
+ expires: never
+ extra_keys:
+ flow_id: *flow_id
+ tokenizing_time:
+ type: quantity
+ description: >
+ Time taken for tokenization in milliseconds
+ inference_time:
+ type: quantity
+ description: >
+ Time taken for inference in milliseconds
+
run_inference_success:
type: labeled_timing_distribution
time_unit: millisecond
@@ -190,11 +302,13 @@ firefox.ai.runtime:
error:
description: error class
type: string
+
engine_run:
type: event
description: >
The performance and memory characteristics of a single run of the inference engine.
extra_keys:
+ flow_id: *flow_id
cpu_milliseconds:
description: The combined milliseconds of every cpu core that was running.
type: quantity
@@ -230,13 +344,16 @@ firefox.ai.runtime:
description: The backend that is being used, e.g. onnx, onnx-native, wllama
bugs:
- https://bugzilla.mozilla.org/show_bug.cgi?id=1992779
+ - https://bugzilla.mozilla.org/show_bug.cgi?id=1997016
data_reviews:
- https://bugzilla.mozilla.org/show_bug.cgi?id=1992779
+ - https://phabricator.services.mozilla.com/D270909
data_sensitivity:
- interaction
notification_emails:
- tziade@mozilla.com
- gtatum@mozilla.com
+ - thasan@mozilla.com
expires: never
model_management:
diff --git a/toolkit/components/ml/tests/browser/browser_ml_telemetry.js b/toolkit/components/ml/tests/browser/browser_ml_telemetry.js
@@ -15,6 +15,10 @@ const { sinon } = ChromeUtils.importESModule(
"resource://testing-common/Sinon.sys.mjs"
);
+const { MLTelemetry } = ChromeUtils.importESModule(
+ "chrome://global/content/ml/MLTelemetry.sys.mjs"
+);
+
function getGleanCount(metricsName, engineId = "default-engine") {
var metrics = Glean.firefoxAiRuntime[metricsName];
@@ -561,3 +565,124 @@ add_task(async function test_model_download_telemetry_mixed() {
wasmBufferStub.restore();
promiseStub.restore();
});
+
+function getLastEvent(gleanMetric) {
+ const events = gleanMetric.testGetValue() || [];
+ return events.length ? events.at(-1) : null;
+}
+
+// A helper to wait for a new Glean event
+async function waitForGleanEvent(gleanMetric) {
+ const originalEvent = getLastEvent(gleanMetric);
+ await TestUtils.waitForCondition(() => {
+ return getLastEvent(gleanMetric) !== originalEvent;
+ }, "Waiting for new Glean event");
+ return getLastEvent(gleanMetric);
+}
+
+/**
+ * Tests that the MLTelemetry constructor auto-generates a flowId
+ * if one is not provided.
+ */
+add_task(async function test_ml_telemetry_flow_id_auto_generated() {
+ info("Starting MLTelemetry test: Constructor auto-generates flowId");
+
+ const telemetry1 = new MLTelemetry({ featureId: "feature-auto-id" });
+ telemetry1.sessionStart({ interaction: "test-1" });
+ let recordedEvent = await waitForGleanEvent(
+ Glean.firefoxAiRuntime.sessionStart
+ );
+
+ Assert.ok(
+ recordedEvent.extra.flow_id,
+ "An event was recorded with a flow_id"
+ );
+ Assert.equal(
+ recordedEvent.extra.flow_id,
+ telemetry1.flowId,
+ "Glean's recorded flow_id matches the instance's flowId"
+ );
+ Assert.equal(
+ recordedEvent.extra.flow_id.length,
+ 36,
+ "The auto-generated flow_id looks like a UUID"
+ );
+});
+
+/**
+ * Tests that the MLTelemetry constructor correctly uses a flowId
+ * when one is provided.
+ */
+add_task(async function test_ml_telemetry_flow_id_provided() {
+ info("Starting MLTelemetry test: Constructor accepts provided flowId");
+
+ const telemetry2 = new MLTelemetry({
+ featureId: "feature-custom-id",
+ flowId: "my-custom-flow-id-69420",
+ });
+ telemetry2.sessionStart({ interaction: "test-2" });
+
+ let recordedEvent = await waitForGleanEvent(
+ Glean.firefoxAiRuntime.sessionStart
+ );
+
+ Assert.ok(
+ recordedEvent.extra.flow_id,
+ "An event was recorded with a flow_id"
+ );
+ Assert.equal(
+ recordedEvent.extra.flow_id,
+ "my-custom-flow-id-69420",
+ "Glean's recorded flow_id matches the provided flowId"
+ );
+ Assert.equal(
+ recordedEvent.extra.flow_id,
+ telemetry2.flowId,
+ "Glean's recorded flow_id also matches the instance's flowId"
+ );
+});
+
+/**
+ * Tests that the flowId set on the instance is used by all
+ * telemetry methods (e.g., sessionStart and sessionEnd).
+ */
+add_task(async function test_ml_telemetry_flow_id_persistent_on_instance() {
+ info("Starting MLTelemetry test: Instance flowId persists across methods");
+
+ const telemetry3 = new MLTelemetry({
+ featureId: "feature-persistent",
+ flowId: "my-instance-flow-id-789",
+ });
+
+ // Check sessionStart
+ telemetry3.sessionStart({ interaction: "test-3" });
+ let startEvent = await waitForGleanEvent(Glean.firefoxAiRuntime.sessionStart);
+ Assert.equal(
+ startEvent.extra.flow_id,
+ "my-instance-flow-id-789",
+ "sessionStart event used the instance flowId"
+ );
+
+ // Check sessionEnd
+ telemetry3.endSession({
+ status: "ok",
+ });
+ let endEvent = await waitForGleanEvent(Glean.firefoxAiRuntime.sessionEnd);
+
+ Assert.ok(
+ endEvent.extra.flow_id,
+ "endSession event was recorded with a flow_id"
+ );
+ Assert.equal(
+ endEvent.extra.flow_id,
+ "my-instance-flow-id-789",
+ "endSession event used the *same* instance flowId"
+ );
+
+ // Final check that the instance property itself wasn't modified
+ Assert.equal(
+ telemetry3.flowId,
+ "my-instance-flow-id-789",
+ "The instance's flowId property remained unchanged"
+ );
+});