[ tor-browser ].git.dasho

commit 4103369fab82494b3fe519d6d0fcedeec87b42e8
parent 6417e29cfda97edd0032f5109e03c3f02f330854
Author: Taimur Hasan <taimurhasan@proton.me>
Date:   Wed, 12 Nov 2025 16:03:37 +0000

Bug 1997016 - Add Glean telemetry for ML workflow sessions r=firefox-ai-ml-reviewers,gregtatum

Differential Revision: https://phabricator.services.mozilla.com/D270909

Diffstat:
A toolkit/components/ml/MLTelemetry.sys.mjs  | 323 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M toolkit/components/ml/jar.mn  | 1 +
M toolkit/components/ml/metrics.yaml  | 121 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
M toolkit/components/ml/tests/browser/browser_ml_telemetry.js  | 125 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

4 files changed, 568 insertions(+), 2 deletions(-)
diff --git a/toolkit/components/ml/MLTelemetry.sys.mjs b/toolkit/components/ml/MLTelemetry.sys.mjs
@@ -0,0 +1,323 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+const lazy = {};
+
+ChromeUtils.defineLazyGetter(lazy, "console", () => {
+  return console.createInstance({
+    maxLogLevelPref: "browser.ml.logLevel",
+    prefix: "MLTelemetry",
+  });
+});
+
+/**
+ * MLTelemetry provides a mechanism tracking a "flow" of operations
+ * related to a machine learning feature. A flow is a sequence of related
+ * events that represent a single, complete user-level operation, for example
+ * generating a summary for a page.
+ *
+ * This class uses a correlation ID pattern where flowId is passed to each
+ * method, allowing flexible tracking across different parts of the system.
+ *
+ * @example
+ * new MLTelemetry({ featureId: "ml-suggest-intent" }).sessionStart({ interaction: "button_click" });
+ * @example
+ * new MLTelemetry({ featureId: "ml-suggest-intent", flowId: "1234-5678" }).sessionStart({ interaction: "keyboard_shortcut"});
+ */
+export class MLTelemetry {
+  /** @type {string} */
+  #flowId;
+  /** @type {string|undefined} */
+  #featureId;
+  /** @type {number|undefined} */
+  #startTime;
+
+  /**
+   * Creates a new MLTelemetry instance.
+   *
+   * @param {object} [options] - Configuration options.
+   * @param {string} [options.featureId] - The identifier for the ML feature.
+   * @param {string} [options.flowId] - An optional unique identifier for
+   * this flow. If not provided, a new UUID will be generated.
+   */
+  constructor(options = {}) {
+    this.#featureId = options.featureId;
+    this.#flowId = options.flowId || crypto.randomUUID();
+
+    this.logEventToConsole(this.constructor, {
+      featureId: this.#featureId,
+      flowId: this.#flowId,
+    });
+  }
+
+  /**
+   * The unique identifier for this flow.
+   *
+   * @returns {string} The flow ID.
+   */
+  get flowId() {
+    return this.#flowId;
+  }
+
+  /**
+   * The feature identifier for this telemetry instance.
+   *
+   * @returns {string} The feature ID.
+   */
+  get featureId() {
+    return this.#featureId;
+  }
+
+  /**
+   * Starts a telemetry session for the given flow.
+   *
+   * @param {object} [options] - Session start options.
+   * @param {string} [options.interaction] - The interaction type (e.g., "button_click", "keyboard_shortcut").
+   * @throws {Error} If session already exists for this flowId.
+   */
+  sessionStart({ interaction } = {}) {
+    if (this.#startTime) {
+      throw new Error(`Session already started for flowId: ${this.#flowId}`);
+    }
+
+    this.#startTime = ChromeUtils.now();
+
+    Glean.firefoxAiRuntime.sessionStart.record({
+      flow_id: this.flowId,
+      feature_id: this.featureId,
+      interaction,
+    });
+
+    this.logEventToConsole(this.sessionStart, {
+      flow_id: this.flowId,
+      feature_id: this.featureId,
+      interaction,
+    });
+  }
+
+  /**
+   * Logs a debug message to the browser console, prefixed with the flow ID.
+   *
+   * @param {object} caller - The calling function or class.
+   * @param {object} [data] - Optional data to be JSON-stringified and logged.
+   */
+  logEventToConsole(caller, data) {
+    const flowId = data?.flowId || this.#flowId;
+    const id = flowId.substring(0, 5);
+    lazy.console.debug("flowId[%s]: %s", id, caller.name, data);
+  }
+
+  /**
+   * Ends the telemetry session and records the final status and duration.
+   *
+   * @param {string} status - The final status of the session.
+   * @throws {Error} If no active session found or status parameter is missing.
+   * @returns {number} Duration in milliseconds.
+   */
+  endSession(status) {
+    // Validate status
+    if (!status) {
+      throw new Error("status parameter is required");
+    }
+    // Validate that session was started
+    if (!this.#startTime) {
+      throw new Error(
+        `sessionStart() was not called for flowId: ${this.#flowId}`
+      );
+    }
+
+    const duration_ms = ChromeUtils.now() - this.#startTime;
+    Glean.firefoxAiRuntime.sessionEnd.record({
+      flow_id: this.#flowId,
+      duration: Math.round(duration_ms),
+      status,
+    });
+
+    this.logEventToConsole(this.endSession, {
+      flowId: this.#flowId,
+      feature_id: this.#featureId,
+      status,
+      duration_ms,
+    });
+    return duration_ms;
+  }
+
+  /**
+   * Records a successful engine creation event.
+   *
+   * @param {object} options - Engine creation success options.
+   * @param {string} [options.flowId] - The flow ID. Uses instance flowId if not provided.
+   * @param {string} options.engineId - The engine identifier (e.g., "pdfjs", "ml-suggest-intent").
+   * @param {string} [options.label] - Label for the old timing distribution metric. Defaults to engineId if not provided.
+   * @param {number} options.duration - Engine creation time in milliseconds.
+   */
+  recordEngineCreationSuccessFlow({ flowId, engineId, label, duration }) {
+    const currentFlowId = flowId || this.#flowId;
+    const actualEngineId = engineId;
+    const actualLabel = label || engineId;
+
+    Glean.firefoxAiRuntime.engineCreationSuccessFlow.record({
+      flow_id: currentFlowId,
+      engineId: actualEngineId,
+      duration: Math.round(duration),
+    });
+
+    // Also record the old labeled timing distribution metric
+    Glean.firefoxAiRuntime.engineCreationSuccess[
+      actualLabel
+    ].accumulateSingleSample(Math.round(duration));
+
+    this.logEventToConsole(this.recordEngineCreationSuccessFlow, {
+      flowId: currentFlowId,
+      engineId: actualEngineId,
+      label: actualLabel,
+      duration,
+    });
+  }
+
+  /**
+   * Records a failed engine creation event.
+   *
+   * @param {object} options - Engine creation failure options.
+   * @param {string} [options.flowId] - The flow ID. Uses instance flowId if not provided.
+   * @param {string} options.modelId - The model identifier.
+   * @param {string} options.featureId - The feature identifier.
+   * @param {string} options.taskName - The task name.
+   * @param {string} options.engineId - The engine identifier.
+   * @param {string} options.error - The error class/message.
+   */
+  recordEngineCreationFailure({
+    flowId,
+    modelId,
+    featureId,
+    taskName,
+    engineId,
+    error,
+  }) {
+    const currentFlowId = flowId || this.#flowId;
+
+    Glean.firefoxAiRuntime.engineCreationFailure.record({
+      flow_id: currentFlowId,
+      modelId,
+      featureId,
+      taskName,
+      engineId,
+      error,
+    });
+
+    this.logEventToConsole(this.recordEngineCreationFailure, {
+      flowId: currentFlowId,
+      modelId,
+      featureId,
+      taskName,
+      engineId,
+      error,
+    });
+  }
+
+  /**
+   * Records a successful inference run event.
+   *
+   * @param {object} options - Inference success options.
+   * @param {string} [options.flowId] - The flow ID. Uses instance flowId if not provided.
+   * @param {string} [options.engineId] - The engine identifier. Defaults to undefined.
+   * @param {string} [options.label] - Label for the old timing distribution metric. Defaults to no-label if not provided.
+   * @param {number} options.tokenizingTime - Time spent tokenizing in milliseconds.
+   * @param {number} options.inferenceTime - Time spent on inference in milliseconds.
+   */
+  recordRunInferenceSuccessFlow({
+    flowId,
+    engineId,
+    label,
+    tokenizingTime,
+    inferenceTime,
+  }) {
+    const currentFlowId = flowId || this.#flowId;
+    const EngineId = engineId || undefined;
+    const Label = label || "no-label";
+
+    Glean.firefoxAiRuntime.runInferenceSuccessFlow.record({
+      flow_id: currentFlowId,
+      tokenizing_time: Math.round(tokenizingTime),
+      inference_time: Math.round(inferenceTime),
+    });
+
+    // Also record the old labeled timing distribution metric
+    const totalTime = Math.round(tokenizingTime + inferenceTime);
+    Glean.firefoxAiRuntime.runInferenceSuccess[Label].accumulateSingleSample(
+      totalTime
+    );
+
+    this.logEventToConsole(this.recordRunInferenceSuccessFlow, {
+      flowId: currentFlowId,
+      engineId: EngineId,
+      label: Label,
+      tokenizingTime,
+      inferenceTime,
+    });
+  }
+
+  /**
+   * Records a failed inference run event.
+   *
+   * @param {string} error - The error class/message.
+   * @param {string} [flow_id=this.#flowId] - The flow ID. Uses instance flowId if not provided.
+   */
+  recordRunInferenceFailure(error, flow_id = this.flowId) {
+    Glean.firefoxAiRuntime.runInferenceFailure.record({
+      flow_id,
+      error,
+    });
+
+    this.logEventToConsole(this.recordRunInferenceFailure, {
+      flow_id,
+      error,
+    });
+  }
+
+  /**
+   * Records an engine run event.
+   *
+   * @param {object} options - Engine run options.
+   * @param {string} [options.flow_id] - The flow ID. Uses instance flowId if not provided.
+   * @param {number} options.cpuMilliseconds - The combined milliseconds of every cpu core that was running.
+   * @param {number} options.wallMilliseconds - The amount of wall time the run request took.
+   * @param {number} options.cores - The number of cores on the machine.
+   * @param {number} options.cpuUtilization - The percentage of the user's CPU used (0-100).
+   * @param {number} options.memoryBytes - The number of RSS bytes for the inference process.
+   * @param {string} [options.feature_id] - The feature identifier. Uses instance featureId if not provided.
+   * @param {string} options.engineId - The engine identifier.
+   * @param {string} options.modelId - The model identifier.
+   * @param {string} options.backend - The backend that is being used.
+   */
+  recordEngineRun({
+    cpuMilliseconds,
+    wallMilliseconds,
+    cores,
+    cpuUtilization,
+    memoryBytes,
+    engineId,
+    modelId,
+    backend,
+    flow_id = this.#flowId,
+    feature_id = this.#featureId,
+  }) {
+    const payload = {
+      flow_id,
+      cpu_milliseconds: Math.round(cpuMilliseconds),
+      wall_milliseconds: Math.round(wallMilliseconds),
+      cores: Math.round(cores),
+      cpu_utilization: Math.round(cpuUtilization),
+      memory_bytes: Math.round(memoryBytes),
+      feature_id,
+      engine_id: engineId,
+      model_id: modelId,
+      backend,
+    };
+
+    Glean.firefoxAiRuntime.engineRun.record(payload);
+
+    this.logEventToConsole(this.recordEngineRun, payload);
+  }
+}
diff --git a/toolkit/components/ml/jar.mn b/toolkit/components/ml/jar.mn
@@ -27,6 +27,7 @@ toolkit.jar:
     content/global/ml/backends/OpenAIPipeline.mjs         (content/backends/OpenAIPipeline.mjs)
     content/global/ml/backends/StaticEmbeddingsPipeline.mjs (content/backends/StaticEmbeddingsPipeline.mjs)
     content/global/ml/openai.mjs                          (vendor/openai/dist/openai.mjs)
+    content/global/ml/MLTelemetry.sys.mjs                 (MLTelemetry.sys.mjs)
 #ifdef NIGHTLY_BUILD
     content/global/ml/ort.webgpu-dev.mjs        	        (vendor/ort.webgpu-dev.mjs)
     content/global/ml/transformers-dev.js       	        (vendor/transformers-dev.js)
diff --git a/toolkit/components/ml/metrics.yaml b/toolkit/components/ml/metrics.yaml
@@ -8,10 +8,90 @@
 ---
 $schema: moz://mozilla.org/schemas/glean/metrics/2-0-0
 $tags:
-  - 'Core :: Machine Learning'
-
+  - "Core :: Machine Learning"
 
 firefox.ai.runtime:
+  session_start:
+    type: event
+    description: >
+      Marks the beginning of an ML inference session or workflow.
+    bugs:
+      - https://bugzilla.mozilla.org/show_bug.cgi?id=1997016
+    data_reviews:
+      - https://phabricator.services.mozilla.com/D270909
+    data_sensitivity:
+      - interaction
+    notification_emails:
+      - thasan@mozilla.com
+    expires: never
+    extra_keys:
+      flow_id: &flow_id
+        type: string
+        description: >
+          Unique identifier for this ML workflow session. Used to correlate all
+          events within this session across components.
+      feature_id:
+        type: string
+        description: >
+          The feature initiating this session
+      interaction:
+        type: string
+        description: >
+          The interaction type that initiated this session
+
+  session_end:
+    type: event
+    description: >
+      Marks the end of an ML inference session or workflow.
+      This event is recorded when the session completes successfully or is terminated.
+    bugs:
+      - https://bugzilla.mozilla.org/show_bug.cgi?id=1997016
+    data_reviews:
+      - https://phabricator.services.mozilla.com/D270909
+    data_sensitivity:
+      - interaction
+    notification_emails:
+      - thasan@mozilla.com
+    expires: never
+    extra_keys:
+      flow_id: *flow_id
+      feature_id:
+        type: string
+        description: >
+          The feature associated with this session.
+      duration:
+        type: quantity
+        description: >
+          Total session duration in milliseconds.
+      status:
+        type: string
+        description: >
+          Session completion status
+
+  engine_creation_success_flow:
+    type: event
+    description: >
+      Records a successful engine creation within a tracked flow
+    bugs:
+      - https://bugzilla.mozilla.org/show_bug.cgi?id=1997016
+    data_reviews:
+      - https://phabricator.services.mozilla.com/D270909
+    data_sensitivity:
+      - interaction
+    notification_emails:
+      - thasan@mozilla.com
+    expires: never
+    extra_keys:
+      flow_id: *flow_id
+      engineId:
+        type: string
+        description: >
+          The Engine Id of the created inference engine
+      duration:
+        type: quantity
+        description: >
+          Engine creation time in milliseconds
+
   engine_creation_success:
     type: labeled_timing_distribution
     time_unit: millisecond
@@ -46,14 +126,18 @@ firefox.ai.runtime:
       An inference engine has failed to create
     bugs:
       - https://bugzilla.mozilla.org/show_bug.cgi?id=1891685
+      - https://bugzilla.mozilla.org/show_bug.cgi?id=1997016
     data_reviews:
       - https://phabricator.services.mozilla.com/D238583
+      - https://phabricator.services.mozilla.com/D270909
     data_sensitivity:
       - interaction
     notification_emails:
       - tziade@mozilla.com
+      - tshasan@mozilla.com
     expires: never
     extra_keys:
+      flow_id: *flow_id
       modelId:
         type: string
         description: model id
@@ -76,14 +160,18 @@ firefox.ai.runtime:
       An inference run has failed
     bugs:
       - https://bugzilla.mozilla.org/show_bug.cgi?id=1891685
+      - https://bugzilla.mozilla.org/show_bug.cgi?id=1997016
     data_reviews:
       - https://phabricator.services.mozilla.com/D238583
+      - https://phabricator.services.mozilla.com/D270909
     data_sensitivity:
       - interaction
     notification_emails:
       - tziade@mozilla.com
+      - tshasan@mozilla.com
     expires: never
     extra_keys:
+      flow_id: *flow_id
       modelId:
         type: string
         description: model id
@@ -94,6 +182,30 @@ firefox.ai.runtime:
         description: Engine id
         type: string
 
+  run_inference_success_flow:
+    type: event
+    description: >
+      Records a successful inference run within a tracked flow
+    bugs:
+      - https://bugzilla.mozilla.org/show_bug.cgi?id=1997016
+    data_reviews:
+      - https://phabricator.services.mozilla.com/D270909
+    data_sensitivity:
+      - interaction
+    notification_emails:
+      - thasan@mozilla.com
+    expires: never
+    extra_keys:
+      flow_id: *flow_id
+      tokenizing_time:
+        type: quantity
+        description: >
+          Time taken for tokenization in milliseconds
+      inference_time:
+        type: quantity
+        description: >
+          Time taken for inference in milliseconds
+
   run_inference_success:
     type: labeled_timing_distribution
     time_unit: millisecond
@@ -190,11 +302,13 @@ firefox.ai.runtime:
       error:
         description: error class
         type: string
+
   engine_run:
     type: event
     description: >
       The performance and memory characteristics of a single run of the inference engine.
     extra_keys:
+      flow_id: *flow_id
       cpu_milliseconds:
         description: The combined milliseconds of every cpu core that was running.
         type: quantity
@@ -230,13 +344,16 @@ firefox.ai.runtime:
         description: The backend that is being used, e.g. onnx, onnx-native, wllama
     bugs:
       - https://bugzilla.mozilla.org/show_bug.cgi?id=1992779
+      - https://bugzilla.mozilla.org/show_bug.cgi?id=1997016
     data_reviews:
       - https://bugzilla.mozilla.org/show_bug.cgi?id=1992779
+      - https://phabricator.services.mozilla.com/D270909
     data_sensitivity:
       - interaction
     notification_emails:
       - tziade@mozilla.com
       - gtatum@mozilla.com
+      - thasan@mozilla.com
     expires: never
 
 model_management:
diff --git a/toolkit/components/ml/tests/browser/browser_ml_telemetry.js b/toolkit/components/ml/tests/browser/browser_ml_telemetry.js
@@ -15,6 +15,10 @@ const { sinon } = ChromeUtils.importESModule(
   "resource://testing-common/Sinon.sys.mjs"
 );
 
+const { MLTelemetry } = ChromeUtils.importESModule(
+  "chrome://global/content/ml/MLTelemetry.sys.mjs"
+);
+
 function getGleanCount(metricsName, engineId = "default-engine") {
   var metrics = Glean.firefoxAiRuntime[metricsName];
 
@@ -561,3 +565,124 @@ add_task(async function test_model_download_telemetry_mixed() {
   wasmBufferStub.restore();
   promiseStub.restore();
 });
+
+function getLastEvent(gleanMetric) {
+  const events = gleanMetric.testGetValue() || [];
+  return events.length ? events.at(-1) : null;
+}
+
+// A helper to wait for a new Glean event
+async function waitForGleanEvent(gleanMetric) {
+  const originalEvent = getLastEvent(gleanMetric);
+  await TestUtils.waitForCondition(() => {
+    return getLastEvent(gleanMetric) !== originalEvent;
+  }, "Waiting for new Glean event");
+  return getLastEvent(gleanMetric);
+}
+
+/**
+ * Tests that the MLTelemetry constructor auto-generates a flowId
+ * if one is not provided.
+ */
+add_task(async function test_ml_telemetry_flow_id_auto_generated() {
+  info("Starting MLTelemetry test: Constructor auto-generates flowId");
+
+  const telemetry1 = new MLTelemetry({ featureId: "feature-auto-id" });
+  telemetry1.sessionStart({ interaction: "test-1" });
+  let recordedEvent = await waitForGleanEvent(
+    Glean.firefoxAiRuntime.sessionStart
+  );
+
+  Assert.ok(
+    recordedEvent.extra.flow_id,
+    "An event was recorded with a flow_id"
+  );
+  Assert.equal(
+    recordedEvent.extra.flow_id,
+    telemetry1.flowId,
+    "Glean's recorded flow_id matches the instance's flowId"
+  );
+  Assert.equal(
+    recordedEvent.extra.flow_id.length,
+    36,
+    "The auto-generated flow_id looks like a UUID"
+  );
+});
+
+/**
+ * Tests that the MLTelemetry constructor correctly uses a flowId
+ * when one is provided.
+ */
+add_task(async function test_ml_telemetry_flow_id_provided() {
+  info("Starting MLTelemetry test: Constructor accepts provided flowId");
+
+  const telemetry2 = new MLTelemetry({
+    featureId: "feature-custom-id",
+    flowId: "my-custom-flow-id-69420",
+  });
+  telemetry2.sessionStart({ interaction: "test-2" });
+
+  let recordedEvent = await waitForGleanEvent(
+    Glean.firefoxAiRuntime.sessionStart
+  );
+
+  Assert.ok(
+    recordedEvent.extra.flow_id,
+    "An event was recorded with a flow_id"
+  );
+  Assert.equal(
+    recordedEvent.extra.flow_id,
+    "my-custom-flow-id-69420",
+    "Glean's recorded flow_id matches the provided flowId"
+  );
+  Assert.equal(
+    recordedEvent.extra.flow_id,
+    telemetry2.flowId,
+    "Glean's recorded flow_id also matches the instance's flowId"
+  );
+});
+
+/**
+ * Tests that the flowId set on the instance is used by all
+ * telemetry methods (e.g., sessionStart and sessionEnd).
+ */
+add_task(async function test_ml_telemetry_flow_id_persistent_on_instance() {
+  info("Starting MLTelemetry test: Instance flowId persists across methods");
+
+  const telemetry3 = new MLTelemetry({
+    featureId: "feature-persistent",
+    flowId: "my-instance-flow-id-789",
+  });
+
+  // Check sessionStart
+  telemetry3.sessionStart({ interaction: "test-3" });
+  let startEvent = await waitForGleanEvent(Glean.firefoxAiRuntime.sessionStart);
+  Assert.equal(
+    startEvent.extra.flow_id,
+    "my-instance-flow-id-789",
+    "sessionStart event used the instance flowId"
+  );
+
+  // Check sessionEnd
+  telemetry3.endSession({
+    status: "ok",
+  });
+  let endEvent = await waitForGleanEvent(Glean.firefoxAiRuntime.sessionEnd);
+
+  Assert.ok(
+    endEvent.extra.flow_id,
+    "endSession event was recorded with a flow_id"
+  );
+  Assert.equal(
+    endEvent.extra.flow_id,
+    "my-instance-flow-id-789",
+    "endSession event used the *same* instance flowId"
+  );
+
+  // Final check that the instance property itself wasn't modified
+  Assert.equal(
+    telemetry3.flowId,
+    "my-instance-flow-id-789",
+    "The instance's flowId property remained unchanged"
+  );
+});

	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE

A	toolkit/components/ml/MLTelemetry.sys.mjs	\|	323	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	toolkit/components/ml/jar.mn	\|	1	+
M	toolkit/components/ml/metrics.yaml	\|	121	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
M	toolkit/components/ml/tests/browser/browser_ml_telemetry.js	\|	125	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++