[ tor-browser ].git.dasho

commit e30793607c53fbcd31bf2411944dfb36675d3e79
parent 29933f495e98918fb343238dde25f551489e2e5c
Author: Serban Stanca <sstanca@mozilla.com>
Date:   Wed, 10 Dec 2025 03:49:25 +0200

Revert "Bug 2000033, Bug 2000682: Integrate Glean MLTelemetry into the inference pipeline r=ai-ondevice-reviewers,gregtatum,rconcepcion" for causing perftests bustages.

This reverts commit 5989e4930b3768e4914e5414b88f93ec580a3fc7.

This reverts commit 416543f28ae08552f777d44acd259c087b3fb3aa.

Diffstat:
M toolkit/components/ml/MLTelemetry.sys.mjs  | 151 ++++++++++++++++++++++---------------------------------------------------------
M toolkit/components/ml/actors/MLEngineParent.sys.mjs  | 86 ++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
M toolkit/components/ml/content/backends/ONNXPipeline.mjs  | 253 +++++++++++++++++++++++++------------------------------------------------------
M toolkit/components/ml/metrics.yaml  | 29 +----------------------------
M toolkit/components/ml/tests/browser/browser_ml_telemetry.js  | 45 ++-------------------------------------------

5 files changed, 172 insertions(+), 392 deletions(-)
diff --git a/toolkit/components/ml/MLTelemetry.sys.mjs b/toolkit/components/ml/MLTelemetry.sys.mjs
@@ -11,10 +11,6 @@ ChromeUtils.defineLazyGetter(lazy, "console", () => {
   });
 });
 
-ChromeUtils.defineESModuleGetters(lazy, {
-  isAddonEngineId: "chrome://global/content/ml/Utils.sys.mjs",
-});
-
 /**
  * MLTelemetry provides a mechanism tracking a "flow" of operations
  * related to a machine learning feature. A flow is a sequence of related
@@ -74,20 +70,6 @@ export class MLTelemetry {
   }
 
   /**
-   * Returns the label used in telemetry for a given engine ID.
-   * Converts addon engine IDs to "webextension" label.
-   *
-   * @param {string} engineId - The engine ID to convert.
-   * @returns {string} The Glean label for the engine.
-   */
-  static getGleanLabel(engineId) {
-    if (lazy.isAddonEngineId(engineId)) {
-      return "webextension";
-    }
-    return engineId;
-  }
-
-  /**
    * Starts a telemetry session for the given flow.
    *
    * @param {object} [options] - Session start options.
@@ -167,12 +149,13 @@ export class MLTelemetry {
    * @param {object} options - Engine creation success options.
    * @param {string} [options.flowId] - The flow ID. Uses instance flowId if not provided.
    * @param {string} options.engineId - The engine identifier (e.g., "pdfjs", "ml-suggest-intent").
+   * @param {string} [options.label] - Label for the old timing distribution metric. Defaults to engineId if not provided.
    * @param {number} options.duration - Engine creation time in milliseconds.
    */
-  recordEngineCreationSuccessFlow({ flowId, engineId, duration }) {
+  recordEngineCreationSuccessFlow({ flowId, engineId, label, duration }) {
     const currentFlowId = flowId || this.#flowId;
     const actualEngineId = engineId;
-    const actualLabel = MLTelemetry.getGleanLabel(engineId);
+    const actualLabel = label || engineId;
 
     Glean.firefoxAiRuntime.engineCreationSuccessFlow.record({
       flow_id: currentFlowId,
@@ -202,7 +185,7 @@ export class MLTelemetry {
    * @param {string} options.featureId - The feature identifier.
    * @param {string} options.taskName - The task name.
    * @param {string} options.engineId - The engine identifier.
-   * @param {string|object} options.error - The error class/message or object.
+   * @param {string} options.error - The error class/message.
    */
   recordEngineCreationFailure({
     flowId,
@@ -213,11 +196,6 @@ export class MLTelemetry {
     error,
   }) {
     const currentFlowId = flowId || this.#flowId;
-    // Ensure error is always a string
-    const errorString =
-      typeof error === "object" && error !== null
-        ? String(error.name || error.message || error)
-        : String(error);
 
     Glean.firefoxAiRuntime.engineCreationFailure.record({
       flow_id: currentFlowId,
@@ -225,7 +203,7 @@ export class MLTelemetry {
       featureId,
       taskName,
       engineId,
-      error: errorString,
+      error,
     });
 
     this.logEventToConsole(this.recordEngineCreationFailure, {
@@ -234,112 +212,67 @@ export class MLTelemetry {
       featureId,
       taskName,
       engineId,
-      error: errorString,
+      error,
     });
   }
 
   /**
    * Records a successful inference run event.
    *
-   * @param {string} engineId - The engine identifier.
-   * @param {object} metrics - The inference metrics object.
-   * @param {number} [metrics.preprocessingTime] - Time spent preprocessing (legacy).
-   * @param {number} [metrics.tokenizingTime] - Time spent tokenizing in milliseconds.
-   * @param {number} [metrics.inferenceTime] - Time spent on inference in milliseconds.
-   * @param {number} [metrics.decodingTime] - Time spent decoding in milliseconds.
-   * @param {number} [metrics.inputTokens] - Number of input tokens.
-   * @param {number} [metrics.outputTokens] - Number of output tokens.
-   * @param {number} [metrics.timeToFirstToken] - Time to first token in milliseconds.
-   * @param {number} [metrics.tokensPerSecond] - Tokens per second.
-   * @param {number} [metrics.timePerOutputToken] - Time per output token in milliseconds.
+   * @param {object} options - Inference success options.
+   * @param {string} [options.flowId] - The flow ID. Uses instance flowId if not provided.
+   * @param {string} [options.engineId] - The engine identifier. Defaults to undefined.
+   * @param {string} [options.label] - Label for the old timing distribution metric. Defaults to no-label if not provided.
+   * @param {number} options.tokenizingTime - Time spent tokenizing in milliseconds.
+   * @param {number} options.inferenceTime - Time spent on inference in milliseconds.
    */
-  recordRunInferenceSuccessFlow(engineId, metrics) {
-    try {
-      const currentFlowId = this.#flowId;
-      const EngineId = engineId || undefined;
-      const Label = engineId ? MLTelemetry.getGleanLabel(engineId) : "no-label";
-
-      // Handle legacy preprocessingTime field
-      const tokenizingTime =
-        metrics.preprocessingTime ?? metrics.tokenizingTime;
-
-      // Ensure all metrics are properly rounded/typed for Glean
-      // This will be updated to use the method from revision(D271263)
-      const gleanPayload = {
-        flow_id: currentFlowId,
-        tokenizing_time:
-          tokenizingTime != null ? Math.round(tokenizingTime) : undefined,
-        inference_time:
-          metrics.inferenceTime != null
-            ? Math.round(metrics.inferenceTime)
-            : undefined,
-        decoding_time:
-          metrics.decodingTime != null
-            ? Math.round(metrics.decodingTime)
-            : undefined,
-        input_tokens:
-          metrics.inputTokens != null
-            ? Math.round(metrics.inputTokens)
-            : undefined,
-        output_tokens:
-          metrics.outputTokens != null
-            ? Math.round(metrics.outputTokens)
-            : undefined,
-        time_to_first_token:
-          metrics.timeToFirstToken != null
-            ? Math.round(metrics.timeToFirstToken)
-            : undefined,
-        tokens_per_second:
-          metrics.tokensPerSecond != null
-            ? Math.round(metrics.tokensPerSecond * 100) / 100
-            : undefined,
-        time_per_output_token:
-          metrics.timePerOutputToken != null
-            ? Math.round(metrics.timePerOutputToken * 100) / 100
-            : undefined,
-      };
-
-      Glean.firefoxAiRuntime.runInferenceSuccessFlow.record(gleanPayload);
+  recordRunInferenceSuccessFlow({
+    flowId,
+    engineId,
+    label,
+    tokenizingTime,
+    inferenceTime,
+  }) {
+    const currentFlowId = flowId || this.#flowId;
+    const EngineId = engineId || undefined;
+    const Label = label || "no-label";
 
-      // record the old labeled timing distribution metric
-      const totalTime = Math.round(
-        (tokenizingTime || 0) +
-          (metrics.inferenceTime || 0) +
-          (metrics.decodingTime || 0)
-      );
+    Glean.firefoxAiRuntime.runInferenceSuccessFlow.record({
+      flow_id: currentFlowId,
+      tokenizing_time: Math.round(tokenizingTime),
+      inference_time: Math.round(inferenceTime),
+    });
 
-      Glean.firefoxAiRuntime.runInferenceSuccess[Label].accumulateSingleSample(
-        totalTime
-      );
+    // Also record the old labeled timing distribution metric
+    const totalTime = Math.round(tokenizingTime + inferenceTime);
+    Glean.firefoxAiRuntime.runInferenceSuccess[Label].accumulateSingleSample(
+      totalTime
+    );
 
-      this.logEventToConsole(this.recordRunInferenceSuccessFlow, {
-        ...gleanPayload,
-        engineId: EngineId,
-        label: Label,
-      });
-    } catch (telemetryError) {
-      lazy.console.error("Failed to record ML telemetry:", telemetryError);
-    }
+    this.logEventToConsole(this.recordRunInferenceSuccessFlow, {
+      flowId: currentFlowId,
+      engineId: EngineId,
+      label: Label,
+      tokenizingTime,
+      inferenceTime,
+    });
   }
 
   /**
    * Records a failed inference run event.
    *
-   * @param {string|object} error - The error class/message or object.
+   * @param {string} error - The error class/message.
    * @param {string} [flow_id=this.#flowId] - The flow ID. Uses instance flowId if not provided.
    */
   recordRunInferenceFailure(error, flow_id = this.flowId) {
-    // Ensure error is always a string
-    const errorString = error instanceof Error ? error.message : String(error);
-
     Glean.firefoxAiRuntime.runInferenceFailure.record({
       flow_id,
-      error: errorString,
+      error,
     });
 
     this.logEventToConsole(this.recordRunInferenceFailure, {
       flow_id,
-      error: errorString,
+      error,
     });
   }
 
diff --git a/toolkit/components/ml/actors/MLEngineParent.sys.mjs b/toolkit/components/ml/actors/MLEngineParent.sys.mjs
@@ -2,7 +2,6 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 import { XPCOMUtils } from "resource://gre/modules/XPCOMUtils.sys.mjs";
-import { MLTelemetry } from "chrome://global/content/ml/MLTelemetry.sys.mjs";
 
 /**
  * @import { MLEngineChild } from "./MLEngineChild.sys.mjs"
@@ -16,6 +15,7 @@ const lazy = XPCOMUtils.declareLazy({
   clearTimeout: "resource://gre/modules/Timer.sys.mjs",
   ModelHub: "chrome://global/content/ml/ModelHub.sys.mjs",
   Progress: "chrome://global/content/ml/Utils.sys.mjs",
+  isAddonEngineId: "chrome://global/content/ml/Utils.sys.mjs",
   OPFS: "chrome://global/content/ml/OPFS.sys.mjs",
   BACKENDS: "chrome://global/content/ml/EngineProcess.sys.mjs",
   stringifyForLog: "chrome://global/content/ml/Utils.sys.mjs",
@@ -222,7 +222,7 @@ export class MLEngineParent extends JSProcessActorParent {
       });
     }
 
-    const { featureId, engineId } = pipelineOptions;
+    const engineId = pipelineOptions.engineId;
 
     // Allow notifications callback changes even when reusing engine.
     this.notificationsCallback = notificationsCallback;
@@ -235,7 +235,6 @@ export class MLEngineParent extends JSProcessActorParent {
       Promise.withResolvers();
     MLEngineParent.engineLocks.set(engineId, lockPromise);
     MLEngineParent.engineCreationAbortSignal.set(engineId, abortSignal);
-
     try {
       const currentEngine = MLEngine.getInstance(engineId);
       if (currentEngine) {
@@ -274,24 +273,12 @@ export class MLEngineParent extends JSProcessActorParent {
 
       const creationTime = ChromeUtils.now() - start;
 
-      engine.telemetry.recordEngineCreationSuccessFlow({
-        engineId,
-        duration: creationTime,
-      });
+      Glean.firefoxAiRuntime.engineCreationSuccess[
+        engine.getGleanLabel()
+      ].accumulateSingleSample(creationTime);
 
       // TODO - What happens if the engine is already killed here?
       return engine;
-    } catch (error) {
-      const { modelId, taskName, flowId } = pipelineOptions;
-      const telemetry = new MLTelemetry({ featureId, flowId });
-      telemetry.recordEngineCreationFailure({
-        modelId,
-        featureId,
-        taskName,
-        engineId,
-        error,
-      });
-      throw error;
     } finally {
       MLEngineParent.engineLocks.delete(engineId);
       MLEngineParent.engineCreationAbortSignal.delete(engineId);
@@ -994,6 +981,18 @@ export class MLEngine {
   notificationsCallback = null;
 
   /**
+   * Returns the label used in telemetry for that engine id
+   *
+   * @returns {string}
+   */
+  getGleanLabel() {
+    if (lazy.isAddonEngineId(this.engineId)) {
+      return "webextension";
+    }
+    return this.engineId;
+  }
+
+  /**
    * Removes an instance of the MLEngine with the given engineId.
    *
    * @param {string} engineId - The ID of the engine instance to be removed.
@@ -1038,10 +1037,6 @@ export class MLEngine {
     this.mlEngineParent = mlEngineParent;
     this.pipelineOptions = pipelineOptions;
     this.notificationsCallback = notificationsCallback;
-    this.telemetry = new MLTelemetry({
-      featureId: pipelineOptions.featureId,
-      flowId: pipelineOptions.flowId,
-    });
   }
 
   /**
@@ -1330,20 +1325,28 @@ export class MLEngine {
         const request = this.#requests.get(requestId);
         if (request) {
           if (error) {
-            this.telemetry.recordRunInferenceFailure(error);
-            request.reject(error);
-          } else if (response) {
+            Glean.firefoxAiRuntime.runInferenceFailure.record({
+              engineId: this.engineId,
+              modelId: this.pipelineOptions.modelId,
+              featureId: this.pipelineOptions.featureId,
+            });
+          }
+          if (response) {
             // Validate response before returning to caller
             const validatedResponse = this.#validateResponse(response);
             if (!validatedResponse) {
               request.reject(new Error("Response failed security validation"));
             } else {
-              this.telemetry.recordRunInferenceSuccessFlow(
-                this.engineId,
-                validatedResponse.metrics
-              );
+              const totalTime =
+                validatedResponse.metrics.tokenizingTime +
+                validatedResponse.metrics.inferenceTime;
+              Glean.firefoxAiRuntime.runInferenceSuccess[
+                this.getGleanLabel()
+              ].accumulateSingleSample(totalTime);
               request.resolve(validatedResponse);
             }
+          } else {
+            request.reject(error);
           }
         } else {
           lazy.console.error(
@@ -1545,19 +1548,26 @@ export class MLEngine {
         (resourcesAfter.cpuTime - resourcesBefore.cpuTime) / 1_000_000;
       const wallMilliseconds = ChromeUtils.now() - beforeRun;
       const cores = lazy.mlUtils.getOptimalCPUConcurrency();
-      const cpuUtilization = (cpuMilliseconds / wallMilliseconds / cores) * 100;
+      const cpuUtilization = cpuMilliseconds / wallMilliseconds / cores;
       const memoryBytes = resourcesAfter.memory;
 
-      this.telemetry.recordEngineRun({
-        cpuMilliseconds,
-        wallMilliseconds,
+      const data = {
+        // Timing:
+        cpu_milliseconds: cpuMilliseconds,
+        wall_milliseconds: wallMilliseconds,
         cores,
-        cpuUtilization,
-        memoryBytes,
-        engineId: this.engineId,
-        modelId: this.pipelineOptions.modelId,
+        cpu_utilization: cpuUtilization,
+        memory_bytes: memoryBytes,
+
+        // Model information:
+        engine_id: this.engineId,
+        model_id: this.pipelineOptions.modelId,
+        feature_id: this.pipelineOptions.featureId,
         backend: this.pipelineOptions.backend,
-      });
+      };
+
+      lazy.console?.debug("[Glean.firefoxAiRuntime.engineRun]", data);
+      Glean.firefoxAiRuntime.engineRun.record(data);
     });
 
     return resolvers.promise;
diff --git a/toolkit/components/ml/content/backends/ONNXPipeline.mjs b/toolkit/components/ml/content/backends/ONNXPipeline.mjs
@@ -9,20 +9,6 @@
  * @typedef {import("../../content/Utils.sys.mjs").ProgressAndStatusCallbackParams} ProgressAndStatusCallbackParams
  */
 
-/**
- * @typedef {object} PipelineMetrics
- * @property {number} [preprocessingTime] - Time spent preprocessing inputs (ms).
- * @property {number} [tokenizingTime] - Time spent tokenizing (same as preprocessingTime, for Glean consistency) (ms).
- * @property {number} [inferenceTime] - Time spent running the model (ms).
- * @property {number} [decodingTime] - Time spent decoding outputs (ms).
- * @property {number} inputTokens - Number of tokens in the input.
- * @property {number} outputTokens - Number of tokens in the output.
- * @property {number|null} [timeToFirstToken] - Time to the first generated token (ms).
- * @property {number} [tokensPerSecond] - Inference throughput (tokens/s).
- * @property {number} [timePerOutputToken] - Latency per output token (ms).
- * @property {Array<object>} [runTimestamps] - Timeline of execution events.
- */
-
 /* eslint-disable-next-line mozilla/reject-import-system-module-from-non-system */
 import { AppConstants } from "resource://gre/modules/AppConstants.sys.mjs";
 
@@ -77,7 +63,7 @@ let transformers = null;
  * @function importTransformers
  * @param {string} backend - The backend to use (e.g. "onnx-native" or "onnx").
  * @returns {Promise<import("chrome://global/content/ml/transformers-dev.js")>}
- * A promise that resolves once the Transformers library is imported.
+ *   A promise that resolves once the Transformers library is imported.
  */
 export async function importTransformers(backend) {
   if (transformers) {
@@ -177,10 +163,7 @@ async function echo(request, _model, _tokenizer, _processor, config) {
 
   return {
     metrics: {
-      preprocessingTime: 0,
-      decodingTime: 0,
-      inputTokens: 0,
-      outputTokens: 0,
+      tokenizingTime: 0,
     },
     output: result,
   };
@@ -206,16 +189,10 @@ async function imageToText(request, model, tokenizer, processor, _config) {
   let result = {
     metrics: {
       inferenceTime: 0,
-      preprocessingTime: 0,
-      decodingTime: 0,
-      inputTokens: null,
-      outputTokens: 0,
+      tokenizingTime: 0,
     },
   };
-  // Destructure to simplify assignments
-  const { metrics } = result;
-
-  let startLoad = ChromeUtils.now();
+  let start = Date.now();
   let rawImage;
 
   if ("url" in request) {
@@ -229,28 +206,27 @@ async function imageToText(request, model, tokenizer, processor, _config) {
     );
   }
 
-  lazy.console.debug("Image loaded in ", ChromeUtils.now() - startLoad);
+  lazy.console.debug("Image loaded in ", Date.now() - start);
 
-  const startProcessing = ChromeUtils.now();
   const { pixel_values } = await processor(rawImage);
-  metrics.preprocessingTime += ChromeUtils.now() - startProcessing;
+  result.metrics.tokenizingTime += Date.now() - start;
   const toReturn = [];
   const streamer = request.options?.streamer;
   for (const batch of pixel_values) {
     batch.dims = [1, ...batch.dims];
-    const startInference = ChromeUtils.now();
+    start = Date.now();
     const output = await model.generate({ inputs: batch, streamer });
-    metrics.inferenceTime += ChromeUtils.now() - startInference;
-    const startDecoding = ChromeUtils.now();
+    result.metrics.inferenceTime += Date.now() - start;
+    start = Date.now();
     const decoded = tokenizer
       .batch_decode(output, {
         skip_special_tokens: true,
       })
       .map(x => ({ generated_text: x.trim() }));
-    metrics.decodingTime += ChromeUtils.now() - startDecoding;
+    result.metrics.tokenizingTime += Date.now() - start;
     toReturn.push(decoded);
   }
-  lazy.console.debug("Inference done in ", ChromeUtils.now() - startProcessing);
+  lazy.console.debug("Inference done in ", Date.now() - start);
   result.output = toReturn[0][0].generated_text;
 
   // Bug 1918220 - replace the result for models with that bug
@@ -291,14 +267,11 @@ async function textToGoal(
 ) {
   const result = {
     metrics: {
-      preprocessingTime: 0,
+      tokenizingTime: 0,
       inferenceTime: 0,
-      inputTokens: 0,
-      outputTokens: 0,
     },
     output: [],
   };
-  const { metrics } = result;
 
   const texts = request.args?.[0] ?? [];
   const taskTypes = request.args?.[1] ?? []; // ["query", "page", ...]
@@ -309,7 +282,7 @@ async function textToGoal(
     const task = taskTypes[i] ?? "query";
     const domain = domains[i] ?? "";
 
-    const startToken = ChromeUtils.now();
+    const startToken = Date.now();
 
     const encoded = await tokenizer(text, {
       padding: "max_length",
@@ -317,7 +290,7 @@ async function textToGoal(
       max_length: 64,
       return_attention_mask: true,
     });
-    metrics.preprocessingTime += ChromeUtils.now() - startToken;
+    result.metrics.tokenizingTime += Date.now() - startToken;
     const input_ids = encoded.input_ids.ort_tensor;
     const attention_mask = encoded.attention_mask.ort_tensor;
     const domain_vocab = modelConfig["transformers.js_config"].domain_vocab;
@@ -345,11 +318,10 @@ async function textToGoal(
       task_type,
     };
 
-    const startInfer = ChromeUtils.now();
+    const startInfer = Date.now();
     const session = model.sessions.model;
     const output = await session.run(inputs);
-    metrics.inferenceTime += ChromeUtils.now() - startInfer;
-    metrics.inputTokens += encoded.input_ids.ort_tensor.dims[1];
+    result.metrics.inferenceTime += Date.now() - startInfer;
 
     result.output.push({
       embedding: Array.from(output.embedding.data),
@@ -649,7 +621,7 @@ export class ONNXPipeline {
 
   async #metricsSnapShot({ name, snapshot = {} }) {
     if (!("when" in snapshot)) {
-      snapshot.when = ChromeUtils.now();
+      snapshot.when = Date.now();
     }
     this.#metrics.push({ name, ...snapshot });
   }
@@ -667,7 +639,7 @@ export class ONNXPipeline {
    */
   static async initialize(mlEngineWorker, runtime, options, errorFactory) {
     let snapShot = {
-      when: ChromeUtils.now(),
+      when: Date.now(),
     };
 
     if (options.logLevel) {
@@ -788,114 +760,86 @@ export class ONNXPipeline {
   async run(request, requestId, inferenceProgressCallback = null) {
     lazy.console.debug("Running task: ", this.#config.taskName);
 
-    /** @type {PipelineMetrics} */
-    const metrics = {
-      inputTokens: 0,
-      outputTokens: 0,
-      preprocessingTime: 0,
-      tokenizingTime: 0, // Same as preprocessingTime, but named for Glean consistency
-      inferenceTime: 0,
-      decodingTime: 0,
-      timeToFirstToken: null,
-      tokensPerSecond: 0,
-      timePerOutputToken: 0,
-      runTimestamps: [],
-    };
-
-    /**
-     * Helper to record a timestamp in the metrics timeline.
-     *
-     * @param {string} name
-     */
-    const snapshot = name => {
-      metrics.runTimestamps.push({ name, when: ChromeUtils.now() });
-    };
-
-    const runStartTime = ChromeUtils.now();
-    snapshot("runStart");
+    let result;
+    await this.#metricsSnapShot({ name: "runStart" });
 
     const tokenizer =
       this.#genericPipelineFunction?.tokenizer ?? this.#tokenizer;
 
-    if (this.#genericPipelineFunction && tokenizer && request.args?.[0]) {
-      try {
-        const inputs = [request.args[0]].flat();
-        for (const text of inputs) {
-          if (typeof text === "string") {
-            const encoded = await tokenizer.encode(text);
-            metrics.inputTokens += encoded.length;
-          }
-        }
-      } catch (e) {
-        lazy.console.debug(
-          "Could not count input tokens for generic pipeline",
-          e
-        );
-      }
-    }
-
     const progressInfo = {
       ok: true,
       id: request.id ?? requestId,
     };
 
     const streamerOptions = {
-      perTokens: true,
+      perTokens: false,
       skipPrompt: true,
       returnTokens: false,
       ...request.streamerOptions,
     };
 
-    let streamer;
+    let streamer = undefined;
     let chunkTokens = [];
-    // Removed unused chunkText declaration here
-
-    let firstTokenTimestamp = null;
-    if (tokenizer) {
+    let chunkText = "";
+    let nextTokensArePrompt = !streamerOptions.skipPrompt;
+    let restoreTokenizer = false;
+
+    if (tokenizer && inferenceProgressCallback) {
+      const flushPrompts = _tokens => {
+        streamer.token_cache = _tokens;
+        streamer.end();
+        streamer.tokenizer = {
+          decode: () => {
+            streamer.token_cache = [];
+            return "";
+          },
+        };
+        restoreTokenizer = true;
+        streamer.next_tokens_are_prompt = false;
+      };
       streamer = new transformers.TextStreamer(tokenizer, {
         skip_prompt: streamerOptions.skipPrompt,
         decode_kwargs: {
           skip_special_tokens: true,
         },
         token_callback_function: tokens => {
-          // Record Time To First Token on the very first callback
-          const now = ChromeUtils.now();
-          if (metrics.timeToFirstToken === null) {
-            metrics.timeToFirstToken = now - runStartTime;
-            firstTokenTimestamp = now;
-          }
-
-          metrics.outputTokens += tokens.length;
-
-          // Only proceed with buffering if we have a callback to call
-          if (!inferenceProgressCallback) {
-            return;
-          }
-
-          if (streamerOptions.perTokens) {
-            // Logic handled in callback_function
-          } else {
-            // Append newly received tokens.
-            chunkTokens.push(tokens);
-          }
-        },
-        // Per-word (or per-token if perTokens=true) callback function
-        callback_function: text => {
-          if (!inferenceProgressCallback) {
-            return;
+          if (restoreTokenizer) {
+            streamer.tokenizer = tokenizer;
+            restoreTokenizer = false;
           }
           if (streamerOptions.perTokens) {
+            if (nextTokensArePrompt) {
+              flushPrompts(tokens);
+            }
+
             inferenceProgressCallback({
               ...progressInfo,
               metadata: {
-                text,
-                tokens: streamerOptions.returnTokens ? chunkTokens : null,
+                text: chunkText,
+                tokens: streamerOptions.returnTokens ? tokens : null,
+                isPrompt: nextTokensArePrompt,
                 requestId,
-                isPrompt: false, // skipping prompt, so assumed false
               },
               type: lazy.Progress.ProgressType.INFERENCE,
               statusText: lazy.Progress.ProgressStatusText.IN_PROGRESS,
             });
+
+            // We have sent the text, now resetting it
+            chunkText = "";
+          } else {
+            // Append newly received tokens.
+            chunkTokens.push(tokens);
+
+            if (nextTokensArePrompt) {
+              flushPrompts(tokens);
+            }
+          }
+          nextTokensArePrompt = false;
+        },
+        // Per-word callback function
+        callback_function: text => {
+          if (streamerOptions.perTokens) {
+            chunkText = text;
           } else {
             inferenceProgressCallback({
               ...progressInfo,
@@ -903,7 +847,7 @@ export class ONNXPipeline {
                 text,
                 tokens: streamerOptions.returnTokens ? chunkTokens : null,
                 requestId,
-                isPrompt: false,
+                isPrompt: nextTokensArePrompt,
               },
               type: lazy.Progress.ProgressType.INFERENCE,
               statusText: lazy.Progress.ProgressStatusText.IN_PROGRESS,
@@ -915,13 +859,13 @@ export class ONNXPipeline {
       });
     }
 
-    // Inject streamer into request options
-    const requestWithCallback = {
-      ...request,
-      options: { ...request.options, streamer },
-    };
-
-    let result;
+    // Override streamer in options
+    const requestWithCallback = inferenceProgressCallback
+      ? {
+          ...request,
+          options: { ...request.options, streamer },
+        }
+      : request;
 
     if (this.#genericPipelineFunction) {
       if (this.#config.modelId === "test-echo") {
@@ -929,19 +873,15 @@ export class ONNXPipeline {
           output: requestWithCallback.args,
           config: this.#config,
           multiThreadSupported: isMultiThreadSupported(),
-          metrics: { ...metrics },
         };
       } else {
-        const start = ChromeUtils.now();
-        let output = await this.#genericPipelineFunction(
+        result = await this.#genericPipelineFunction(
           ...requestWithCallback.args,
           requestWithCallback.options || {}
         );
-        metrics.inferenceTime = ChromeUtils.now() - start;
-        if (output instanceof transformers.Tensor) {
-          output = output.tolist();
+        if (result instanceof transformers.Tensor) {
+          result = result.tolist();
         }
-        result = output;
       }
     } else {
       result = await this.#pipelineFunction(
@@ -952,44 +892,9 @@ export class ONNXPipeline {
         this.#config,
         this.#modelConfig
       );
-      result.metrics ??= {};
-    }
-
-    if (result.metrics) {
-      for (const [key, value] of Object.entries(result.metrics)) {
-        if (value !== undefined && value !== null) {
-          metrics[key] = value;
-        }
-      }
-    }
-
-    snapshot("runEnd");
-    const runEndTime = ChromeUtils.now();
-
-    // Calculate metrics
-    try {
-      // If we streamed, decoding time is Time(End) - Time(FirstToken).
-      // Otherwise, we fallback to inferenceTime (e.g. for embeddings or image-to-text without streaming).
-      if (metrics.timeToFirstToken !== null && firstTokenTimestamp !== null) {
-        metrics.decodingTime = runEndTime - firstTokenTimestamp;
-      } else {
-        metrics.decodingTime = metrics.inferenceTime;
-      }
-
-      // Sync tokenizingTime with preprocessingTime for Glean metrics consistency
-      metrics.tokenizingTime = metrics.preprocessingTime;
-
-      // Calculate throughput metrics if we have the necessary data
-      if (metrics.inferenceTime > 0 && metrics.outputTokens > 0) {
-        metrics.tokensPerSecond =
-          metrics.outputTokens / (metrics.inferenceTime / 1000);
-        metrics.timePerOutputToken =
-          metrics.inferenceTime / metrics.outputTokens;
-      }
-    } catch (e) {
-      lazy.console.debug("Error computing throughput metrics", e);
     }
-    result.metrics = metrics;
+    await this.#metricsSnapShot({ name: "runEnd" });
+    result.metrics = this.#metrics;
 
     if (streamer) {
       inferenceProgressCallback?.({
diff --git a/toolkit/components/ml/metrics.yaml b/toolkit/components/ml/metrics.yaml
@@ -181,9 +181,6 @@ firefox.ai.runtime:
       engineId:
         description: Engine id
         type: string
-      error:
-        description: error class or message
-        type: string
 
   run_inference_success_flow:
     type: event
@@ -203,35 +200,11 @@ firefox.ai.runtime:
       tokenizing_time:
         type: quantity
         description: >
-          Time taken for tokenization (preprocessing) in milliseconds
+          Time taken for tokenization in milliseconds
       inference_time:
         type: quantity
         description: >
           Time taken for inference in milliseconds
-      decoding_time:
-        type: quantity
-        description: >
-          Time taken for decoding in milliseconds
-      input_tokens:
-        type: quantity
-        description: >
-          Number of tokens in the input prompt
-      output_tokens:
-        type: quantity
-        description: >
-          Number of tokens generated
-      time_to_first_token:
-        type: quantity
-        description: >
-          Time to first token in milliseconds (null if not applicable)
-      tokens_per_second:
-        type: quantity
-        description: >
-          Inference throughput in tokens per second (rounded integer)
-      time_per_output_token:
-        type: quantity
-        description: >
-          Average latency per output token in milliseconds (rounded integer)
 
   run_inference_success:
     type: labeled_timing_distribution
diff --git a/toolkit/components/ml/tests/browser/browser_ml_telemetry.js b/toolkit/components/ml/tests/browser/browser_ml_telemetry.js
@@ -58,52 +58,11 @@ add_task(async function test_default_telemetry() {
   );
 
   {
-    info("Test the run_inference_success_flow event");
-    const inferenceFlowEvents =
-      Glean.firefoxAiRuntime.runInferenceSuccessFlow.testGetValue();
-    Assert.ok(
-      inferenceFlowEvents && !!inferenceFlowEvents.length,
-      "At least one run_inference_success_flow event was recorded"
-    );
-    const lastInferenceEvent = inferenceFlowEvents.at(-1);
-    const { extra: inferenceExtra } = lastInferenceEvent;
-
-    // Helper to check that a number field is present and >= 0
-    const checkNumber = (key, isOptional = false) => {
-      const value = inferenceExtra[key];
-      if (isOptional && (value === null || value === undefined)) {
-        return; // Optional field not present is OK
-      }
-      Assert.notEqual(value, null, `${key} should be present`);
-      const number = Number(value); // Quantities are stored as strings
-      Assert.ok(!Number.isNaN(number), `${key} should be a number`);
-      Assert.greaterOrEqual(number, 0, `${key} should be >= 0`);
-    };
-
-    // Check flow_id is present
-    Assert.ok(inferenceExtra.flow_id, "flow_id should be present");
-
-    // Check all required timing/token metrics
-    checkNumber("tokenizing_time", true);
-    checkNumber("inference_time", true);
-    checkNumber("decoding_time", true);
-    checkNumber("input_tokens", true);
-    checkNumber("output_tokens", true);
-    checkNumber("time_to_first_token", true);
-    checkNumber("tokens_per_second", true);
-    checkNumber("time_per_output_token", true);
-  }
-
-  {
     info("Test the engine_run event");
     await engineInstance.lastResourceRequest;
     const value = Glean.firefoxAiRuntime.engineRun.testGetValue();
-    Assert.ok(
-      value && !!value.length,
-      "At least one engine_run event was recorded"
-    );
-    const lastEngineRunEvent = value.at(-1);
-    const { extra } = lastEngineRunEvent;
+    Assert.equal(value?.length, 1, "One engine_run event was recorded");
+    const [{ extra }] = value;
     const checkNumber = key => {
       const value = extra[key];
       Assert.notEqual(value, null, `${key} should be present`);

	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE

M	toolkit/components/ml/MLTelemetry.sys.mjs	\|	151	++++++++++++++++++++++---------------------------------------------------------
M	toolkit/components/ml/actors/MLEngineParent.sys.mjs	\|	86	++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
M	toolkit/components/ml/content/backends/ONNXPipeline.mjs	\|	253	+++++++++++++++++++++++++------------------------------------------------------
M	toolkit/components/ml/metrics.yaml	\|	29	+----------------------------
M	toolkit/components/ml/tests/browser/browser_ml_telemetry.js	\|	45	++-------------------------------------------