commit 8055c1e680713a954e044cbf4534377fda15b6cc
parent 90a4dc6c5759cc7e9a3a59d0fc61caf198d2ad8b
Author: Greg Tatum <tatum.creative@gmail.com>
Date: Thu, 9 Oct 2025 18:03:53 +0000
Bug 1992779 - Measure CPU and memory usage for inference engine runs in telemetry; r=nordzilla
Differential Revision: https://phabricator.services.mozilla.com/D267817
Diffstat:
5 files changed, 181 insertions(+), 51 deletions(-)
diff --git a/toolkit/components/ml/actors/MLEngineParent.sys.mjs b/toolkit/components/ml/actors/MLEngineParent.sys.mjs
@@ -3,31 +3,7 @@
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
import { XPCOMUtils } from "resource://gre/modules/XPCOMUtils.sys.mjs";
-/**
- * @typedef {object} Lazy
- * @property {typeof import("resource://services-settings/remote-settings.sys.mjs").RemoteSettings} RemoteSettings
- * @property {typeof import("resource://services-settings/Utils.sys.mjs").Utils} Utils
- * @property {typeof import("resource://gre/actors/TranslationsParent.sys.mjs").TranslationsParent} TranslationsParent
- * @property {typeof setTimeout} setTimeout
- * @property {typeof clearTimeout} clearTimeout
- * @property {typeof import("chrome://global/content/ml/ModelHub.sys.mjs").ModelHub} ModelHub
- * @property {typeof import("chrome://global/content/ml/Utils.sys.mjs").Progress} Progress
- * @property {typeof import("chrome://global/content/ml/Utils.sys.mjs").isAddonEngineId} isAddonEngineId
- * @property {typeof import("chrome://global/content/ml/OPFS.sys.mjs").OPFS} OPFS
- * @property {typeof import("chrome://global/content/ml/EngineProcess.sys.mjs").BACKENDS} BACKENDS
- */
-
-/** @type {Lazy} */
-const lazy = {};
-
-ChromeUtils.defineLazyGetter(lazy, "console", () => {
- return console.createInstance({
- maxLogLevelPref: "browser.ml.logLevel",
- prefix: "GeckoMLEngineParent",
- });
-});
-
-ChromeUtils.defineESModuleGetters(lazy, {
+const lazy = XPCOMUtils.declareLazy({
RemoteSettings: "resource://services-settings/remote-settings.sys.mjs",
Utils: "resource://services-settings/Utils.sys.mjs",
TranslationsParent: "resource://gre/actors/TranslationsParent.sys.mjs",
@@ -39,27 +15,16 @@ ChromeUtils.defineESModuleGetters(lazy, {
OPFS: "chrome://global/content/ml/OPFS.sys.mjs",
BACKENDS: "chrome://global/content/ml/EngineProcess.sys.mjs",
stringifyForLog: "chrome://global/content/ml/Utils.sys.mjs",
+ console: () =>
+ console.createInstance({
+ maxLogLevelPref: "browser.ml.logLevel",
+ prefix: "GeckoMLEngineParent",
+ }),
+ mlUtils: { service: "@mozilla.org/ml-utils;1", iid: Ci.nsIMLUtils },
+ CHECK_FOR_MEMORY: { pref: "browser.ml.checkForMemory" },
+ MINIMUM_PHYSICAL_MEMORY: { pref: "browser.ml.minimumPhysicalMemory" },
});
-XPCOMUtils.defineLazyServiceGetter(
- lazy,
- "mlUtils",
- "@mozilla.org/ml-utils;1",
- "nsIMLUtils"
-);
-
-XPCOMUtils.defineLazyPreferenceGetter(
- lazy,
- "CHECK_FOR_MEMORY",
- "browser.ml.checkForMemory"
-);
-
-XPCOMUtils.defineLazyPreferenceGetter(
- lazy,
- "MINIMUM_PHYSICAL_MEMORY",
- "browser.ml.minimumPhysicalMemory"
-);
-
const ONE_GiB = 1024 * 1024 * 1024;
const RS_RUNTIME_COLLECTION = "ml-onnx-runtime";
const RS_INFERENCE_OPTIONS_COLLECTION = "ml-inference-options";
@@ -988,6 +953,14 @@ export class MLEngine {
engineId;
/**
+ * Allow tests to await on the last resource request, as this is not exposed
+ * in the response, @see {MLEngine#run}.
+ *
+ * @type {null | Promise<void>}
+ */
+ lastResourceRequest = null;
+
+ /**
* Callback to call when receiving an initializing progress status.
*
* @type {?function(ProgressAndStatusCallbackParams):void}
@@ -1444,12 +1417,35 @@ export class MLEngine {
}
/**
+ * @returns {Promise<null | { cpuTime: null | number, memory: null | number}>}
+ */
+ async getInferenceResources() {
+ try {
+ const { children } = await ChromeUtils.requestProcInfo();
+ const [inference] = children.filter(child => child.type == "inference");
+ if (!inference) {
+ lazy.console.log(
+ "Could not find the inference process cpu information."
+ );
+ return null;
+ }
+ return {
+ cpuTime: inference.cpuTime ?? null,
+ memory: inference.memory ?? null,
+ };
+ } catch (error) {
+ lazy.console.error(error);
+ return null;
+ }
+ }
+
+ /**
* Run the inference request
*
* @param {Request} request
* @returns {Promise<Response>}
*/
- run(request) {
+ async run(request) {
const resolvers = Promise.withResolvers();
const requestId = this.#nextRequestId++;
this.#requests.set(requestId, resolvers);
@@ -1463,6 +1459,9 @@ export class MLEngine {
throw new Error("Port does not exist");
}
+ const resourcesPromise = this.getInferenceResources();
+ const beforeRun = ChromeUtils.now();
+
this.#port.postMessage(
{
type: "EnginePort:Run",
@@ -1472,6 +1471,50 @@ export class MLEngine {
},
transferables
);
+
+ this.lastResourceRequest = Promise.all([
+ resourcesPromise,
+ resolvers.promise.catch(() => {
+ // Catch this error so that we don't trigger an unhandled promise rejection.
+ return false;
+ }),
+ ]).then(async ([resourcesBefore, result]) => {
+ if (!result) {
+ // The request failed, do not report the telemetry.
+ return;
+ }
+ const resourcesAfter = await this.getInferenceResources();
+ if (!resourcesBefore || !resourcesAfter) {
+ return;
+ }
+
+ // Convert nanoseconds to milliseconds
+ const cpuMilliseconds =
+ (resourcesAfter.cpuTime - resourcesBefore.cpuTime) / 1_000_000;
+ const wallMilliseconds = ChromeUtils.now() - beforeRun;
+ const cores = lazy.mlUtils.getOptimalCPUConcurrency();
+ const cpuUtilization = cpuMilliseconds / wallMilliseconds / cores;
+ const memoryBytes = resourcesAfter.memory;
+
+ const data = {
+ // Timing:
+ cpu_milliseconds: cpuMilliseconds,
+ wall_milliseconds: wallMilliseconds,
+ cores,
+ cpu_utilization: cpuUtilization,
+ memory_bytes: memoryBytes,
+
+ // Model information:
+ engine_id: this.engineId,
+ model_id: this.pipelineOptions.modelId,
+ feature_id: this.pipelineOptions.featureId,
+ backend: this.pipelineOptions.backend,
+ };
+
+ lazy.console?.debug("[Glean.firefoxAiRuntime.engineRun]", data);
+ Glean.firefoxAiRuntime.engineRun.record(data);
+ });
+
return resolvers.promise;
}
diff --git a/toolkit/components/ml/content/EngineProcess.sys.mjs b/toolkit/components/ml/content/EngineProcess.sys.mjs
@@ -813,20 +813,24 @@ export class PipelineOptions {
}
// Validating featureId
if (key === "featureId") {
- if (FEATURES.hasOwnProperty(options[key])) {
+ const featureId = options[key];
+ if (FEATURES.hasOwnProperty(featureId)) {
// if featureId is set and engineId is not set, we set it
if (
options.engineId == null ||
options.engineId === DEFAULT_ENGINE_ID
) {
- options.engineId = FEATURES[options[key]].engineId;
+ options.engineId = FEATURES[featureId].engineId;
this.engineId = options.engineId;
}
- } else {
+ } else if (
+ // Allow tests to define a feature id.
+ featureId != "test-feature"
+ ) {
// we want an explicit list of features.
throw new PipelineOptionsValidationError(
key,
- options[key],
+ featureId,
`Should be one of ${Object.keys(FEATURES).join(", ")}`
);
}
@@ -883,7 +887,9 @@ export class PipelineOptions {
if (
key === "backend" &&
- !Object.values(BACKENDS).includes(options[key])
+ !Object.values(BACKENDS).includes(options[key]) &&
+ // Allow tests to define a test backend.
+ options[key] != "test-backend"
) {
throw new PipelineOptionsValidationError(
key,
diff --git a/toolkit/components/ml/metrics.yaml b/toolkit/components/ml/metrics.yaml
@@ -188,6 +188,54 @@ firefox.ai.runtime:
error:
description: error class
type: string
+ engine_run:
+ type: event
+ description: >
+ The performance and memory characteristics of a single run of the inference engine.
+ extra_keys:
+ cpu_milliseconds:
+ description: The combined milliseconds of every cpu core that was running.
+ type: quantity
+ wall_milliseconds:
+ description: The amount of wall time the run request took.
+ type: quantity
+ cores:
+ description: The number of cores on the machine.
+ type: quantity
+ cpu_utilization:
+ description: The percentage of the user's CPU used.
+ type: quantity
+ memory_bytes:
+ description: The number of RSS bytes for the inference process.
+ type: quantity
+ feature_id:
+ description: >
+ A unique id to describe the feature. e.g. pdfjs-alt-text, suggest-NER,
+ link-preview.
+ type: string
+ engine_id:
+ type: string
+ description: >
+ Similar to the feature id, but is used to ensure only one engine is created
+ under this name.
+ model_id:
+ type: string
+ description: >
+ The identifier for what model is being used by the engine,
+ e.g. Xenova/all-MiniLM-L6-v2
+ backend:
+ type: string
+ description: The backend that is being used, e.g. onnx, onnx-native, wllama
+ bugs:
+ - https://bugzilla.mozilla.org/show_bug.cgi?id=1992779
+ data_reviews:
+ - https://bugzilla.mozilla.org/show_bug.cgi?id=1992779
+ data_sensitivity:
+ - interaction
+ notification_emails:
+ - tziade@mozilla.com
+ - gtatum@mozilla.com
+ expires: never
model_management:
remove_initiated:
diff --git a/toolkit/components/ml/tests/browser/browser_ml_telemetry.js b/toolkit/components/ml/tests/browser/browser_ml_telemetry.js
@@ -3,7 +3,13 @@
requestLongerTimeout(2);
-const RAW_PIPELINE_OPTIONS = { taskName: "moz-echo", timeoutMS: -1 };
+const RAW_PIPELINE_OPTIONS = {
+ taskName: "moz-echo",
+ timeoutMS: -1,
+ modelId: "Mozilla/test",
+ featureId: "test-feature",
+ backend: "test-backend",
+};
const { sinon } = ChromeUtils.importESModule(
"resource://testing-common/Sinon.sys.mjs"
@@ -47,6 +53,31 @@ add_task(async function test_default_telemetry() {
"The text get echoed exercising the whole flow."
);
+ {
+ info("Test the engine_run event");
+ await engineInstance.lastResourceRequest;
+ const value = Glean.firefoxAiRuntime.engineRun.testGetValue();
+ Assert.equal(value?.length, 1, "One engine_run event was recorded");
+ const [{ extra }] = value;
+ const checkNumber = key => {
+ const value = extra[key];
+ Assert.notEqual(value, null, `${key} should be present`);
+ const number = Number(value); // Quantities are stored as strings.
+ Assert.ok(!Number.isNaN(number), `${key} should be a number`);
+ Assert.greater(number, 0, `${key} should be greater than 0`);
+ };
+ checkNumber("cpu_milliseconds");
+ checkNumber("wall_milliseconds");
+ checkNumber("cores");
+ checkNumber("cpu_utilization");
+ checkNumber("memory_bytes");
+
+ Assert.equal(extra.feature_id, "test-feature");
+ Assert.equal(extra.engine_id, "default-engine");
+ Assert.equal(extra.model_id, "Mozilla/test");
+ Assert.equal(extra.backend, "test-backend");
+ }
+
Assert.equal(res.output.dtype, "q8", "The config was enriched by RS");
ok(
!EngineProcess.areAllEnginesTerminated(),
diff --git a/toolkit/components/ml/tests/browser/head.js b/toolkit/components/ml/tests/browser/head.js
@@ -65,6 +65,8 @@ async function setup({
backend,
});
+ Services.fog.testResetFOG();
+
await SpecialPowers.pushPrefEnv({
set: [
// Enabled by default.